e3.1.0 Refactored lifecycle (configuration, drop report, heartbeat).

This commit is contained in:
2025-07-21 21:28:45 -04:00
parent 98402cce37
commit ce6e3b7ffc
16 changed files with 615 additions and 200 deletions

View File

@ -2,8 +2,10 @@
package log
import (
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"time"
@ -18,7 +20,8 @@ func TestLoggerHeartbeat(t *testing.T) {
cfg := logger.GetConfig()
cfg.HeartbeatLevel = 3 // All heartbeats
cfg.HeartbeatIntervalS = 1
logger.ApplyConfig(cfg)
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
// Wait for heartbeats
time.Sleep(1500 * time.Millisecond)
@ -42,24 +45,55 @@ func TestDroppedLogs(t *testing.T) {
cfg := DefaultConfig()
cfg.Directory = t.TempDir()
cfg.BufferSize = 1 // Very small buffer
cfg.FlushIntervalMs = 1000 // Slow flush
cfg.FlushIntervalMs = 10 // Fast processing
cfg.HeartbeatLevel = 1 // Enable proc heartbeat
cfg.HeartbeatIntervalS = 1 // Fast heartbeat
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
err = logger.Start()
require.NoError(t, err)
defer logger.Shutdown()
// Flood the logger
// Flood to guarantee drops
for i := 0; i < 100; i++ {
logger.Info("flood", i)
}
// Let it process
time.Sleep(100 * time.Millisecond)
// Wait for first heartbeat
time.Sleep(1500 * time.Millisecond)
// Check drop counter
dropped := logger.state.DroppedLogs.Load()
// Some logs should have been dropped with buffer size 1
assert.Greater(t, dropped, uint64(0))
// Flood again
for i := 0; i < 50; i++ {
logger.Info("flood2", i)
}
// Wait for second heartbeat
time.Sleep(1000 * time.Millisecond)
logger.Flush(time.Second)
// Read log file and verify heartbeats
content, err := os.ReadFile(filepath.Join(cfg.Directory, "log.log"))
require.NoError(t, err)
lines := strings.Split(string(content), "\n")
foundTotal := false
foundInterval := false
for _, line := range lines {
if strings.Contains(line, "PROC") {
if strings.Contains(line, "total_dropped_logs") {
foundTotal = true
}
if strings.Contains(line, "dropped_since_last") {
foundInterval = true
}
}
}
assert.True(t, foundTotal, "Expected PROC heartbeat with total_dropped_logs")
assert.True(t, foundInterval, "Expected PROC heartbeat with dropped_since_last")
}
func TestAdaptiveDiskCheck(t *testing.T) {
@ -71,7 +105,8 @@ func TestAdaptiveDiskCheck(t *testing.T) {
cfg.DiskCheckIntervalMs = 100
cfg.MinCheckIntervalMs = 50
cfg.MaxCheckIntervalMs = 500
logger.ApplyConfig(cfg)
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
// Generate varying log rates and verify no panic
for i := 0; i < 10; i++ {
@ -85,4 +120,93 @@ func TestAdaptiveDiskCheck(t *testing.T) {
}
logger.Flush(time.Second)
}
func TestDroppedLogRecoveryOnDroppedHeartbeat(t *testing.T) {
logger := NewLogger()
cfg := DefaultConfig()
cfg.Directory = t.TempDir()
cfg.BufferSize = 10 // Small buffer
cfg.HeartbeatLevel = 1 // Enable proc heartbeat
cfg.HeartbeatIntervalS = 1 // Fast heartbeat
cfg.Format = "json" // Use JSON for easy parsing
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
err = logger.Start()
require.NoError(t, err)
defer logger.Shutdown()
// 1. Flood the logger to guarantee drops. Let's aim to drop exactly 50 logs.
const floodCount = 50
for i := 0; i < int(cfg.BufferSize)+floodCount; i++ {
logger.Info("flood", i)
}
// Wait for the first heartbeat to be generated. It will carry the count of ~50 drops.
time.Sleep(1100 * time.Millisecond)
// 2. Immediately put the logger into a "disk full" state.
// This will cause the processor to drop the first heartbeat record.
diskFullCfg := logger.GetConfig()
diskFullCfg.MinDiskFreeKB = 9999999999
err = logger.ApplyConfig(diskFullCfg)
require.NoError(t, err)
// Force a disk check to ensure the state is updated to not OK.
logger.performDiskCheck(true)
assert.False(t, logger.state.DiskStatusOK.Load(), "Disk status should be not OK")
// 3. Now, "fix" the disk so the next heartbeat can be written successfully.
diskOKCfg := logger.GetConfig()
diskOKCfg.MinDiskFreeKB = 0
err = logger.ApplyConfig(diskOKCfg)
require.NoError(t, err)
logger.performDiskCheck(true) // Ensure state is updated back to OK.
assert.True(t, logger.state.DiskStatusOK.Load(), "Disk status should be OK")
// 4. Wait for the second heartbeat to be generated and written to the file.
time.Sleep(1100 * time.Millisecond)
logger.Flush(time.Second)
// 5. Verify the log file content.
content, err := os.ReadFile(filepath.Join(cfg.Directory, "log.log"))
require.NoError(t, err)
var foundHeartbeat bool
var intervalDropCount, totalDropCount float64
lines := strings.Split(string(content), "\n")
for _, line := range lines {
// Find the last valid heartbeat with drop stats.
if strings.Contains(line, `"level":"PROC"`) && strings.Contains(line, "dropped_since_last") {
foundHeartbeat = true
var entry map[string]interface{}
err := json.Unmarshal([]byte(line), &entry)
require.NoError(t, err, "Failed to parse heartbeat log line: %s", line)
fields := entry["fields"].([]interface{})
for i := 0; i < len(fields)-1; i += 2 {
if key, ok := fields[i].(string); ok {
if key == "dropped_since_last" {
intervalDropCount, _ = fields[i+1].(float64)
}
if key == "total_dropped_logs" {
totalDropCount, _ = fields[i+1].(float64)
}
}
}
}
}
require.True(t, foundHeartbeat, "Did not find the final heartbeat with drop stats")
// ASSERT THE CURRENT BEHAVIOR:
// The 'dropped_since_last' count from the first heartbeat (~50) was lost when that heartbeat was dropped.
// The only new drop in the next interval was the heartbeat record itself.
assert.Equal(t, float64(1), intervalDropCount, "The interval drop count should only reflect the single dropped heartbeat from the previous interval.")
// The 'total_dropped_logs' counter should be accurate, reflecting the initial flood (~50) + the one dropped heartbeat.
assert.True(t, totalDropCount >= float64(floodCount), "Total drop count should be at least the number of flooded logs plus the dropped heartbeat.")
}