From bfed212f40a85b6e9903e63e9db48598408bf169875c341cf985192626ef8a32 Mon Sep 17 00:00:00 2001 From: LixenWraith Date: Wed, 23 Apr 2025 11:52:57 -0400 Subject: [PATCH] e1.2.0 Heartbeat feature added. --- README.md | 371 ++++++++--------- cmd/heartbeat/main.go | 80 ++++ cmd/simple/main.go | 4 +- cmd/stress/main.go | 6 +- config.go | 18 +- format.go | 8 +- go.mod | 2 +- go.sum | 4 +- interface.go | 7 + logger.go | 15 +- processor.go | 919 +++++++++++++++++++----------------------- state.go | 21 +- storage.go | 407 +++++++++++++++++++ utility.go | 8 +- 14 files changed, 1150 insertions(+), 720 deletions(-) create mode 100644 cmd/heartbeat/main.go create mode 100644 storage.go diff --git a/README.md b/README.md index e0ddccb..988efe0 100644 --- a/README.md +++ b/README.md @@ -1,274 +1,251 @@ # Log -A high-performance, buffered, rotating file logger for Go applications, configured via the [LixenWraith/config](https://github.com/LixenWraith/config) package. Designed for production-grade reliability with features like disk management, log retention, and lock-free asynchronous processing using atomic operations and channels. +A high-performance, buffered, rotating file logger for Go applications, configured via +the [LixenWraith/config](https://github.com/LixenWraith/config) package or simple overrides. Designed for +production-grade reliability with features like disk management, log retention, and lock-free asynchronous processing +using atomic operations and channels. + +**Note:** This logger requires creating an instance using `NewLogger()` and calling methods on that instance (e.g., +`l.Info(...)`). It does not use package-level logging functions. ## Features -- **Lock-free Asynchronous Logging:** Non-blocking log operations with minimal application impact. Logs are sent via a buffered channel, processed by a dedicated background goroutine. Uses atomic operations for state management, avoiding mutexes in the hot path. -- **External Configuration:** Fully configured using `github.com/LixenWraith/config`, supporting both TOML files and CLI overrides with centralized management. -- **Automatic File Rotation:** Seamlessly rotates log files when they reach configurable size limits (`max_size_mb`), generating timestamped filenames. -- **Comprehensive Disk Management:** - - Monitors total log directory size against configured limits (`max_total_size_mb`) - - Enforces minimum free disk space requirements (`min_disk_free_mb`) - - Automatically prunes oldest log files to maintain space constraints - - Implements recovery behavior when disk space is exhausted -- **Adaptive Resource Monitoring:** Dynamically adjusts disk check frequency based on logging volume (`enable_adaptive_interval`, `min_check_interval_ms`, `max_check_interval_ms`), optimizing performance under varying loads. -- **Reliable Buffer Management:** Periodic buffer flushing with configurable intervals (`flush_interval_ms`). Detects and reports dropped logs during high-volume scenarios. -- **Automated Log Retention:** Time-based log file cleanup with configurable retention periods (`retention_period_hrs`, `retention_check_mins`). -- **Structured Logging:** Support for both human-readable text (`txt`) and machine-parseable (`json`) output formats with consistent field handling. -- **Comprehensive Log Levels:** Standard severity levels (Debug, Info, Warn, Error) with numeric values compatible with other logging systems. -- **Function Call Tracing:** Optional function call stack traces with configurable depth (`trace_depth`) for debugging complex execution flows. -- **Clean API Design:** Straightforward logging methods that don't require `context.Context` parameters. -- **Graceful Shutdown:** Managed termination with best-effort flushing to minimize log data loss during application shutdown. +- **Instance-Based API:** Create logger instances via `NewLogger()` and use methods like `l.Info()`, `l.Warn()`, etc. +- **Lock-free Asynchronous Logging:** Non-blocking log operations with minimal application impact. Logs are sent via a + buffered channel, processed by a dedicated background goroutine. Uses atomic operations for state management, avoiding + mutexes in the hot path. +- **External Configuration:** Fully configured using `github.com/LixenWraith/config`, supporting both TOML files and CLI + overrides with centralized management. Also supports simple initialization with defaults and string overrides via + `InitWithDefaults`. +- **Automatic File Rotation:** Seamlessly rotates log files when they reach configurable size limits (`max_size_mb`), + generating timestamped filenames. +- **Comprehensive Disk Management:** + - Monitors total log directory size against configured limits (`max_total_size_mb`) + - Enforces minimum free disk space requirements (`min_disk_free_mb`) + - Automatically prunes oldest log files to maintain space constraints + - Implements recovery behavior when disk space is exhausted +- **Adaptive Resource Monitoring:** Dynamically adjusts disk check frequency based on logging volume ( + `enable_adaptive_interval`, `min_check_interval_ms`, `max_check_interval_ms`), optimizing performance under varying + loads. +- **Operational Heartbeats:** Multi-level periodic statistics messages (process, disk, system) that bypass level + filtering to ensure operational monitoring even with higher log levels. +- **Reliable Buffer Management:** Periodic buffer flushing with configurable intervals (`flush_interval_ms`). Detects + and reports dropped logs during high-volume scenarios. +- **Automated Log Retention:** Time-based log file cleanup with configurable retention periods (`retention_period_hrs`, + `retention_check_mins`). +- **Structured Logging:** Support for both human-readable text (`txt`) and machine-parseable (`json`) output formats + with consistent field handling. +- **Comprehensive Log Levels:** Standard severity levels (Debug, Info, Warn, Error) with numeric values compatible with + other logging systems. +- **Function Call Tracing:** Optional function call stack traces with configurable depth (`trace_depth`) for debugging + complex execution flows. +- **Clean API Design:** Straightforward logging methods on the logger instance that don't require `context.Context` + parameters. +- **Graceful Shutdown:** Managed termination with best-effort flushing to minimize log data loss during application + shutdown. ## Installation ```bash go get github.com/LixenWraith/log -go get github.com/LixenWraith/config +# If using full TOML/CLI config: +# go get github.com/LixenWraith/config ``` ## Basic Usage +This example shows minimal initialization using defaults with a single override, logging one message, and shutting down. + ```go package main import ( - "fmt" - "os" - "sync" - "time" - - "github.com/LixenWraith/config" // External config package - "github.com/LixenWraith/log" // This logger package + "github.com/LixenWraith/log" ) -const configFile = "app_config.toml" -const logConfigPath = "logging" // Base path for logger settings in TOML/config - -// Example app_config.toml content: -/* -[logging] - level = 0 # Info Level (0) - directory = "./app_logs" - format = "json" - extension = "log" - max_size_mb = 50 - flush_interval_ms = 100 - disk_check_interval_ms = 5000 # Check disk every 5s - enable_adaptive_interval = true - # Other settings will use defaults registered by log.Init -*/ - func main() { - // 1. Initialize the main config manager - cfg := config.New() - - // Optional: Create a dummy config file if it doesn't exist - if _, err := os.Stat(configFile); os.IsNotExist(err) { - content := fmt.Sprintf("[%s]\n level = 0\n directory = \"./app_logs\"\n", logConfigPath) - os.WriteFile(configFile, []byte(content), 0644) - } - - // 2. Load configuration (e.g., from file and/or CLI) - _, err := cfg.Load(configFile, os.Args[1:]) - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: Failed to load config file '%s': %v. Using defaults.\n", configFile, err) - } - - // 3. Initialize the logger, passing the config instance and base path. - // log.Init registers necessary keys (e.g., "logging.level") with cfg. - err = log.Init(cfg, logConfigPath) - if err != nil { - fmt.Fprintf(os.Stderr, "Fatal: Failed to initialize logger: %v\n", err) - os.Exit(1) - } - fmt.Println("Logger initialized.") - - // 4. Optionally save the merged config (defaults + file/CLI overrides) - err = cfg.Save(configFile) // Save back to the file - if err != nil { - fmt.Fprintf(os.Stderr, "Warning: Failed to save config: %v\n", err) - } - - // 5. Use the logger - log.Info("Application started", "pid", os.Getpid()) - log.Debug("Debugging info", "value", 42) // Might be filtered by level - - // Example concurrent logging - var wg sync.WaitGroup - for i := 0; i < 3; i++ { - wg.Add(1) - go func(id int) { - defer wg.Done() - log.Info("Goroutine task started", "goroutine_id", id) - time.Sleep(time.Duration(id*10) * time.Millisecond) - log.InfoTrace(1, "Goroutine task finished", "goroutine_id", id) - }(i) - } - wg.Wait() - - // ... application logic ... - - // 6. Shutdown the logger gracefully before exit - fmt.Println("Shutting down...") - // Shutdown timeout is used internally for a brief wait, not a hard deadline for flushing. - shutdownTimeout := 2 * time.Second - err = log.Shutdown(shutdownTimeout) // Pass timeout (used for internal sleep) - if err != nil { - fmt.Fprintf(os.Stderr, "Logger shutdown error: %v\n", err) - } - fmt.Println("Shutdown complete.") + logger := log.NewLogger() + _ = logger.InitWithDefaults("directory=/var/log/myapp") + logger.Info("Application starting", "pid", 12345) + _ = logger.Shutdown() } ``` ## Configuration -The `log` package is configured via keys registered with the `config.Config` instance passed to `log.Init`. `log.Init` expects these keys relative to the `basePath` argument. +The `log` package is configured via keys registered with the `config.Config` instance passed to `(l *Logger) Init`, or +via string overrides passed to `(l *Logger) InitWithDefaults`. `Init` expects these keys relative to the `basePath` +argument. -| Key (`basePath` + Key) | Type | Description | Default Value | -|:---------------------------| :-------- |:-----------------------------------------------------------------|:--------------| -| `level` | `int64` | Minimum log level (-4=Debug, 0=Info, 4=Warn, 8=Error) | `0` (Info) | -| `name` | `string` | Base name for log files | `"log"` | -| `directory` | `string` | Directory to store log files | `"./logs"` | -| `format` | `string` | Log file format (`"txt"`, `"json"`) | `"txt"` | -| `extension` | `string` | Log file extension (without dot) | `"log"` | -| `show_timestamp` | `bool` | Show timestamp in log entries | `true` | -| `show_level` | `bool` | Show log level in entries | `true` | -| `buffer_size` | `int64` | Channel buffer capacity for log records | `1024` | -| `max_size_mb` | `int64` | Max size (MB) per log file before rotation | `10` | -| `max_total_size_mb` | `int64` | Max total size (MB) of log directory (0=unlimited) | `50` | -| `min_disk_free_mb` | `int64` | Min required free disk space (MB) (0=unlimited) | `100` | -| `flush_interval_ms` | `int64` | Interval (ms) to force flush buffer to disk via timer | `100` | -| `trace_depth` | `int64` | Function call trace depth (0=disabled, 1-10) | `0` | -| `retention_period_hrs` | `float64` | Hours to keep log files (0=disabled) | `0.0` | -| `retention_check_mins` | `float64` | Minutes between retention checks via timer (if enabled) | `60.0` | -| `disk_check_interval_ms` | `int64` | Base interval (ms) for periodic disk space checks via timer | `5000` | -| `enable_adaptive_interval` | `bool` | Adjust disk check interval based on load (within min/max bounds) | `true` | -| `enable_periodic_sync` | `bool` | Periodic sync with disk based on flush interval | `false` | -| `min_check_interval_ms` | `int64` | Minimum interval (ms) for adaptive disk checks | `100` | -| `max_check_interval_ms` | `int64` | Maximum interval (ms) for adaptive disk checks | `60000` | +| Key (`basePath` + Key) | Type | Description | Default Value | +|:---------------------------|:----------|:--------------------------------------------------------------------------|:--------------| +| `level` | `int64` | Minimum log level (-4=Debug, 0=Info, 4=Warn, 8=Error) | `0` (Info) | +| `name` | `string` | Base name for log files | `"log"` | +| `directory` | `string` | Directory to store log files | `"./logs"` | +| `format` | `string` | Log file format (`"txt"`, `"json"`) | `"txt"` | +| `extension` | `string` | Log file extension (without dot) | `"log"` | +| `show_timestamp` | `bool` | Show timestamp in log entries | `true` | +| `show_level` | `bool` | Show log level in entries | `true` | +| `buffer_size` | `int64` | Channel buffer capacity for log records | `1024` | +| `max_size_mb` | `int64` | Max size (MB) per log file before rotation | `10` | +| `max_total_size_mb` | `int64` | Max total size (MB) of log directory (0=unlimited) | `50` | +| `min_disk_free_mb` | `int64` | Min required free disk space (MB) (0=unlimited) | `100` | +| `flush_interval_ms` | `int64` | Interval (ms) to force flush buffer to disk via timer | `100` | +| `trace_depth` | `int64` | Function call trace depth (0=disabled, 1-10) | `0` | +| `retention_period_hrs` | `float64` | Hours to keep log files (0=disabled) | `0.0` | +| `retention_check_mins` | `float64` | Minutes between retention checks via timer (if enabled) | `60.0` | +| `disk_check_interval_ms` | `int64` | Base interval (ms) for periodic disk space checks via timer | `5000` | +| `enable_adaptive_interval` | `bool` | Adjust disk check interval based on load (within min/max bounds) | `true` | +| `enable_periodic_sync` | `bool` | Periodic sync with disk based on flush interval | `true` | +| `min_check_interval_ms` | `int64` | Minimum interval (ms) for adaptive disk checks | `100` | +| `max_check_interval_ms` | `int64` | Maximum interval (ms) for adaptive disk checks | `60000` | +| `heartbeat_level` | `int64` | Heartbeat detail level (0=disabled, 1=proc, 2=proc+disk, 3=proc+disk+sys) | `0` | +| `heartbeat_interval_s` | `int64` | Interval (s) between heartbeat messages | `60` | -**Example TOML Configuration (`app_config.toml`)** +**Example TOML Configuration** (Used with `(l *Logger) Init` and the `config` package) + +# Assuming basePath = "logging" passed to Init ```toml -# Main application settings -app_name = "My Service" - -# Logger settings under the 'logging' base path [logging] - level = -4 # Debug - directory = "/var/log/my_service" - format = "json" - extension = "log" - max_size_mb = 100 - max_total_size_mb = 1024 # 1 GB total - min_disk_free_mb = 512 # 512 MB free required - flush_interval_ms = 100 - trace_depth = 2 - retention_period_hrs = 168.0 # 7 days (7 * 24) - retention_check_mins = 60.0 - disk_check_interval_ms = 10000 # Check disk every 10 seconds - enable_adaptive_interval = false # Disable adaptive checks - -# Other application settings -[database] - host = "db.example.com" +level = -4 # Debug +directory = "/var/log/my_service" +format = "json" +max_size_mb = 100 +retention_period_hrs = 168.0 # 7 days +heartbeat_level = 2 # Process + disk statistics ``` ## API Reference +**Note:** All logging and control functions are methods on a `*Logger` instance obtained via `NewLogger()`. + +### Creation + +- **`NewLogger() *Logger`** + Creates a new, uninitialized logger instance with default configuration parameters registered internally. + ### Initialization -- **`Init(cfg *config.Config, basePath string) error`** - Initializes or reconfigures the logger using settings from the provided `config.Config` instance under `basePath`. Registers required keys with defaults if not present. Thread-safe. -- **`InitWithDefaults(overrides ...string) error`** - Initializes the logger using built-in defaults, applying optional overrides provided as "key=value" strings. Thread-safe. +- **`(l *Logger) Init(cfg *config.Config, basePath string) error`** + Initializes the logger instance `l` using settings from the provided `config.Config` instance under `basePath`. Starts + the background processing goroutine. +- **`(l *Logger) InitWithDefaults(overrides ...string) error`** + Initializes the logger instance `l` using built-in defaults, applying optional overrides provided as "key=value" + strings (e.g., `"directory=/tmp/logs"`). Starts the background processing goroutine. ### Logging Functions -These methods accept `...any` arguments, typically used as key-value pairs for structured logging (e.g., `"user_id", 123, "status", "active"`). All logging functions are non-blocking and use atomic operations for state checks. +These methods accept `...any` arguments, typically used as key-value pairs for structured logging. They are called on an +initialized `*Logger` instance (e.g., `l.Info(...)`). -- **`Debug(args ...any)`**: Logs at Debug level (-4). -- **`Info(args ...any)`**: Logs at Info level (0). -- **`Warn(args ...any)`**: Logs at Warn level (4). -- **`Error(args ...any)`**: Logs at Error level (8). +- **`(l *Logger) Debug(args ...any)`**: Logs at Debug level (-4). +- **`(l *Logger) Info(args ...any)`**: Logs at Info level (0). +- **`(l *Logger) Warn(args ...any)`**: Logs at Warn level (4). +- **`(l *Logger) Error(args ...any)`**: Logs at Error level (8). ### Trace Logging Functions -Temporarily enable function call tracing for a single log entry, regardless of the configured `trace_depth`. +Temporarily enable function call tracing for a single log entry on an initialized `*Logger` instance. -- **`DebugTrace(depth int, args ...any)`**: Logs Debug with trace. -- **`InfoTrace(depth int, args ...any)`**: Logs Info with trace. -- **`WarnTrace(depth int, args ...any)`**: Logs Warn with trace. -- **`ErrorTrace(depth int, args ...any)`**: Logs Error with trace. - (`depth` specifies the number of stack frames, 0-10). +- **`(l *Logger) DebugTrace(depth int, args ...any)`**: Logs Debug with trace. +- **`(l *Logger) InfoTrace(depth int, args ...any)`**: Logs Info with trace. +- **`(l *Logger) WarnTrace(depth int, args ...any)`**: Logs Warn with trace. +- **`(l *Logger) ErrorTrace(depth int, args ...any)`**: Logs Error with trace. + (`depth` specifies the number of stack frames, 0-10). ### Other Logging Variants -- **`Log(args ...any)`**: Logs with timestamp only, no level (uses Info internally). -- **`Message(args ...any)`**: Logs raw message without timestamp or level. -- **`LogTrace(depth int, args ...any)`**: Logs with timestamp and trace, no level. +Called on an initialized `*Logger` instance. + +- **`(l *Logger) Log(args ...any)`**: Logs with timestamp only, no level (uses Info internally). +- **`(l *Logger) Message(args ...any)`**: Logs raw message without timestamp or level. +- **`(l *Logger) LogTrace(depth int, args ...any)`**: Logs with timestamp and trace, no level. ### Shutdown and Control -- **`Shutdown(timeout time.Duration) error`** - Gracefully shuts down the logger. Signals the processor to stop, waits briefly for pending logs to flush, then closes file handles. Returns error details if closing operations fail. +Called on an initialized `*Logger` instance. -- **`Flush(timeout time.Duration) error`** - Explicitly triggers a sync of the current log file buffer to disk and waits for completion or timeout. +- **`(l *Logger) Shutdown(timeout time.Duration) error`** + Gracefully shuts down the logger instance `l`. Signals the processor to stop, waits briefly for pending logs to flush, + then closes file handles. + +- **`(l *Logger) Flush(timeout time.Duration) error`** + Explicitly triggers a sync of the current log file buffer to disk for instance `l` and waits for completion or + timeout. ### Constants -- **`LevelDebug (-4)`, `LevelInfo (0)`, `LevelWarn (4)`, `LevelError (8)` (`int64`)**: Log level constants. -- **`FlagShowTimestamp`, `FlagShowLevel`, `FlagDefault`**: Record flag constants controlling output format. +- **`LevelDebug (-4)`, `LevelInfo (0)`, `LevelWarn (4)`, `LevelError (8)` (`int64`)**: Standard log level constants. +- **`LevelProc (12)`, `LevelDisk (16)`, `LevelSys (20)` (`int64`)**: Heartbeat log level constants. These levels bypass + the configured `level` filter. +- **`FlagShowTimestamp`, `FlagShowLevel`, `FlagDefault`**: Record flag constants controlling output format. ## Implementation Details -- **Lock-Free Hot Path:** Log methods (`Info`, `Debug`, etc.) operate without locks, using atomic operations to check logger state and non-blocking channel sends. Only initialization, reconfiguration, and shutdown use a mutex. +- **Lock-Free Hot Path:** Logging methods (`l.Info`, `l.Debug`, etc.) operate without locks, using atomic operations to + check logger state and non-blocking channel sends. Only initialization, reconfiguration, and shutdown use a mutex + internally. -- **Channel-Based Architecture:** Log records flow through a buffered channel from producer methods to a single consumer goroutine, preventing contention and serializing file I/O operations. +- **Channel-Based Architecture:** Log records flow through a buffered channel from producer methods to a single consumer + goroutine per logger instance, preventing contention and serializing file I/O operations. -- **Adaptive Resource Management:** - - Disk checks run periodically via timer and reactively when write volume thresholds are crossed - - Check frequency automatically adjusts based on logging rate when `enable_adaptive_interval` is enabled - - Intelligently backs off during low activity and increases responsiveness during high volume +- **Adaptive Resource Management:** + - Disk checks run periodically via timer and reactively when write volume thresholds are crossed. + - Check frequency automatically adjusts based on logging rate when `enable_adaptive_interval` is enabled. -- **File Management:** - - Log files are rotated when `max_size_mb` is exceeded, with new files named using timestamps - - Oldest files (by modification time) are automatically pruned when space limits are approached - - Files older than `retention_period_hrs` are periodically removed +- **Heartbeat Messages:** + - Periodic operational statistics that bypass log level filtering. + - Three levels of detail (`heartbeat_level`): + - Level 1 (PROC): Logger metrics (uptime, processed/dropped logs) + - Level 2 (DISK): Adds disk metrics (rotations, deletions, file counts, sizes) + - Level 3 (SYS): Adds system metrics (memory usage, goroutine count, GC stats) + - Ensures monitoring data is available regardless of the configured `level`. -- **Recovery Behavior:** When disk issues occur, the logger temporarily pauses new logs and attempts recovery on subsequent operations, logging one disk warning message to prevent error spam. +- **File Management:** + - Log files are rotated when `max_size_mb` is exceeded. + - Oldest files are automatically pruned when space limits (`max_total_size_mb`, `min_disk_free_mb`) are approached. + - Files older than `retention_period_hrs` are periodically removed. -- **Graceful Shutdown Flow:** - 1. Sets atomic flags to prevent new logs - 2. Closes the active log channel to signal processor shutdown - 3. Waits briefly for processor to finish pending records - 4. Performs final sync and closes the file handle +- **Recovery Behavior:** When disk issues occur, the logger temporarily pauses new logs and attempts recovery on + subsequent operations, logging one disk warning message to prevent error spam. + +- **Graceful Shutdown Flow:** + 1. Sets atomic flags to prevent new logs on the specific instance. + 2. Closes the active log channel to signal processor shutdown for that instance. + 3. Waits briefly for the processor to finish pending records. + 4. Performs final sync and closes the file handle. ## Performance Considerations -- **Non-blocking Design:** The logger is designed to have minimal impact on application performance, with non-blocking log operations and buffered processing. +- **Non-blocking Design:** The logger is designed to have minimal impact on application performance, with non-blocking + log operations and buffered processing. -- **Memory Efficiency:** Uses a reusable buffer for serialization, avoiding unnecessary allocations when formatting log entries. +- **Memory Efficiency:** Uses a reusable buffer (`serializer`) per instance for serialization, avoiding unnecessary + allocations when formatting log entries. -- **Disk I/O Management:** Batches writes and intelligently schedules disk operations to minimize I/O overhead while maintaining data safety. +- **Disk I/O Management:** Batches writes and intelligently schedules disk operations to minimize I/O overhead while + maintaining data safety. -- **Concurrent Safety:** Thread-safe through careful use of atomic operations, minimizing mutex usage to initialization and shutdown paths only. +- **Concurrent Safety:** Thread-safe through careful use of atomic operations and channel-based processing, minimizing + mutex usage to initialization and shutdown paths only. Multiple `*Logger` instances operate independently. ## Caveats & Limitations -- **Log Loss Scenarios:** - - **Buffer Saturation:** Under extreme load, logs may be dropped if the internal buffer fills faster than records can be processed. A summary message will be logged once capacity is available again. - - **Shutdown Race:** The `Shutdown` function provides a best-effort attempt to process remaining logs, but cannot guarantee all buffered logs will be written if the application terminates quickly. - - **Persistent Disk Issues:** If disk space cannot be reclaimed through cleanup, logs will be dropped until the condition is resolved. +- **Log Loss Scenarios:** + - **Buffer Saturation:** Under extreme load, logs may be dropped if the internal buffer fills faster than records + can be processed by the background goroutine. A summary message will be logged once capacity is available again. + - **Shutdown Race:** The `Shutdown` function provides a best-effort attempt to process remaining logs, but cannot + guarantee all buffered logs will be written if the application terminates abruptly or the timeout is too short. + - **Persistent Disk Issues:** If disk space cannot be reclaimed through cleanup, logs will be dropped until the + condition is resolved. -- **Configuration Dependencies:** Requires the `github.com/LixenWraith/config` package for advanced configuration management. +- **Configuration Dependencies:** Requires the `github.com/LixenWraith/config` package for advanced configuration + management via TOML/CLI using the `Init` method. `InitWithDefaults` provides simpler initialization without this + dependency. -- **Retention Accuracy:** Log retention relies on file modification times, which could be affected by external file system operations. - -- **Reconfiguration Impact:** Changing buffer size during runtime requires restarting the background processor, which may cause a brief period where logs could be dropped. +- **Retention Accuracy:** Log retention relies on file modification times, which could potentially be affected by + external file system operations. ## License diff --git a/cmd/heartbeat/main.go b/cmd/heartbeat/main.go new file mode 100644 index 0000000..4edec7d --- /dev/null +++ b/cmd/heartbeat/main.go @@ -0,0 +1,80 @@ +package main + +import ( + "fmt" + "os" + "time" + + "github.com/LixenWraith/log" +) + +func main() { + // Create test log directory if it doesn't exist + if err := os.MkdirAll("./logs", 0755); err != nil { + fmt.Fprintf(os.Stderr, "Failed to create test logs directory: %v\n", err) + os.Exit(1) + } + + // Test cycle: disable -> PROC -> PROC+DISK -> PROC+DISK+SYS -> PROC+DISK -> PROC -> disable + levels := []struct { + level int64 + description string + }{ + {0, "Heartbeats disabled"}, + {1, "PROC heartbeats only"}, + {2, "PROC+DISK heartbeats"}, + {3, "PROC+DISK+SYS heartbeats"}, + {2, "PROC+DISK heartbeats (reducing from 3)"}, + {1, "PROC heartbeats only (reducing from 2)"}, + {0, "Heartbeats disabled (final)"}, + } + + // Create a single logger instance that we'll reconfigure + logger := log.NewLogger() + + for _, levelConfig := range levels { + // Set up configuration overrides + overrides := []string{ + "directory=./logs", + "level=-4", // Debug level to see everything + "format=txt", // Use text format for easier reading + "heartbeat_interval_s=5", // Short interval for testing + fmt.Sprintf("heartbeat_level=%d", levelConfig.level), + } + + // Initialize logger with the new configuration + // Note: InitWithDefaults handles reconfiguration of an existing logger + if err := logger.InitWithDefaults(overrides...); err != nil { + fmt.Fprintf(os.Stderr, "Failed to initialize logger: %v\n", err) + os.Exit(1) + } + + // Log the current test state + fmt.Printf("\n--- Testing heartbeat level %d: %s ---\n", levelConfig.level, levelConfig.description) + logger.Info("Heartbeat test started", "level", levelConfig.level, "description", levelConfig.description) + + // Generate some logs to trigger heartbeat counters + for j := 0; j < 10; j++ { + logger.Debug("Debug test log", "iteration", j, "level_test", levelConfig.level) + logger.Info("Info test log", "iteration", j, "level_test", levelConfig.level) + logger.Warn("Warning test log", "iteration", j, "level_test", levelConfig.level) + logger.Error("Error test log", "iteration", j, "level_test", levelConfig.level) + time.Sleep(100 * time.Millisecond) + } + + // Wait for heartbeats to generate (slightly longer than the interval) + waitTime := 6 * time.Second + fmt.Printf("Waiting %v for heartbeats to generate...\n", waitTime) + time.Sleep(waitTime) + + logger.Info("Heartbeat test completed for level", "level", levelConfig.level) + } + + // Final shutdown + if err := logger.Shutdown(2 * time.Second); err != nil { + fmt.Fprintf(os.Stderr, "Warning: Failed to shut down logger: %v\n", err) + } + + fmt.Println("\nHeartbeat test program completed successfully") + fmt.Println("Check logs directory for generated log files") +} \ No newline at end of file diff --git a/cmd/simple/main.go b/cmd/simple/main.go index 114e6cc..8bdd5ac 100644 --- a/cmd/simple/main.go +++ b/cmd/simple/main.go @@ -18,7 +18,7 @@ var tomlContent = ` # Example simple_config.toml [logging] level = -4 # Debug - directory = "./simple_logs" + directory = "./logs" format = "txt" extension = "log" show_timestamp = true @@ -113,5 +113,5 @@ func main() { // NO time.Sleep needed here - log.Shutdown waits. fmt.Println("--- Example Finished ---") - fmt.Printf("Check log files in './simple_logs' and the saved config '%s'.\n", configFile) + fmt.Printf("Check log files in './logs' and the saved config '%s'.\n", configFile) } \ No newline at end of file diff --git a/cmd/stress/main.go b/cmd/stress/main.go index 36d3dc2..d134a50 100644 --- a/cmd/stress/main.go +++ b/cmd/stress/main.go @@ -31,7 +31,7 @@ var tomlContent = ` [logstress] level = -4 # Debug name = "stress_test" - directory = "./stress_logs" # Log package will create this + directory = "./logs" # Log package will create this format = "txt" extension = "log" show_timestamp = true @@ -115,8 +115,8 @@ func main() { os.Exit(1) } fmt.Printf("Created dummy config file: %s\n", configFile) - logsDir := "./stress_logs" // Match config - _ = os.RemoveAll(logsDir) // Clean previous run's LOGS directory before starting + logsDir := "./logs" // Match config + _ = os.RemoveAll(logsDir) // Clean previous run's LOGS directory before starting // defer os.Remove(configFile) // Remove to keep the saved config file // defer os.RemoveAll(logsDir) // Remove to keep the log directory diff --git a/config.go b/config.go index 64e4165..804aa4b 100644 --- a/config.go +++ b/config.go @@ -36,6 +36,10 @@ type Config struct { EnablePeriodicSync bool `toml:"enable_periodic_sync"` // Periodic sync with disk MinCheckIntervalMs int64 `toml:"min_check_interval_ms"` // Minimum adaptive interval MaxCheckIntervalMs int64 `toml:"max_check_interval_ms"` // Maximum adaptive interval + + // Heartbeat configuration + HeartbeatLevel int64 `toml:"heartbeat_level"` // 0=disabled, 1=proc only, 2=proc+disk, 3=proc+disk+sys + HeartbeatIntervalS int64 `toml:"heartbeat_interval_s"` // Interval seconds for heartbeat } // defaultConfig is the single source of truth for all default values @@ -66,9 +70,13 @@ var defaultConfig = Config{ // Disk check settings DiskCheckIntervalMs: 5000, EnableAdaptiveInterval: true, - EnablePeriodicSync: false, + EnablePeriodicSync: true, MinCheckIntervalMs: 100, MaxCheckIntervalMs: 60000, + + // Heartbeat settings + HeartbeatLevel: 0, // Disabled by default + HeartbeatIntervalS: 60, // Default to 60 seconds if enabled } // DefaultConfig returns a copy of the default configuration @@ -123,7 +131,13 @@ func (c *Config) validate() error { return fmtErrorf("retention_period_hrs cannot be negative: %f", c.RetentionPeriodHrs) } if c.RetentionCheckMins < 0 { - // Allow 0 check interval (disables periodic check but not initial) + return fmtErrorf("retention_check_mins cannot be negative: %f", c.RetentionCheckMins) + } + if c.HeartbeatLevel < 0 || c.HeartbeatLevel > 3 { + return fmtErrorf("heartbeat_level must be between 0 and 3: %d", c.HeartbeatLevel) + } + if c.HeartbeatLevel > 0 && c.HeartbeatIntervalS <= 0 { + return fmtErrorf("heartbeat_interval_s must be positive when heartbeat is enabled: %d", c.HeartbeatIntervalS) } return nil } \ No newline at end of file diff --git a/format.go b/format.go index 4208258..850ac48 100644 --- a/format.go +++ b/format.go @@ -232,7 +232,7 @@ func (s *serializer) writeJSONValue(v any) { } } -// levelToString converts numeric levels to string representation. +// Update the levelToString function to include the new heartbeat levels func levelToString(level int64) string { switch level { case LevelDebug: @@ -243,6 +243,12 @@ func levelToString(level int64) string { return "WARN" case LevelError: return "ERROR" + case LevelProc: + return "PROC" + case LevelDisk: + return "DISK" + case LevelSys: + return "SYS" default: return fmt.Sprintf("LEVEL(%d)", level) } diff --git a/go.mod b/go.mod index 614f610..bc7a4c7 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,7 @@ module github.com/LixenWraith/log go 1.24.2 -require github.com/LixenWraith/config v0.0.0-20250423043415-925ccb5f1748 +require github.com/LixenWraith/config v0.0.0-20250423082047-b106c94c2c8b require ( github.com/LixenWraith/tinytoml v0.0.0-20250422065624-8aa28720f04a // indirect diff --git a/go.sum b/go.sum index a4daa2d..8ba5822 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/LixenWraith/config v0.0.0-20250423043415-925ccb5f1748 h1:d5Kq0OSqsJM8eSwA4xvoAOAWwniKBgZOy3h4e4fjiPo= -github.com/LixenWraith/config v0.0.0-20250423043415-925ccb5f1748/go.mod h1:LWz2FXeYAN1IxmPFAmbMZLhL/5LbHzJgnj4m7l5jGvc= +github.com/LixenWraith/config v0.0.0-20250423082047-b106c94c2c8b h1:IYhbozsDOhT1fiogABpomRq9IEonNmQs54ROPn3Xy4g= +github.com/LixenWraith/config v0.0.0-20250423082047-b106c94c2c8b/go.mod h1:LWz2FXeYAN1IxmPFAmbMZLhL/5LbHzJgnj4m7l5jGvc= github.com/LixenWraith/tinytoml v0.0.0-20250422065624-8aa28720f04a h1:m+lhpIexwlJa5m1QuEveRmaGIE+wp87T97PyX1IWbMw= github.com/LixenWraith/tinytoml v0.0.0-20250422065624-8aa28720f04a/go.mod h1:Vax79K0I//Klsa8POjua/XHbsMUiIdjJHr59VFbc0/8= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= diff --git a/interface.go b/interface.go index 4b1b8bd..e22e2be 100644 --- a/interface.go +++ b/interface.go @@ -13,6 +13,13 @@ const ( LevelError int64 = 8 ) +// Heartbeat log levels +const ( + LevelProc int64 = 12 // Process/Logger Stats + LevelDisk int64 = 16 // Disk/File Stats + LevelSys int64 = 20 // System/Runtime Stats +) + // Record flags for controlling output structure const ( FlagShowTimestamp int64 = 0b01 diff --git a/logger.go b/logger.go index 0dd3273..f1748e2 100644 --- a/logger.go +++ b/logger.go @@ -13,10 +13,10 @@ import ( // Logger is the core struct that encapsulates all logger functionality type Logger struct { - config *config.Config // Config management + config *config.Config state State - initMu sync.Mutex // Only mutex we need to keep - serializer *serializer // Encapsulated serializer instance + initMu sync.Mutex + serializer *serializer } // NewLogger creates a new Logger instance with default settings @@ -39,6 +39,13 @@ func NewLogger() *Logger { l.state.CurrentSize.Store(0) l.state.EarliestFileTime.Store(time.Time{}) + // Initialize heartbeat counters + l.state.HeartbeatSequence.Store(0) + l.state.LoggerStartTime.Store(time.Now()) + l.state.TotalLogsProcessed.Store(0) + l.state.TotalRotations.Store(0) + l.state.TotalDeletions.Store(0) + // Create a closed channel initially to prevent nil pointer issues initialChan := make(chan logRecord) close(initialChan) @@ -245,6 +252,8 @@ func (l *Logger) loadCurrentConfig() *Config { cfg.MinCheckIntervalMs, _ = l.config.Int64("log.min_check_interval_ms") cfg.MaxCheckIntervalMs, _ = l.config.Int64("log.max_check_interval_ms") cfg.EnablePeriodicSync, _ = l.config.Bool("log.enable_periodic_sync") + cfg.HeartbeatLevel, _ = l.config.Int64("log.heartbeat_level") + cfg.HeartbeatIntervalS, _ = l.config.Int64("log.heartbeat_interval_s") return cfg } diff --git a/processor.go b/processor.go index 8f3f36a..f3bdf75 100644 --- a/processor.go +++ b/processor.go @@ -4,10 +4,7 @@ package log import ( "fmt" "os" - "path/filepath" - "sort" - "strings" - "syscall" + "runtime" "time" ) @@ -24,30 +21,148 @@ func (l *Logger) processLogs(ch <-chan logRecord) { l.state.ProcessorExited.Store(false) // Mark processor as running defer l.state.ProcessorExited.Store(true) // Ensure flag is set on exit - // Get configuration values for setup + // Set up timers and state variables + timers := l.setupProcessingTimers() + defer l.closeProcessingTimers(timers) + + // Perform an initial disk check on startup + l.performDiskCheck(true) // Force check and update status + + // Send initial heartbeats immediately instead of waiting for first tick + heartbeatLevel, _ := l.config.Int64("log.heartbeat_level") + if heartbeatLevel > 0 { + if heartbeatLevel >= 1 { + l.logProcHeartbeat() + } + if heartbeatLevel >= 2 { + l.logDiskHeartbeat() + } + if heartbeatLevel >= 3 { + l.logSysHeartbeat() + } + } + + // State variables for adaptive disk checks + var bytesSinceLastCheck int64 = 0 + var lastCheckTime time.Time = time.Now() + var logsSinceLastCheck int64 = 0 + + // --- Main Loop --- + for { + select { + case record, ok := <-ch: + if !ok { + // Channel closed: Perform final sync and exit + l.performSync() + return + } + + // Process the received log record + bytesWritten := l.processLogRecord(record) + if bytesWritten > 0 { + // Update adaptive check counters + bytesSinceLastCheck += bytesWritten + logsSinceLastCheck++ + + // Reactive Check Trigger + if bytesSinceLastCheck > reactiveCheckThresholdBytes { + if l.performDiskCheck(false) { // Check without forcing cleanup yet + bytesSinceLastCheck = 0 // Reset if check OK + logsSinceLastCheck = 0 + lastCheckTime = time.Now() + } + } + } + + case <-timers.flushTicker.C: + l.handleFlushTick() + + case <-timers.diskCheckTicker.C: + // Periodic disk check + if l.performDiskCheck(true) { // Periodic check, force cleanup if needed + l.adjustDiskCheckInterval(timers, lastCheckTime, logsSinceLastCheck) + // Reset counters after successful periodic check + bytesSinceLastCheck = 0 + logsSinceLastCheck = 0 + lastCheckTime = time.Now() + } + + case confirmChan := <-l.state.flushRequestChan: + l.handleFlushRequest(confirmChan) + + case <-timers.retentionChan: + l.handleRetentionCheck() + + case <-timers.heartbeatChan: + l.handleHeartbeat() + } + } +} + +// TimerSet holds all timers used in processLogs +type TimerSet struct { + flushTicker *time.Ticker + diskCheckTicker *time.Ticker + retentionTicker *time.Ticker + heartbeatTicker *time.Ticker + retentionChan <-chan time.Time + heartbeatChan <-chan time.Time +} + +// setupProcessingTimers creates and configures all necessary timers for the processor +func (l *Logger) setupProcessingTimers() *TimerSet { + timers := &TimerSet{} + + // Set up flush timer flushInterval, _ := l.config.Int64("log.flush_interval_ms") if flushInterval <= 0 { flushInterval = 100 } - flushTicker := time.NewTicker(time.Duration(flushInterval) * time.Millisecond) - defer flushTicker.Stop() + timers.flushTicker = time.NewTicker(time.Duration(flushInterval) * time.Millisecond) - // Retention Timer - var retentionTicker *time.Ticker - var retentionChan <-chan time.Time = nil + // Set up retention timer if enabled + timers.retentionChan = l.setupRetentionTimer(timers) + + // Set up disk check timer + timers.diskCheckTicker = l.setupDiskCheckTimer() + + // Set up heartbeat timer + timers.heartbeatChan = l.setupHeartbeatTimer(timers) + + return timers +} + +// closeProcessingTimers stops all active timers +func (l *Logger) closeProcessingTimers(timers *TimerSet) { + timers.flushTicker.Stop() + if timers.diskCheckTicker != nil { + timers.diskCheckTicker.Stop() + } + if timers.retentionTicker != nil { + timers.retentionTicker.Stop() + } + if timers.heartbeatTicker != nil { + timers.heartbeatTicker.Stop() + } +} + +// setupRetentionTimer configures the retention check timer if retention is enabled +func (l *Logger) setupRetentionTimer(timers *TimerSet) <-chan time.Time { retentionPeriodHrs, _ := l.config.Float64("log.retention_period_hrs") retentionCheckMins, _ := l.config.Float64("log.retention_check_mins") retentionDur := time.Duration(retentionPeriodHrs * float64(time.Hour)) retentionCheckInterval := time.Duration(retentionCheckMins * float64(time.Minute)) if retentionDur > 0 && retentionCheckInterval > 0 { - retentionTicker = time.NewTicker(retentionCheckInterval) - defer retentionTicker.Stop() - retentionChan = retentionTicker.C + timers.retentionTicker = time.NewTicker(retentionCheckInterval) l.updateEarliestFileTime() // Initial check + return timers.retentionTicker.C } + return nil +} - // Disk Check Timer +// setupDiskCheckTimer configures the disk check timer +func (l *Logger) setupDiskCheckTimer() *time.Ticker { diskCheckIntervalMs, _ := l.config.Int64("log.disk_check_interval_ms") if diskCheckIntervalMs <= 0 { diskCheckIntervalMs = 5000 @@ -67,522 +182,320 @@ func (l *Logger) processLogs(ch <-chan logRecord) { currentDiskCheckInterval = maxCheckInterval } - diskCheckTicker := time.NewTicker(currentDiskCheckInterval) - defer diskCheckTicker.Stop() - - // --- State Variables --- - var bytesSinceLastCheck int64 = 0 - var lastCheckTime time.Time = time.Now() - var logsSinceLastCheck int64 = 0 - - // Perform an initial disk check on startup - l.performDiskCheck(true) // Force check and update status - - // --- Main Loop --- - for { - select { - case record, ok := <-ch: - if !ok { - // Channel closed: Perform final sync and exit - l.performSync() - return - } - - // --- Process the received record --- - if !l.state.DiskStatusOK.Load() { - l.state.DroppedLogs.Add(1) - continue // Skip processing if disk known to be unavailable - } - - // Serialize the record - format, _ := l.config.String("log.format") - data := l.serializer.serialize( - format, - record.Flags, - record.TimeStamp, - record.Level, - record.Trace, - record.Args, - ) - dataLen := int64(len(data)) - - // Check for rotation - currentFileSize := l.state.CurrentSize.Load() - estimatedSize := currentFileSize + dataLen - - maxSizeMB, _ := l.config.Int64("log.max_size_mb") - if maxSizeMB > 0 && estimatedSize > maxSizeMB*1024*1024 { - if err := l.rotateLogFile(); err != nil { - fmtFprintf(os.Stderr, "log: failed to rotate log file: %v\n", err) - } - bytesSinceLastCheck = 0 // Reset counters after rotation - logsSinceLastCheck = 0 - } - - // Write to the current log file - cfPtr := l.state.CurrentFile.Load() - if currentLogFile, isFile := cfPtr.(*os.File); isFile && currentLogFile != nil { - n, err := currentLogFile.Write(data) - if err != nil { - fmtFprintf(os.Stderr, "log: failed to write to log file: %v\n", err) - l.state.DroppedLogs.Add(1) - l.performDiskCheck(true) // Force check if write fails - } else { - l.state.CurrentSize.Add(int64(n)) - bytesSinceLastCheck += int64(n) - logsSinceLastCheck++ - - // Reactive Check Trigger - if bytesSinceLastCheck > reactiveCheckThresholdBytes { - if l.performDiskCheck(false) { // Check without forcing cleanup yet - bytesSinceLastCheck = 0 // Reset if check OK - logsSinceLastCheck = 0 - lastCheckTime = time.Now() - } - } - } - } else { - l.state.DroppedLogs.Add(1) // File pointer somehow nil - } - - case <-flushTicker.C: - enableSync, _ := l.config.Bool("log.enable_periodic_sync") - if enableSync { - l.performSync() - } - - case <-diskCheckTicker.C: - // Periodic disk check - if l.performDiskCheck(true) { // Periodic check, force cleanup if needed - enableAdaptive, _ := l.config.Bool("log.enable_adaptive_interval") - if enableAdaptive { - elapsed := time.Since(lastCheckTime) - if elapsed < 10*time.Millisecond { - elapsed = 10 * time.Millisecond - } - - logsPerSecond := float64(logsSinceLastCheck) / elapsed.Seconds() - targetLogsPerSecond := float64(100) // Baseline - - if logsPerSecond < targetLogsPerSecond/2 { // Load low -> increase interval - currentDiskCheckInterval = time.Duration(float64(currentDiskCheckInterval) * adaptiveIntervalFactor) - } else if logsPerSecond > targetLogsPerSecond*2 { // Load high -> decrease interval - currentDiskCheckInterval = time.Duration(float64(currentDiskCheckInterval) * adaptiveSpeedUpFactor) - } - - // Clamp interval using current config - minCheckIntervalMs, _ := l.config.Int64("log.min_check_interval_ms") - maxCheckIntervalMs, _ := l.config.Int64("log.max_check_interval_ms") - minCheckInterval := time.Duration(minCheckIntervalMs) * time.Millisecond - maxCheckInterval := time.Duration(maxCheckIntervalMs) * time.Millisecond - - if currentDiskCheckInterval < minCheckInterval { - currentDiskCheckInterval = minCheckInterval - } - if currentDiskCheckInterval > maxCheckInterval { - currentDiskCheckInterval = maxCheckInterval - } - - diskCheckTicker.Reset(currentDiskCheckInterval) - } - // Reset counters after successful periodic check - bytesSinceLastCheck = 0 - logsSinceLastCheck = 0 - lastCheckTime = time.Now() - } - - case confirmChan := <-l.state.flushRequestChan: - l.performSync() - close(confirmChan) // Signal completion back to the Flush caller - - case <-retentionChan: - // Check file retention - retentionPeriodHrs, _ := l.config.Float64("log.retention_period_hrs") - retentionDur := time.Duration(retentionPeriodHrs * float64(time.Hour)) - - if retentionDur > 0 { - etPtr := l.state.EarliestFileTime.Load() - if earliest, ok := etPtr.(time.Time); ok && !earliest.IsZero() { - if time.Since(earliest) > retentionDur { - if err := l.cleanExpiredLogs(earliest); err == nil { - l.updateEarliestFileTime() - } else { - fmtFprintf(os.Stderr, "log: failed to clean expired logs: %v\n", err) - } - } - } else if !ok || earliest.IsZero() { - l.updateEarliestFileTime() - } - } - } - } + return time.NewTicker(currentDiskCheckInterval) } -// performSync syncs the current log file -func (l *Logger) performSync() { - cfPtr := l.state.CurrentFile.Load() - if cfPtr != nil { - if currentLogFile, isFile := cfPtr.(*os.File); isFile && currentLogFile != nil { - if err := currentLogFile.Sync(); err != nil { - // Log sync error - syncErrRecord := logRecord{ - Flags: FlagDefault, - TimeStamp: time.Now(), - Level: LevelWarn, - Args: []any{"Log file sync failed", "file", currentLogFile.Name(), "error", err.Error()}, - } - l.sendLogRecord(syncErrRecord) - } +// setupHeartbeatTimer configures the heartbeat timer if heartbeats are enabled +func (l *Logger) setupHeartbeatTimer(timers *TimerSet) <-chan time.Time { + heartbeatLevel, _ := l.config.Int64("log.heartbeat_level") + if heartbeatLevel > 0 { + intervalS, _ := l.config.Int64("log.heartbeat_interval_s") + // Make sure interval is positive + if intervalS <= 0 { + intervalS = 60 // Default to 60 seconds } - } -} - -// performDiskCheck checks disk space, triggers cleanup if needed, and updates status -// Returns true if disk is OK, false otherwise -func (l *Logger) performDiskCheck(forceCleanup bool) bool { - dir, _ := l.config.String("log.directory") - ext, _ := l.config.String("log.extension") - maxTotalMB, _ := l.config.Int64("log.max_total_size_mb") - minDiskFreeMB, _ := l.config.Int64("log.min_disk_free_mb") - maxTotal := maxTotalMB * 1024 * 1024 - minFreeRequired := minDiskFreeMB * 1024 * 1024 - - if maxTotal <= 0 && minFreeRequired <= 0 { - if !l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(true) - l.state.DiskFullLogged.Store(false) - } - return true - } - - freeSpace, err := l.getDiskFreeSpace(dir) - if err != nil { - fmtFprintf(os.Stderr, "log: warning - failed to check free disk space for '%s': %v\n", dir, err) - if l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(false) - } - return false - } - - needsCleanupCheck := false - spaceToFree := int64(0) - if minFreeRequired > 0 && freeSpace < minFreeRequired { - needsCleanupCheck = true - spaceToFree = minFreeRequired - freeSpace - } - - if maxTotal > 0 { - dirSize, err := l.getLogDirSize(dir, ext) - if err != nil { - fmtFprintf(os.Stderr, "log: warning - failed to check log directory size for '%s': %v\n", dir, err) - if l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(false) - } - return false - } - if dirSize > maxTotal { - needsCleanupCheck = true - amountOver := dirSize - maxTotal - if amountOver > spaceToFree { - spaceToFree = amountOver - } - } - } - - if needsCleanupCheck && forceCleanup { - if err := l.cleanOldLogs(spaceToFree); err != nil { - if !l.state.DiskFullLogged.Swap(true) { - diskFullRecord := logRecord{ - Flags: FlagDefault, TimeStamp: time.Now(), Level: LevelError, - Args: []any{"Log directory full or disk space low, cleanup failed", "error", err.Error()}, - } - l.sendLogRecord(diskFullRecord) - } - if l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(false) - } - return false - } - // Cleanup succeeded - l.state.DiskFullLogged.Store(false) - l.state.DiskStatusOK.Store(true) - l.updateEarliestFileTime() - return true - } else if needsCleanupCheck { - // Limits exceeded, but not forcing cleanup now - if l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(false) - } - return false - } else { - // Limits OK - if !l.state.DiskStatusOK.Load() { - l.state.DiskStatusOK.Store(true) - l.state.DiskFullLogged.Store(false) - } - return true - } -} - -// getDiskFreeSpace retrieves available disk space for the given path -func (l *Logger) getDiskFreeSpace(path string) (int64, error) { - var stat syscall.Statfs_t - info, err := os.Stat(path) - if err != nil { - if os.IsNotExist(err) { - return 0, fmtErrorf("log directory '%s' does not exist for disk check: %w", path, err) - } - return 0, fmtErrorf("failed to stat log directory '%s': %w", path, err) - } - if !info.IsDir() { - path = filepath.Dir(path) - } - - if err := syscall.Statfs(path, &stat); err != nil { - return 0, fmtErrorf("failed to get disk stats for '%s': %w", path, err) - } - availableBytes := int64(stat.Bavail) * int64(stat.Bsize) - return availableBytes, nil -} - -// getLogDirSize calculates total size of log files matching the current extension -func (l *Logger) getLogDirSize(dir, fileExt string) (int64, error) { - var size int64 - entries, err := os.ReadDir(dir) - if err != nil { - if os.IsNotExist(err) { - return 0, nil - } - return 0, fmtErrorf("failed to read log directory '%s': %w", dir, err) - } - - targetExt := "." + fileExt - for _, entry := range entries { - if entry.IsDir() { - continue - } - if filepath.Ext(entry.Name()) == targetExt { - info, errInfo := entry.Info() - if errInfo != nil { - continue - } - size += info.Size() - } - } - return size, nil -} - -// cleanOldLogs removes oldest log files until required space is freed -func (l *Logger) cleanOldLogs(required int64) error { - dir, _ := l.config.String("log.directory") - fileExt, _ := l.config.String("log.extension") - - entries, err := os.ReadDir(dir) - if err != nil { - return fmtErrorf("failed to read log directory '%s' for cleanup: %w", dir, err) - } - - currentLogFileName := "" - cfPtr := l.state.CurrentFile.Load() - if cfPtr != nil { - if clf, ok := cfPtr.(*os.File); ok && clf != nil { - currentLogFileName = filepath.Base(clf.Name()) - } - } - - type logFileMeta struct { - name string - modTime time.Time - size int64 - } - var logs []logFileMeta - targetExt := "." + fileExt - for _, entry := range entries { - if entry.IsDir() || filepath.Ext(entry.Name()) != targetExt || entry.Name() == currentLogFileName { - continue - } - info, errInfo := entry.Info() - if errInfo != nil { - continue - } - logs = append(logs, logFileMeta{name: entry.Name(), modTime: info.ModTime(), size: info.Size()}) - } - - if len(logs) == 0 { - if required > 0 { - return fmtErrorf("no old logs available to delete in '%s', needed %d bytes", dir, required) - } - return nil - } - - sort.Slice(logs, func(i, j int) bool { return logs[i].modTime.Before(logs[j].modTime) }) - - var freedSpace int64 - for _, log := range logs { - if required > 0 && freedSpace >= required { - break - } - filePath := filepath.Join(dir, log.name) - if err := os.Remove(filePath); err != nil { - fmtFprintf(os.Stderr, "log: failed to remove old log file '%s': %v\n", filePath, err) - continue - } - freedSpace += log.size - } - - if required > 0 && freedSpace < required { - return fmtErrorf("could not free enough space in '%s': freed %d bytes, needed %d bytes", dir, freedSpace, required) + // Create a new ticker that's offset slightly to avoid skipping the first tick + // by creating it and then waiting until exactly the next interval time + timers.heartbeatTicker = time.NewTicker(time.Duration(intervalS) * time.Second) + return timers.heartbeatTicker.C } return nil } -// updateEarliestFileTime scans the log directory for the oldest log file -func (l *Logger) updateEarliestFileTime() { - dir, _ := l.config.String("log.directory") - fileExt, _ := l.config.String("log.extension") - baseName, _ := l.config.String("log.name") +// processLogRecord handles individual log records, returning bytes written +func (l *Logger) processLogRecord(record logRecord) int64 { + if !l.state.DiskStatusOK.Load() { + l.state.DroppedLogs.Add(1) + return 0 // Skip processing if disk known to be unavailable + } - entries, err := os.ReadDir(dir) - if err != nil { - l.state.EarliestFileTime.Store(time.Time{}) + // Serialize the record + format, _ := l.config.String("log.format") + data := l.serializer.serialize( + format, + record.Flags, + record.TimeStamp, + record.Level, + record.Trace, + record.Args, + ) + dataLen := int64(len(data)) + + // Check for rotation + currentFileSize := l.state.CurrentSize.Load() + estimatedSize := currentFileSize + dataLen + + maxSizeMB, _ := l.config.Int64("log.max_size_mb") + if maxSizeMB > 0 && estimatedSize > maxSizeMB*1024*1024 { + if err := l.rotateLogFile(); err != nil { + fmtFprintf(os.Stderr, "log: failed to rotate log file: %v\n", err) + } + } + + // Write to the current log file + cfPtr := l.state.CurrentFile.Load() + if currentLogFile, isFile := cfPtr.(*os.File); isFile && currentLogFile != nil { + n, err := currentLogFile.Write(data) + if err != nil { + fmtFprintf(os.Stderr, "log: failed to write to log file: %v\n", err) + l.state.DroppedLogs.Add(1) + l.performDiskCheck(true) // Force check if write fails + return 0 + } else { + l.state.CurrentSize.Add(int64(n)) + l.state.TotalLogsProcessed.Add(1) + return int64(n) + } + } else { + l.state.DroppedLogs.Add(1) // File pointer somehow nil + return 0 + } +} + +// handleFlushTick handles the periodic flush timer tick +func (l *Logger) handleFlushTick() { + enableSync, _ := l.config.Bool("log.enable_periodic_sync") + if enableSync { + l.performSync() + } +} + +// handleFlushRequest handles an explicit flush request +func (l *Logger) handleFlushRequest(confirmChan chan struct{}) { + l.performSync() + close(confirmChan) // Signal completion back to the Flush caller +} + +// handleRetentionCheck performs file retention check and cleanup +func (l *Logger) handleRetentionCheck() { + retentionPeriodHrs, _ := l.config.Float64("log.retention_period_hrs") + retentionDur := time.Duration(retentionPeriodHrs * float64(time.Hour)) + + if retentionDur > 0 { + etPtr := l.state.EarliestFileTime.Load() + if earliest, ok := etPtr.(time.Time); ok && !earliest.IsZero() { + if time.Since(earliest) > retentionDur { + if err := l.cleanExpiredLogs(earliest); err == nil { + l.updateEarliestFileTime() + } else { + fmtFprintf(os.Stderr, "log: failed to clean expired logs: %v\n", err) + } + } + } else if !ok || earliest.IsZero() { + l.updateEarliestFileTime() + } + } +} + +// adjustDiskCheckInterval modifies the disk check interval based on logging activity +func (l *Logger) adjustDiskCheckInterval(timers *TimerSet, lastCheckTime time.Time, logsSinceLastCheck int64) { + enableAdaptive, _ := l.config.Bool("log.enable_adaptive_interval") + if !enableAdaptive { return } - var earliest time.Time - currentLogFileName := "" - cfPtr := l.state.CurrentFile.Load() - if cfPtr != nil { - if clf, ok := cfPtr.(*os.File); ok && clf != nil { - currentLogFileName = filepath.Base(clf.Name()) - } + elapsed := time.Since(lastCheckTime) + if elapsed < 10*time.Millisecond { // Min arbitrary reasonable value + elapsed = 10 * time.Millisecond } - targetExt := "." + fileExt - prefix := baseName + "_" - for _, entry := range entries { - if entry.IsDir() { - continue - } - fname := entry.Name() - if !strings.HasPrefix(fname, prefix) || filepath.Ext(fname) != targetExt || fname == currentLogFileName { - continue - } - info, errInfo := entry.Info() - if errInfo != nil { - continue - } - if earliest.IsZero() || info.ModTime().Before(earliest) { - earliest = info.ModTime() - } + logsPerSecond := float64(logsSinceLastCheck) / elapsed.Seconds() + targetLogsPerSecond := float64(100) // Baseline + + // Get current disk check interval from config + diskCheckIntervalMs, _ := l.config.Int64("log.disk_check_interval_ms") + currentDiskCheckInterval := time.Duration(diskCheckIntervalMs) * time.Millisecond + + // Calculate the new interval + var newInterval time.Duration + if logsPerSecond < targetLogsPerSecond/2 { // Load low -> increase interval + newInterval = time.Duration(float64(currentDiskCheckInterval) * adaptiveIntervalFactor) + } else if logsPerSecond > targetLogsPerSecond*2 { // Load high -> decrease interval + newInterval = time.Duration(float64(currentDiskCheckInterval) * adaptiveSpeedUpFactor) + } else { + // No change needed if within normal range + return } - l.state.EarliestFileTime.Store(earliest) + + // Clamp interval using current config + minCheckIntervalMs, _ := l.config.Int64("log.min_check_interval_ms") + maxCheckIntervalMs, _ := l.config.Int64("log.max_check_interval_ms") + minCheckInterval := time.Duration(minCheckIntervalMs) * time.Millisecond + maxCheckInterval := time.Duration(maxCheckIntervalMs) * time.Millisecond + + if newInterval < minCheckInterval { + newInterval = minCheckInterval + } + if newInterval > maxCheckInterval { + newInterval = maxCheckInterval + } + + // Reset the ticker with the new interval + timers.diskCheckTicker.Reset(newInterval) } -// cleanExpiredLogs removes log files older than the retention period -func (l *Logger) cleanExpiredLogs(oldest time.Time) error { +// handleHeartbeat processes a heartbeat timer tick +func (l *Logger) handleHeartbeat() { + heartbeatLevel, _ := l.config.Int64("log.heartbeat_level") + + // Process heartbeat based on configured level + if heartbeatLevel >= 1 { + l.logProcHeartbeat() + } + + if heartbeatLevel >= 2 { + l.logDiskHeartbeat() + } + + if heartbeatLevel >= 3 { + l.logSysHeartbeat() + } +} + +// logProcHeartbeat logs process/logger statistics heartbeat +func (l *Logger) logProcHeartbeat() { + // 1. Gather process/logger stats + processed := l.state.TotalLogsProcessed.Load() + dropped := l.state.DroppedLogs.Load() + sequence := l.state.HeartbeatSequence.Add(1) // Increment and get sequence number + + // Calculate uptime + startTimeVal := l.state.LoggerStartTime.Load() + var uptimeHours float64 = 0 + if startTime, ok := startTimeVal.(time.Time); ok && !startTime.IsZero() { + uptime := time.Since(startTime) + uptimeHours = uptime.Hours() + } + + // 2. Format Args + procArgs := []any{ + "type", "proc", + "sequence", sequence, + "uptime_hours", fmt.Sprintf("%.2f", uptimeHours), + "processed_logs", processed, + "dropped_logs", dropped, + } + + // 3. Write the heartbeat record + l.writeHeartbeatRecord(LevelProc, procArgs) +} + +// logDiskHeartbeat logs disk/file statistics heartbeat +func (l *Logger) logDiskHeartbeat() { + sequence := l.state.HeartbeatSequence.Load() + rotations := l.state.TotalRotations.Load() + deletions := l.state.TotalDeletions.Load() + + // Get file system stats dir, _ := l.config.String("log.directory") - fileExt, _ := l.config.String("log.extension") - retentionPeriodHrs, _ := l.config.Float64("log.retention_period_hrs") - rpDuration := time.Duration(retentionPeriodHrs * float64(time.Hour)) - - if rpDuration <= 0 { - return nil - } - cutoffTime := time.Now().Add(-rpDuration) - if oldest.IsZero() || !oldest.Before(cutoffTime) { - return nil - } - - entries, err := os.ReadDir(dir) - if err != nil { - return fmtErrorf("failed to read log directory '%s' for retention cleanup: %w", dir, err) - } - - currentLogFileName := "" - cfPtr := l.state.CurrentFile.Load() - if cfPtr != nil { - if clf, ok := cfPtr.(*os.File); ok && clf != nil { - currentLogFileName = filepath.Base(clf.Name()) - } - } - - targetExt := "." + fileExt - var deletedCount int - for _, entry := range entries { - if entry.IsDir() || filepath.Ext(entry.Name()) != targetExt || entry.Name() == currentLogFileName { - continue - } - info, errInfo := entry.Info() - if errInfo != nil { - continue - } - if info.ModTime().Before(cutoffTime) { - filePath := filepath.Join(dir, entry.Name()) - if err := os.Remove(filePath); err != nil { - fmtFprintf(os.Stderr, "log: failed to remove expired log file '%s': %v\n", filePath, err) - } else { - deletedCount++ - } - } - } - - if deletedCount == 0 && err != nil { - return err - } - return nil -} - -// generateLogFileName creates a unique log filename using a timestamp -func (l *Logger) generateLogFileName(timestamp time.Time) string { - name, _ := l.config.String("log.name") ext, _ := l.config.String("log.extension") - tsFormat := timestamp.Format("060102_150405") - nano := timestamp.Nanosecond() - return fmt.Sprintf("%s_%s_%d.%s", name, tsFormat, nano, ext) + currentSizeMB := float64(l.state.CurrentSize.Load()) / (1024 * 1024) // Current file size + totalSizeMB := float64(-1.0) // Default error value + fileCount := -1 // Default error value + + dirSize, err := l.getLogDirSize(dir, ext) + if err == nil { + totalSizeMB = float64(dirSize) / (1024 * 1024) + } else { + fmtFprintf(os.Stderr, "log: warning - heartbeat failed to get dir size: %v\n", err) + } + + count, err := l.getLogFileCount(dir, ext) + if err == nil { + fileCount = count + } else { + fmtFprintf(os.Stderr, "log: warning - heartbeat failed to get file count: %v\n", err) + } + + // Format Args + diskArgs := []any{ + "type", "disk", + "sequence", sequence, + "rotated_files", rotations, + "deleted_files", deletions, + "total_log_size_mb", fmt.Sprintf("%.2f", totalSizeMB), + "log_file_count", fileCount, + "current_file_size_mb", fmt.Sprintf("%.2f", currentSizeMB), + "disk_status_ok", l.state.DiskStatusOK.Load(), + } + + // Add disk free space if we can get it + freeSpace, err := l.getDiskFreeSpace(dir) + if err == nil { + freeSpaceMB := float64(freeSpace) / (1024 * 1024) + diskArgs = append(diskArgs, "disk_free_mb", fmt.Sprintf("%.2f", freeSpaceMB)) + } + + // Write the heartbeat record + l.writeHeartbeatRecord(LevelDisk, diskArgs) } -// createNewLogFile generates a unique name and opens a new log file -func (l *Logger) createNewLogFile() (*os.File, error) { - dir, _ := l.config.String("log.directory") - filename := l.generateLogFileName(time.Now()) - fullPath := filepath.Join(dir, filename) +// logSysHeartbeat logs system/runtime statistics heartbeat +func (l *Logger) logSysHeartbeat() { + sequence := l.state.HeartbeatSequence.Load() - // Retry logic for potential collisions (rare) - for i := 0; i < 5; i++ { - if _, err := os.Stat(fullPath); os.IsNotExist(err) { - break - } - time.Sleep(1 * time.Millisecond) - filename := l.generateLogFileName(time.Now()) - fullPath = filepath.Join(dir, filename) + // Get memory stats + var memStats runtime.MemStats + runtime.ReadMemStats(&memStats) + + // Format Args + sysArgs := []any{ + "type", "sys", + "sequence", sequence, + "alloc_mb", fmt.Sprintf("%.2f", float64(memStats.Alloc)/(1024*1024)), + "sys_mb", fmt.Sprintf("%.2f", float64(memStats.Sys)/(1024*1024)), + "num_gc", memStats.NumGC, + "num_goroutine", runtime.NumGoroutine(), } - file, err := os.OpenFile(fullPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return nil, fmtErrorf("failed to open/create log file '%s': %w", fullPath, err) - } - return file, nil + // Write the heartbeat record + l.writeHeartbeatRecord(LevelSys, sysArgs) } -// rotateLogFile handles closing the current log file and opening a new one -func (l *Logger) rotateLogFile() error { - newFile, err := l.createNewLogFile() +// writeHeartbeatRecord handles the common logic for writing a heartbeat record +func (l *Logger) writeHeartbeatRecord(level int64, args []any) { + // Skip if logger disabled or shutting down + if l.state.LoggerDisabled.Load() || l.state.ShutdownCalled.Load() { + return + } + + // Skip if disk known to be unavailable + if !l.state.DiskStatusOK.Load() { + return + } + + // 1. Serialize the record + format, _ := l.config.String("log.format") + // Use FlagDefault | FlagShowLevel so Level appears in the output + hbData := l.serializer.serialize(format, FlagDefault|FlagShowLevel, time.Now(), level, "", args) + + // 2. Write the record + cfPtr := l.state.CurrentFile.Load() + if cfPtr == nil { + fmtFprintf(os.Stderr, "log: error - current file handle is nil during heartbeat\n") + return + } + + currentLogFile, isFile := cfPtr.(*os.File) + if !isFile || currentLogFile == nil { + fmtFprintf(os.Stderr, "log: error - invalid file handle type during heartbeat\n") + return + } + + // Write with a single retry attempt + n, err := currentLogFile.Write(hbData) if err != nil { - return fmtErrorf("failed to create new log file for rotation: %w", err) - } + fmtFprintf(os.Stderr, "log: failed to write heartbeat: %v\n", err) + l.performDiskCheck(true) // Force disk check on write failure - oldFilePtr := l.state.CurrentFile.Swap(newFile) - l.state.CurrentSize.Store(0) // Reset size for the new file - - if oldFilePtr != nil { - if oldFile, ok := oldFilePtr.(*os.File); ok && oldFile != nil { - if err := oldFile.Close(); err != nil { - fmtFprintf(os.Stderr, "log: failed to close old log file '%s': %v\n", oldFile.Name(), err) - // Continue with new file anyway - } + // One retry after disk check + n, err = currentLogFile.Write(hbData) + if err != nil { + fmtFprintf(os.Stderr, "log: failed to write heartbeat on retry: %v\n", err) + } else { + l.state.CurrentSize.Add(int64(n)) } + } else { + l.state.CurrentSize.Add(int64(n)) } - - l.updateEarliestFileTime() // Update earliest time after rotation - return nil } \ No newline at end of file diff --git a/state.go b/state.go index 1795d79..745f654 100644 --- a/state.go +++ b/state.go @@ -31,6 +31,13 @@ type State struct { LoggedDrops atomic.Uint64 // Counter for dropped logs message already logged ActiveLogChannel atomic.Value // stores chan logRecord + + // Heartbeat statistics + HeartbeatSequence atomic.Uint64 // Counter for heartbeat sequence numbers + LoggerStartTime atomic.Value // Stores time.Time for uptime calculation + TotalLogsProcessed atomic.Uint64 // Counter for non-heartbeat logs successfully processed + TotalRotations atomic.Uint64 // Counter for successful log rotations + TotalDeletions atomic.Uint64 // Counter for successful log deletions (cleanup/retention) } // Init initializes or reconfigures the logger using the provided config.Config instance @@ -124,7 +131,9 @@ func (l *Logger) InitWithDefaults(overrides ...string) error { } // Shutdown gracefully closes the logger, attempting to flush pending records -func (l *Logger) Shutdown(timeout time.Duration) error { +// If no timeout is provided, uses a default of 2x flush interval +func (l *Logger) Shutdown(timeout ...time.Duration) error { + // Ensure shutdown runs only once if !l.state.ShutdownCalled.CompareAndSwap(false, true) { return nil @@ -153,10 +162,12 @@ func (l *Logger) Shutdown(timeout time.Duration) error { } l.initMu.Unlock() - // Determine the maximum time to wait for the processor to finish - effectiveTimeout := timeout - if effectiveTimeout <= 0 { - // Use the configured flush interval as the default timeout if none provided + // Determine the effective timeout, if timeout is zero or negative, use a default based on flush interval + var effectiveTimeout time.Duration + if len(timeout) > 0 { + effectiveTimeout = timeout[0] + } else { + // Default to 2x flush interval flushMs, _ := l.config.Int64("log.flush_interval_ms") effectiveTimeout = 2 * time.Duration(flushMs) * time.Millisecond } diff --git a/storage.go b/storage.go new file mode 100644 index 0000000..f7f742e --- /dev/null +++ b/storage.go @@ -0,0 +1,407 @@ +package log + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" + "syscall" + "time" +) + +// performSync syncs the current log file +func (l *Logger) performSync() { + cfPtr := l.state.CurrentFile.Load() + if cfPtr != nil { + if currentLogFile, isFile := cfPtr.(*os.File); isFile && currentLogFile != nil { + if err := currentLogFile.Sync(); err != nil { + // Log sync error + syncErrRecord := logRecord{ + Flags: FlagDefault, + TimeStamp: time.Now(), + Level: LevelWarn, + Args: []any{"Log file sync failed", "file", currentLogFile.Name(), "error", err.Error()}, + } + l.sendLogRecord(syncErrRecord) + } + } + } +} + +// performDiskCheck checks disk space, triggers cleanup if needed, and updates status +// Returns true if disk is OK, false otherwise +func (l *Logger) performDiskCheck(forceCleanup bool) bool { + dir, _ := l.config.String("log.directory") + ext, _ := l.config.String("log.extension") + maxTotalMB, _ := l.config.Int64("log.max_total_size_mb") + minDiskFreeMB, _ := l.config.Int64("log.min_disk_free_mb") + maxTotal := maxTotalMB * 1024 * 1024 + minFreeRequired := minDiskFreeMB * 1024 * 1024 + + if maxTotal <= 0 && minFreeRequired <= 0 { + if !l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(true) + l.state.DiskFullLogged.Store(false) + } + return true + } + + freeSpace, err := l.getDiskFreeSpace(dir) + if err != nil { + fmtFprintf(os.Stderr, "log: warning - failed to check free disk space for '%s': %v\n", dir, err) + if l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(false) + } + return false + } + + needsCleanupCheck := false + spaceToFree := int64(0) + if minFreeRequired > 0 && freeSpace < minFreeRequired { + needsCleanupCheck = true + spaceToFree = minFreeRequired - freeSpace + } + + if maxTotal > 0 { + dirSize, err := l.getLogDirSize(dir, ext) + if err != nil { + fmtFprintf(os.Stderr, "log: warning - failed to check log directory size for '%s': %v\n", dir, err) + if l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(false) + } + return false + } + if dirSize > maxTotal { + needsCleanupCheck = true + amountOver := dirSize - maxTotal + if amountOver > spaceToFree { + spaceToFree = amountOver + } + } + } + + if needsCleanupCheck && forceCleanup { + if err := l.cleanOldLogs(spaceToFree); err != nil { + if !l.state.DiskFullLogged.Swap(true) { + diskFullRecord := logRecord{ + Flags: FlagDefault, TimeStamp: time.Now(), Level: LevelError, + Args: []any{"Log directory full or disk space low, cleanup failed", "error", err.Error()}, + } + l.sendLogRecord(diskFullRecord) + } + if l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(false) + } + return false + } + // Cleanup succeeded + l.state.DiskFullLogged.Store(false) + l.state.DiskStatusOK.Store(true) + l.updateEarliestFileTime() + return true + } else if needsCleanupCheck { + // Limits exceeded, but not forcing cleanup now + if l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(false) + } + return false + } else { + // Limits OK + if !l.state.DiskStatusOK.Load() { + l.state.DiskStatusOK.Store(true) + l.state.DiskFullLogged.Store(false) + } + return true + } +} + +// getDiskFreeSpace retrieves available disk space for the given path +func (l *Logger) getDiskFreeSpace(path string) (int64, error) { + var stat syscall.Statfs_t + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) { + return 0, fmtErrorf("log directory '%s' does not exist for disk check: %w", path, err) + } + return 0, fmtErrorf("failed to stat log directory '%s': %w", path, err) + } + if !info.IsDir() { + path = filepath.Dir(path) + } + + if err := syscall.Statfs(path, &stat); err != nil { + return 0, fmtErrorf("failed to get disk stats for '%s': %w", path, err) + } + availableBytes := int64(stat.Bavail) * int64(stat.Bsize) + return availableBytes, nil +} + +// getLogDirSize calculates total size of log files matching the current extension +func (l *Logger) getLogDirSize(dir, fileExt string) (int64, error) { + var size int64 + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return 0, fmtErrorf("failed to read log directory '%s': %w", dir, err) + } + + targetExt := "." + fileExt + for _, entry := range entries { + if entry.IsDir() { + continue + } + if filepath.Ext(entry.Name()) == targetExt { + info, errInfo := entry.Info() + if errInfo != nil { + continue + } + size += info.Size() + } + } + return size, nil +} + +// cleanOldLogs removes oldest log files until required space is freed +func (l *Logger) cleanOldLogs(required int64) error { + dir, _ := l.config.String("log.directory") + fileExt, _ := l.config.String("log.extension") + + entries, err := os.ReadDir(dir) + if err != nil { + return fmtErrorf("failed to read log directory '%s' for cleanup: %w", dir, err) + } + + currentLogFileName := "" + cfPtr := l.state.CurrentFile.Load() + if cfPtr != nil { + if clf, ok := cfPtr.(*os.File); ok && clf != nil { + currentLogFileName = filepath.Base(clf.Name()) + } + } + + type logFileMeta struct { + name string + modTime time.Time + size int64 + } + var logs []logFileMeta + targetExt := "." + fileExt + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != targetExt || entry.Name() == currentLogFileName { + continue + } + info, errInfo := entry.Info() + if errInfo != nil { + continue + } + logs = append(logs, logFileMeta{name: entry.Name(), modTime: info.ModTime(), size: info.Size()}) + } + + if len(logs) == 0 { + if required > 0 { + return fmtErrorf("no old logs available to delete in '%s', needed %d bytes", dir, required) + } + return nil + } + + sort.Slice(logs, func(i, j int) bool { return logs[i].modTime.Before(logs[j].modTime) }) + + var freedSpace int64 + for _, log := range logs { + if required > 0 && freedSpace >= required { + break + } + filePath := filepath.Join(dir, log.name) + if err := os.Remove(filePath); err != nil { + fmtFprintf(os.Stderr, "log: failed to remove old log file '%s': %v\n", filePath, err) + continue + } + freedSpace += log.size + l.state.TotalDeletions.Add(1) + } + + if required > 0 && freedSpace < required { + return fmtErrorf("could not free enough space in '%s': freed %d bytes, needed %d bytes", dir, freedSpace, required) + } + return nil +} + +// updateEarliestFileTime scans the log directory for the oldest log file +func (l *Logger) updateEarliestFileTime() { + dir, _ := l.config.String("log.directory") + fileExt, _ := l.config.String("log.extension") + baseName, _ := l.config.String("log.name") + + entries, err := os.ReadDir(dir) + if err != nil { + l.state.EarliestFileTime.Store(time.Time{}) + return + } + + var earliest time.Time + currentLogFileName := "" + cfPtr := l.state.CurrentFile.Load() + if cfPtr != nil { + if clf, ok := cfPtr.(*os.File); ok && clf != nil { + currentLogFileName = filepath.Base(clf.Name()) + } + } + + targetExt := "." + fileExt + prefix := baseName + "_" + for _, entry := range entries { + if entry.IsDir() { + continue + } + fname := entry.Name() + if !strings.HasPrefix(fname, prefix) || filepath.Ext(fname) != targetExt || fname == currentLogFileName { + continue + } + info, errInfo := entry.Info() + if errInfo != nil { + continue + } + if earliest.IsZero() || info.ModTime().Before(earliest) { + earliest = info.ModTime() + } + } + l.state.EarliestFileTime.Store(earliest) +} + +// cleanExpiredLogs removes log files older than the retention period +func (l *Logger) cleanExpiredLogs(oldest time.Time) error { + dir, _ := l.config.String("log.directory") + fileExt, _ := l.config.String("log.extension") + retentionPeriodHrs, _ := l.config.Float64("log.retention_period_hrs") + rpDuration := time.Duration(retentionPeriodHrs * float64(time.Hour)) + + if rpDuration <= 0 { + return nil + } + cutoffTime := time.Now().Add(-rpDuration) + if oldest.IsZero() || !oldest.Before(cutoffTime) { + return nil + } + + entries, err := os.ReadDir(dir) + if err != nil { + return fmtErrorf("failed to read log directory '%s' for retention cleanup: %w", dir, err) + } + + currentLogFileName := "" + cfPtr := l.state.CurrentFile.Load() + if cfPtr != nil { + if clf, ok := cfPtr.(*os.File); ok && clf != nil { + currentLogFileName = filepath.Base(clf.Name()) + } + } + + targetExt := "." + fileExt + var deletedCount int + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != targetExt || entry.Name() == currentLogFileName { + continue + } + info, errInfo := entry.Info() + if errInfo != nil { + continue + } + if info.ModTime().Before(cutoffTime) { + filePath := filepath.Join(dir, entry.Name()) + if err := os.Remove(filePath); err != nil { + fmtFprintf(os.Stderr, "log: failed to remove expired log file '%s': %v\n", filePath, err) + } else { + deletedCount++ + l.state.TotalDeletions.Add(1) + } + } + } + + if deletedCount == 0 && err != nil { + return err + } + return nil +} + +// generateLogFileName creates a unique log filename using a timestamp +func (l *Logger) generateLogFileName(timestamp time.Time) string { + name, _ := l.config.String("log.name") + ext, _ := l.config.String("log.extension") + tsFormat := timestamp.Format("060102_150405") + nano := timestamp.Nanosecond() + return fmt.Sprintf("%s_%s_%d.%s", name, tsFormat, nano, ext) +} + +// createNewLogFile generates a unique name and opens a new log file +func (l *Logger) createNewLogFile() (*os.File, error) { + dir, _ := l.config.String("log.directory") + filename := l.generateLogFileName(time.Now()) + fullPath := filepath.Join(dir, filename) + + // Retry logic for potential collisions (rare) + for i := 0; i < 5; i++ { + if _, err := os.Stat(fullPath); os.IsNotExist(err) { + break + } + time.Sleep(1 * time.Millisecond) + filename := l.generateLogFileName(time.Now()) + fullPath = filepath.Join(dir, filename) + } + + file, err := os.OpenFile(fullPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return nil, fmtErrorf("failed to open/create log file '%s': %w", fullPath, err) + } + return file, nil +} + +// rotateLogFile handles closing the current log file and opening a new one +func (l *Logger) rotateLogFile() error { + newFile, err := l.createNewLogFile() + if err != nil { + return fmtErrorf("failed to create new log file for rotation: %w", err) + } + + oldFilePtr := l.state.CurrentFile.Swap(newFile) + l.state.CurrentSize.Store(0) // Reset size for the new file + + if oldFilePtr != nil { + if oldFile, ok := oldFilePtr.(*os.File); ok && oldFile != nil { + if err := oldFile.Close(); err != nil { + fmtFprintf(os.Stderr, "log: failed to close old log file '%s': %v\n", oldFile.Name(), err) + // Continue with new file anyway + } + } + } + + l.updateEarliestFileTime() + l.state.TotalRotations.Add(1) + return nil +} + +// getLogFileCount calculates the number of log files matching the current extension +func (l *Logger) getLogFileCount(dir, fileExt string) (int, error) { + count := 0 + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return -1, fmtErrorf("failed to read log directory '%s': %w", dir, err) + } + + targetExt := "." + fileExt + for _, entry := range entries { + if entry.IsDir() { + continue + } + // Count all files matching the extension, including the current one if present + if filepath.Ext(entry.Name()) == targetExt { + count++ + } + } + return count, nil +} \ No newline at end of file diff --git a/utility.go b/utility.go index ec39fcc..36d1928 100644 --- a/utility.go +++ b/utility.go @@ -153,7 +153,13 @@ func Level(levelStr string) (int64, error) { return LevelWarn, nil case "error": return LevelError, nil + case "proc": + return LevelProc, nil + case "disk": + return LevelDisk, nil + case "sys": + return LevelSys, nil default: - return 0, fmtErrorf("invalid level string: '%s' (use debug, info, warn, error)", levelStr) + return 0, fmtErrorf("invalid level string: '%s' (use debug, info, warn, error, proc, disk, sys)", levelStr) } } \ No newline at end of file