diff --git a/README.md b/README.md index 8af6463..82a6dd5 100644 --- a/README.md +++ b/README.md @@ -52,11 +52,6 @@ func main() { go get github.com/lixenwraith/log ``` -For configuration management support: -```bash -go get github.com/lixenwraith/config -``` - ## Documentation - **[Getting Started](doc/getting-started.md)** - Installation and basic usage @@ -67,7 +62,7 @@ go get github.com/lixenwraith/config - **[Disk Management](doc/storage.md)** - File rotation and cleanup - **[Heartbeat Monitoring](doc/heartbeat.md)** - Operational statistics - **[Compatibility Adapters](doc/adapters.md)** - Framework integrations -- **[LLM Guide](doc/llm-guide_lixenwraith_log.md)** - Guide for LLM usage without full codebase +- **[Quick Guide](doc/quick-guide_lixenwraith_log.md)** - Quick reference guide ## Architecture Overview diff --git a/builder.go b/builder.go index 0011f5c..7321c4d 100644 --- a/builder.go +++ b/builder.go @@ -1,6 +1,10 @@ // FILE: lixenwraith/log/builder.go package log +import ( + "github.com/lixenwraith/log/sanitizer" +) + // Builder provides a fluent API for building logger configurations // It wraps a Config instance and provides chainable methods for setting values type Builder struct { @@ -70,6 +74,12 @@ func (b *Builder) Format(format string) *Builder { return b } +// Sanitization sets the sanitization mode +func (b *Builder) Sanitization(mode sanitizer.Mode) *Builder { + b.cfg.Sanitization = mode + return b +} + // Extension sets the log level func (b *Builder) Extension(ext string) *Builder { b.cfg.Extension = ext diff --git a/builder_test.go b/builder_test.go index 82585ea..a4ec380 100644 --- a/builder_test.go +++ b/builder_test.go @@ -67,8 +67,8 @@ func TestBuilder_Build(t *testing.T) { t.Run("apply config validation error", func(t *testing.T) { // Use a configuration that will fail validation inside ApplyConfig, - // e.g., an invalid directory path that cannot be created. - // Note: on linux /root is not writable by non-root users. + // e.g., an invalid directory path that cannot be created + // Note: on linux /root is not writable by non-root users invalidDir := filepath.Join("/root", "unwritable-log-test-dir") logger, err := NewBuilder(). Directory(invalidDir). diff --git a/config.go b/config.go index 52a3b59..aa0961b 100644 --- a/config.go +++ b/config.go @@ -6,6 +6,8 @@ import ( "strconv" "strings" "time" + + "github.com/lixenwraith/log/sanitizer" ) // Config holds all logger configuration values @@ -19,13 +21,14 @@ type Config struct { Level int64 `toml:"level"` // Log records at or above this Level will be logged Name string `toml:"name"` // Base name for log files Directory string `toml:"directory"` // Directory for log files - Format string `toml:"format"` // "txt", "raw", or "json" Extension string `toml:"extension"` // Log file extension // Formatting - ShowTimestamp bool `toml:"show_timestamp"` // Add timestamp to log records - ShowLevel bool `toml:"show_level"` // Add level to log record - TimestampFormat string `toml:"timestamp_format"` // Time format for log timestamps + Format string `toml:"format"` // "txt", "raw", or "json" + ShowTimestamp bool `toml:"show_timestamp"` // Add timestamp to log records + ShowLevel bool `toml:"show_level"` // Add level to log record + TimestampFormat string `toml:"timestamp_format"` // Time format for log timestamps + Sanitization sanitizer.Mode `toml:"sanitization"` // 0=None, 1=HexEncode, 2=Strip, 3=Escape // Buffer and size limits BufferSize int64 `toml:"buffer_size"` // Channel buffer size @@ -65,13 +68,14 @@ var defaultConfig = Config{ Level: LevelInfo, Name: "log", Directory: "./log", - Format: "txt", Extension: "log", // Formatting + Format: "txt", ShowTimestamp: true, ShowLevel: true, TimestampFormat: time.RFC3339Nano, + Sanitization: sanitizer.HexEncode, // Buffer and size limits BufferSize: 1024, @@ -123,6 +127,11 @@ func (c *Config) Validate() error { return fmtErrorf("invalid format: '%s' (use txt, json, or raw)", c.Format) } + // TODO: better bound check, implement validator in `sanitizer` + if c.Sanitization < 0 || c.Sanitization > sanitizer.Escape { + return fmtErrorf("invalid sanitization mode: '%d' (use 0=None, 1=HexEncode, 2=Strip, 3=Escape)", c.Sanitization) + } + if strings.HasPrefix(c.Extension, ".") { return fmtErrorf("extension should not start with dot: %s", c.Extension) } @@ -175,8 +184,8 @@ func (c *Config) Validate() error { return nil } -// applyConfigField applies a single key-value override to a Config. -// This is the core field mapping logic for string overrides. +// applyConfigField applies a single key-value override to a Config +// This is the core field mapping logic for string overrides func applyConfigField(cfg *Config, key, value string) error { switch key { // Basic settings @@ -196,12 +205,12 @@ func applyConfigField(cfg *Config, key, value string) error { cfg.Name = value case "directory": cfg.Directory = value - case "format": - cfg.Format = value case "extension": cfg.Extension = value // Formatting + case "format": + cfg.Format = value case "show_timestamp": boolVal, err := strconv.ParseBool(value) if err != nil { @@ -216,6 +225,12 @@ func applyConfigField(cfg *Config, key, value string) error { cfg.ShowLevel = boolVal case "timestamp_format": cfg.TimestampFormat = value + case "sanitization": + intVal, err := strconv.ParseInt(value, 10, 64) + if err != nil { + return fmtErrorf("invalid integer value for sanitization '%s': %w", value, err) + } + cfg.Sanitization = sanitizer.Mode(intVal) // Buffer and size limits case "buffer_size": diff --git a/constant.go b/constant.go index df4b63e..977ebfb 100644 --- a/constant.go +++ b/constant.go @@ -1,7 +1,9 @@ // FILE: lixenwraith/log/constant.go package log -import "time" +import ( + "time" +) // Log level constants const ( @@ -27,16 +29,19 @@ const ( FlagDefault = FlagShowTimestamp | FlagShowLevel ) +// Storage const ( // Threshold for triggering reactive disk check reactiveCheckThresholdBytes int64 = 10 * 1024 * 1024 - // Factors to adjust check interval - adaptiveIntervalFactor float64 = 1.5 // Slow down - adaptiveSpeedUpFactor float64 = 0.8 // Speed up - // Minimum wait time used throughout the package - minWaitTime = 10 * time.Millisecond // Size multiplier for KB, MB sizeMultiplier = 1000 ) -const hexChars = "0123456789abcdef" \ No newline at end of file +// Timers +const ( + // Minimum wait time used throughout the package + minWaitTime = 10 * time.Millisecond + // Factors to adjust check interval + adaptiveIntervalFactor float64 = 1.5 // Slow down + adaptiveSpeedUpFactor float64 = 0.8 // Speed up +) \ No newline at end of file diff --git a/doc/llm-guide_lixenwraith_log.md b/doc/quick-guide_lixenwraith_log.md similarity index 97% rename from doc/llm-guide_lixenwraith_log.md rename to doc/quick-guide_lixenwraith_log.md index 37d52d1..aad5d48 100644 --- a/doc/llm-guide_lixenwraith_log.md +++ b/doc/quick-guide_lixenwraith_log.md @@ -1,4 +1,4 @@ -# lixenwraith/log LLM Usage Guide +# lixenwraith/log Quick Reference Guide This guide details the `lixenwraith/log` package, a high-performance, buffered, rotating file logger for Go with built-in disk management, operational monitoring, and framework compatibility adapters. @@ -161,10 +161,10 @@ func (l *Logger) GetConfig() *Config ### Lifecycle Methods ```go -func (l *Logger) Start() error // Start log processing -func (l *Logger) Stop(timeout ...time.Duration) error // Stop (can restart) +func (l *Logger) Start() error // Start log processing +func (l *Logger) Stop(timeout ...time.Duration) error // Stop (can restart) func (l *Logger) Shutdown(timeout ...time.Duration) error // Terminal shutdown -func (l *Logger) Flush(timeout time.Duration) error // Force buffer flush +func (l *Logger) Flush(timeout time.Duration) error // Force buffer flush ``` ### Standard Logging Methods diff --git a/format.go b/format.go index d5693c4..d00c471 100644 --- a/format.go +++ b/format.go @@ -2,500 +2,269 @@ package log import ( - "bytes" - "encoding/hex" "encoding/json" "fmt" "strconv" - "strings" "time" "unicode/utf8" - "github.com/davecgh/go-spew/spew" + "github.com/lixenwraith/log/sanitizer" ) -// serializer manages the buffered writing of log entries -type serializer struct { - buf []byte - timestampFormat string +// Formatter manages the buffered writing and formatting of log entries +type Formatter struct { + format string + buf []byte + timestampFormat string + sanitizationMode sanitizer.Mode + sanitizer *sanitizer.Sanitizer } -// newSerializer creates a serializer instance -func newSerializer() *serializer { - return &serializer{ - buf: make([]byte, 0, 4096), // Initial reasonable capacity - timestampFormat: time.RFC3339Nano, // Default until configured +// NewFormatter creates a formatter instance +func NewFormatter(format string, bufferSize int64, timestampFormat string, sanitizationMode sanitizer.Mode) *Formatter { + if timestampFormat == "" { + timestampFormat = time.RFC3339Nano + } + if format == "" { + format = "txt" + } + return &Formatter{ + format: format, + buf: make([]byte, 0, bufferSize), + timestampFormat: timestampFormat, + sanitizationMode: sanitizationMode, + sanitizer: sanitizer.New(sanitizationMode), } } -// reset clears the serializer buffer for reuse -func (s *serializer) reset() { - s.buf = s.buf[:0] +// Reset clears the formatter buffer for reuse +func (f *Formatter) Reset() { + f.buf = f.buf[:0] } -// serialize converts log entries to the configured format, JSON, raw, or (default) txt -func (s *serializer) serialize(format string, flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { - s.reset() +// Format converts log entries to the configured format +func (f *Formatter) Format(format string, flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { + f.Reset() - // 1. Prioritize the on-demand flag from Write() + // The FlagRaw acts as an override to the configured format + effectiveFormat := format if flags&FlagRaw != 0 { - return s.serializeRaw(args) + effectiveFormat = "raw" } - // 2. Check for structured JSON flag - if flags&FlagStructuredJSON != 0 && format == "json" { - return s.serializeStructuredJSON(flags, timestamp, level, trace, args) - } + // Create the handler based on the effective format + handler := sanitizer.NewUnifiedHandler(effectiveFormat, f.sanitizer) - // 3. Handle the instance-wide configuration setting - if format == "raw" { - return s.serializeRaw(args) - } - - if format == "json" { - return s.serializeJSON(flags, timestamp, level, trace, args) - } - return s.serializeTxt(flags, timestamp, level, trace, args) -} - -// serializeRaw formats args as space-separated strings without metadata or newline -// This is used for both format="raw" configuration and Logger.Write() calls -func (s *serializer) serializeRaw(args []any) []byte { - needsSpace := false - - for _, arg := range args { - if needsSpace { - s.buf = append(s.buf, ' ') + switch effectiveFormat { + case "raw": + // This dedicated path handles both format="raw" and FlagRaw + // It only serializes the arguments and adds NO metadata or newlines + for i, arg := range args { + f.convertValue(&f.buf, arg, handler, i > 0) } - s.writeRawValue(arg) - needsSpace = true + return f.buf + + case "json": + // The existing JSON serialization logic remains unchanged + return f.formatJSON(flags, timestamp, level, trace, args, handler) + + case "txt": + // The existing Txt serialization logic is now correctly isolated + return f.formatTxt(flags, timestamp, level, trace, args, handler) } - // No newline appended for raw format - return s.buf + return nil // forcing panic on unrecognized format } -// writeRawValue converts any value to its raw string representation -// fallback to go-spew/spew with data structure information for types that are not explicitly supported -func (s *serializer) writeRawValue(v any) { +// FormatValue formats a single value according to the formatter's configuration +func (f *Formatter) FormatValue(v any) []byte { + f.Reset() + handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer) + f.convertValue(&f.buf, v, handler, false) + return f.buf +} + +// FormatArgs formats multiple arguments as space-separated values +func (f *Formatter) FormatArgs(args ...any) []byte { + f.Reset() + handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer) + for i, arg := range args { + f.convertValue(&f.buf, arg, handler, i > 0) + } + return f.buf +} + +// convertValue provides unified type conversion +func (f *Formatter) convertValue(buf *[]byte, v any, handler *sanitizer.UnifiedHandler, needsSpace bool) { + if needsSpace && len(*buf) > 0 { + *buf = append(*buf, ' ') + } + switch val := v.(type) { case string: - s.appendSanitized(val) // prevent special character corruption - case rune: - // Single rune should be sanitized if non-printable - s.appendSanitizedRune(val) - case int: - s.buf = strconv.AppendInt(s.buf, int64(val), 10) - case int64: - s.buf = strconv.AppendInt(s.buf, val, 10) - case uint: - s.buf = strconv.AppendUint(s.buf, uint64(val), 10) - case uint64: - s.buf = strconv.AppendUint(s.buf, val, 10) - case float32: - s.buf = strconv.AppendFloat(s.buf, float64(val), 'f', -1, 32) - case float64: - s.buf = strconv.AppendFloat(s.buf, val, 'f', -1, 64) - case bool: - s.buf = strconv.AppendBool(s.buf, val) - case nil: - s.buf = append(s.buf, "nil"...) - case time.Time: - s.buf = val.AppendFormat(s.buf, s.timestampFormat) - case error: - s.buf = append(s.buf, val.Error()...) - case fmt.Stringer: - s.appendSanitized(val.String()) + handler.WriteString(buf, val) + case []byte: - s.appendSanitized(string(val)) // prevent special character corruption + handler.WriteString(buf, string(val)) + + case rune: + var runeStr [utf8.UTFMax]byte + n := utf8.EncodeRune(runeStr[:], val) + handler.WriteString(buf, string(runeStr[:n])) + + case int: + num := strconv.AppendInt(nil, int64(val), 10) + handler.WriteNumber(buf, string(num)) + + case int64: + num := strconv.AppendInt(nil, val, 10) + handler.WriteNumber(buf, string(num)) + + case uint: + num := strconv.AppendUint(nil, uint64(val), 10) + handler.WriteNumber(buf, string(num)) + + case uint64: + num := strconv.AppendUint(nil, val, 10) + handler.WriteNumber(buf, string(num)) + + case float32: + num := strconv.AppendFloat(nil, float64(val), 'f', -1, 32) + handler.WriteNumber(buf, string(num)) + + case float64: + num := strconv.AppendFloat(nil, val, 'f', -1, 64) + handler.WriteNumber(buf, string(num)) + + case bool: + handler.WriteBool(buf, val) + + case nil: + handler.WriteNil(buf) + + case time.Time: + timeStr := val.Format(f.timestampFormat) + handler.WriteString(buf, timeStr) + + case error: + handler.WriteString(buf, val.Error()) + + case fmt.Stringer: + handler.WriteString(buf, val.String()) + default: - // For all other types (structs, maps, pointers, arrays, etc.), delegate to spew - // It is not the intended use of raw logging - // The output of such cases are structured and have type and size information set by spew - // Converting to string similar to non-raw logs is not used to avoid binary log corruption - var b bytes.Buffer - - // Use a custom dumper for log-friendly compact output - dumper := &spew.ConfigState{ - Indent: " ", - MaxDepth: 10, - DisablePointerAddresses: true, // Cleaner for logs - DisableCapacities: true, // Less noise - SortKeys: true, // Consistent map output - } - - dumper.Fdump(&b, val) - - // Trim trailing new line added by spew - s.buf = append(s.buf, bytes.TrimSpace(b.Bytes())...) + handler.WriteComplex(buf, val) } } -// serializeJSON formats log entries as JSON (time, level, trace, fields) -func (s *serializer) serializeJSON(flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { - s.buf = append(s.buf, '{') +// formatJSON unifies JSON output +func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte { + f.buf = append(f.buf, '{') needsComma := false if flags&FlagShowTimestamp != 0 { - s.buf = append(s.buf, `"time":"`...) - s.buf = timestamp.AppendFormat(s.buf, s.timestampFormat) - s.buf = append(s.buf, '"') + f.buf = append(f.buf, `"time":"`...) + f.buf = timestamp.AppendFormat(f.buf, f.timestampFormat) + f.buf = append(f.buf, '"') needsComma = true } if flags&FlagShowLevel != 0 { if needsComma { - s.buf = append(s.buf, ',') + f.buf = append(f.buf, ',') } - s.buf = append(s.buf, `"level":"`...) - s.buf = append(s.buf, levelToString(level)...) - s.buf = append(s.buf, '"') + f.buf = append(f.buf, `"level":"`...) + f.buf = append(f.buf, levelToString(level)...) + f.buf = append(f.buf, '"') needsComma = true } if trace != "" { if needsComma { - s.buf = append(s.buf, ',') + f.buf = append(f.buf, ',') } - s.buf = append(s.buf, `"trace":"`...) - s.writeString(trace) // Ensure trace string is escaped - s.buf = append(s.buf, '"') + f.buf = append(f.buf, `"trace":`...) + handler.WriteString(&f.buf, trace) needsComma = true } + // Handle structured JSON if flag is set and args match pattern + if flags&FlagStructuredJSON != 0 && len(args) >= 2 { + if message, ok := args[0].(string); ok { + if fields, ok := args[1].(map[string]any); ok { + if needsComma { + f.buf = append(f.buf, ',') + } + f.buf = append(f.buf, `"message":`...) + handler.WriteString(&f.buf, message) + + f.buf = append(f.buf, ',') + f.buf = append(f.buf, `"fields":`...) + + marshaledFields, err := json.Marshal(fields) + if err != nil { + f.buf = append(f.buf, `{"_marshal_error":"`...) + handler.WriteString(&f.buf, err.Error()) + f.buf = append(f.buf, `"}`...) + } else { + f.buf = append(f.buf, marshaledFields...) + } + + f.buf = append(f.buf, '}', '\n') + return f.buf + } + } + } + + // Regular JSON with fields array if len(args) > 0 { if needsComma { - s.buf = append(s.buf, ',') + f.buf = append(f.buf, ',') } - s.buf = append(s.buf, `"fields":[`...) + f.buf = append(f.buf, `"fields":[`...) for i, arg := range args { if i > 0 { - s.buf = append(s.buf, ',') + f.buf = append(f.buf, ',') } - s.writeJSONValue(arg) + f.convertValue(&f.buf, arg, handler, false) } - s.buf = append(s.buf, ']') + f.buf = append(f.buf, ']') } - s.buf = append(s.buf, '}', '\n') - return s.buf + f.buf = append(f.buf, '}', '\n') + return f.buf } -// serializeTxt formats log entries as plain txt (time, level, trace, fields) -func (s *serializer) serializeTxt(flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { +// formatTxt handles txt format output +func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte { needsSpace := false if flags&FlagShowTimestamp != 0 { - s.buf = timestamp.AppendFormat(s.buf, s.timestampFormat) + f.buf = timestamp.AppendFormat(f.buf, f.timestampFormat) needsSpace = true } if flags&FlagShowLevel != 0 { if needsSpace { - s.buf = append(s.buf, ' ') + f.buf = append(f.buf, ' ') } - s.buf = append(s.buf, levelToString(level)...) + f.buf = append(f.buf, levelToString(level)...) needsSpace = true } if trace != "" { if needsSpace { - s.buf = append(s.buf, ' ') + f.buf = append(f.buf, ' ') } - s.buf = append(s.buf, trace...) + f.buf = append(f.buf, trace...) needsSpace = true } for _, arg := range args { - if needsSpace { - s.buf = append(s.buf, ' ') - } - s.writeTxtValue(arg) + f.convertValue(&f.buf, arg, handler, needsSpace) needsSpace = true } - s.buf = append(s.buf, '\n') - return s.buf -} - -// writeTxtValue converts any value to its txt representation -func (s *serializer) writeTxtValue(v any) { - switch val := v.(type) { - case string: - s.appendSanitized(val) // prevent special character corruption - case rune: - // Single rune should be sanitized if non-printable - s.appendSanitizedRune(val) - case int: - s.buf = strconv.AppendInt(s.buf, int64(val), 10) - case int64: - s.buf = strconv.AppendInt(s.buf, val, 10) - case uint: - s.buf = strconv.AppendUint(s.buf, uint64(val), 10) - case uint64: - s.buf = strconv.AppendUint(s.buf, val, 10) - case float32: - s.buf = strconv.AppendFloat(s.buf, float64(val), 'f', -1, 32) - case float64: - s.buf = strconv.AppendFloat(s.buf, val, 'f', -1, 64) - case bool: - s.buf = strconv.AppendBool(s.buf, val) - case nil: - s.buf = append(s.buf, "null"...) - case time.Time: - s.buf = val.AppendFormat(s.buf, s.timestampFormat) - case error: - str := val.Error() - if len(str) == 0 || strings.ContainsRune(str, ' ') { - s.buf = append(s.buf, '"') - s.writeString(str) - s.buf = append(s.buf, '"') - } else { - s.buf = append(s.buf, str...) - } - case fmt.Stringer: - str := val.String() - if len(str) == 0 || strings.ContainsRune(str, ' ') { - s.buf = append(s.buf, '"') - s.writeString(str) - s.buf = append(s.buf, '"') - } else { - s.appendSanitized(str) - } - case []byte: - s.appendSanitized(string(val)) // prevent special character corruption - default: - str := fmt.Sprintf("%+v", val) - if len(str) == 0 || strings.ContainsRune(str, ' ') { - s.buf = append(s.buf, '"') - // Sanitize - for _, r := range str { - s.appendSanitizedRune(r) - } - s.buf = append(s.buf, '"') - } else { - // Sanitize non-quoted complex values - s.appendSanitized(str) - } - } -} - -// writeJSONValue converts any value to its JSON representation -func (s *serializer) writeJSONValue(v any) { - switch val := v.(type) { - case string: - s.buf = append(s.buf, '"') - s.writeString(val) - s.buf = append(s.buf, '"') - case int: - s.buf = strconv.AppendInt(s.buf, int64(val), 10) - case int64: - s.buf = strconv.AppendInt(s.buf, val, 10) - case uint: - s.buf = strconv.AppendUint(s.buf, uint64(val), 10) - case uint64: - s.buf = strconv.AppendUint(s.buf, val, 10) - case float32: - s.buf = strconv.AppendFloat(s.buf, float64(val), 'f', -1, 32) - case float64: - s.buf = strconv.AppendFloat(s.buf, val, 'f', -1, 64) - case bool: - s.buf = strconv.AppendBool(s.buf, val) - case nil: - s.buf = append(s.buf, "null"...) - case time.Time: - s.buf = append(s.buf, '"') - s.buf = val.AppendFormat(s.buf, s.timestampFormat) - s.buf = append(s.buf, '"') - case error: - s.buf = append(s.buf, '"') - s.writeString(val.Error()) - s.buf = append(s.buf, '"') - case fmt.Stringer: - s.buf = append(s.buf, '"') - s.writeString(val.String()) - s.buf = append(s.buf, '"') - default: - s.buf = append(s.buf, '"') - s.writeString(fmt.Sprintf("%+v", val)) - s.buf = append(s.buf, '"') - } -} - -// serializeStructuredJSON formats log entries as structured JSON with proper field marshaling -func (s *serializer) serializeStructuredJSON(flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { - // Validate args structure - if len(args) < 2 { - // Fallback to regular JSON if args are malformed - return s.serializeJSON(flags, timestamp, level, trace, args) - } - - message, ok := args[0].(string) - if !ok { - // Fallback if message is not a string - return s.serializeJSON(flags, timestamp, level, trace, args) - } - - fields, ok := args[1].(map[string]any) - if !ok { - // Fallback if fields is not a map - return s.serializeJSON(flags, timestamp, level, trace, args) - } - - s.buf = append(s.buf, '{') - needsComma := false - - // Add timestamp - if flags&FlagShowTimestamp != 0 { - s.buf = append(s.buf, `"time":"`...) - s.buf = timestamp.AppendFormat(s.buf, s.timestampFormat) - s.buf = append(s.buf, '"') - needsComma = true - } - - // Add level - if flags&FlagShowLevel != 0 { - if needsComma { - s.buf = append(s.buf, ',') - } - s.buf = append(s.buf, `"level":"`...) - s.buf = append(s.buf, levelToString(level)...) - s.buf = append(s.buf, '"') - needsComma = true - } - - // Add message - if needsComma { - s.buf = append(s.buf, ',') - } - s.buf = append(s.buf, `"message":"`...) - s.writeString(message) - s.buf = append(s.buf, '"') - - // Add trace if present - if trace != "" { - s.buf = append(s.buf, ',') - s.buf = append(s.buf, `"trace":"`...) - s.writeString(trace) - s.buf = append(s.buf, '"') - } - - // Marshal fields using encoding/json - if len(fields) > 0 { - s.buf = append(s.buf, ',') - s.buf = append(s.buf, `"fields":`...) - - // Use json.Marshal for proper encoding - marshaledFields, err := json.Marshal(fields) - if err != nil { - // SECURITY: Log marshaling error as a string to prevent log injection - s.buf = append(s.buf, `{"_marshal_error":"`...) - s.writeString(err.Error()) - s.buf = append(s.buf, `"}`...) - } else { - s.buf = append(s.buf, marshaledFields...) - } - } - - s.buf = append(s.buf, '}', '\n') - return s.buf -} - -// appendSanitized sanitizes a string by replacing non-printable runes with their hex representation -func (s *serializer) appendSanitized(data string) { - var builder strings.Builder - builder.Grow(len(data)) // Pre-allocate for efficiency - - for _, r := range data { - // Use the standard library's definition of a printable character - // This correctly handles Unicode, including high-bit characters like '│' and '世界' - if strconv.IsPrint(r) { - builder.WriteRune(r) - } else { - // For non-printable runes, encode them safely in a format - // This handles multi-byte control characters correctly - var runeBytes [utf8.UTFMax]byte - n := utf8.EncodeRune(runeBytes[:], r) - builder.WriteString("<") - builder.WriteString(hex.EncodeToString(runeBytes[:n])) - builder.WriteString(">") - } - } - s.buf = append(s.buf, builder.String()...) -} - -// appendSanitizedRune sanitizes a rune by replacing non-printable rune with its hex representation -func (s *serializer) appendSanitizedRune(data rune) { - if strconv.IsPrint(data) { - s.buf = utf8.AppendRune(s.buf, data) - } else { - var runeBytes [utf8.UTFMax]byte - n := utf8.EncodeRune(runeBytes[:], data) - s.buf = append(s.buf, '<') - s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...) - s.buf = append(s.buf, '>') - } -} - -// levelToString converts integer level values to string -func levelToString(level int64) string { - switch level { - case LevelDebug: - return "DEBUG" - case LevelInfo: - return "INFO" - case LevelWarn: - return "WARN" - case LevelError: - return "ERROR" - case LevelProc: - return "PROC" - case LevelDisk: - return "DISK" - case LevelSys: - return "SYS" - default: - return fmt.Sprintf("LEVEL(%d)", level) - } -} - -// writeString appends a string to the buffer, escaping JSON special characters -func (s *serializer) writeString(str string) { - lenStr := len(str) - for i := 0; i < lenStr; { - if c := str[i]; c < ' ' || c == '"' || c == '\\' { - switch c { - case '\\', '"': - s.buf = append(s.buf, '\\', c) - case '\n': - s.buf = append(s.buf, '\\', 'n') - case '\r': - s.buf = append(s.buf, '\\', 'r') - case '\t': - s.buf = append(s.buf, '\\', 't') - case '\b': - s.buf = append(s.buf, '\\', 'b') - case '\f': - s.buf = append(s.buf, '\\', 'f') - default: - s.buf = append(s.buf, `\u00`...) - s.buf = append(s.buf, hexChars[c>>4], hexChars[c&0xF]) - } - i++ - } else { - start := i - for i < lenStr && str[i] >= ' ' && str[i] != '"' && str[i] != '\\' { - i++ - } - s.buf = append(s.buf, str[start:i]...) - } - } -} - -// setTimestampFormat updates the cached timestamp format in the serializer -func (s *serializer) setTimestampFormat(format string) { - if format == "" { - format = time.RFC3339Nano - } - s.timestampFormat = format + f.buf = append(f.buf, '\n') + return f.buf } \ No newline at end of file diff --git a/format_test.go b/format_test.go index cffe9db..e499d40 100644 --- a/format_test.go +++ b/format_test.go @@ -4,6 +4,8 @@ package log import ( "encoding/json" "errors" + "os" + "path/filepath" "strings" "testing" "time" @@ -12,13 +14,13 @@ import ( "github.com/stretchr/testify/require" ) -// TestSerializer tests the output of the serializer for txt, json, and raw formats -func TestSerializer(t *testing.T) { - s := newSerializer() +// TestFormatter tests the output of the formatter for txt, json, and raw formats +func TestFormatter(t *testing.T) { + f := NewFormatter("txt", 1024, time.RFC3339Nano, 0) timestamp := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) t.Run("txt format", func(t *testing.T) { - data := s.serialize("txt", FlagDefault, timestamp, LevelInfo, "", []any{"test message", 123}) + data := f.Format("txt", FlagDefault, timestamp, LevelInfo, "", []any{"test message", 123}) str := string(data) assert.Contains(t, str, "2024-01-01") @@ -28,8 +30,9 @@ func TestSerializer(t *testing.T) { assert.True(t, strings.HasSuffix(str, "\n")) }) + f = NewFormatter("json", 1024, time.RFC3339Nano, 0) t.Run("json format", func(t *testing.T) { - data := s.serialize("json", FlagDefault, timestamp, LevelWarn, "trace1", []any{"warning", true}) + data := f.Format("json", FlagDefault, timestamp, LevelWarn, "trace1", []any{"warning", true}) var result map[string]any err := json.Unmarshal(data[:len(data)-1], &result) // Remove trailing newline @@ -42,8 +45,9 @@ func TestSerializer(t *testing.T) { assert.Equal(t, true, fields[1]) }) + f = NewFormatter("raw", 1024, time.RFC3339Nano, 0) t.Run("raw format", func(t *testing.T) { - data := s.serialize("raw", 0, timestamp, LevelInfo, "", []any{"raw", "data", 42}) + data := f.Format("raw", 0, timestamp, LevelInfo, "", []any{"raw", "data", 42}) str := string(data) assert.Equal(t, "raw data 42", str) @@ -51,15 +55,16 @@ func TestSerializer(t *testing.T) { }) t.Run("flag override raw", func(t *testing.T) { - data := s.serialize("txt", FlagRaw, timestamp, LevelInfo, "", []any{"forced", "raw"}) + data := f.Format("txt", FlagRaw, timestamp, LevelInfo, "", []any{"forced", "raw"}) str := string(data) assert.Equal(t, "forced raw", str) }) + f = NewFormatter("json", 1024, time.RFC3339Nano, 0) t.Run("structured json", func(t *testing.T) { fields := map[string]any{"key1": "value1", "key2": 42} - data := s.serialize("json", FlagStructuredJSON|FlagDefault, timestamp, LevelInfo, "", + data := f.Format("json", FlagStructuredJSON|FlagDefault, timestamp, LevelInfo, "", []any{"structured message", fields}) var result map[string]any @@ -70,8 +75,9 @@ func TestSerializer(t *testing.T) { assert.Equal(t, map[string]any{"key1": "value1", "key2": float64(42)}, result["fields"]) }) + f = NewFormatter("json", 1024, time.RFC3339Nano, 3) t.Run("special characters escaping", func(t *testing.T) { - data := s.serialize("json", FlagDefault, timestamp, LevelInfo, "", + data := f.Format("json", FlagDefault, timestamp, LevelInfo, "", []any{"test\n\r\t\"\\message"}) str := string(data) @@ -80,7 +86,7 @@ func TestSerializer(t *testing.T) { t.Run("error type handling", func(t *testing.T) { err := errors.New("test error") - data := s.serialize("txt", FlagDefault, timestamp, LevelError, "", []any{err}) + data := f.Format("txt", FlagDefault, timestamp, LevelError, "", []any{err}) str := string(data) assert.Contains(t, str, "test error") @@ -108,4 +114,78 @@ func TestLevelToString(t *testing.T) { assert.Equal(t, tt.expected, levelToString(tt.level)) }) } +} + +// TestControlCharacterWrite verifies that control characters are safely handled in raw output +func TestControlCharacterWrite(t *testing.T) { + logger, tmpDir := createTestLogger(t) + defer logger.Shutdown() + + // Test various control characters + testCases := []struct { + name string + input string + }{ + {"null bytes", "test\x00data"}, + {"bell", "alert\x07message"}, + {"backspace", "back\x08space"}, + {"form feed", "page\x0Cbreak"}, + {"vertical tab", "vertical\x0Btab"}, + {"escape", "escape\x1B[31mcolor"}, + {"mixed", "\x00\x01\x02test\x1F\x7Fdata"}, + } + + for _, tc := range testCases { + logger.Write(tc.input) + } + + logger.Flush(time.Second) + + // Verify file contains hex-encoded control chars + content, err := os.ReadFile(filepath.Join(tmpDir, "log.log")) + require.NoError(t, err) + + // Control chars should be hex-encoded in raw output + assert.Contains(t, string(content), "test") + assert.Contains(t, string(content), "data") +} + +// TestRawSanitizedOutput verifies that raw output is correctly sanitized +func TestRawSanitizedOutput(t *testing.T) { + logger, tmpDir := createTestLogger(t) + defer logger.Shutdown() + + // 1. A string with valid multi-byte UTF-8 should be unchanged + utf8String := "Hello │ 世界" + + // 2. A string with single-byte control chars should have them encoded + stringWithControl := "start-\x07-end" + expectedStringOutput := "start-<07>-end" + + // 3. A []byte with control chars should have them encoded, not stripped + bytesWithControl := []byte("data\x00with\x08bytes") + expectedBytesOutput := "data<00>with<08>bytes" + + // 4. A string with a multi-byte non-printable rune (U+0085, NEXT LINE) + multiByteControl := "line1\u0085line2" + expectedMultiByteOutput := "line1line2" + + // Log all cases + logger.Write(utf8String, stringWithControl, bytesWithControl, multiByteControl) + logger.Flush(time.Second) + + // Read and verify the single line of output + content, err := os.ReadFile(filepath.Join(tmpDir, "log.log")) + require.NoError(t, err) + logOutput := string(content) + + // The output should be one line with spaces between the sanitized parts + expectedOutput := strings.Join([]string{ + utf8String, + expectedStringOutput, + expectedBytesOutput, + expectedMultiByteOutput, + }, " ") + + assert.Equal(t, expectedOutput, logOutput) } \ No newline at end of file diff --git a/go.mod b/go.mod index 03e79f3..b001199 100644 --- a/go.mod +++ b/go.mod @@ -11,5 +11,3 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) - -replace github.com/mitchellh/mapstructure => github.com/go-viper/mapstructure v1.6.0 diff --git a/lifecycle_test.go b/lifecycle_test.go index fe8d624..678a7ee 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -100,7 +100,8 @@ func TestStopReconfigureRestart(t *testing.T) { require.NoError(t, err) strContent := string(content) - assert.Contains(t, strContent, "INFO first message", "Should contain the log from the first configuration") + // assert.Contains(t, strContent, "INFO first message", "Should contain the log from the first configuration") + assert.Contains(t, strContent, `INFO "first message"`, "Should contain the log from the first configuration") assert.Contains(t, strContent, `"fields":["second message"]`, "Should contain the log from the second (JSON) configuration") } diff --git a/logger.go b/logger.go index 9003b91..3de74ca 100644 --- a/logger.go +++ b/logger.go @@ -15,14 +15,12 @@ type Logger struct { currentConfig atomic.Value // stores *Config state State initMu sync.Mutex - serializer *serializer + formatter *Formatter } // NewLogger creates a new Logger instance with default settings func NewLogger() *Logger { - l := &Logger{ - serializer: newSerializer(), - } + l := &Logger{} // Set default configuration l.currentConfig.Store(DefaultConfig()) @@ -344,7 +342,7 @@ func (l *Logger) applyConfig(cfg *Config) error { oldCfg := l.getConfig() l.currentConfig.Store(cfg) - l.serializer.setTimestampFormat(cfg.TimestampFormat) + l.formatter = NewFormatter(cfg.Format, cfg.BufferSize, cfg.TimestampFormat, cfg.Sanitization) // Ensure log directory exists if file output is enabled if cfg.EnableFile { diff --git a/logger_test.go b/logger_test.go index 8b9e20a..395ff0c 100644 --- a/logger_test.go +++ b/logger_test.go @@ -28,7 +28,7 @@ func createTestLogger(t *testing.T) (*Logger, string) { err := logger.ApplyConfig(cfg) require.NoError(t, err) - // Start the logger, which is the new requirement. + // Start the logger err = logger.Start() require.NoError(t, err) @@ -40,7 +40,6 @@ func TestNewLogger(t *testing.T) { logger := NewLogger() assert.NotNil(t, logger) - assert.NotNil(t, logger.serializer) assert.False(t, logger.state.IsInitialized.Load()) assert.False(t, logger.state.LoggerDisabled.Load()) } @@ -157,9 +156,9 @@ func TestLoggerLoggingLevels(t *testing.T) { // Default level is INFO, so debug shouldn't appear assert.NotContains(t, string(content), "debug message") - assert.Contains(t, string(content), "INFO info message") - assert.Contains(t, string(content), "WARN warn message") - assert.Contains(t, string(content), "ERROR error message") + assert.Contains(t, string(content), `INFO "info message"`) + assert.Contains(t, string(content), `WARN "warn message"`) + assert.Contains(t, string(content), `ERROR "error message"`) } // TestLoggerWithTrace ensures that logging with a stack trace does not cause a panic @@ -188,7 +187,7 @@ func TestLoggerFormats(t *testing.T) { name: "txt format", format: "txt", check: func(t *testing.T, content string) { - assert.Contains(t, content, "INFO test message") + assert.Contains(t, content, `INFO "test message"`) }, }, { @@ -303,81 +302,4 @@ func TestLoggerWrite(t *testing.T) { assert.Contains(t, string(content), "raw output 123") assert.True(t, strings.HasSuffix(string(content), "raw output 123")) -} - -// TestControlCharacterWrite verifies that control characters are safely handled in raw output -func TestControlCharacterWrite(t *testing.T) { - logger, tmpDir := createTestLogger(t) - defer logger.Shutdown() - - // Test various control characters - testCases := []struct { - name string - input string - }{ - {"null bytes", "test\x00data"}, - {"bell", "alert\x07message"}, - {"backspace", "back\x08space"}, - {"form feed", "page\x0Cbreak"}, - {"vertical tab", "vertical\x0Btab"}, - {"escape", "escape\x1B[31mcolor"}, - {"mixed", "\x00\x01\x02test\x1F\x7Fdata"}, - } - - for _, tc := range testCases { - logger.Write(tc.input) - } - - logger.Flush(time.Second) - - // Verify file contains hex-encoded control chars - content, err := os.ReadFile(filepath.Join(tmpDir, "log.log")) - require.NoError(t, err) - - // Control chars should be hex-encoded in raw output - assert.Contains(t, string(content), "test") - assert.Contains(t, string(content), "data") - // Raw format preserves as-is, but reading back should work -} - -// TestRawSanitizedOutput verifies that raw output is correctly sanitized, -// preserving printable runes and hex-encoding non-printable ones -func TestRawSanitizedOutput(t *testing.T) { - logger, tmpDir := createTestLogger(t) - defer logger.Shutdown() - - // 1. A string with valid multi-byte UTF-8 should be unchanged - utf8String := "Hello │ 世界" - - // 2. A string with single-byte control chars should have them encoded - stringWithControl := "start-\x07-end" - expectedStringOutput := "start-<07>-end" - - // 3. A []byte with control chars should have them encoded, not stripped - bytesWithControl := []byte("data\x00with\x08bytes") - expectedBytesOutput := "data<00>with<08>bytes" - - // 4. A string with a multi-byte non-printable rune (U+0085, NEXT LINE) - // This proves Unicode control character handling is correct - multiByteControl := "line1\u0085line2" - expectedMultiByteOutput := "line1line2" - - // Log all cases - logger.Write(utf8String, stringWithControl, bytesWithControl, multiByteControl) - logger.Flush(time.Second) - - // Read and verify the single line of output - content, err := os.ReadFile(filepath.Join(tmpDir, "log.log")) - require.NoError(t, err) - logOutput := string(content) - - // The output should be one line with spaces between the sanitized parts - expectedOutput := strings.Join([]string{ - utf8String, - expectedStringOutput, - expectedBytesOutput, - expectedMultiByteOutput, - }, " ") - - assert.Equal(t, expectedOutput, logOutput) } \ No newline at end of file diff --git a/processor.go b/processor.go index 79c9c2c..181d169 100644 --- a/processor.go +++ b/processor.go @@ -102,9 +102,9 @@ func (l *Logger) processLogRecord(record logRecord) int64 { return 0 } - // Serialize the log entry once + // Format and serialize the log entry once format := c.Format - data := l.serializer.serialize( + data := l.formatter.Format( format, record.Flags, record.TimeStamp, diff --git a/sanitizer/sanitizer.go b/sanitizer/sanitizer.go new file mode 100644 index 0000000..df47316 --- /dev/null +++ b/sanitizer/sanitizer.go @@ -0,0 +1,229 @@ +// FILE: lixenwraith/log/sanitizer/sanitizer.go +package sanitizer + +import ( + "bytes" + "encoding/hex" + "fmt" + "strconv" + "unicode" + "unicode/utf8" + + "github.com/davecgh/go-spew/spew" +) + +// Mode controls how non-printable characters are handled +type Mode int + +// Sanitization modes +const ( + None Mode = iota // No sanitization + HexEncode // Encode as (current default) + Strip // Remove control characters + Escape // JSON-style escaping +) + +// Sanitizer provides centralized sanitization logic +type Sanitizer struct { + mode Mode + buf []byte // Reusable buffer +} + +func New(mode Mode) *Sanitizer { + return &Sanitizer{ + mode: mode, + buf: make([]byte, 0, 256), + } +} + +func (s *Sanitizer) Reset() { + s.buf = s.buf[:0] +} + +func (s *Sanitizer) Sanitize(data string) string { + if s.mode == None { + return data + } + + s.Reset() + + for _, r := range data { + if strconv.IsPrint(r) { + s.buf = utf8.AppendRune(s.buf, r) + continue + } + + switch s.mode { + case HexEncode: + var runeBytes [utf8.UTFMax]byte + n := utf8.EncodeRune(runeBytes[:], r) + s.buf = append(s.buf, '<') + s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...) + s.buf = append(s.buf, '>') + + case Strip: + // Skip non-printable + continue + + case Escape: + switch r { + case '\n': + s.buf = append(s.buf, '\\', 'n') + case '\r': + s.buf = append(s.buf, '\\', 'r') + case '\t': + s.buf = append(s.buf, '\\', 't') + case '\b': + s.buf = append(s.buf, '\\', 'b') + case '\f': + s.buf = append(s.buf, '\\', 'f') + default: + // Unicode escape for other control chars + s.buf = append(s.buf, '\\', 'u') + s.buf = append(s.buf, fmt.Sprintf("%04x", r)...) + } + } + } + + return string(s.buf) +} + +// UnifiedHandler implements all format behaviors in a single struct +type UnifiedHandler struct { + format string + sanitizer *Sanitizer +} + +func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler { + return &UnifiedHandler{ + format: format, + sanitizer: san, + } +} + +func (h *UnifiedHandler) WriteString(buf *[]byte, s string) { + switch h.format { + case "raw": + *buf = append(*buf, h.sanitizer.Sanitize(s)...) + + case "txt": + sanitized := h.sanitizer.Sanitize(s) + if h.NeedsQuotes(sanitized) { + *buf = append(*buf, '"') + // Escape quotes within quoted strings + for i := 0; i < len(sanitized); i++ { + if sanitized[i] == '"' || sanitized[i] == '\\' { + *buf = append(*buf, '\\') + } + *buf = append(*buf, sanitized[i]) + } + *buf = append(*buf, '"') + } else { + *buf = append(*buf, sanitized...) + } + + case "json": + *buf = append(*buf, '"') + // Direct JSON escaping without pre-sanitization + for i := 0; i < len(s); { + c := s[i] + if c >= ' ' && c != '"' && c != '\\' { + start := i + for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' { + i++ + } + *buf = append(*buf, s[start:i]...) + } else { + switch c { + case '\\', '"': + *buf = append(*buf, '\\', c) + case '\n': + *buf = append(*buf, '\\', 'n') + case '\r': + *buf = append(*buf, '\\', 'r') + case '\t': + *buf = append(*buf, '\\', 't') + case '\b': + *buf = append(*buf, '\\', 'b') + case '\f': + *buf = append(*buf, '\\', 'f') + default: + *buf = append(*buf, fmt.Sprintf("\\u%04x", c)...) + } + i++ + } + } + *buf = append(*buf, '"') + } +} + +func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) { + *buf = append(*buf, n...) +} + +func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) { + *buf = strconv.AppendBool(*buf, b) +} + +func (h *UnifiedHandler) WriteNil(buf *[]byte) { + switch h.format { + case "raw": + *buf = append(*buf, "nil"...) + default: // txt, json + *buf = append(*buf, "null"...) + } +} + +func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) { + switch h.format { + case "raw": + // Use spew for complex types in raw mode, DEBUG use + var b bytes.Buffer + dumper := &spew.ConfigState{ + Indent: " ", + MaxDepth: 10, + DisablePointerAddresses: true, + DisableCapacities: true, + SortKeys: true, + } + dumper.Fdump(&b, v) + *buf = append(*buf, bytes.TrimSpace(b.Bytes())...) + + default: // txt, json + str := fmt.Sprintf("%+v", v) + h.WriteString(buf, str) + } +} + +func (h *UnifiedHandler) NeedsQuotes(s string) bool { + switch h.format { + case "json": + return true // JSON always quotes + case "txt": + // Quote strings that: + // 1. Are empty + if len(s) == 0 { + return true + } + for _, r := range s { + // 2. Contain whitespace (space, tab, newline, etc.) + if unicode.IsSpace(r) { + return true + } + // 3. Contain shell special characters (POSIX + common extensions) + switch r { + case '"', '\'', '\\', '$', '`', '!', '&', '|', ';', + '(', ')', '<', '>', '*', '?', '[', ']', '{', '}', + '~', '#', '%', '=', '\n', '\r', '\t': + return true + } + // 4. Non-print + if !unicode.IsPrint(r) { + return true + } + } + return false + default: // raw + return false + } +} \ No newline at end of file diff --git a/sanitizer/sanitizer_test.go b/sanitizer/sanitizer_test.go new file mode 100644 index 0000000..8fd857c --- /dev/null +++ b/sanitizer/sanitizer_test.go @@ -0,0 +1,206 @@ +// FILE: lixenwraith/log/sanitizer/sanitizer_test.go +package sanitizer + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSanitizer(t *testing.T) { + testCases := []struct { + name string + input string + mode Mode + expected string + }{ + // None mode tests + { + name: "none mode passes through", + input: "hello\x00world\n", + mode: None, + expected: "hello\x00world\n", + }, + + // HexEncode tests + { + name: "hex encode null byte", + input: "test\x00data", + mode: HexEncode, + expected: "test<00>data", + }, + { + name: "hex encode control chars", + input: "bell\x07tab\x09form\x0c", + mode: HexEncode, + expected: "bell<07>tab<09>form<0c>", + }, + { + name: "hex encode preserves printable", + input: "Hello World 123!@#", + mode: HexEncode, + expected: "Hello World 123!@#", + }, + { + name: "hex encode multi-byte control", + input: "line1\u0085line2", // NEXT LINE (C2 85) + mode: HexEncode, + expected: "line1line2", + }, + { + name: "hex encode preserves UTF-8", + input: "Hello 世界 ✓", + mode: HexEncode, + expected: "Hello 世界 ✓", + }, + + // Strip tests + { + name: "strip removes control chars", + input: "clean\x00\x07\ntxt", + mode: Strip, + expected: "cleantxt", + }, + { + name: "strip preserves spaces", + input: "hello world", + mode: Strip, + expected: "hello world", + }, + + // Escape tests + { + name: "escape common control chars", + input: "line1\nline2\ttab\rreturn", + mode: Escape, + expected: "line1\\nline2\\ttab\\rreturn", + }, + { + name: "escape unicode control", + input: "text\x01\x1f", + mode: Escape, + expected: "text\\u0001\\u001f", + }, + { + name: "escape backspace and form feed", + input: "back\bspace form\ffeed", + mode: Escape, + expected: "back\\bspace form\\ffeed", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + s := New(tc.mode) + result := s.Sanitize(tc.input) + assert.Equal(t, tc.expected, result) + }) + } +} + +func TestUnifiedHandler(t *testing.T) { + t.Run("raw format", func(t *testing.T) { + san := New(HexEncode) + handler := NewUnifiedHandler("raw", san) + + var buf []byte + + // String handling + handler.WriteString(&buf, "test\x00data") + assert.Equal(t, "test<00>data", string(buf)) + + // Nil handling + buf = nil + handler.WriteNil(&buf) + assert.Equal(t, "nil", string(buf)) + + // No quotes needed + assert.False(t, handler.NeedsQuotes("any string")) + }) + + t.Run("txt format", func(t *testing.T) { + san := New(HexEncode) + handler := NewUnifiedHandler("txt", san) + + var buf []byte + + // String with spaces gets quoted + handler.WriteString(&buf, "hello world") + assert.Equal(t, `"hello world"`, string(buf)) + + // String without spaces unquoted + buf = nil + handler.WriteString(&buf, "single") + assert.Equal(t, "single", string(buf)) + + // Nil handling + buf = nil + handler.WriteNil(&buf) + assert.Equal(t, "null", string(buf)) + + // Quotes needed for empty or space-containing + assert.True(t, handler.NeedsQuotes("")) + assert.True(t, handler.NeedsQuotes("has space")) + assert.False(t, handler.NeedsQuotes("nospace")) + }) + + t.Run("json format", func(t *testing.T) { + san := New(Escape) // Not used for JSON, direct escaping + handler := NewUnifiedHandler("json", san) + + var buf []byte + + // JSON escaping + handler.WriteString(&buf, "line1\nline2\t\"quoted\"") + assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf)) + + // Control char escaping + buf = nil + handler.WriteString(&buf, "null\x00byte") + assert.Equal(t, `"null\u0000byte"`, string(buf)) + + // Always quotes + assert.True(t, handler.NeedsQuotes("anything")) + }) + + t.Run("complex value handling", func(t *testing.T) { + san := New(HexEncode) + + // Raw uses spew + rawHandler := NewUnifiedHandler("raw", san) + var buf []byte + rawHandler.WriteComplex(&buf, map[string]int{"a": 1}) + assert.Contains(t, string(buf), "map[") + + // Txt/JSON use fmt.Sprintf + txtHandler := NewUnifiedHandler("txt", san) + buf = nil + txtHandler.WriteComplex(&buf, []int{1, 2, 3}) + assert.Contains(t, string(buf), "[1 2 3]") + }) +} + +func BenchmarkSanitizer(b *testing.B) { + input := strings.Repeat("normal text\x00\n\t", 100) + + benchmarks := []struct { + name string + mode Mode + }{ + {"None", None}, + {"HexEncode", HexEncode}, + {"Strip", Strip}, + {"Escape", Escape}, + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + s := New(bm.mode) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = s.Sanitize(input) + } + }) + } +} \ No newline at end of file diff --git a/utility.go b/utility.go index f95fe0b..8efba20 100644 --- a/utility.go +++ b/utility.go @@ -112,4 +112,26 @@ func Level(levelStr string) (int64, error) { default: return 0, fmtErrorf("invalid level string: '%s' (use debug, info, warn, error, proc, disk, sys)", levelStr) } +} + +// levelToString converts integer level values to string +func levelToString(level int64) string { + switch level { + case LevelDebug: + return "DEBUG" + case LevelInfo: + return "INFO" + case LevelWarn: + return "WARN" + case LevelError: + return "ERROR" + case LevelProc: + return "PROC" + case LevelDisk: + return "DISK" + case LevelSys: + return "SYS" + default: + return fmt.Sprintf("LEVEL(%d)", level) + } } \ No newline at end of file