v0.1.2 sanitizer redisigned with policies and rules

This commit is contained in:
2025-11-15 13:23:18 -05:00
parent af162755dd
commit b2be5cec88
9 changed files with 496 additions and 338 deletions

View File

@ -75,8 +75,8 @@ func (b *Builder) Format(format string) *Builder {
} }
// Sanitization sets the sanitization mode // Sanitization sets the sanitization mode
func (b *Builder) Sanitization(mode sanitizer.Mode) *Builder { func (b *Builder) Sanitization(policy sanitizer.PolicyPreset) *Builder {
b.cfg.Sanitization = mode b.cfg.Sanitization = policy
return b return b
} }

View File

@ -28,7 +28,7 @@ type Config struct {
ShowTimestamp bool `toml:"show_timestamp"` // Add timestamp to log records ShowTimestamp bool `toml:"show_timestamp"` // Add timestamp to log records
ShowLevel bool `toml:"show_level"` // Add level to log record ShowLevel bool `toml:"show_level"` // Add level to log record
TimestampFormat string `toml:"timestamp_format"` // Time format for log timestamps TimestampFormat string `toml:"timestamp_format"` // Time format for log timestamps
Sanitization sanitizer.Mode `toml:"sanitization"` // 0=None, 1=HexEncode, 2=Strip, 3=Escape Sanitization sanitizer.PolicyPreset `toml:"sanitization"` // "default", "json", "txt", "shell"
// Buffer and size limits // Buffer and size limits
BufferSize int64 `toml:"buffer_size"` // Channel buffer size BufferSize int64 `toml:"buffer_size"` // Channel buffer size
@ -75,7 +75,7 @@ var defaultConfig = Config{
ShowTimestamp: true, ShowTimestamp: true,
ShowLevel: true, ShowLevel: true,
TimestampFormat: time.RFC3339Nano, TimestampFormat: time.RFC3339Nano,
Sanitization: sanitizer.HexEncode, Sanitization: sanitizer.PolicyTxt,
// Buffer and size limits // Buffer and size limits
BufferSize: 1024, BufferSize: 1024,
@ -127,9 +127,11 @@ func (c *Config) Validate() error {
return fmtErrorf("invalid format: '%s' (use txt, json, or raw)", c.Format) return fmtErrorf("invalid format: '%s' (use txt, json, or raw)", c.Format)
} }
// TODO: better bound check, implement validator in `sanitizer` switch c.Sanitization {
if c.Sanitization < 0 || c.Sanitization > sanitizer.Escape { case sanitizer.PolicyRaw, sanitizer.PolicyJSON, sanitizer.PolicyTxt, sanitizer.PolicyShell:
return fmtErrorf("invalid sanitization mode: '%d' (use 0=None, 1=HexEncode, 2=Strip, 3=Escape)", c.Sanitization) // valid policy
default:
return fmtErrorf("invalid sanitization policy: '%s' (use raw, json, txt, or shell)", c.Sanitization)
} }
if strings.HasPrefix(c.Extension, ".") { if strings.HasPrefix(c.Extension, ".") {
@ -226,11 +228,7 @@ func applyConfigField(cfg *Config, key, value string) error {
case "timestamp_format": case "timestamp_format":
cfg.TimestampFormat = value cfg.TimestampFormat = value
case "sanitization": case "sanitization":
intVal, err := strconv.ParseInt(value, 10, 64) cfg.Sanitization = sanitizer.PolicyPreset(value)
if err != nil {
return fmtErrorf("invalid integer value for sanitization '%s': %w", value, err)
}
cfg.Sanitization = sanitizer.Mode(intVal)
// Buffer and size limits // Buffer and size limits
case "buffer_size": case "buffer_size":

View File

@ -22,9 +22,9 @@ const (
// Record flags for controlling output structure // Record flags for controlling output structure
const ( const (
FlagShowTimestamp int64 = 0b0001 FlagRaw int64 = 0b0001
FlagShowLevel int64 = 0b0010 FlagShowTimestamp int64 = 0b0010
FlagRaw int64 = 0b0100 FlagShowLevel int64 = 0b0100
FlagStructuredJSON int64 = 0b1000 FlagStructuredJSON int64 = 0b1000
FlagDefault = FlagShowTimestamp | FlagShowLevel FlagDefault = FlagShowTimestamp | FlagShowLevel
) )

116
format.go
View File

@ -16,24 +16,27 @@ type Formatter struct {
format string format string
buf []byte buf []byte
timestampFormat string timestampFormat string
sanitizationMode sanitizer.Mode
sanitizer *sanitizer.Sanitizer sanitizer *sanitizer.Sanitizer
} }
// NewFormatter creates a formatter instance // NewFormatter creates a formatter instance
func NewFormatter(format string, bufferSize int64, timestampFormat string, sanitizationMode sanitizer.Mode) *Formatter { func NewFormatter(format string, bufferSize int64, timestampFormat string, sanitizationPolicy sanitizer.PolicyPreset) *Formatter {
if timestampFormat == "" { if timestampFormat == "" {
timestampFormat = time.RFC3339Nano timestampFormat = time.RFC3339Nano
} }
if format == "" { if format == "" {
format = "txt" format = "txt"
} }
if sanitizationPolicy == "" {
sanitizationPolicy = "raw"
}
s := (sanitizer.New()).Policy(sanitizationPolicy)
return &Formatter{ return &Formatter{
format: format, format: format,
buf: make([]byte, 0, bufferSize), buf: make([]byte, 0, bufferSize),
timestampFormat: timestampFormat, timestampFormat: timestampFormat,
sanitizationMode: sanitizationMode, sanitizer: s,
sanitizer: sanitizer.New(sanitizationMode),
} }
} }
@ -46,31 +49,45 @@ func (f *Formatter) Reset() {
func (f *Formatter) Format(format string, flags int64, timestamp time.Time, level int64, trace string, args []any) []byte { func (f *Formatter) Format(format string, flags int64, timestamp time.Time, level int64, trace string, args []any) []byte {
f.Reset() f.Reset()
// The FlagRaw acts as an override to the configured format // FlagRaw completely bypasses formatting and sanitization
effectiveFormat := format
if flags&FlagRaw != 0 { if flags&FlagRaw != 0 {
effectiveFormat = "raw" for i, arg := range args {
if i > 0 {
f.buf = append(f.buf, ' ')
}
// Direct conversion without sanitization
switch v := arg.(type) {
case string:
f.buf = append(f.buf, v...)
case []byte:
f.buf = append(f.buf, v...)
case fmt.Stringer:
f.buf = append(f.buf, v.String()...)
case error:
f.buf = append(f.buf, v.Error()...)
default:
f.buf = append(f.buf, fmt.Sprint(v)...)
}
}
return f.buf
} }
// Create the handler based on the effective format // Create the serializer based on the effective format
handler := sanitizer.NewUnifiedHandler(effectiveFormat, f.sanitizer) serializer := sanitizer.NewSerializer(format, f.sanitizer)
switch effectiveFormat { switch format {
case "raw": case "raw":
// This dedicated path handles both format="raw" and FlagRaw // Raw formatting serializes the arguments and adds NO metadata or newlines
// It only serializes the arguments and adds NO metadata or newlines
for i, arg := range args { for i, arg := range args {
f.convertValue(&f.buf, arg, handler, i > 0) f.convertValue(&f.buf, arg, serializer, i > 0)
} }
return f.buf return f.buf
case "json": case "json":
// The existing JSON serialization logic remains unchanged return f.formatJSON(flags, timestamp, level, trace, args, serializer)
return f.formatJSON(flags, timestamp, level, trace, args, handler)
case "txt": case "txt":
// The existing Txt serialization logic is now correctly isolated return f.formatTxt(flags, timestamp, level, trace, args, serializer)
return f.formatTxt(flags, timestamp, level, trace, args, handler)
} }
return nil // forcing panic on unrecognized format return nil // forcing panic on unrecognized format
@ -79,86 +96,86 @@ func (f *Formatter) Format(format string, flags int64, timestamp time.Time, leve
// FormatValue formats a single value according to the formatter's configuration // FormatValue formats a single value according to the formatter's configuration
func (f *Formatter) FormatValue(v any) []byte { func (f *Formatter) FormatValue(v any) []byte {
f.Reset() f.Reset()
handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer) serializer := sanitizer.NewSerializer(f.format, f.sanitizer)
f.convertValue(&f.buf, v, handler, false) f.convertValue(&f.buf, v, serializer, false)
return f.buf return f.buf
} }
// FormatArgs formats multiple arguments as space-separated values // FormatArgs formats multiple arguments as space-separated values
func (f *Formatter) FormatArgs(args ...any) []byte { func (f *Formatter) FormatArgs(args ...any) []byte {
f.Reset() f.Reset()
handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer) serializer := sanitizer.NewSerializer(f.format, f.sanitizer)
for i, arg := range args { for i, arg := range args {
f.convertValue(&f.buf, arg, handler, i > 0) f.convertValue(&f.buf, arg, serializer, i > 0)
} }
return f.buf return f.buf
} }
// convertValue provides unified type conversion // convertValue provides unified type conversion
func (f *Formatter) convertValue(buf *[]byte, v any, handler *sanitizer.UnifiedHandler, needsSpace bool) { func (f *Formatter) convertValue(buf *[]byte, v any, serializer *sanitizer.Serializer, needsSpace bool) {
if needsSpace && len(*buf) > 0 { if needsSpace && len(*buf) > 0 {
*buf = append(*buf, ' ') *buf = append(*buf, ' ')
} }
switch val := v.(type) { switch val := v.(type) {
case string: case string:
handler.WriteString(buf, val) serializer.WriteString(buf, val)
case []byte: case []byte:
handler.WriteString(buf, string(val)) serializer.WriteString(buf, string(val))
case rune: case rune:
var runeStr [utf8.UTFMax]byte var runeStr [utf8.UTFMax]byte
n := utf8.EncodeRune(runeStr[:], val) n := utf8.EncodeRune(runeStr[:], val)
handler.WriteString(buf, string(runeStr[:n])) serializer.WriteString(buf, string(runeStr[:n]))
case int: case int:
num := strconv.AppendInt(nil, int64(val), 10) num := strconv.AppendInt(nil, int64(val), 10)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case int64: case int64:
num := strconv.AppendInt(nil, val, 10) num := strconv.AppendInt(nil, val, 10)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case uint: case uint:
num := strconv.AppendUint(nil, uint64(val), 10) num := strconv.AppendUint(nil, uint64(val), 10)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case uint64: case uint64:
num := strconv.AppendUint(nil, val, 10) num := strconv.AppendUint(nil, val, 10)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case float32: case float32:
num := strconv.AppendFloat(nil, float64(val), 'f', -1, 32) num := strconv.AppendFloat(nil, float64(val), 'f', -1, 32)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case float64: case float64:
num := strconv.AppendFloat(nil, val, 'f', -1, 64) num := strconv.AppendFloat(nil, val, 'f', -1, 64)
handler.WriteNumber(buf, string(num)) serializer.WriteNumber(buf, string(num))
case bool: case bool:
handler.WriteBool(buf, val) serializer.WriteBool(buf, val)
case nil: case nil:
handler.WriteNil(buf) serializer.WriteNil(buf)
case time.Time: case time.Time:
timeStr := val.Format(f.timestampFormat) timeStr := val.Format(f.timestampFormat)
handler.WriteString(buf, timeStr) serializer.WriteString(buf, timeStr)
case error: case error:
handler.WriteString(buf, val.Error()) serializer.WriteString(buf, val.Error())
case fmt.Stringer: case fmt.Stringer:
handler.WriteString(buf, val.String()) serializer.WriteString(buf, val.String())
default: default:
handler.WriteComplex(buf, val) serializer.WriteComplex(buf, val)
} }
} }
// formatJSON unifies JSON output // formatJSON unifies JSON output
func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte { func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, trace string, args []any, serializer *sanitizer.Serializer) []byte {
f.buf = append(f.buf, '{') f.buf = append(f.buf, '{')
needsComma := false needsComma := false
@ -184,7 +201,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
f.buf = append(f.buf, ',') f.buf = append(f.buf, ',')
} }
f.buf = append(f.buf, `"trace":`...) f.buf = append(f.buf, `"trace":`...)
handler.WriteString(&f.buf, trace) serializer.WriteString(&f.buf, trace)
needsComma = true needsComma = true
} }
@ -196,7 +213,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
f.buf = append(f.buf, ',') f.buf = append(f.buf, ',')
} }
f.buf = append(f.buf, `"message":`...) f.buf = append(f.buf, `"message":`...)
handler.WriteString(&f.buf, message) serializer.WriteString(&f.buf, message)
f.buf = append(f.buf, ',') f.buf = append(f.buf, ',')
f.buf = append(f.buf, `"fields":`...) f.buf = append(f.buf, `"fields":`...)
@ -204,7 +221,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
marshaledFields, err := json.Marshal(fields) marshaledFields, err := json.Marshal(fields)
if err != nil { if err != nil {
f.buf = append(f.buf, `{"_marshal_error":"`...) f.buf = append(f.buf, `{"_marshal_error":"`...)
handler.WriteString(&f.buf, err.Error()) serializer.WriteString(&f.buf, err.Error())
f.buf = append(f.buf, `"}`...) f.buf = append(f.buf, `"}`...)
} else { } else {
f.buf = append(f.buf, marshaledFields...) f.buf = append(f.buf, marshaledFields...)
@ -226,7 +243,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
if i > 0 { if i > 0 {
f.buf = append(f.buf, ',') f.buf = append(f.buf, ',')
} }
f.convertValue(&f.buf, arg, handler, false) f.convertValue(&f.buf, arg, serializer, false)
} }
f.buf = append(f.buf, ']') f.buf = append(f.buf, ']')
} }
@ -236,7 +253,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
} }
// formatTxt handles txt format output // formatTxt handles txt format output
func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte { func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, trace string, args []any, serializer *sanitizer.Serializer) []byte {
needsSpace := false needsSpace := false
if flags&FlagShowTimestamp != 0 { if flags&FlagShowTimestamp != 0 {
@ -256,12 +273,21 @@ func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, tra
if needsSpace { if needsSpace {
f.buf = append(f.buf, ' ') f.buf = append(f.buf, ' ')
} }
f.buf = append(f.buf, trace...) // Sanitize trace to prevent terminal control sequence injection
traceHandler := sanitizer.NewSerializer("txt", f.sanitizer)
tempBuf := make([]byte, 0, len(trace)*2)
traceHandler.WriteString(&tempBuf, trace)
// Extract content without quotes if added by txt serializer
if len(tempBuf) > 2 && tempBuf[0] == '"' && tempBuf[len(tempBuf)-1] == '"' {
f.buf = append(f.buf, tempBuf[1:len(tempBuf)-1]...)
} else {
f.buf = append(f.buf, tempBuf...)
}
needsSpace = true needsSpace = true
} }
for _, arg := range args { for _, arg := range args {
f.convertValue(&f.buf, arg, handler, needsSpace) f.convertValue(&f.buf, arg, serializer, needsSpace)
needsSpace = true needsSpace = true
} }

View File

@ -10,13 +10,14 @@ import (
"testing" "testing"
"time" "time"
"github.com/lixenwraith/log/sanitizer"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
// TestFormatter tests the output of the formatter for txt, json, and raw formats // TestFormatter tests the output of the formatter for txt, json, and raw formats
func TestFormatter(t *testing.T) { func TestFormatter(t *testing.T) {
f := NewFormatter("txt", 1024, time.RFC3339Nano, 0) f := NewFormatter("txt", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
timestamp := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) timestamp := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
t.Run("txt format", func(t *testing.T) { t.Run("txt format", func(t *testing.T) {
@ -30,7 +31,7 @@ func TestFormatter(t *testing.T) {
assert.True(t, strings.HasSuffix(str, "\n")) assert.True(t, strings.HasSuffix(str, "\n"))
}) })
f = NewFormatter("json", 1024, time.RFC3339Nano, 0) f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
t.Run("json format", func(t *testing.T) { t.Run("json format", func(t *testing.T) {
data := f.Format("json", FlagDefault, timestamp, LevelWarn, "trace1", []any{"warning", true}) data := f.Format("json", FlagDefault, timestamp, LevelWarn, "trace1", []any{"warning", true})
@ -45,7 +46,7 @@ func TestFormatter(t *testing.T) {
assert.Equal(t, true, fields[1]) assert.Equal(t, true, fields[1])
}) })
f = NewFormatter("raw", 1024, time.RFC3339Nano, 0) f = NewFormatter("raw", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
t.Run("raw format", func(t *testing.T) { t.Run("raw format", func(t *testing.T) {
data := f.Format("raw", 0, timestamp, LevelInfo, "", []any{"raw", "data", 42}) data := f.Format("raw", 0, timestamp, LevelInfo, "", []any{"raw", "data", 42})
str := string(data) str := string(data)
@ -61,7 +62,7 @@ func TestFormatter(t *testing.T) {
assert.Equal(t, "forced raw", str) assert.Equal(t, "forced raw", str)
}) })
f = NewFormatter("json", 1024, time.RFC3339Nano, 0) f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyJSON)
t.Run("structured json", func(t *testing.T) { t.Run("structured json", func(t *testing.T) {
fields := map[string]any{"key1": "value1", "key2": 42} fields := map[string]any{"key1": "value1", "key2": 42}
data := f.Format("json", FlagStructuredJSON|FlagDefault, timestamp, LevelInfo, "", data := f.Format("json", FlagStructuredJSON|FlagDefault, timestamp, LevelInfo, "",
@ -75,7 +76,7 @@ func TestFormatter(t *testing.T) {
assert.Equal(t, map[string]any{"key1": "value1", "key2": float64(42)}, result["fields"]) assert.Equal(t, map[string]any{"key1": "value1", "key2": float64(42)}, result["fields"])
}) })
f = NewFormatter("json", 1024, time.RFC3339Nano, 3) f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyJSON)
t.Run("special characters escaping", func(t *testing.T) { t.Run("special characters escaping", func(t *testing.T) {
data := f.Format("json", FlagDefault, timestamp, LevelInfo, "", data := f.Format("json", FlagDefault, timestamp, LevelInfo, "",
[]any{"test\n\r\t\"\\message"}) []any{"test\n\r\t\"\\message"})
@ -121,33 +122,42 @@ func TestControlCharacterWrite(t *testing.T) {
logger, tmpDir := createTestLogger(t) logger, tmpDir := createTestLogger(t)
defer logger.Shutdown() defer logger.Shutdown()
// Test various control characters cfg := logger.GetConfig()
cfg.Format = "raw"
cfg.ShowTimestamp = false
cfg.ShowLevel = false
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
// Test various control characters with expected sanitized output
testCases := []struct { testCases := []struct {
name string name string
input string input string
expected string
}{ }{
{"null bytes", "test\x00data"}, {"null bytes", "test\x00data", "test<00>data"},
{"bell", "alert\x07message"}, {"bell", "alert\x07message", "alert<07>message"},
{"backspace", "back\x08space"}, {"backspace", "back\x08space", "back<08>space"},
{"form feed", "page\x0Cbreak"}, {"form feed", "page\x0Cbreak", "page<0c>break"},
{"vertical tab", "vertical\x0Btab"}, {"vertical tab", "vertical\x0Btab", "vertical<0b>tab"},
{"escape", "escape\x1B[31mcolor"}, {"escape", "escape\x1B[31mcolor", "escape<1b>[31mcolor"},
{"mixed", "\x00\x01\x02test\x1F\x7Fdata"}, {"mixed", "\x00\x01\x02test\x1F\x7Fdata", "<00><01><02>test<1f><7f>data"},
} }
for _, tc := range testCases { for _, tc := range testCases {
logger.Write(tc.input) logger.Message(tc.input)
} }
logger.Flush(time.Second) logger.Flush(time.Second)
// Verify file contains hex-encoded control chars
content, err := os.ReadFile(filepath.Join(tmpDir, "log.log")) content, err := os.ReadFile(filepath.Join(tmpDir, "log.log"))
require.NoError(t, err) require.NoError(t, err)
// Control chars should be hex-encoded in raw output // Verify each test case produced correct sanitized output
assert.Contains(t, string(content), "test") for _, tc := range testCases {
assert.Contains(t, string(content), "data") assert.Contains(t, string(content), tc.expected,
"Test case '%s' should produce hex-encoded control chars", tc.name)
}
} }
// TestRawSanitizedOutput verifies that raw output is correctly sanitized // TestRawSanitizedOutput verifies that raw output is correctly sanitized
@ -155,6 +165,14 @@ func TestRawSanitizedOutput(t *testing.T) {
logger, tmpDir := createTestLogger(t) logger, tmpDir := createTestLogger(t)
defer logger.Shutdown() defer logger.Shutdown()
// Use raw format instead of Write() to test sanitization
cfg := logger.GetConfig()
cfg.Format = "raw"
cfg.ShowTimestamp = false
cfg.ShowLevel = false
err := logger.ApplyConfig(cfg)
require.NoError(t, err)
// 1. A string with valid multi-byte UTF-8 should be unchanged // 1. A string with valid multi-byte UTF-8 should be unchanged
utf8String := "Hello │ 世界" utf8String := "Hello │ 世界"
@ -171,7 +189,7 @@ func TestRawSanitizedOutput(t *testing.T) {
expectedMultiByteOutput := "line1<c285>line2" expectedMultiByteOutput := "line1<c285>line2"
// Log all cases // Log all cases
logger.Write(utf8String, stringWithControl, bytesWithControl, multiByteControl) logger.Message(utf8String, stringWithControl, bytesWithControl, multiByteControl)
logger.Flush(time.Second) logger.Flush(time.Second)
// Read and verify the single line of output // Read and verify the single line of output

View File

@ -326,8 +326,7 @@ func (l *Logger) LogStructured(level int64, message string, fields map[string]an
l.log(l.getFlags()|FlagStructuredJSON, level, 0, []any{message, fields}) l.log(l.getFlags()|FlagStructuredJSON, level, 0, []any{message, fields})
} }
// Write outputs raw, unformatted data regardless of configured format // Write outputs raw, unformatted data ignoring configured format and sanitization without trailing new line
// Writes args as space-separated strings without a trailing newline
func (l *Logger) Write(args ...any) { func (l *Logger) Write(args ...any) {
l.log(FlagRaw, LevelInfo, 0, args...) l.log(FlagRaw, LevelInfo, 0, args...)
} }

View File

@ -1,4 +1,6 @@
// FILE: lixenwraith/log/sanitizer/sanitizer.go // FILE: lixenwraith/log/sanitizer/sanitizer.go
// Package sanitizer provides a fluent and composable interface for sanitizing
// strings based on configurable rules using bitwise filter flags and transforms.
package sanitizer package sanitizer
import ( import (
@ -12,105 +14,186 @@ import (
"github.com/davecgh/go-spew/spew" "github.com/davecgh/go-spew/spew"
) )
// Mode controls how non-printable characters are handled // Filter flags for character matching
type Mode int
// Sanitization modes
const ( const (
None Mode = iota // No sanitization FilterNonPrintable uint64 = 1 << iota // Matches runes not classified as printable by strconv.IsPrint
HexEncode // Encode as <hex> (current default) FilterControl // Matches control characters (unicode.IsControl)
Strip // Remove control characters FilterWhitespace // Matches whitespace characters (unicode.IsSpace)
Escape // JSON-style escaping FilterShellSpecial // Matches common shell metacharacters: '`', '$', ';', '|', '&', '>', '<', '(', ')', '#'
) )
// Sanitizer provides centralized sanitization logic // Transform flags for character transformation
type Sanitizer struct { const (
mode Mode TransformStrip uint64 = 1 << iota // Removes the character
buf []byte // Reusable buffer TransformHexEncode // Encodes the character's UTF-8 bytes as "<XXYY>"
TransformJSONEscape // Escapes the character with JSON-style backslashes (e.g., '\n', '\u0000')
)
// PolicyPreset defines pre-configured sanitization policies
type PolicyPreset string
const (
PolicyRaw PolicyPreset = "raw" // Default is a no-op (passthrough)
PolicyJSON PolicyPreset = "json" // Policy for sanitizing strings to be embedded in JSON
PolicyTxt PolicyPreset = "txt" // Policy for sanitizing text written to log files
PolicyShell PolicyPreset = "shell" // Policy for sanitizing arguments passed to shell commands
)
// rule represents a single sanitization rule
type rule struct {
filter uint64
transform uint64
} }
func New(mode Mode) *Sanitizer { // policyRules contains pre-configured rules for each policy
var policyRules = map[PolicyPreset][]rule{
PolicyRaw: {},
PolicyTxt: {{filter: FilterNonPrintable, transform: TransformHexEncode}},
PolicyJSON: {{filter: FilterControl, transform: TransformJSONEscape}},
PolicyShell: {{filter: FilterShellSpecial | FilterWhitespace, transform: TransformStrip}},
}
// filterCheckers maps individual filter flags to their check functions
var filterCheckers = map[uint64]func(rune) bool{
FilterNonPrintable: func(r rune) bool { return !strconv.IsPrint(r) },
FilterControl: unicode.IsControl,
FilterWhitespace: unicode.IsSpace,
FilterShellSpecial: func(r rune) bool {
switch r {
case '`', '$', ';', '|', '&', '>', '<', '(', ')', '#':
return true
}
return false
},
}
// Sanitizer provides chainable text sanitization
type Sanitizer struct {
rules []rule
buf []byte
}
// New creates a new Sanitizer instance
func New() *Sanitizer {
return &Sanitizer{ return &Sanitizer{
mode: mode, rules: []rule{},
buf: make([]byte, 0, 256), buf: make([]byte, 0, 256),
} }
} }
func (s *Sanitizer) Reset() { // Rule adds a custom rule to the sanitizer (prepended for precedence)
s.buf = s.buf[:0] func (s *Sanitizer) Rule(filter uint64, transform uint64) *Sanitizer {
// Append rule in natural order
s.rules = append(s.rules, rule{filter: filter, transform: transform})
return s
} }
// Policy applies a pre-configured policy to the sanitizer (appended)
func (s *Sanitizer) Policy(preset PolicyPreset) *Sanitizer {
if rules, ok := policyRules[preset]; ok {
s.rules = append(s.rules, rules...)
}
return s
}
// Sanitize applies all configured rules to the input string
func (s *Sanitizer) Sanitize(data string) string { func (s *Sanitizer) Sanitize(data string) string {
if s.mode == None { // Reset buffer
return data s.buf = s.buf[:0]
}
s.Reset()
// Process each rune
for _, r := range data { for _, r := range data {
if strconv.IsPrint(r) { matched := false
// Check rules in order (first match wins)
for _, rl := range s.rules {
if matchesFilter(r, rl.filter) {
applyTransform(&s.buf, r, rl.transform)
matched = true
break
}
}
// If no rule matched, append original rune
if !matched {
s.buf = utf8.AppendRune(s.buf, r) s.buf = utf8.AppendRune(s.buf, r)
continue
}
switch s.mode {
case HexEncode:
var runeBytes [utf8.UTFMax]byte
n := utf8.EncodeRune(runeBytes[:], r)
s.buf = append(s.buf, '<')
s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
s.buf = append(s.buf, '>')
case Strip:
// Skip non-printable
continue
case Escape:
switch r {
case '\n':
s.buf = append(s.buf, '\\', 'n')
case '\r':
s.buf = append(s.buf, '\\', 'r')
case '\t':
s.buf = append(s.buf, '\\', 't')
case '\b':
s.buf = append(s.buf, '\\', 'b')
case '\f':
s.buf = append(s.buf, '\\', 'f')
default:
// Unicode escape for other control chars
s.buf = append(s.buf, '\\', 'u')
s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
}
} }
} }
return string(s.buf) return string(s.buf)
} }
// UnifiedHandler implements all format behaviors in a single struct // matchesFilter checks if a rune matches any filter in the mask
type UnifiedHandler struct { func matchesFilter(r rune, filterMask uint64) bool {
for flag, checker := range filterCheckers {
if (filterMask&flag) != 0 && checker(r) {
return true
}
}
return false
}
// applyTransform applies the specified transform to the buffer
func applyTransform(buf *[]byte, r rune, transformMask uint64) {
switch {
case (transformMask & TransformStrip) != 0:
// Do nothing (strip)
case (transformMask & TransformHexEncode) != 0:
var runeBytes [utf8.UTFMax]byte
n := utf8.EncodeRune(runeBytes[:], r)
*buf = append(*buf, '<')
*buf = append(*buf, hex.EncodeToString(runeBytes[:n])...)
*buf = append(*buf, '>')
case (transformMask & TransformJSONEscape) != 0:
switch r {
case '\n':
*buf = append(*buf, '\\', 'n')
case '\r':
*buf = append(*buf, '\\', 'r')
case '\t':
*buf = append(*buf, '\\', 't')
case '\b':
*buf = append(*buf, '\\', 'b')
case '\f':
*buf = append(*buf, '\\', 'f')
case '"':
*buf = append(*buf, '\\', '"')
case '\\':
*buf = append(*buf, '\\', '\\')
default:
if r < 0x20 || r == 0x7f {
*buf = append(*buf, fmt.Sprintf("\\u%04x", r)...)
} else {
*buf = utf8.AppendRune(*buf, r)
}
}
}
}
// Serializer implements format-specific output behaviors
type Serializer struct {
format string format string
sanitizer *Sanitizer sanitizer *Sanitizer
} }
func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler { // NewSerializer creates a handler with format-specific behavior
return &UnifiedHandler{ func NewSerializer(format string, san *Sanitizer) *Serializer {
return &Serializer{
format: format, format: format,
sanitizer: san, sanitizer: san,
} }
} }
func (h *UnifiedHandler) WriteString(buf *[]byte, s string) { // WriteString writes a string with format-specific handling
switch h.format { func (se *Serializer) WriteString(buf *[]byte, s string) {
switch se.format {
case "raw": case "raw":
*buf = append(*buf, h.sanitizer.Sanitize(s)...) *buf = append(*buf, se.sanitizer.Sanitize(s)...)
case "txt": case "txt":
sanitized := h.sanitizer.Sanitize(s) sanitized := se.sanitizer.Sanitize(s)
if h.NeedsQuotes(sanitized) { if se.NeedsQuotes(sanitized) {
*buf = append(*buf, '"') *buf = append(*buf, '"')
// Escape quotes within quoted strings
for i := 0; i < len(sanitized); i++ { for i := 0; i < len(sanitized); i++ {
if sanitized[i] == '"' || sanitized[i] == '\\' { if sanitized[i] == '"' || sanitized[i] == '\\' {
*buf = append(*buf, '\\') *buf = append(*buf, '\\')
@ -124,12 +207,12 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
case "json": case "json":
*buf = append(*buf, '"') *buf = append(*buf, '"')
// Direct JSON escaping without pre-sanitization // Direct JSON escaping
for i := 0; i < len(s); { for i := 0; i < len(s); {
c := s[i] c := s[i]
if c >= ' ' && c != '"' && c != '\\' { if c >= ' ' && c != '"' && c != '\\' && c < 0x7f {
start := i start := i
for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' { for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' && s[i] < 0x7f {
i++ i++
} }
*buf = append(*buf, s[start:i]...) *buf = append(*buf, s[start:i]...)
@ -157,27 +240,30 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
} }
} }
func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) { // WriteNumber writes a number value
func (se *Serializer) WriteNumber(buf *[]byte, n string) {
*buf = append(*buf, n...) *buf = append(*buf, n...)
} }
func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) { // WriteBool writes a boolean value
func (se *Serializer) WriteBool(buf *[]byte, b bool) {
*buf = strconv.AppendBool(*buf, b) *buf = strconv.AppendBool(*buf, b)
} }
func (h *UnifiedHandler) WriteNil(buf *[]byte) { // WriteNil writes a nil value
switch h.format { func (se *Serializer) WriteNil(buf *[]byte) {
switch se.format {
case "raw": case "raw":
*buf = append(*buf, "nil"...) *buf = append(*buf, "nil"...)
default: // txt, json default:
*buf = append(*buf, "null"...) *buf = append(*buf, "null"...)
} }
} }
func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) { // WriteComplex writes complex types
switch h.format { func (se *Serializer) WriteComplex(buf *[]byte, v any) {
switch se.format {
case "raw": case "raw":
// Use spew for complex types in raw mode, DEBUG use
var b bytes.Buffer var b bytes.Buffer
dumper := &spew.ConfigState{ dumper := &spew.ConfigState{
Indent: " ", Indent: " ",
@ -189,41 +275,37 @@ func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
dumper.Fdump(&b, v) dumper.Fdump(&b, v)
*buf = append(*buf, bytes.TrimSpace(b.Bytes())...) *buf = append(*buf, bytes.TrimSpace(b.Bytes())...)
default: // txt, json default:
str := fmt.Sprintf("%+v", v) str := fmt.Sprintf("%+v", v)
h.WriteString(buf, str) se.WriteString(buf, str)
} }
} }
func (h *UnifiedHandler) NeedsQuotes(s string) bool { // NeedsQuotes determines if quoting is needed
switch h.format { func (se *Serializer) NeedsQuotes(s string) bool {
switch se.format {
case "json": case "json":
return true // JSON always quotes return true
case "txt": case "txt":
// Quote strings that:
// 1. Are empty
if len(s) == 0 { if len(s) == 0 {
return true return true
} }
for _, r := range s { for _, r := range s {
// 2. Contain whitespace (space, tab, newline, etc.)
if unicode.IsSpace(r) { if unicode.IsSpace(r) {
return true return true
} }
// 3. Contain shell special characters (POSIX + common extensions)
switch r { switch r {
case '"', '\'', '\\', '$', '`', '!', '&', '|', ';', case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
'(', ')', '<', '>', '*', '?', '[', ']', '{', '}', '(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
'~', '#', '%', '=', '\n', '\r', '\t': '~', '#', '%', '=', '\n', '\r', '\t':
return true return true
} }
// 4. Non-print
if !unicode.IsPrint(r) { if !unicode.IsPrint(r) {
return true return true
} }
} }
return false return false
default: // raw default:
return false return false
} }
} }

View File

@ -8,177 +8,202 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestSanitizer(t *testing.T) { func TestNewSanitizer(t *testing.T) {
testCases := []struct { // Default passthrough behavior
name string s := New()
input string input := "abc\x00xyz"
mode Mode assert.Equal(t, input, s.Sanitize(input), "default sanitizer should pass through all characters")
expected string
}{
// None mode tests
{
name: "none mode passes through",
input: "hello\x00world\n",
mode: None,
expected: "hello\x00world\n",
},
// HexEncode tests
{
name: "hex encode null byte",
input: "test\x00data",
mode: HexEncode,
expected: "test<00>data",
},
{
name: "hex encode control chars",
input: "bell\x07tab\x09form\x0c",
mode: HexEncode,
expected: "bell<07>tab<09>form<0c>",
},
{
name: "hex encode preserves printable",
input: "Hello World 123!@#",
mode: HexEncode,
expected: "Hello World 123!@#",
},
{
name: "hex encode multi-byte control",
input: "line1\u0085line2", // NEXT LINE (C2 85)
mode: HexEncode,
expected: "line1<c285>line2",
},
{
name: "hex encode preserves UTF-8",
input: "Hello 世界 ✓",
mode: HexEncode,
expected: "Hello 世界 ✓",
},
// Strip tests
{
name: "strip removes control chars",
input: "clean\x00\x07\ntxt",
mode: Strip,
expected: "cleantxt",
},
{
name: "strip preserves spaces",
input: "hello world",
mode: Strip,
expected: "hello world",
},
// Escape tests
{
name: "escape common control chars",
input: "line1\nline2\ttab\rreturn",
mode: Escape,
expected: "line1\\nline2\\ttab\\rreturn",
},
{
name: "escape unicode control",
input: "text\x01\x1f",
mode: Escape,
expected: "text\\u0001\\u001f",
},
{
name: "escape backspace and form feed",
input: "back\bspace form\ffeed",
mode: Escape,
expected: "back\\bspace form\\ffeed",
},
} }
for _, tc := range testCases { func TestSingleRule(t *testing.T) {
t.Run(tc.name, func(t *testing.T) { t.Run("strip non-printable", func(t *testing.T) {
s := New(tc.mode) s := New().Rule(FilterNonPrintable, TransformStrip)
result := s.Sanitize(tc.input) assert.Equal(t, "ab", s.Sanitize("a\x00b"))
assert.Equal(t, tc.expected, result) assert.Equal(t, "test", s.Sanitize("test\x01\x02\x03"))
})
t.Run("hex encode non-printable", func(t *testing.T) {
s := New().Rule(FilterNonPrintable, TransformHexEncode)
assert.Equal(t, "a<00>b", s.Sanitize("a\x00b"))
assert.Equal(t, "bell<07>tab<09>", s.Sanitize("bell\x07tab\x09"))
})
t.Run("JSON escape control", func(t *testing.T) {
s := New().Rule(FilterControl, TransformJSONEscape)
assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
assert.Equal(t, "tab\\there", s.Sanitize("tab\there"))
assert.Equal(t, "null\\u0000byte", s.Sanitize("null\x00byte"))
})
t.Run("strip whitespace", func(t *testing.T) {
s := New().Rule(FilterWhitespace, TransformStrip)
assert.Equal(t, "nospaceshere", s.Sanitize("no spaces here"))
assert.Equal(t, "tabsgone", s.Sanitize("tabs\t\tgone"))
})
t.Run("strip shell special", func(t *testing.T) {
s := New().Rule(FilterShellSpecial, TransformStrip)
assert.Equal(t, "cmd echo test", s.Sanitize("cmd; echo test"))
assert.Equal(t, "no pipes", s.Sanitize("no | pipes"))
assert.Equal(t, "var", s.Sanitize("$var"))
}) })
} }
func TestPolicy(t *testing.T) {
t.Run("PolicyTxt", func(t *testing.T) {
s := New().Policy(PolicyTxt)
assert.Equal(t, "hello<07>world", s.Sanitize("hello\x07world"))
assert.Equal(t, "clean text", s.Sanitize("clean text"))
})
t.Run("PolicyJSON", func(t *testing.T) {
s := New().Policy(PolicyJSON)
assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
assert.Equal(t, "\\ttab", s.Sanitize("\ttab"))
})
t.Run("PolicyShellArg", func(t *testing.T) {
s := New().Policy(PolicyShell)
assert.Equal(t, "cmdecho", s.Sanitize("cmd; echo"))
assert.Equal(t, "nospaces", s.Sanitize("no spaces"))
})
} }
func TestUnifiedHandler(t *testing.T) { func TestRulePrecedence(t *testing.T) {
t.Run("raw format", func(t *testing.T) { // With append + forward iteration: Policy is checked before Rule
san := New(HexEncode) s := New().Policy(PolicyTxt).Rule(FilterControl, TransformStrip)
handler := NewUnifiedHandler("raw", san)
// \x07 is both control AND non-printable - matches PolicyTxt first
// \x00 is both control AND non-printable - matches PolicyTxt first
input := "a\x07b\x00c"
expected := "a<07>b<00>c" // FIXED: Policy wins now
result := s.Sanitize(input)
assert.Equal(t, expected, result,
"Policy() is now checked before Rule() - non-printable chars get hex encoded")
}
func TestCompositeFilter(t *testing.T) {
s := New().Rule(FilterShellSpecial|FilterWhitespace, TransformStrip)
assert.Equal(t, "cmdechohello", s.Sanitize("cmd; echo hello"))
assert.Equal(t, "nopipesnospaces", s.Sanitize("no |pipes| no spaces"))
}
func TestChaining(t *testing.T) {
s := New().
Rule(FilterWhitespace, TransformStrip).
Rule(FilterShellSpecial, TransformHexEncode)
// Shell special chars are checked first (prepended), get hex encoded
// Whitespace rule is second, strips spaces
assert.Equal(t, "cmd<3b>echohello", s.Sanitize("cmd; echo hello"))
}
func TestMultipleRulesOrder(t *testing.T) {
// Test that first matching rule wins
s := New().
Rule(FilterControl, TransformStrip).
Rule(FilterControl, TransformHexEncode) // This should never match
assert.Equal(t, "ab", s.Sanitize("a\x00b"), "first rule should win")
}
func TestEdgeCases(t *testing.T) {
t.Run("empty string", func(t *testing.T) {
s := New().Rule(FilterNonPrintable, TransformStrip)
assert.Equal(t, "", s.Sanitize(""))
})
t.Run("only sanitizable characters", func(t *testing.T) {
s := New().Rule(FilterNonPrintable, TransformStrip)
assert.Equal(t, "", s.Sanitize("\x00\x01\x02\x03"))
})
t.Run("multi-byte UTF-8", func(t *testing.T) {
s := New().Rule(FilterNonPrintable, TransformHexEncode)
input := "Hello 世界 ✓"
assert.Equal(t, input, s.Sanitize(input), "UTF-8 should pass through")
})
t.Run("multi-byte control character", func(t *testing.T) {
s := New().Rule(FilterNonPrintable, TransformHexEncode)
// NEL (Next Line) is U+0085, encoded as C2 85 in UTF-8
assert.Equal(t, "line1<c285>line2", s.Sanitize("line1\u0085line2"))
})
}
func TestSerializer(t *testing.T) {
t.Run("raw format with sanitizer", func(t *testing.T) {
san := New().Rule(FilterNonPrintable, TransformHexEncode)
handler := NewSerializer("raw", san)
var buf []byte var buf []byte
// String handling
handler.WriteString(&buf, "test\x00data") handler.WriteString(&buf, "test\x00data")
assert.Equal(t, "test<00>data", string(buf)) assert.Equal(t, "test<00>data", string(buf))
// Nil handling
buf = nil
handler.WriteNil(&buf)
assert.Equal(t, "nil", string(buf))
// No quotes needed
assert.False(t, handler.NeedsQuotes("any string"))
}) })
t.Run("txt format", func(t *testing.T) { t.Run("txt format with quotes", func(t *testing.T) {
san := New(HexEncode) san := New() // No sanitization
handler := NewUnifiedHandler("txt", san) handler := NewSerializer("txt", san)
var buf []byte var buf []byte
// String with spaces gets quoted
handler.WriteString(&buf, "hello world") handler.WriteString(&buf, "hello world")
assert.Equal(t, `"hello world"`, string(buf)) assert.Equal(t, `"hello world"`, string(buf))
// String without spaces unquoted
buf = nil buf = nil
handler.WriteString(&buf, "single") handler.WriteString(&buf, "nospace")
assert.Equal(t, "single", string(buf)) assert.Equal(t, "nospace", string(buf))
// Nil handling
buf = nil
handler.WriteNil(&buf)
assert.Equal(t, "null", string(buf))
// Quotes needed for empty or space-containing
assert.True(t, handler.NeedsQuotes(""))
assert.True(t, handler.NeedsQuotes("has space"))
assert.False(t, handler.NeedsQuotes("nospace"))
}) })
t.Run("json format", func(t *testing.T) { t.Run("json format escaping", func(t *testing.T) {
san := New(Escape) // Not used for JSON, direct escaping san := New() // JSON handler does its own escaping
handler := NewUnifiedHandler("json", san) handler := NewSerializer("json", san)
var buf []byte var buf []byte
// JSON escaping
handler.WriteString(&buf, "line1\nline2\t\"quoted\"") handler.WriteString(&buf, "line1\nline2\t\"quoted\"")
assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf)) assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf))
// Control char escaping
buf = nil buf = nil
handler.WriteString(&buf, "null\x00byte") handler.WriteString(&buf, "null\x00byte")
assert.Equal(t, `"null\u0000byte"`, string(buf)) assert.Equal(t, `"null\u0000byte"`, string(buf))
// Always quotes
assert.True(t, handler.NeedsQuotes("anything"))
}) })
t.Run("complex value handling", func(t *testing.T) { t.Run("complex value handling", func(t *testing.T) {
san := New(HexEncode) san := New()
handler := NewSerializer("raw", san)
// Raw uses spew
rawHandler := NewUnifiedHandler("raw", san)
var buf []byte var buf []byte
rawHandler.WriteComplex(&buf, map[string]int{"a": 1}) handler.WriteComplex(&buf, map[string]int{"a": 1})
assert.Contains(t, string(buf), "map[") assert.Contains(t, string(buf), "map[")
// Txt/JSON use fmt.Sprintf
txtHandler := NewUnifiedHandler("txt", san)
buf = nil
txtHandler.WriteComplex(&buf, []int{1, 2, 3})
assert.Contains(t, string(buf), "[1 2 3]")
}) })
t.Run("nil handling", func(t *testing.T) {
san := New()
rawHandler := NewSerializer("raw", san)
var buf []byte
rawHandler.WriteNil(&buf)
assert.Equal(t, "nil", string(buf))
jsonHandler := NewSerializer("json", san)
buf = nil
jsonHandler.WriteNil(&buf)
assert.Equal(t, "null", string(buf))
})
}
func TestPolicyWithCustomRules(t *testing.T) {
s := New().
Policy(PolicyTxt).
Rule(FilterControl, TransformStrip).
Rule(FilterWhitespace, TransformJSONEscape)
// \x07 is non-printable AND control - matches PolicyTxt first (hex encode)
// \x7F is non-printable but NOT control - matches PolicyTxt (hex encode)
input := "a\x07b c\x7Fd"
result := s.Sanitize(input)
assert.Equal(t, "a<07>b c<7f>d", result) // FIXED: \x07 now hex encoded
} }
func BenchmarkSanitizer(b *testing.B) { func BenchmarkSanitizer(b *testing.B) {
@ -186,21 +211,31 @@ func BenchmarkSanitizer(b *testing.B) {
benchmarks := []struct { benchmarks := []struct {
name string name string
mode Mode sanitizer *Sanitizer
}{ }{
{"None", None}, {"Passthrough", New()},
{"HexEncode", HexEncode}, {"SingleRule", New().Rule(FilterNonPrintable, TransformHexEncode)},
{"Strip", Strip}, {"Policy", New().Policy(PolicyTxt)},
{"Escape", Escape}, {"Complex", New().
Policy(PolicyTxt).
Rule(FilterControl, TransformStrip).
Rule(FilterWhitespace, TransformJSONEscape)},
} }
for _, bm := range benchmarks { for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) { b.Run(bm.name, func(b *testing.B) {
s := New(bm.mode)
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
_ = s.Sanitize(input) _ = bm.sanitizer.Sanitize(input)
} }
}) })
} }
} }
func TestTransformPriority(t *testing.T) {
// Test that only one transform is applied per rule
s := New().Rule(FilterControl, TransformStrip|TransformHexEncode)
// Should strip (first flag checked), not hex encode
assert.Equal(t, "ab", s.Sanitize("a\x00b"))
}

View File

@ -19,7 +19,7 @@ func TestLogRotation(t *testing.T) {
defer logger.Shutdown() defer logger.Shutdown()
cfg := logger.GetConfig() cfg := logger.GetConfig()
cfg.MaxSizeKB = 1000 // 1MB cfg.MaxSizeKB = 100 // 100KB
cfg.FlushIntervalMs = 10 // Fast flush for testing cfg.FlushIntervalMs = 10 // Fast flush for testing
logger.ApplyConfig(cfg) logger.ApplyConfig(cfg)
@ -27,11 +27,11 @@ func TestLogRotation(t *testing.T) {
// Account for timestamp, level, and other formatting overhead // Account for timestamp, level, and other formatting overhead
// A typical log line overhead is ~50-100 bytes // A typical log line overhead is ~50-100 bytes
const overhead = 100 const overhead = 100
const targetMessageSize = 50000 // 50KB per message const targetMessageSize = 5000 // 5KB per message
largeData := strings.Repeat("x", targetMessageSize) largeData := strings.Repeat("x", targetMessageSize)
// Write enough to exceed 1MB twice (should cause at least one rotation) // Write enough to exceed 1MB twice (should cause at least one rotation)
messagesNeeded := (2 * sizeMultiplier * 1000) / (targetMessageSize + overhead) // ~40 messages messagesNeeded := int((2 * sizeMultiplier * cfg.MaxSizeKB) / (targetMessageSize + overhead)) // ~40 messages
for i := 0; i < messagesNeeded; i++ { for i := 0; i < messagesNeeded; i++ {
logger.Info(fmt.Sprintf("msg%d:", i), largeData) logger.Info(fmt.Sprintf("msg%d:", i), largeData)