v0.1.2 sanitizer redisigned with policies and rules

2025-11-15 13:23:18 -05:00
parent af162755dd
commit b2be5cec88
9 changed files with 496 additions and 338 deletions
--- a/builder.go
+++ b/builder.go
@ -75,8 +75,8 @@ func (b *Builder) Format(format string) *Builder {
 }
 // Sanitization sets the sanitization mode
-func (b *Builder) Sanitization(mode sanitizer.Mode) *Builder {
+func (b *Builder) Sanitization(policy sanitizer.PolicyPreset) *Builder {
-	b.cfg.Sanitization = mode
+	b.cfg.Sanitization = policy
 	return b
 }
--- a/config.go
+++ b/config.go
@ -28,7 +28,7 @@ type Config struct {
 	ShowTimestamp   bool                   `toml:"show_timestamp"`   // Add timestamp to log records
 	ShowLevel       bool                   `toml:"show_level"`       // Add level to log record
 	TimestampFormat string                 `toml:"timestamp_format"` // Time format for log timestamps
-	Sanitization    sanitizer.Mode `toml:"sanitization"`     // 0=None, 1=HexEncode, 2=Strip, 3=Escape
+	Sanitization    sanitizer.PolicyPreset `toml:"sanitization"`     // "default", "json", "txt", "shell"
 	// Buffer and size limits
 	BufferSize     int64 `toml:"buffer_size"`       // Channel buffer size
@ -75,7 +75,7 @@ var defaultConfig = Config{
 	ShowTimestamp:   true,
 	ShowLevel:       true,
 	TimestampFormat: time.RFC3339Nano,
-	Sanitization:    sanitizer.HexEncode,
+	Sanitization:    sanitizer.PolicyTxt,
 	// Buffer and size limits
 	BufferSize:     1024,
@ -127,9 +127,11 @@ func (c *Config) Validate() error {
 		return fmtErrorf("invalid format: '%s' (use txt, json, or raw)", c.Format)
 	}
-	// TODO: better bound check, implement validator in `sanitizer`
+	switch c.Sanitization {
-	if c.Sanitization < 0 || c.Sanitization > sanitizer.Escape {
+	case sanitizer.PolicyRaw, sanitizer.PolicyJSON, sanitizer.PolicyTxt, sanitizer.PolicyShell:
-		return fmtErrorf("invalid sanitization mode: '%d' (use 0=None, 1=HexEncode, 2=Strip, 3=Escape)", c.Sanitization)
+		// valid policy
 	default:
 		return fmtErrorf("invalid sanitization policy: '%s' (use raw, json, txt, or shell)", c.Sanitization)
 	}
 	if strings.HasPrefix(c.Extension, ".") {
@ -226,11 +228,7 @@ func applyConfigField(cfg *Config, key, value string) error {
 	case "timestamp_format":
 		cfg.TimestampFormat = value
 	case "sanitization":
-		intVal, err := strconv.ParseInt(value, 10, 64)
+		cfg.Sanitization = sanitizer.PolicyPreset(value)
 		if err != nil {
 			return fmtErrorf("invalid integer value for sanitization '%s': %w", value, err)
 		}
 		cfg.Sanitization = sanitizer.Mode(intVal)
 	// Buffer and size limits
 	case "buffer_size":
--- a/constant.go
+++ b/constant.go
@ -22,9 +22,9 @@ const (
 // Record flags for controlling output structure
 const (
-	FlagShowTimestamp  int64 = 0b0001
+	FlagRaw            int64 = 0b0001
-	FlagShowLevel      int64 = 0b0010
+	FlagShowTimestamp  int64 = 0b0010
-	FlagRaw            int64 = 0b0100
+	FlagShowLevel      int64 = 0b0100
 	FlagStructuredJSON int64 = 0b1000
 	FlagDefault              = FlagShowTimestamp | FlagShowLevel
 )
--- a/format.go
+++ b/format.go
@ -16,24 +16,27 @@ type Formatter struct {
 	format          string
 	buf             []byte
 	timestampFormat string
 	sanitizationMode sanitizer.Mode
 	sanitizer       *sanitizer.Sanitizer
 }
 // NewFormatter creates a formatter instance
-func NewFormatter(format string, bufferSize int64, timestampFormat string, sanitizationMode sanitizer.Mode) *Formatter {
+func NewFormatter(format string, bufferSize int64, timestampFormat string, sanitizationPolicy sanitizer.PolicyPreset) *Formatter {
 	if timestampFormat == "" {
 		timestampFormat = time.RFC3339Nano
 	}
 	if format == "" {
 		format = "txt"
 	}
 	if sanitizationPolicy == "" {
 		sanitizationPolicy = "raw"
 	}
 	s := (sanitizer.New()).Policy(sanitizationPolicy)
 	return &Formatter{
 		format:          format,
 		buf:             make([]byte, 0, bufferSize),
 		timestampFormat: timestampFormat,
-		sanitizationMode: sanitizationMode,
+		sanitizer:       s,
 		sanitizer:        sanitizer.New(sanitizationMode),
 	}
 }
@ -46,31 +49,45 @@ func (f *Formatter) Reset() {
 func (f *Formatter) Format(format string, flags int64, timestamp time.Time, level int64, trace string, args []any) []byte {
 	f.Reset()
-	// The FlagRaw acts as an override to the configured format
+	// FlagRaw completely bypasses formatting and sanitization
 	effectiveFormat := format
 	if flags&FlagRaw != 0 {
-		effectiveFormat = "raw"
+		for i, arg := range args {
 			if i > 0 {
 				f.buf = append(f.buf, ' ')
 			}
 			// Direct conversion without sanitization
 			switch v := arg.(type) {
 			case string:
 				f.buf = append(f.buf, v...)
 			case []byte:
 				f.buf = append(f.buf, v...)
 			case fmt.Stringer:
 				f.buf = append(f.buf, v.String()...)
 			case error:
 				f.buf = append(f.buf, v.Error()...)
 			default:
 				f.buf = append(f.buf, fmt.Sprint(v)...)
 			}
 		}
 		return f.buf
 	}
-	// Create the handler based on the effective format
+	// Create the serializer based on the effective format
-	handler := sanitizer.NewUnifiedHandler(effectiveFormat, f.sanitizer)
+	serializer := sanitizer.NewSerializer(format, f.sanitizer)
-	switch effectiveFormat {
+	switch format {
 	case "raw":
-		// This dedicated path handles both format="raw" and FlagRaw
+		// Raw formatting serializes the arguments and adds NO metadata or newlines
 		// It only serializes the arguments and adds NO metadata or newlines
 		for i, arg := range args {
-			f.convertValue(&f.buf, arg, handler, i > 0)
+			f.convertValue(&f.buf, arg, serializer, i > 0)
 		}
 		return f.buf
 	case "json":
-		// The existing JSON serialization logic remains unchanged
+		return f.formatJSON(flags, timestamp, level, trace, args, serializer)
 		return f.formatJSON(flags, timestamp, level, trace, args, handler)
 	case "txt":
-		// The existing Txt serialization logic is now correctly isolated
+		return f.formatTxt(flags, timestamp, level, trace, args, serializer)
 		return f.formatTxt(flags, timestamp, level, trace, args, handler)
 	}
 	return nil // forcing panic on unrecognized format
@ -79,86 +96,86 @@ func (f *Formatter) Format(format string, flags int64, timestamp time.Time, leve
 // FormatValue formats a single value according to the formatter's configuration
 func (f *Formatter) FormatValue(v any) []byte {
 	f.Reset()
-	handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer)
+	serializer := sanitizer.NewSerializer(f.format, f.sanitizer)
-	f.convertValue(&f.buf, v, handler, false)
+	f.convertValue(&f.buf, v, serializer, false)
 	return f.buf
 }
 // FormatArgs formats multiple arguments as space-separated values
 func (f *Formatter) FormatArgs(args ...any) []byte {
 	f.Reset()
-	handler := sanitizer.NewUnifiedHandler(f.format, f.sanitizer)
+	serializer := sanitizer.NewSerializer(f.format, f.sanitizer)
 	for i, arg := range args {
-		f.convertValue(&f.buf, arg, handler, i > 0)
+		f.convertValue(&f.buf, arg, serializer, i > 0)
 	}
 	return f.buf
 }
 // convertValue provides unified type conversion
-func (f *Formatter) convertValue(buf *[]byte, v any, handler *sanitizer.UnifiedHandler, needsSpace bool) {
+func (f *Formatter) convertValue(buf *[]byte, v any, serializer *sanitizer.Serializer, needsSpace bool) {
 	if needsSpace && len(*buf) > 0 {
 		*buf = append(*buf, ' ')
 	}
 	switch val := v.(type) {
 	case string:
-		handler.WriteString(buf, val)
+		serializer.WriteString(buf, val)
 	case []byte:
-		handler.WriteString(buf, string(val))
+		serializer.WriteString(buf, string(val))
 	case rune:
 		var runeStr [utf8.UTFMax]byte
 		n := utf8.EncodeRune(runeStr[:], val)
-		handler.WriteString(buf, string(runeStr[:n]))
+		serializer.WriteString(buf, string(runeStr[:n]))
 	case int:
 		num := strconv.AppendInt(nil, int64(val), 10)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case int64:
 		num := strconv.AppendInt(nil, val, 10)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case uint:
 		num := strconv.AppendUint(nil, uint64(val), 10)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case uint64:
 		num := strconv.AppendUint(nil, val, 10)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case float32:
 		num := strconv.AppendFloat(nil, float64(val), 'f', -1, 32)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case float64:
 		num := strconv.AppendFloat(nil, val, 'f', -1, 64)
-		handler.WriteNumber(buf, string(num))
+		serializer.WriteNumber(buf, string(num))
 	case bool:
-		handler.WriteBool(buf, val)
+		serializer.WriteBool(buf, val)
 	case nil:
-		handler.WriteNil(buf)
+		serializer.WriteNil(buf)
 	case time.Time:
 		timeStr := val.Format(f.timestampFormat)
-		handler.WriteString(buf, timeStr)
+		serializer.WriteString(buf, timeStr)
 	case error:
-		handler.WriteString(buf, val.Error())
+		serializer.WriteString(buf, val.Error())
 	case fmt.Stringer:
-		handler.WriteString(buf, val.String())
+		serializer.WriteString(buf, val.String())
 	default:
-		handler.WriteComplex(buf, val)
+		serializer.WriteComplex(buf, val)
 	}
 }
 // formatJSON unifies JSON output
-func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte {
+func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, trace string, args []any, serializer *sanitizer.Serializer) []byte {
 	f.buf = append(f.buf, '{')
 	needsComma := false
@ -184,7 +201,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
 			f.buf = append(f.buf, ',')
 		}
 		f.buf = append(f.buf, `"trace":`...)
-		handler.WriteString(&f.buf, trace)
+		serializer.WriteString(&f.buf, trace)
 		needsComma = true
 	}
@ -196,7 +213,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
 					f.buf = append(f.buf, ',')
 				}
 				f.buf = append(f.buf, `"message":`...)
-				handler.WriteString(&f.buf, message)
+				serializer.WriteString(&f.buf, message)
 				f.buf = append(f.buf, ',')
 				f.buf = append(f.buf, `"fields":`...)
@ -204,7 +221,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
 				marshaledFields, err := json.Marshal(fields)
 				if err != nil {
 					f.buf = append(f.buf, `{"_marshal_error":"`...)
-					handler.WriteString(&f.buf, err.Error())
+					serializer.WriteString(&f.buf, err.Error())
 					f.buf = append(f.buf, `"}`...)
 				} else {
 					f.buf = append(f.buf, marshaledFields...)
@ -226,7 +243,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
 			if i > 0 {
 				f.buf = append(f.buf, ',')
 			}
-			f.convertValue(&f.buf, arg, handler, false)
+			f.convertValue(&f.buf, arg, serializer, false)
 		}
 		f.buf = append(f.buf, ']')
 	}
@ -236,7 +253,7 @@ func (f *Formatter) formatJSON(flags int64, timestamp time.Time, level int64, tr
 }
 // formatTxt handles txt format output
-func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, trace string, args []any, handler *sanitizer.UnifiedHandler) []byte {
+func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, trace string, args []any, serializer *sanitizer.Serializer) []byte {
 	needsSpace := false
 	if flags&FlagShowTimestamp != 0 {
@ -256,12 +273,21 @@ func (f *Formatter) formatTxt(flags int64, timestamp time.Time, level int64, tra
 		if needsSpace {
 			f.buf = append(f.buf, ' ')
 		}
-		f.buf = append(f.buf, trace...)
+		// Sanitize trace to prevent terminal control sequence injection
 		traceHandler := sanitizer.NewSerializer("txt", f.sanitizer)
 		tempBuf := make([]byte, 0, len(trace)*2)
 		traceHandler.WriteString(&tempBuf, trace)
 		// Extract content without quotes if added by txt serializer
 		if len(tempBuf) > 2 && tempBuf[0] == '"' && tempBuf[len(tempBuf)-1] == '"' {
 			f.buf = append(f.buf, tempBuf[1:len(tempBuf)-1]...)
 		} else {
 			f.buf = append(f.buf, tempBuf...)
 		}
 		needsSpace = true
 	}
 	for _, arg := range args {
-		f.convertValue(&f.buf, arg, handler, needsSpace)
+		f.convertValue(&f.buf, arg, serializer, needsSpace)
 		needsSpace = true
 	}
--- a/format_test.go
+++ b/format_test.go
@ -10,13 +10,14 @@ import (
 	"testing"
 	"time"
 	"github.com/lixenwraith/log/sanitizer"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 // TestFormatter tests the output of the formatter for txt, json, and raw formats
 func TestFormatter(t *testing.T) {
-	f := NewFormatter("txt", 1024, time.RFC3339Nano, 0)
+	f := NewFormatter("txt", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
 	timestamp := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
 	t.Run("txt format", func(t *testing.T) {
@ -30,7 +31,7 @@ func TestFormatter(t *testing.T) {
 		assert.True(t, strings.HasSuffix(str, "\n"))
 	})
-	f = NewFormatter("json", 1024, time.RFC3339Nano, 0)
+	f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
 	t.Run("json format", func(t *testing.T) {
 		data := f.Format("json", FlagDefault, timestamp, LevelWarn, "trace1", []any{"warning", true})
@ -45,7 +46,7 @@ func TestFormatter(t *testing.T) {
 		assert.Equal(t, true, fields[1])
 	})
-	f = NewFormatter("raw", 1024, time.RFC3339Nano, 0)
+	f = NewFormatter("raw", 1024, time.RFC3339Nano, sanitizer.PolicyRaw)
 	t.Run("raw format", func(t *testing.T) {
 		data := f.Format("raw", 0, timestamp, LevelInfo, "", []any{"raw", "data", 42})
 		str := string(data)
@ -61,7 +62,7 @@ func TestFormatter(t *testing.T) {
 		assert.Equal(t, "forced raw", str)
 	})
-	f = NewFormatter("json", 1024, time.RFC3339Nano, 0)
+	f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyJSON)
 	t.Run("structured json", func(t *testing.T) {
 		fields := map[string]any{"key1": "value1", "key2": 42}
 		data := f.Format("json", FlagStructuredJSON|FlagDefault, timestamp, LevelInfo, "",
@ -75,7 +76,7 @@ func TestFormatter(t *testing.T) {
 		assert.Equal(t, map[string]any{"key1": "value1", "key2": float64(42)}, result["fields"])
 	})
-	f = NewFormatter("json", 1024, time.RFC3339Nano, 3)
+	f = NewFormatter("json", 1024, time.RFC3339Nano, sanitizer.PolicyJSON)
 	t.Run("special characters escaping", func(t *testing.T) {
 		data := f.Format("json", FlagDefault, timestamp, LevelInfo, "",
 			[]any{"test\n\r\t\"\\message"})
@ -121,33 +122,42 @@ func TestControlCharacterWrite(t *testing.T) {
 	logger, tmpDir := createTestLogger(t)
 	defer logger.Shutdown()
-	// Test various control characters
+	cfg := logger.GetConfig()
 	cfg.Format = "raw"
 	cfg.ShowTimestamp = false
 	cfg.ShowLevel = false
 	err := logger.ApplyConfig(cfg)
 	require.NoError(t, err)
 	// Test various control characters with expected sanitized output
 	testCases := []struct {
 		name     string
 		input    string
 		expected string
 	}{
-		{"null bytes", "test\x00data"},
+		{"null bytes", "test\x00data", "test<00>data"},
-		{"bell", "alert\x07message"},
+		{"bell", "alert\x07message", "alert<07>message"},
-		{"backspace", "back\x08space"},
+		{"backspace", "back\x08space", "back<08>space"},
-		{"form feed", "page\x0Cbreak"},
+		{"form feed", "page\x0Cbreak", "page<0c>break"},
-		{"vertical tab", "vertical\x0Btab"},
+		{"vertical tab", "vertical\x0Btab", "vertical<0b>tab"},
-		{"escape", "escape\x1B[31mcolor"},
+		{"escape", "escape\x1B[31mcolor", "escape<1b>[31mcolor"},
-		{"mixed", "\x00\x01\x02test\x1F\x7Fdata"},
+		{"mixed", "\x00\x01\x02test\x1F\x7Fdata", "<00><01><02>test<1f><7f>data"},
 	}
 	for _, tc := range testCases {
-		logger.Write(tc.input)
+		logger.Message(tc.input)
 	}
 	logger.Flush(time.Second)
 	// Verify file contains hex-encoded control chars
 	content, err := os.ReadFile(filepath.Join(tmpDir, "log.log"))
 	require.NoError(t, err)
-	// Control chars should be hex-encoded in raw output
+	// Verify each test case produced correct sanitized output
-	assert.Contains(t, string(content), "test")
+	for _, tc := range testCases {
-	assert.Contains(t, string(content), "data")
+		assert.Contains(t, string(content), tc.expected,
 			"Test case '%s' should produce hex-encoded control chars", tc.name)
 	}
 }
 // TestRawSanitizedOutput verifies that raw output is correctly sanitized
@ -155,6 +165,14 @@ func TestRawSanitizedOutput(t *testing.T) {
 	logger, tmpDir := createTestLogger(t)
 	defer logger.Shutdown()
 	// Use raw format instead of Write() to test sanitization
 	cfg := logger.GetConfig()
 	cfg.Format = "raw"
 	cfg.ShowTimestamp = false
 	cfg.ShowLevel = false
 	err := logger.ApplyConfig(cfg)
 	require.NoError(t, err)
 	// 1. A string with valid multi-byte UTF-8 should be unchanged
 	utf8String := "Hello │ 世界"
@ -171,7 +189,7 @@ func TestRawSanitizedOutput(t *testing.T) {
 	expectedMultiByteOutput := "line1<c285>line2"
 	// Log all cases
-	logger.Write(utf8String, stringWithControl, bytesWithControl, multiByteControl)
+	logger.Message(utf8String, stringWithControl, bytesWithControl, multiByteControl)
 	logger.Flush(time.Second)
 	// Read and verify the single line of output
--- a/logger.go
+++ b/logger.go
@ -326,8 +326,7 @@ func (l *Logger) LogStructured(level int64, message string, fields map[string]an
 	l.log(l.getFlags()|FlagStructuredJSON, level, 0, []any{message, fields})
 }
-// Write outputs raw, unformatted data regardless of configured format
+// Write outputs raw, unformatted data ignoring configured format and sanitization without trailing new line
 // Writes args as space-separated strings without a trailing newline
 func (l *Logger) Write(args ...any) {
 	l.log(FlagRaw, LevelInfo, 0, args...)
 }
--- a/sanitizer/sanitizer.go
+++ b/sanitizer/sanitizer.go
@ -1,4 +1,6 @@
 // FILE: lixenwraith/log/sanitizer/sanitizer.go
 // Package sanitizer provides a fluent and composable interface for sanitizing
 // strings based on configurable rules using bitwise filter flags and transforms.
 package sanitizer
 import (
@ -12,105 +14,186 @@ import (
 	"github.com/davecgh/go-spew/spew"
 )
-// Mode controls how non-printable characters are handled
+// Filter flags for character matching
 type Mode int
 // Sanitization modes
 const (
-	None      Mode = iota // No sanitization
+	FilterNonPrintable uint64 = 1 << iota // Matches runes not classified as printable by strconv.IsPrint
-	HexEncode             // Encode as <hex> (current default)
+	FilterControl                         // Matches control characters (unicode.IsControl)
-	Strip                 // Remove control characters
+	FilterWhitespace                      // Matches whitespace characters (unicode.IsSpace)
-	Escape                // JSON-style escaping
+	FilterShellSpecial                    // Matches common shell metacharacters: '`', '$', ';', '|', '&', '>', '<', '(', ')', '#'
 )
-// Sanitizer provides centralized sanitization logic
+// Transform flags for character transformation
-type Sanitizer struct {
+const (
-	mode Mode
+	TransformStrip      uint64 = 1 << iota // Removes the character
-	buf  []byte // Reusable buffer
+	TransformHexEncode                     // Encodes the character's UTF-8 bytes as "<XXYY>"
 	TransformJSONEscape                    // Escapes the character with JSON-style backslashes (e.g., '\n', '\u0000')
 )
 // PolicyPreset defines pre-configured sanitization policies
 type PolicyPreset string
 const (
 	PolicyRaw   PolicyPreset = "raw"   // Default is a no-op (passthrough)
 	PolicyJSON  PolicyPreset = "json"  // Policy for sanitizing strings to be embedded in JSON
 	PolicyTxt   PolicyPreset = "txt"   // Policy for sanitizing text written to log files
 	PolicyShell PolicyPreset = "shell" // Policy for sanitizing arguments passed to shell commands
 )
 // rule represents a single sanitization rule
 type rule struct {
 	filter    uint64
 	transform uint64
 }
-func New(mode Mode) *Sanitizer {
+// policyRules contains pre-configured rules for each policy
 var policyRules = map[PolicyPreset][]rule{
 	PolicyRaw:   {},
 	PolicyTxt:   {{filter: FilterNonPrintable, transform: TransformHexEncode}},
 	PolicyJSON:  {{filter: FilterControl, transform: TransformJSONEscape}},
 	PolicyShell: {{filter: FilterShellSpecial | FilterWhitespace, transform: TransformStrip}},
 }
 // filterCheckers maps individual filter flags to their check functions
 var filterCheckers = map[uint64]func(rune) bool{
 	FilterNonPrintable: func(r rune) bool { return !strconv.IsPrint(r) },
 	FilterControl:      unicode.IsControl,
 	FilterWhitespace:   unicode.IsSpace,
 	FilterShellSpecial: func(r rune) bool {
 		switch r {
 		case '`', '$', ';', '|', '&', '>', '<', '(', ')', '#':
 			return true
 		}
 		return false
 	},
 }
 // Sanitizer provides chainable text sanitization
 type Sanitizer struct {
 	rules []rule
 	buf   []byte
 }
 // New creates a new Sanitizer instance
 func New() *Sanitizer {
 	return &Sanitizer{
-		mode: mode,
+		rules: []rule{},
 		buf:   make([]byte, 0, 256),
 	}
 }
-func (s *Sanitizer) Reset() {
+// Rule adds a custom rule to the sanitizer (prepended for precedence)
-	s.buf = s.buf[:0]
+func (s *Sanitizer) Rule(filter uint64, transform uint64) *Sanitizer {
 	// Append rule in natural order
 	s.rules = append(s.rules, rule{filter: filter, transform: transform})
 	return s
 }
 // Policy applies a pre-configured policy to the sanitizer (appended)
 func (s *Sanitizer) Policy(preset PolicyPreset) *Sanitizer {
 	if rules, ok := policyRules[preset]; ok {
 		s.rules = append(s.rules, rules...)
 	}
 	return s
 }
 // Sanitize applies all configured rules to the input string
 func (s *Sanitizer) Sanitize(data string) string {
-	if s.mode == None {
+	// Reset buffer
-		return data
+	s.buf = s.buf[:0]
 	}
 	s.Reset()
 	// Process each rune
 	for _, r := range data {
-		if strconv.IsPrint(r) {
+		matched := false
 		// Check rules in order (first match wins)
 		for _, rl := range s.rules {
 			if matchesFilter(r, rl.filter) {
 				applyTransform(&s.buf, r, rl.transform)
 				matched = true
 				break
 			}
 		}
 		// If no rule matched, append original rune
 		if !matched {
 			s.buf = utf8.AppendRune(s.buf, r)
 			continue
 		}
 		switch s.mode {
 		case HexEncode:
 			var runeBytes [utf8.UTFMax]byte
 			n := utf8.EncodeRune(runeBytes[:], r)
 			s.buf = append(s.buf, '<')
 			s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
 			s.buf = append(s.buf, '>')
 		case Strip:
 			// Skip non-printable
 			continue
 		case Escape:
 			switch r {
 			case '\n':
 				s.buf = append(s.buf, '\\', 'n')
 			case '\r':
 				s.buf = append(s.buf, '\\', 'r')
 			case '\t':
 				s.buf = append(s.buf, '\\', 't')
 			case '\b':
 				s.buf = append(s.buf, '\\', 'b')
 			case '\f':
 				s.buf = append(s.buf, '\\', 'f')
 			default:
 				// Unicode escape for other control chars
 				s.buf = append(s.buf, '\\', 'u')
 				s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
 			}
 		}
 	}
 	return string(s.buf)
 }
-// UnifiedHandler implements all format behaviors in a single struct
+// matchesFilter checks if a rune matches any filter in the mask
-type UnifiedHandler struct {
+func matchesFilter(r rune, filterMask uint64) bool {
 	for flag, checker := range filterCheckers {
 		if (filterMask&flag) != 0 && checker(r) {
 			return true
 		}
 	}
 	return false
 }
 // applyTransform applies the specified transform to the buffer
 func applyTransform(buf *[]byte, r rune, transformMask uint64) {
 	switch {
 	case (transformMask & TransformStrip) != 0:
 		// Do nothing (strip)
 	case (transformMask & TransformHexEncode) != 0:
 		var runeBytes [utf8.UTFMax]byte
 		n := utf8.EncodeRune(runeBytes[:], r)
 		*buf = append(*buf, '<')
 		*buf = append(*buf, hex.EncodeToString(runeBytes[:n])...)
 		*buf = append(*buf, '>')
 	case (transformMask & TransformJSONEscape) != 0:
 		switch r {
 		case '\n':
 			*buf = append(*buf, '\\', 'n')
 		case '\r':
 			*buf = append(*buf, '\\', 'r')
 		case '\t':
 			*buf = append(*buf, '\\', 't')
 		case '\b':
 			*buf = append(*buf, '\\', 'b')
 		case '\f':
 			*buf = append(*buf, '\\', 'f')
 		case '"':
 			*buf = append(*buf, '\\', '"')
 		case '\\':
 			*buf = append(*buf, '\\', '\\')
 		default:
 			if r < 0x20 || r == 0x7f {
 				*buf = append(*buf, fmt.Sprintf("\\u%04x", r)...)
 			} else {
 				*buf = utf8.AppendRune(*buf, r)
 			}
 		}
 	}
 }
 // Serializer implements format-specific output behaviors
 type Serializer struct {
 	format    string
 	sanitizer *Sanitizer
 }
-func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler {
+// NewSerializer creates a handler with format-specific behavior
-	return &UnifiedHandler{
+func NewSerializer(format string, san *Sanitizer) *Serializer {
 	return &Serializer{
 		format:    format,
 		sanitizer: san,
 	}
 }
-func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
+// WriteString writes a string with format-specific handling
-	switch h.format {
+func (se *Serializer) WriteString(buf *[]byte, s string) {
 	switch se.format {
 	case "raw":
-		*buf = append(*buf, h.sanitizer.Sanitize(s)...)
+		*buf = append(*buf, se.sanitizer.Sanitize(s)...)
 	case "txt":
-		sanitized := h.sanitizer.Sanitize(s)
+		sanitized := se.sanitizer.Sanitize(s)
-		if h.NeedsQuotes(sanitized) {
+		if se.NeedsQuotes(sanitized) {
 			*buf = append(*buf, '"')
 			// Escape quotes within quoted strings
 			for i := 0; i < len(sanitized); i++ {
 				if sanitized[i] == '"' || sanitized[i] == '\\' {
 					*buf = append(*buf, '\\')
@ -124,12 +207,12 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
 	case "json":
 		*buf = append(*buf, '"')
-		// Direct JSON escaping without pre-sanitization
+		// Direct JSON escaping
 		for i := 0; i < len(s); {
 			c := s[i]
-			if c >= ' ' && c != '"' && c != '\\' {
+			if c >= ' ' && c != '"' && c != '\\' && c < 0x7f {
 				start := i
-				for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' {
+				for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' && s[i] < 0x7f {
 					i++
 				}
 				*buf = append(*buf, s[start:i]...)
@ -157,27 +240,30 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
 	}
 }
-func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) {
+// WriteNumber writes a number value
 func (se *Serializer) WriteNumber(buf *[]byte, n string) {
 	*buf = append(*buf, n...)
 }
-func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) {
+// WriteBool writes a boolean value
 func (se *Serializer) WriteBool(buf *[]byte, b bool) {
 	*buf = strconv.AppendBool(*buf, b)
 }
-func (h *UnifiedHandler) WriteNil(buf *[]byte) {
+// WriteNil writes a nil value
-	switch h.format {
+func (se *Serializer) WriteNil(buf *[]byte) {
 	switch se.format {
 	case "raw":
 		*buf = append(*buf, "nil"...)
-	default: // txt, json
+	default:
 		*buf = append(*buf, "null"...)
 	}
 }
-func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
+// WriteComplex writes complex types
-	switch h.format {
+func (se *Serializer) WriteComplex(buf *[]byte, v any) {
 	switch se.format {
 	case "raw":
 		// Use spew for complex types in raw mode, DEBUG use
 		var b bytes.Buffer
 		dumper := &spew.ConfigState{
 			Indent:                  " ",
@ -189,41 +275,37 @@ func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
 		dumper.Fdump(&b, v)
 		*buf = append(*buf, bytes.TrimSpace(b.Bytes())...)
-	default: // txt, json
+	default:
 		str := fmt.Sprintf("%+v", v)
-		h.WriteString(buf, str)
+		se.WriteString(buf, str)
 	}
 }
-func (h *UnifiedHandler) NeedsQuotes(s string) bool {
+// NeedsQuotes determines if quoting is needed
-	switch h.format {
+func (se *Serializer) NeedsQuotes(s string) bool {
 	switch se.format {
 	case "json":
-		return true // JSON always quotes
+		return true
 	case "txt":
 		// Quote strings that:
 		// 1. Are empty
 		if len(s) == 0 {
 			return true
 		}
 		for _, r := range s {
 			// 2. Contain whitespace (space, tab, newline, etc.)
 			if unicode.IsSpace(r) {
 				return true
 			}
 			// 3. Contain shell special characters (POSIX + common extensions)
 			switch r {
 			case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
 				'(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
 				'~', '#', '%', '=', '\n', '\r', '\t':
 				return true
 			}
 			// 4. Non-print
 			if !unicode.IsPrint(r) {
 				return true
 			}
 		}
 		return false
-	default: // raw
+	default:
 		return false
 	}
 }
--- a/sanitizer/sanitizer_test.go
+++ b/sanitizer/sanitizer_test.go
@ -8,177 +8,202 @@ import (
 	"github.com/stretchr/testify/assert"
 )
-func TestSanitizer(t *testing.T) {
+func TestNewSanitizer(t *testing.T) {
-	testCases := []struct {
+	// Default passthrough behavior
-		name     string
+	s := New()
-		input    string
+	input := "abc\x00xyz"
-		mode     Mode
+	assert.Equal(t, input, s.Sanitize(input), "default sanitizer should pass through all characters")
 		expected string
 	}{
 		// None mode tests
 		{
 			name:     "none mode passes through",
 			input:    "hello\x00world\n",
 			mode:     None,
 			expected: "hello\x00world\n",
 		},
 		// HexEncode tests
 		{
 			name:     "hex encode null byte",
 			input:    "test\x00data",
 			mode:     HexEncode,
 			expected: "test<00>data",
 		},
 		{
 			name:     "hex encode control chars",
 			input:    "bell\x07tab\x09form\x0c",
 			mode:     HexEncode,
 			expected: "bell<07>tab<09>form<0c>",
 		},
 		{
 			name:     "hex encode preserves printable",
 			input:    "Hello World 123!@#",
 			mode:     HexEncode,
 			expected: "Hello World 123!@#",
 		},
 		{
 			name:     "hex encode multi-byte control",
 			input:    "line1\u0085line2", // NEXT LINE (C2 85)
 			mode:     HexEncode,
 			expected: "line1<c285>line2",
 		},
 		{
 			name:     "hex encode preserves UTF-8",
 			input:    "Hello 世界 ✓",
 			mode:     HexEncode,
 			expected: "Hello 世界 ✓",
 		},
 		// Strip tests
 		{
 			name:     "strip removes control chars",
 			input:    "clean\x00\x07\ntxt",
 			mode:     Strip,
 			expected: "cleantxt",
 		},
 		{
 			name:     "strip preserves spaces",
 			input:    "hello world",
 			mode:     Strip,
 			expected: "hello world",
 		},
 		// Escape tests
 		{
 			name:     "escape common control chars",
 			input:    "line1\nline2\ttab\rreturn",
 			mode:     Escape,
 			expected: "line1\\nline2\\ttab\\rreturn",
 		},
 		{
 			name:     "escape unicode control",
 			input:    "text\x01\x1f",
 			mode:     Escape,
 			expected: "text\\u0001\\u001f",
 		},
 		{
 			name:     "escape backspace and form feed",
 			input:    "back\bspace form\ffeed",
 			mode:     Escape,
 			expected: "back\\bspace form\\ffeed",
 		},
 }
-	for _, tc := range testCases {
+func TestSingleRule(t *testing.T) {
-		t.Run(tc.name, func(t *testing.T) {
+	t.Run("strip non-printable", func(t *testing.T) {
-			s := New(tc.mode)
+		s := New().Rule(FilterNonPrintable, TransformStrip)
-			result := s.Sanitize(tc.input)
+		assert.Equal(t, "ab", s.Sanitize("a\x00b"))
-			assert.Equal(t, tc.expected, result)
+		assert.Equal(t, "test", s.Sanitize("test\x01\x02\x03"))
 	})
 	t.Run("hex encode non-printable", func(t *testing.T) {
 		s := New().Rule(FilterNonPrintable, TransformHexEncode)
 		assert.Equal(t, "a<00>b", s.Sanitize("a\x00b"))
 		assert.Equal(t, "bell<07>tab<09>", s.Sanitize("bell\x07tab\x09"))
 	})
 	t.Run("JSON escape control", func(t *testing.T) {
 		s := New().Rule(FilterControl, TransformJSONEscape)
 		assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
 		assert.Equal(t, "tab\\there", s.Sanitize("tab\there"))
 		assert.Equal(t, "null\\u0000byte", s.Sanitize("null\x00byte"))
 	})
 	t.Run("strip whitespace", func(t *testing.T) {
 		s := New().Rule(FilterWhitespace, TransformStrip)
 		assert.Equal(t, "nospaceshere", s.Sanitize("no spaces here"))
 		assert.Equal(t, "tabsgone", s.Sanitize("tabs\t\tgone"))
 	})
 	t.Run("strip shell special", func(t *testing.T) {
 		s := New().Rule(FilterShellSpecial, TransformStrip)
 		assert.Equal(t, "cmd echo test", s.Sanitize("cmd; echo test"))
 		assert.Equal(t, "no  pipes", s.Sanitize("no | pipes"))
 		assert.Equal(t, "var", s.Sanitize("$var"))
 	})
 }
 func TestPolicy(t *testing.T) {
 	t.Run("PolicyTxt", func(t *testing.T) {
 		s := New().Policy(PolicyTxt)
 		assert.Equal(t, "hello<07>world", s.Sanitize("hello\x07world"))
 		assert.Equal(t, "clean text", s.Sanitize("clean text"))
 	})
 	t.Run("PolicyJSON", func(t *testing.T) {
 		s := New().Policy(PolicyJSON)
 		assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
 		assert.Equal(t, "\\ttab", s.Sanitize("\ttab"))
 	})
 	t.Run("PolicyShellArg", func(t *testing.T) {
 		s := New().Policy(PolicyShell)
 		assert.Equal(t, "cmdecho", s.Sanitize("cmd; echo"))
 		assert.Equal(t, "nospaces", s.Sanitize("no spaces"))
 	})
 }
-func TestUnifiedHandler(t *testing.T) {
+func TestRulePrecedence(t *testing.T) {
-	t.Run("raw format", func(t *testing.T) {
+	// With append + forward iteration: Policy is checked before Rule
-		san := New(HexEncode)
+	s := New().Policy(PolicyTxt).Rule(FilterControl, TransformStrip)
-		handler := NewUnifiedHandler("raw", san)
+
 	// \x07 is both control AND non-printable - matches PolicyTxt first
 	// \x00 is both control AND non-printable - matches PolicyTxt first
 	input := "a\x07b\x00c"
 	expected := "a<07>b<00>c" // FIXED: Policy wins now
 	result := s.Sanitize(input)
 	assert.Equal(t, expected, result,
 		"Policy() is now checked before Rule() - non-printable chars get hex encoded")
 }
 func TestCompositeFilter(t *testing.T) {
 	s := New().Rule(FilterShellSpecial|FilterWhitespace, TransformStrip)
 	assert.Equal(t, "cmdechohello", s.Sanitize("cmd; echo hello"))
 	assert.Equal(t, "nopipesnospaces", s.Sanitize("no |pipes| no spaces"))
 }
 func TestChaining(t *testing.T) {
 	s := New().
 		Rule(FilterWhitespace, TransformStrip).
 		Rule(FilterShellSpecial, TransformHexEncode)
 	// Shell special chars are checked first (prepended), get hex encoded
 	// Whitespace rule is second, strips spaces
 	assert.Equal(t, "cmd<3b>echohello", s.Sanitize("cmd; echo hello"))
 }
 func TestMultipleRulesOrder(t *testing.T) {
 	// Test that first matching rule wins
 	s := New().
 		Rule(FilterControl, TransformStrip).
 		Rule(FilterControl, TransformHexEncode) // This should never match
 	assert.Equal(t, "ab", s.Sanitize("a\x00b"), "first rule should win")
 }
 func TestEdgeCases(t *testing.T) {
 	t.Run("empty string", func(t *testing.T) {
 		s := New().Rule(FilterNonPrintable, TransformStrip)
 		assert.Equal(t, "", s.Sanitize(""))
 	})
 	t.Run("only sanitizable characters", func(t *testing.T) {
 		s := New().Rule(FilterNonPrintable, TransformStrip)
 		assert.Equal(t, "", s.Sanitize("\x00\x01\x02\x03"))
 	})
 	t.Run("multi-byte UTF-8", func(t *testing.T) {
 		s := New().Rule(FilterNonPrintable, TransformHexEncode)
 		input := "Hello 世界 ✓"
 		assert.Equal(t, input, s.Sanitize(input), "UTF-8 should pass through")
 	})
 	t.Run("multi-byte control character", func(t *testing.T) {
 		s := New().Rule(FilterNonPrintable, TransformHexEncode)
 		// NEL (Next Line) is U+0085, encoded as C2 85 in UTF-8
 		assert.Equal(t, "line1<c285>line2", s.Sanitize("line1\u0085line2"))
 	})
 }
 func TestSerializer(t *testing.T) {
 	t.Run("raw format with sanitizer", func(t *testing.T) {
 		san := New().Rule(FilterNonPrintable, TransformHexEncode)
 		handler := NewSerializer("raw", san)
 		var buf []byte
 		// String handling
 		handler.WriteString(&buf, "test\x00data")
 		assert.Equal(t, "test<00>data", string(buf))
 		// Nil handling
 		buf = nil
 		handler.WriteNil(&buf)
 		assert.Equal(t, "nil", string(buf))
 		// No quotes needed
 		assert.False(t, handler.NeedsQuotes("any string"))
 	})
-	t.Run("txt format", func(t *testing.T) {
+	t.Run("txt format with quotes", func(t *testing.T) {
-		san := New(HexEncode)
+		san := New() // No sanitization
-		handler := NewUnifiedHandler("txt", san)
+		handler := NewSerializer("txt", san)
 		var buf []byte
 		// String with spaces gets quoted
 		handler.WriteString(&buf, "hello world")
 		assert.Equal(t, `"hello world"`, string(buf))
 		// String without spaces unquoted
 		buf = nil
-		handler.WriteString(&buf, "single")
+		handler.WriteString(&buf, "nospace")
-		assert.Equal(t, "single", string(buf))
+		assert.Equal(t, "nospace", string(buf))
 		// Nil handling
 		buf = nil
 		handler.WriteNil(&buf)
 		assert.Equal(t, "null", string(buf))
 		// Quotes needed for empty or space-containing
 		assert.True(t, handler.NeedsQuotes(""))
 		assert.True(t, handler.NeedsQuotes("has space"))
 		assert.False(t, handler.NeedsQuotes("nospace"))
 	})
-	t.Run("json format", func(t *testing.T) {
+	t.Run("json format escaping", func(t *testing.T) {
-		san := New(Escape) // Not used for JSON, direct escaping
+		san := New() // JSON handler does its own escaping
-		handler := NewUnifiedHandler("json", san)
+		handler := NewSerializer("json", san)
 		var buf []byte
 		// JSON escaping
 		handler.WriteString(&buf, "line1\nline2\t\"quoted\"")
 		assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf))
 		// Control char escaping
 		buf = nil
 		handler.WriteString(&buf, "null\x00byte")
 		assert.Equal(t, `"null\u0000byte"`, string(buf))
 		// Always quotes
 		assert.True(t, handler.NeedsQuotes("anything"))
 	})
 	t.Run("complex value handling", func(t *testing.T) {
-		san := New(HexEncode)
+		san := New()
 		handler := NewSerializer("raw", san)
 		// Raw uses spew
 		rawHandler := NewUnifiedHandler("raw", san)
 		var buf []byte
-		rawHandler.WriteComplex(&buf, map[string]int{"a": 1})
+		handler.WriteComplex(&buf, map[string]int{"a": 1})
 		assert.Contains(t, string(buf), "map[")
 		// Txt/JSON use fmt.Sprintf
 		txtHandler := NewUnifiedHandler("txt", san)
 		buf = nil
 		txtHandler.WriteComplex(&buf, []int{1, 2, 3})
 		assert.Contains(t, string(buf), "[1 2 3]")
 	})
 	t.Run("nil handling", func(t *testing.T) {
 		san := New()
 		rawHandler := NewSerializer("raw", san)
 		var buf []byte
 		rawHandler.WriteNil(&buf)
 		assert.Equal(t, "nil", string(buf))
 		jsonHandler := NewSerializer("json", san)
 		buf = nil
 		jsonHandler.WriteNil(&buf)
 		assert.Equal(t, "null", string(buf))
 	})
 }
 func TestPolicyWithCustomRules(t *testing.T) {
 	s := New().
 		Policy(PolicyTxt).
 		Rule(FilterControl, TransformStrip).
 		Rule(FilterWhitespace, TransformJSONEscape)
 	// \x07 is non-printable AND control - matches PolicyTxt first (hex encode)
 	// \x7F is non-printable but NOT control - matches PolicyTxt (hex encode)
 	input := "a\x07b c\x7Fd"
 	result := s.Sanitize(input)
 	assert.Equal(t, "a<07>b c<7f>d", result) // FIXED: \x07 now hex encoded
 }
 func BenchmarkSanitizer(b *testing.B) {
@ -186,21 +211,31 @@ func BenchmarkSanitizer(b *testing.B) {
 	benchmarks := []struct {
 		name      string
-		mode Mode
+		sanitizer *Sanitizer
 	}{
-		{"None", None},
+		{"Passthrough", New()},
-		{"HexEncode", HexEncode},
+		{"SingleRule", New().Rule(FilterNonPrintable, TransformHexEncode)},
-		{"Strip", Strip},
+		{"Policy", New().Policy(PolicyTxt)},
-		{"Escape", Escape},
+		{"Complex", New().
 			Policy(PolicyTxt).
 			Rule(FilterControl, TransformStrip).
 			Rule(FilterWhitespace, TransformJSONEscape)},
 	}
 	for _, bm := range benchmarks {
 		b.Run(bm.name, func(b *testing.B) {
 			s := New(bm.mode)
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
-				_ = s.Sanitize(input)
+				_ = bm.sanitizer.Sanitize(input)
 			}
 		})
 	}
 }
 func TestTransformPriority(t *testing.T) {
 	// Test that only one transform is applied per rule
 	s := New().Rule(FilterControl, TransformStrip|TransformHexEncode)
 	// Should strip (first flag checked), not hex encode
 	assert.Equal(t, "ab", s.Sanitize("a\x00b"))
 }
--- a/storage_test.go
+++ b/storage_test.go
@ -19,7 +19,7 @@ func TestLogRotation(t *testing.T) {
 	defer logger.Shutdown()
 	cfg := logger.GetConfig()
-	cfg.MaxSizeKB = 1000     // 1MB
+	cfg.MaxSizeKB = 100      // 100KB
 	cfg.FlushIntervalMs = 10 // Fast flush for testing
 	logger.ApplyConfig(cfg)
@ -27,11 +27,11 @@ func TestLogRotation(t *testing.T) {
 	// Account for timestamp, level, and other formatting overhead
 	// A typical log line overhead is ~50-100 bytes
 	const overhead = 100
-	const targetMessageSize = 50000 // 50KB per message
+	const targetMessageSize = 5000 // 5KB per message
 	largeData := strings.Repeat("x", targetMessageSize)
 	// Write enough to exceed 1MB twice (should cause at least one rotation)
-	messagesNeeded := (2 * sizeMultiplier * 1000) / (targetMessageSize + overhead) // ~40 messages
+	messagesNeeded := int((2 * sizeMultiplier * cfg.MaxSizeKB) / (targetMessageSize + overhead)) // ~40 messages
 	for i := 0; i < messagesNeeded; i++ {
 		logger.Info(fmt.Sprintf("msg%d:", i), largeData)