v0.1.2 sanitizer redisigned with policies and rules

2025-11-15 13:23:18 -05:00
parent af162755dd
commit b2be5cec88
9 changed files with 496 additions and 338 deletions
--- a/sanitizer/sanitizer.go
+++ b/sanitizer/sanitizer.go
@ -1,4 +1,6 @@
 // FILE: lixenwraith/log/sanitizer/sanitizer.go
+// Package sanitizer provides a fluent and composable interface for sanitizing
+// strings based on configurable rules using bitwise filter flags and transforms.
 package sanitizer

 import (
@ -12,105 +14,186 @@ import (
 	"github.com/davecgh/go-spew/spew"
 )

-// Mode controls how non-printable characters are handled
-type Mode int
-
-// Sanitization modes
+// Filter flags for character matching
 const (
-	None      Mode = iota // No sanitization
-	HexEncode             // Encode as <hex> (current default)
-	Strip                 // Remove control characters
-	Escape                // JSON-style escaping
+	FilterNonPrintable uint64 = 1 << iota // Matches runes not classified as printable by strconv.IsPrint
+	FilterControl                         // Matches control characters (unicode.IsControl)
+	FilterWhitespace                      // Matches whitespace characters (unicode.IsSpace)
+	FilterShellSpecial                    // Matches common shell metacharacters: '`', '$', ';', '|', '&', '>', '<', '(', ')', '#'
 )

-// Sanitizer provides centralized sanitization logic
-type Sanitizer struct {
-	mode Mode
-	buf  []byte // Reusable buffer
+// Transform flags for character transformation
+const (
+	TransformStrip      uint64 = 1 << iota // Removes the character
+	TransformHexEncode                     // Encodes the character's UTF-8 bytes as "<XXYY>"
+	TransformJSONEscape                    // Escapes the character with JSON-style backslashes (e.g., '\n', '\u0000')
+)
+
+// PolicyPreset defines pre-configured sanitization policies
+type PolicyPreset string
+
+const (
+	PolicyRaw   PolicyPreset = "raw"   // Default is a no-op (passthrough)
+	PolicyJSON  PolicyPreset = "json"  // Policy for sanitizing strings to be embedded in JSON
+	PolicyTxt   PolicyPreset = "txt"   // Policy for sanitizing text written to log files
+	PolicyShell PolicyPreset = "shell" // Policy for sanitizing arguments passed to shell commands
+)
+
+// rule represents a single sanitization rule
+type rule struct {
+	filter    uint64
+	transform uint64
 }

-func New(mode Mode) *Sanitizer {
-	return &Sanitizer{
-		mode: mode,
-		buf:  make([]byte, 0, 256),
-	}
+// policyRules contains pre-configured rules for each policy
+var policyRules = map[PolicyPreset][]rule{
+	PolicyRaw:   {},
+	PolicyTxt:   {{filter: FilterNonPrintable, transform: TransformHexEncode}},
+	PolicyJSON:  {{filter: FilterControl, transform: TransformJSONEscape}},
+	PolicyShell: {{filter: FilterShellSpecial | FilterWhitespace, transform: TransformStrip}},
 }

-func (s *Sanitizer) Reset() {
-	s.buf = s.buf[:0]
-}
-
-func (s *Sanitizer) Sanitize(data string) string {
-	if s.mode == None {
-		return data
-	}
-
-	s.Reset()
-
-	for _, r := range data {
-		if strconv.IsPrint(r) {
-			s.buf = utf8.AppendRune(s.buf, r)
-			continue
+// filterCheckers maps individual filter flags to their check functions
+var filterCheckers = map[uint64]func(rune) bool{
+	FilterNonPrintable: func(r rune) bool { return !strconv.IsPrint(r) },
+	FilterControl:      unicode.IsControl,
+	FilterWhitespace:   unicode.IsSpace,
+	FilterShellSpecial: func(r rune) bool {
+		switch r {
+		case '`', '$', ';', '|', '&', '>', '<', '(', ')', '#':
+			return true
 		}
+		return false
+	},
+}

-		switch s.mode {
-		case HexEncode:
-			var runeBytes [utf8.UTFMax]byte
-			n := utf8.EncodeRune(runeBytes[:], r)
-			s.buf = append(s.buf, '<')
-			s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
-			s.buf = append(s.buf, '>')
+// Sanitizer provides chainable text sanitization
+type Sanitizer struct {
+	rules []rule
+	buf   []byte
+}

-		case Strip:
-			// Skip non-printable
-			continue
+// New creates a new Sanitizer instance
+func New() *Sanitizer {
+	return &Sanitizer{
+		rules: []rule{},
+		buf:   make([]byte, 0, 256),
+	}
+}

-		case Escape:
-			switch r {
-			case '\n':
-				s.buf = append(s.buf, '\\', 'n')
-			case '\r':
-				s.buf = append(s.buf, '\\', 'r')
-			case '\t':
-				s.buf = append(s.buf, '\\', 't')
-			case '\b':
-				s.buf = append(s.buf, '\\', 'b')
-			case '\f':
-				s.buf = append(s.buf, '\\', 'f')
-			default:
-				// Unicode escape for other control chars
-				s.buf = append(s.buf, '\\', 'u')
-				s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
+// Rule adds a custom rule to the sanitizer (prepended for precedence)
+func (s *Sanitizer) Rule(filter uint64, transform uint64) *Sanitizer {
+	// Append rule in natural order
+	s.rules = append(s.rules, rule{filter: filter, transform: transform})
+	return s
+}
+
+// Policy applies a pre-configured policy to the sanitizer (appended)
+func (s *Sanitizer) Policy(preset PolicyPreset) *Sanitizer {
+	if rules, ok := policyRules[preset]; ok {
+		s.rules = append(s.rules, rules...)
+	}
+	return s
+}
+
+// Sanitize applies all configured rules to the input string
+func (s *Sanitizer) Sanitize(data string) string {
+	// Reset buffer
+	s.buf = s.buf[:0]
+
+	// Process each rune
+	for _, r := range data {
+		matched := false
+		// Check rules in order (first match wins)
+		for _, rl := range s.rules {
+			if matchesFilter(r, rl.filter) {
+				applyTransform(&s.buf, r, rl.transform)
+				matched = true
+				break
 			}
 		}
+		// If no rule matched, append original rune
+		if !matched {
+			s.buf = utf8.AppendRune(s.buf, r)
+		}
 	}

 	return string(s.buf)
 }

-// UnifiedHandler implements all format behaviors in a single struct
-type UnifiedHandler struct {
+// matchesFilter checks if a rune matches any filter in the mask
+func matchesFilter(r rune, filterMask uint64) bool {
+	for flag, checker := range filterCheckers {
+		if (filterMask&flag) != 0 && checker(r) {
+			return true
+		}
+	}
+	return false
+}
+
+// applyTransform applies the specified transform to the buffer
+func applyTransform(buf *[]byte, r rune, transformMask uint64) {
+	switch {
+	case (transformMask & TransformStrip) != 0:
+		// Do nothing (strip)
+
+	case (transformMask & TransformHexEncode) != 0:
+		var runeBytes [utf8.UTFMax]byte
+		n := utf8.EncodeRune(runeBytes[:], r)
+		*buf = append(*buf, '<')
+		*buf = append(*buf, hex.EncodeToString(runeBytes[:n])...)
+		*buf = append(*buf, '>')
+
+	case (transformMask & TransformJSONEscape) != 0:
+		switch r {
+		case '\n':
+			*buf = append(*buf, '\\', 'n')
+		case '\r':
+			*buf = append(*buf, '\\', 'r')
+		case '\t':
+			*buf = append(*buf, '\\', 't')
+		case '\b':
+			*buf = append(*buf, '\\', 'b')
+		case '\f':
+			*buf = append(*buf, '\\', 'f')
+		case '"':
+			*buf = append(*buf, '\\', '"')
+		case '\\':
+			*buf = append(*buf, '\\', '\\')
+		default:
+			if r < 0x20 || r == 0x7f {
+				*buf = append(*buf, fmt.Sprintf("\\u%04x", r)...)
+			} else {
+				*buf = utf8.AppendRune(*buf, r)
+			}
+		}
+	}
+}
+
+// Serializer implements format-specific output behaviors
+type Serializer struct {
 	format    string
 	sanitizer *Sanitizer
 }

-func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler {
-	return &UnifiedHandler{
+// NewSerializer creates a handler with format-specific behavior
+func NewSerializer(format string, san *Sanitizer) *Serializer {
+	return &Serializer{
 		format:    format,
 		sanitizer: san,
 	}
 }

-func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
-	switch h.format {
+// WriteString writes a string with format-specific handling
+func (se *Serializer) WriteString(buf *[]byte, s string) {
+	switch se.format {
 	case "raw":
-		*buf = append(*buf, h.sanitizer.Sanitize(s)...)
+		*buf = append(*buf, se.sanitizer.Sanitize(s)...)

 	case "txt":
-		sanitized := h.sanitizer.Sanitize(s)
-		if h.NeedsQuotes(sanitized) {
+		sanitized := se.sanitizer.Sanitize(s)
+		if se.NeedsQuotes(sanitized) {
 			*buf = append(*buf, '"')
-			// Escape quotes within quoted strings
 			for i := 0; i < len(sanitized); i++ {
 				if sanitized[i] == '"' || sanitized[i] == '\\' {
 					*buf = append(*buf, '\\')
@ -124,12 +207,12 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {

 	case "json":
 		*buf = append(*buf, '"')
-		// Direct JSON escaping without pre-sanitization
+		// Direct JSON escaping
 		for i := 0; i < len(s); {
 			c := s[i]
-			if c >= ' ' && c != '"' && c != '\\' {
+			if c >= ' ' && c != '"' && c != '\\' && c < 0x7f {
 				start := i
-				for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' {
+				for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' && s[i] < 0x7f {
 					i++
 				}
 				*buf = append(*buf, s[start:i]...)
@ -157,27 +240,30 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
 	}
 }

-func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) {
+// WriteNumber writes a number value
+func (se *Serializer) WriteNumber(buf *[]byte, n string) {
 	*buf = append(*buf, n...)
 }

-func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) {
+// WriteBool writes a boolean value
+func (se *Serializer) WriteBool(buf *[]byte, b bool) {
 	*buf = strconv.AppendBool(*buf, b)
 }

-func (h *UnifiedHandler) WriteNil(buf *[]byte) {
-	switch h.format {
+// WriteNil writes a nil value
+func (se *Serializer) WriteNil(buf *[]byte) {
+	switch se.format {
 	case "raw":
 		*buf = append(*buf, "nil"...)
-	default: // txt, json
+	default:
 		*buf = append(*buf, "null"...)
 	}
 }

-func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
-	switch h.format {
+// WriteComplex writes complex types
+func (se *Serializer) WriteComplex(buf *[]byte, v any) {
+	switch se.format {
 	case "raw":
-		// Use spew for complex types in raw mode, DEBUG use
 		var b bytes.Buffer
 		dumper := &spew.ConfigState{
 			Indent:                  " ",
@ -189,41 +275,37 @@ func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
 		dumper.Fdump(&b, v)
 		*buf = append(*buf, bytes.TrimSpace(b.Bytes())...)

-	default: // txt, json
+	default:
 		str := fmt.Sprintf("%+v", v)
-		h.WriteString(buf, str)
+		se.WriteString(buf, str)
 	}
 }

-func (h *UnifiedHandler) NeedsQuotes(s string) bool {
-	switch h.format {
+// NeedsQuotes determines if quoting is needed
+func (se *Serializer) NeedsQuotes(s string) bool {
+	switch se.format {
 	case "json":
-		return true // JSON always quotes
+		return true
 	case "txt":
-		// Quote strings that:
-		// 1. Are empty
 		if len(s) == 0 {
 			return true
 		}
 		for _, r := range s {
-			// 2. Contain whitespace (space, tab, newline, etc.)
 			if unicode.IsSpace(r) {
 				return true
 			}
-			// 3. Contain shell special characters (POSIX + common extensions)
 			switch r {
 			case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
 				'(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
 				'~', '#', '%', '=', '\n', '\r', '\t':
 				return true
 			}
-			// 4. Non-print
 			if !unicode.IsPrint(r) {
 				return true
 			}
 		}
 		return false
-	default: // raw
+	default:
 		return false
 	}
 }
--- a/sanitizer/sanitizer_test.go
+++ b/sanitizer/sanitizer_test.go
@ -8,199 +8,234 @@ import (
 	"github.com/stretchr/testify/assert"
 )

-func TestSanitizer(t *testing.T) {
-	testCases := []struct {
-		name     string
-		input    string
-		mode     Mode
-		expected string
-	}{
-		// None mode tests
-		{
-			name:     "none mode passes through",
-			input:    "hello\x00world\n",
-			mode:     None,
-			expected: "hello\x00world\n",
-		},
-
-		// HexEncode tests
-		{
-			name:     "hex encode null byte",
-			input:    "test\x00data",
-			mode:     HexEncode,
-			expected: "test<00>data",
-		},
-		{
-			name:     "hex encode control chars",
-			input:    "bell\x07tab\x09form\x0c",
-			mode:     HexEncode,
-			expected: "bell<07>tab<09>form<0c>",
-		},
-		{
-			name:     "hex encode preserves printable",
-			input:    "Hello World 123!@#",
-			mode:     HexEncode,
-			expected: "Hello World 123!@#",
-		},
-		{
-			name:     "hex encode multi-byte control",
-			input:    "line1\u0085line2", // NEXT LINE (C2 85)
-			mode:     HexEncode,
-			expected: "line1<c285>line2",
-		},
-		{
-			name:     "hex encode preserves UTF-8",
-			input:    "Hello 世界 ✓",
-			mode:     HexEncode,
-			expected: "Hello 世界 ✓",
-		},
-
-		// Strip tests
-		{
-			name:     "strip removes control chars",
-			input:    "clean\x00\x07\ntxt",
-			mode:     Strip,
-			expected: "cleantxt",
-		},
-		{
-			name:     "strip preserves spaces",
-			input:    "hello world",
-			mode:     Strip,
-			expected: "hello world",
-		},
-
-		// Escape tests
-		{
-			name:     "escape common control chars",
-			input:    "line1\nline2\ttab\rreturn",
-			mode:     Escape,
-			expected: "line1\\nline2\\ttab\\rreturn",
-		},
-		{
-			name:     "escape unicode control",
-			input:    "text\x01\x1f",
-			mode:     Escape,
-			expected: "text\\u0001\\u001f",
-		},
-		{
-			name:     "escape backspace and form feed",
-			input:    "back\bspace form\ffeed",
-			mode:     Escape,
-			expected: "back\\bspace form\\ffeed",
-		},
-	}
-
-	for _, tc := range testCases {
-		t.Run(tc.name, func(t *testing.T) {
-			s := New(tc.mode)
-			result := s.Sanitize(tc.input)
-			assert.Equal(t, tc.expected, result)
-		})
-	}
+func TestNewSanitizer(t *testing.T) {
+	// Default passthrough behavior
+	s := New()
+	input := "abc\x00xyz"
+	assert.Equal(t, input, s.Sanitize(input), "default sanitizer should pass through all characters")
 }

-func TestUnifiedHandler(t *testing.T) {
-	t.Run("raw format", func(t *testing.T) {
-		san := New(HexEncode)
-		handler := NewUnifiedHandler("raw", san)
-
-		var buf []byte
-
-		// String handling
-		handler.WriteString(&buf, "test\x00data")
-		assert.Equal(t, "test<00>data", string(buf))
-
-		// Nil handling
-		buf = nil
-		handler.WriteNil(&buf)
-		assert.Equal(t, "nil", string(buf))
-
-		// No quotes needed
-		assert.False(t, handler.NeedsQuotes("any string"))
+func TestSingleRule(t *testing.T) {
+	t.Run("strip non-printable", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformStrip)
+		assert.Equal(t, "ab", s.Sanitize("a\x00b"))
+		assert.Equal(t, "test", s.Sanitize("test\x01\x02\x03"))
 	})

-	t.Run("txt format", func(t *testing.T) {
-		san := New(HexEncode)
-		handler := NewUnifiedHandler("txt", san)
+	t.Run("hex encode non-printable", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformHexEncode)
+		assert.Equal(t, "a<00>b", s.Sanitize("a\x00b"))
+		assert.Equal(t, "bell<07>tab<09>", s.Sanitize("bell\x07tab\x09"))
+	})
+
+	t.Run("JSON escape control", func(t *testing.T) {
+		s := New().Rule(FilterControl, TransformJSONEscape)
+		assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
+		assert.Equal(t, "tab\\there", s.Sanitize("tab\there"))
+		assert.Equal(t, "null\\u0000byte", s.Sanitize("null\x00byte"))
+	})
+
+	t.Run("strip whitespace", func(t *testing.T) {
+		s := New().Rule(FilterWhitespace, TransformStrip)
+		assert.Equal(t, "nospaceshere", s.Sanitize("no spaces here"))
+		assert.Equal(t, "tabsgone", s.Sanitize("tabs\t\tgone"))
+	})
+
+	t.Run("strip shell special", func(t *testing.T) {
+		s := New().Rule(FilterShellSpecial, TransformStrip)
+		assert.Equal(t, "cmd echo test", s.Sanitize("cmd; echo test"))
+		assert.Equal(t, "no  pipes", s.Sanitize("no | pipes"))
+		assert.Equal(t, "var", s.Sanitize("$var"))
+	})
+}
+
+func TestPolicy(t *testing.T) {
+	t.Run("PolicyTxt", func(t *testing.T) {
+		s := New().Policy(PolicyTxt)
+		assert.Equal(t, "hello<07>world", s.Sanitize("hello\x07world"))
+		assert.Equal(t, "clean text", s.Sanitize("clean text"))
+	})
+
+	t.Run("PolicyJSON", func(t *testing.T) {
+		s := New().Policy(PolicyJSON)
+		assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
+		assert.Equal(t, "\\ttab", s.Sanitize("\ttab"))
+	})
+
+	t.Run("PolicyShellArg", func(t *testing.T) {
+		s := New().Policy(PolicyShell)
+		assert.Equal(t, "cmdecho", s.Sanitize("cmd; echo"))
+		assert.Equal(t, "nospaces", s.Sanitize("no spaces"))
+	})
+}
+
+func TestRulePrecedence(t *testing.T) {
+	// With append + forward iteration: Policy is checked before Rule
+	s := New().Policy(PolicyTxt).Rule(FilterControl, TransformStrip)
+
+	// \x07 is both control AND non-printable - matches PolicyTxt first
+	// \x00 is both control AND non-printable - matches PolicyTxt first
+	input := "a\x07b\x00c"
+	expected := "a<07>b<00>c" // FIXED: Policy wins now
+	result := s.Sanitize(input)
+
+	assert.Equal(t, expected, result,
+		"Policy() is now checked before Rule() - non-printable chars get hex encoded")
+}
+
+func TestCompositeFilter(t *testing.T) {
+	s := New().Rule(FilterShellSpecial|FilterWhitespace, TransformStrip)
+	assert.Equal(t, "cmdechohello", s.Sanitize("cmd; echo hello"))
+	assert.Equal(t, "nopipesnospaces", s.Sanitize("no |pipes| no spaces"))
+}
+
+func TestChaining(t *testing.T) {
+	s := New().
+		Rule(FilterWhitespace, TransformStrip).
+		Rule(FilterShellSpecial, TransformHexEncode)
+
+	// Shell special chars are checked first (prepended), get hex encoded
+	// Whitespace rule is second, strips spaces
+	assert.Equal(t, "cmd<3b>echohello", s.Sanitize("cmd; echo hello"))
+}
+
+func TestMultipleRulesOrder(t *testing.T) {
+	// Test that first matching rule wins
+	s := New().
+		Rule(FilterControl, TransformStrip).
+		Rule(FilterControl, TransformHexEncode) // This should never match
+
+	assert.Equal(t, "ab", s.Sanitize("a\x00b"), "first rule should win")
+}
+
+func TestEdgeCases(t *testing.T) {
+	t.Run("empty string", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformStrip)
+		assert.Equal(t, "", s.Sanitize(""))
+	})
+
+	t.Run("only sanitizable characters", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformStrip)
+		assert.Equal(t, "", s.Sanitize("\x00\x01\x02\x03"))
+	})
+
+	t.Run("multi-byte UTF-8", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformHexEncode)
+		input := "Hello 世界 ✓"
+		assert.Equal(t, input, s.Sanitize(input), "UTF-8 should pass through")
+	})
+
+	t.Run("multi-byte control character", func(t *testing.T) {
+		s := New().Rule(FilterNonPrintable, TransformHexEncode)
+		// NEL (Next Line) is U+0085, encoded as C2 85 in UTF-8
+		assert.Equal(t, "line1<c285>line2", s.Sanitize("line1\u0085line2"))
+	})
+}
+
+func TestSerializer(t *testing.T) {
+	t.Run("raw format with sanitizer", func(t *testing.T) {
+		san := New().Rule(FilterNonPrintable, TransformHexEncode)
+		handler := NewSerializer("raw", san)

 		var buf []byte
+		handler.WriteString(&buf, "test\x00data")
+		assert.Equal(t, "test<00>data", string(buf))
+	})

-		// String with spaces gets quoted
+	t.Run("txt format with quotes", func(t *testing.T) {
+		san := New() // No sanitization
+		handler := NewSerializer("txt", san)
+
+		var buf []byte
 		handler.WriteString(&buf, "hello world")
 		assert.Equal(t, `"hello world"`, string(buf))

-		// String without spaces unquoted
 		buf = nil
-		handler.WriteString(&buf, "single")
-		assert.Equal(t, "single", string(buf))
-
-		// Nil handling
-		buf = nil
-		handler.WriteNil(&buf)
-		assert.Equal(t, "null", string(buf))
-
-		// Quotes needed for empty or space-containing
-		assert.True(t, handler.NeedsQuotes(""))
-		assert.True(t, handler.NeedsQuotes("has space"))
-		assert.False(t, handler.NeedsQuotes("nospace"))
+		handler.WriteString(&buf, "nospace")
+		assert.Equal(t, "nospace", string(buf))
 	})

-	t.Run("json format", func(t *testing.T) {
-		san := New(Escape) // Not used for JSON, direct escaping
-		handler := NewUnifiedHandler("json", san)
+	t.Run("json format escaping", func(t *testing.T) {
+		san := New() // JSON handler does its own escaping
+		handler := NewSerializer("json", san)

 		var buf []byte
-
-		// JSON escaping
 		handler.WriteString(&buf, "line1\nline2\t\"quoted\"")
 		assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf))

-		// Control char escaping
 		buf = nil
 		handler.WriteString(&buf, "null\x00byte")
 		assert.Equal(t, `"null\u0000byte"`, string(buf))
-
-		// Always quotes
-		assert.True(t, handler.NeedsQuotes("anything"))
 	})

 	t.Run("complex value handling", func(t *testing.T) {
-		san := New(HexEncode)
+		san := New()
+		handler := NewSerializer("raw", san)

-		// Raw uses spew
-		rawHandler := NewUnifiedHandler("raw", san)
 		var buf []byte
-		rawHandler.WriteComplex(&buf, map[string]int{"a": 1})
+		handler.WriteComplex(&buf, map[string]int{"a": 1})
 		assert.Contains(t, string(buf), "map[")
-
-		// Txt/JSON use fmt.Sprintf
-		txtHandler := NewUnifiedHandler("txt", san)
-		buf = nil
-		txtHandler.WriteComplex(&buf, []int{1, 2, 3})
-		assert.Contains(t, string(buf), "[1 2 3]")
 	})
+
+	t.Run("nil handling", func(t *testing.T) {
+		san := New()
+
+		rawHandler := NewSerializer("raw", san)
+		var buf []byte
+		rawHandler.WriteNil(&buf)
+		assert.Equal(t, "nil", string(buf))
+
+		jsonHandler := NewSerializer("json", san)
+		buf = nil
+		jsonHandler.WriteNil(&buf)
+		assert.Equal(t, "null", string(buf))
+	})
+}
+
+func TestPolicyWithCustomRules(t *testing.T) {
+	s := New().
+		Policy(PolicyTxt).
+		Rule(FilterControl, TransformStrip).
+		Rule(FilterWhitespace, TransformJSONEscape)
+
+	// \x07 is non-printable AND control - matches PolicyTxt first (hex encode)
+	// \x7F is non-printable but NOT control - matches PolicyTxt (hex encode)
+	input := "a\x07b c\x7Fd"
+	result := s.Sanitize(input)
+
+	assert.Equal(t, "a<07>b c<7f>d", result) // FIXED: \x07 now hex encoded
 }

 func BenchmarkSanitizer(b *testing.B) {
 	input := strings.Repeat("normal text\x00\n\t", 100)

 	benchmarks := []struct {
-		name string
-		mode Mode
+		name      string
+		sanitizer *Sanitizer
 	}{
-		{"None", None},
-		{"HexEncode", HexEncode},
-		{"Strip", Strip},
-		{"Escape", Escape},
+		{"Passthrough", New()},
+		{"SingleRule", New().Rule(FilterNonPrintable, TransformHexEncode)},
+		{"Policy", New().Policy(PolicyTxt)},
+		{"Complex", New().
+			Policy(PolicyTxt).
+			Rule(FilterControl, TransformStrip).
+			Rule(FilterWhitespace, TransformJSONEscape)},
 	}

 	for _, bm := range benchmarks {
 		b.Run(bm.name, func(b *testing.B) {
-			s := New(bm.mode)
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
-				_ = s.Sanitize(input)
+				_ = bm.sanitizer.Sanitize(input)
 			}
 		})
 	}
+}
+
+func TestTransformPriority(t *testing.T) {
+	// Test that only one transform is applied per rule
+	s := New().Rule(FilterControl, TransformStrip|TransformHexEncode)
+
+	// Should strip (first flag checked), not hex encode
+	assert.Equal(t, "ab", s.Sanitize("a\x00b"))
 }