v0.1.2 sanitizer redisigned with policies and rules
This commit is contained in:
@ -1,4 +1,6 @@
|
||||
// FILE: lixenwraith/log/sanitizer/sanitizer.go
|
||||
// Package sanitizer provides a fluent and composable interface for sanitizing
|
||||
// strings based on configurable rules using bitwise filter flags and transforms.
|
||||
package sanitizer
|
||||
|
||||
import (
|
||||
@ -12,105 +14,186 @@ import (
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
)
|
||||
|
||||
// Mode controls how non-printable characters are handled
|
||||
type Mode int
|
||||
|
||||
// Sanitization modes
|
||||
// Filter flags for character matching
|
||||
const (
|
||||
None Mode = iota // No sanitization
|
||||
HexEncode // Encode as <hex> (current default)
|
||||
Strip // Remove control characters
|
||||
Escape // JSON-style escaping
|
||||
FilterNonPrintable uint64 = 1 << iota // Matches runes not classified as printable by strconv.IsPrint
|
||||
FilterControl // Matches control characters (unicode.IsControl)
|
||||
FilterWhitespace // Matches whitespace characters (unicode.IsSpace)
|
||||
FilterShellSpecial // Matches common shell metacharacters: '`', '$', ';', '|', '&', '>', '<', '(', ')', '#'
|
||||
)
|
||||
|
||||
// Sanitizer provides centralized sanitization logic
|
||||
type Sanitizer struct {
|
||||
mode Mode
|
||||
buf []byte // Reusable buffer
|
||||
// Transform flags for character transformation
|
||||
const (
|
||||
TransformStrip uint64 = 1 << iota // Removes the character
|
||||
TransformHexEncode // Encodes the character's UTF-8 bytes as "<XXYY>"
|
||||
TransformJSONEscape // Escapes the character with JSON-style backslashes (e.g., '\n', '\u0000')
|
||||
)
|
||||
|
||||
// PolicyPreset defines pre-configured sanitization policies
|
||||
type PolicyPreset string
|
||||
|
||||
const (
|
||||
PolicyRaw PolicyPreset = "raw" // Default is a no-op (passthrough)
|
||||
PolicyJSON PolicyPreset = "json" // Policy for sanitizing strings to be embedded in JSON
|
||||
PolicyTxt PolicyPreset = "txt" // Policy for sanitizing text written to log files
|
||||
PolicyShell PolicyPreset = "shell" // Policy for sanitizing arguments passed to shell commands
|
||||
)
|
||||
|
||||
// rule represents a single sanitization rule
|
||||
type rule struct {
|
||||
filter uint64
|
||||
transform uint64
|
||||
}
|
||||
|
||||
func New(mode Mode) *Sanitizer {
|
||||
return &Sanitizer{
|
||||
mode: mode,
|
||||
buf: make([]byte, 0, 256),
|
||||
}
|
||||
// policyRules contains pre-configured rules for each policy
|
||||
var policyRules = map[PolicyPreset][]rule{
|
||||
PolicyRaw: {},
|
||||
PolicyTxt: {{filter: FilterNonPrintable, transform: TransformHexEncode}},
|
||||
PolicyJSON: {{filter: FilterControl, transform: TransformJSONEscape}},
|
||||
PolicyShell: {{filter: FilterShellSpecial | FilterWhitespace, transform: TransformStrip}},
|
||||
}
|
||||
|
||||
func (s *Sanitizer) Reset() {
|
||||
s.buf = s.buf[:0]
|
||||
}
|
||||
|
||||
func (s *Sanitizer) Sanitize(data string) string {
|
||||
if s.mode == None {
|
||||
return data
|
||||
}
|
||||
|
||||
s.Reset()
|
||||
|
||||
for _, r := range data {
|
||||
if strconv.IsPrint(r) {
|
||||
s.buf = utf8.AppendRune(s.buf, r)
|
||||
continue
|
||||
// filterCheckers maps individual filter flags to their check functions
|
||||
var filterCheckers = map[uint64]func(rune) bool{
|
||||
FilterNonPrintable: func(r rune) bool { return !strconv.IsPrint(r) },
|
||||
FilterControl: unicode.IsControl,
|
||||
FilterWhitespace: unicode.IsSpace,
|
||||
FilterShellSpecial: func(r rune) bool {
|
||||
switch r {
|
||||
case '`', '$', ';', '|', '&', '>', '<', '(', ')', '#':
|
||||
return true
|
||||
}
|
||||
return false
|
||||
},
|
||||
}
|
||||
|
||||
switch s.mode {
|
||||
case HexEncode:
|
||||
var runeBytes [utf8.UTFMax]byte
|
||||
n := utf8.EncodeRune(runeBytes[:], r)
|
||||
s.buf = append(s.buf, '<')
|
||||
s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
|
||||
s.buf = append(s.buf, '>')
|
||||
// Sanitizer provides chainable text sanitization
|
||||
type Sanitizer struct {
|
||||
rules []rule
|
||||
buf []byte
|
||||
}
|
||||
|
||||
case Strip:
|
||||
// Skip non-printable
|
||||
continue
|
||||
// New creates a new Sanitizer instance
|
||||
func New() *Sanitizer {
|
||||
return &Sanitizer{
|
||||
rules: []rule{},
|
||||
buf: make([]byte, 0, 256),
|
||||
}
|
||||
}
|
||||
|
||||
case Escape:
|
||||
switch r {
|
||||
case '\n':
|
||||
s.buf = append(s.buf, '\\', 'n')
|
||||
case '\r':
|
||||
s.buf = append(s.buf, '\\', 'r')
|
||||
case '\t':
|
||||
s.buf = append(s.buf, '\\', 't')
|
||||
case '\b':
|
||||
s.buf = append(s.buf, '\\', 'b')
|
||||
case '\f':
|
||||
s.buf = append(s.buf, '\\', 'f')
|
||||
default:
|
||||
// Unicode escape for other control chars
|
||||
s.buf = append(s.buf, '\\', 'u')
|
||||
s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
|
||||
// Rule adds a custom rule to the sanitizer (prepended for precedence)
|
||||
func (s *Sanitizer) Rule(filter uint64, transform uint64) *Sanitizer {
|
||||
// Append rule in natural order
|
||||
s.rules = append(s.rules, rule{filter: filter, transform: transform})
|
||||
return s
|
||||
}
|
||||
|
||||
// Policy applies a pre-configured policy to the sanitizer (appended)
|
||||
func (s *Sanitizer) Policy(preset PolicyPreset) *Sanitizer {
|
||||
if rules, ok := policyRules[preset]; ok {
|
||||
s.rules = append(s.rules, rules...)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Sanitize applies all configured rules to the input string
|
||||
func (s *Sanitizer) Sanitize(data string) string {
|
||||
// Reset buffer
|
||||
s.buf = s.buf[:0]
|
||||
|
||||
// Process each rune
|
||||
for _, r := range data {
|
||||
matched := false
|
||||
// Check rules in order (first match wins)
|
||||
for _, rl := range s.rules {
|
||||
if matchesFilter(r, rl.filter) {
|
||||
applyTransform(&s.buf, r, rl.transform)
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
// If no rule matched, append original rune
|
||||
if !matched {
|
||||
s.buf = utf8.AppendRune(s.buf, r)
|
||||
}
|
||||
}
|
||||
|
||||
return string(s.buf)
|
||||
}
|
||||
|
||||
// UnifiedHandler implements all format behaviors in a single struct
|
||||
type UnifiedHandler struct {
|
||||
// matchesFilter checks if a rune matches any filter in the mask
|
||||
func matchesFilter(r rune, filterMask uint64) bool {
|
||||
for flag, checker := range filterCheckers {
|
||||
if (filterMask&flag) != 0 && checker(r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// applyTransform applies the specified transform to the buffer
|
||||
func applyTransform(buf *[]byte, r rune, transformMask uint64) {
|
||||
switch {
|
||||
case (transformMask & TransformStrip) != 0:
|
||||
// Do nothing (strip)
|
||||
|
||||
case (transformMask & TransformHexEncode) != 0:
|
||||
var runeBytes [utf8.UTFMax]byte
|
||||
n := utf8.EncodeRune(runeBytes[:], r)
|
||||
*buf = append(*buf, '<')
|
||||
*buf = append(*buf, hex.EncodeToString(runeBytes[:n])...)
|
||||
*buf = append(*buf, '>')
|
||||
|
||||
case (transformMask & TransformJSONEscape) != 0:
|
||||
switch r {
|
||||
case '\n':
|
||||
*buf = append(*buf, '\\', 'n')
|
||||
case '\r':
|
||||
*buf = append(*buf, '\\', 'r')
|
||||
case '\t':
|
||||
*buf = append(*buf, '\\', 't')
|
||||
case '\b':
|
||||
*buf = append(*buf, '\\', 'b')
|
||||
case '\f':
|
||||
*buf = append(*buf, '\\', 'f')
|
||||
case '"':
|
||||
*buf = append(*buf, '\\', '"')
|
||||
case '\\':
|
||||
*buf = append(*buf, '\\', '\\')
|
||||
default:
|
||||
if r < 0x20 || r == 0x7f {
|
||||
*buf = append(*buf, fmt.Sprintf("\\u%04x", r)...)
|
||||
} else {
|
||||
*buf = utf8.AppendRune(*buf, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serializer implements format-specific output behaviors
|
||||
type Serializer struct {
|
||||
format string
|
||||
sanitizer *Sanitizer
|
||||
}
|
||||
|
||||
func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler {
|
||||
return &UnifiedHandler{
|
||||
// NewSerializer creates a handler with format-specific behavior
|
||||
func NewSerializer(format string, san *Sanitizer) *Serializer {
|
||||
return &Serializer{
|
||||
format: format,
|
||||
sanitizer: san,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
|
||||
switch h.format {
|
||||
// WriteString writes a string with format-specific handling
|
||||
func (se *Serializer) WriteString(buf *[]byte, s string) {
|
||||
switch se.format {
|
||||
case "raw":
|
||||
*buf = append(*buf, h.sanitizer.Sanitize(s)...)
|
||||
*buf = append(*buf, se.sanitizer.Sanitize(s)...)
|
||||
|
||||
case "txt":
|
||||
sanitized := h.sanitizer.Sanitize(s)
|
||||
if h.NeedsQuotes(sanitized) {
|
||||
sanitized := se.sanitizer.Sanitize(s)
|
||||
if se.NeedsQuotes(sanitized) {
|
||||
*buf = append(*buf, '"')
|
||||
// Escape quotes within quoted strings
|
||||
for i := 0; i < len(sanitized); i++ {
|
||||
if sanitized[i] == '"' || sanitized[i] == '\\' {
|
||||
*buf = append(*buf, '\\')
|
||||
@ -124,12 +207,12 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
|
||||
|
||||
case "json":
|
||||
*buf = append(*buf, '"')
|
||||
// Direct JSON escaping without pre-sanitization
|
||||
// Direct JSON escaping
|
||||
for i := 0; i < len(s); {
|
||||
c := s[i]
|
||||
if c >= ' ' && c != '"' && c != '\\' {
|
||||
if c >= ' ' && c != '"' && c != '\\' && c < 0x7f {
|
||||
start := i
|
||||
for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' {
|
||||
for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' && s[i] < 0x7f {
|
||||
i++
|
||||
}
|
||||
*buf = append(*buf, s[start:i]...)
|
||||
@ -157,27 +240,30 @@ func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) {
|
||||
// WriteNumber writes a number value
|
||||
func (se *Serializer) WriteNumber(buf *[]byte, n string) {
|
||||
*buf = append(*buf, n...)
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) {
|
||||
// WriteBool writes a boolean value
|
||||
func (se *Serializer) WriteBool(buf *[]byte, b bool) {
|
||||
*buf = strconv.AppendBool(*buf, b)
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) WriteNil(buf *[]byte) {
|
||||
switch h.format {
|
||||
// WriteNil writes a nil value
|
||||
func (se *Serializer) WriteNil(buf *[]byte) {
|
||||
switch se.format {
|
||||
case "raw":
|
||||
*buf = append(*buf, "nil"...)
|
||||
default: // txt, json
|
||||
default:
|
||||
*buf = append(*buf, "null"...)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
|
||||
switch h.format {
|
||||
// WriteComplex writes complex types
|
||||
func (se *Serializer) WriteComplex(buf *[]byte, v any) {
|
||||
switch se.format {
|
||||
case "raw":
|
||||
// Use spew for complex types in raw mode, DEBUG use
|
||||
var b bytes.Buffer
|
||||
dumper := &spew.ConfigState{
|
||||
Indent: " ",
|
||||
@ -189,41 +275,37 @@ func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
|
||||
dumper.Fdump(&b, v)
|
||||
*buf = append(*buf, bytes.TrimSpace(b.Bytes())...)
|
||||
|
||||
default: // txt, json
|
||||
default:
|
||||
str := fmt.Sprintf("%+v", v)
|
||||
h.WriteString(buf, str)
|
||||
se.WriteString(buf, str)
|
||||
}
|
||||
}
|
||||
|
||||
func (h *UnifiedHandler) NeedsQuotes(s string) bool {
|
||||
switch h.format {
|
||||
// NeedsQuotes determines if quoting is needed
|
||||
func (se *Serializer) NeedsQuotes(s string) bool {
|
||||
switch se.format {
|
||||
case "json":
|
||||
return true // JSON always quotes
|
||||
return true
|
||||
case "txt":
|
||||
// Quote strings that:
|
||||
// 1. Are empty
|
||||
if len(s) == 0 {
|
||||
return true
|
||||
}
|
||||
for _, r := range s {
|
||||
// 2. Contain whitespace (space, tab, newline, etc.)
|
||||
if unicode.IsSpace(r) {
|
||||
return true
|
||||
}
|
||||
// 3. Contain shell special characters (POSIX + common extensions)
|
||||
switch r {
|
||||
case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
|
||||
'(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
|
||||
'~', '#', '%', '=', '\n', '\r', '\t':
|
||||
return true
|
||||
}
|
||||
// 4. Non-print
|
||||
if !unicode.IsPrint(r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
default: // raw
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -8,199 +8,234 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSanitizer(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
input string
|
||||
mode Mode
|
||||
expected string
|
||||
}{
|
||||
// None mode tests
|
||||
{
|
||||
name: "none mode passes through",
|
||||
input: "hello\x00world\n",
|
||||
mode: None,
|
||||
expected: "hello\x00world\n",
|
||||
},
|
||||
|
||||
// HexEncode tests
|
||||
{
|
||||
name: "hex encode null byte",
|
||||
input: "test\x00data",
|
||||
mode: HexEncode,
|
||||
expected: "test<00>data",
|
||||
},
|
||||
{
|
||||
name: "hex encode control chars",
|
||||
input: "bell\x07tab\x09form\x0c",
|
||||
mode: HexEncode,
|
||||
expected: "bell<07>tab<09>form<0c>",
|
||||
},
|
||||
{
|
||||
name: "hex encode preserves printable",
|
||||
input: "Hello World 123!@#",
|
||||
mode: HexEncode,
|
||||
expected: "Hello World 123!@#",
|
||||
},
|
||||
{
|
||||
name: "hex encode multi-byte control",
|
||||
input: "line1\u0085line2", // NEXT LINE (C2 85)
|
||||
mode: HexEncode,
|
||||
expected: "line1<c285>line2",
|
||||
},
|
||||
{
|
||||
name: "hex encode preserves UTF-8",
|
||||
input: "Hello 世界 ✓",
|
||||
mode: HexEncode,
|
||||
expected: "Hello 世界 ✓",
|
||||
},
|
||||
|
||||
// Strip tests
|
||||
{
|
||||
name: "strip removes control chars",
|
||||
input: "clean\x00\x07\ntxt",
|
||||
mode: Strip,
|
||||
expected: "cleantxt",
|
||||
},
|
||||
{
|
||||
name: "strip preserves spaces",
|
||||
input: "hello world",
|
||||
mode: Strip,
|
||||
expected: "hello world",
|
||||
},
|
||||
|
||||
// Escape tests
|
||||
{
|
||||
name: "escape common control chars",
|
||||
input: "line1\nline2\ttab\rreturn",
|
||||
mode: Escape,
|
||||
expected: "line1\\nline2\\ttab\\rreturn",
|
||||
},
|
||||
{
|
||||
name: "escape unicode control",
|
||||
input: "text\x01\x1f",
|
||||
mode: Escape,
|
||||
expected: "text\\u0001\\u001f",
|
||||
},
|
||||
{
|
||||
name: "escape backspace and form feed",
|
||||
input: "back\bspace form\ffeed",
|
||||
mode: Escape,
|
||||
expected: "back\\bspace form\\ffeed",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
s := New(tc.mode)
|
||||
result := s.Sanitize(tc.input)
|
||||
assert.Equal(t, tc.expected, result)
|
||||
})
|
||||
}
|
||||
func TestNewSanitizer(t *testing.T) {
|
||||
// Default passthrough behavior
|
||||
s := New()
|
||||
input := "abc\x00xyz"
|
||||
assert.Equal(t, input, s.Sanitize(input), "default sanitizer should pass through all characters")
|
||||
}
|
||||
|
||||
func TestUnifiedHandler(t *testing.T) {
|
||||
t.Run("raw format", func(t *testing.T) {
|
||||
san := New(HexEncode)
|
||||
handler := NewUnifiedHandler("raw", san)
|
||||
|
||||
var buf []byte
|
||||
|
||||
// String handling
|
||||
handler.WriteString(&buf, "test\x00data")
|
||||
assert.Equal(t, "test<00>data", string(buf))
|
||||
|
||||
// Nil handling
|
||||
buf = nil
|
||||
handler.WriteNil(&buf)
|
||||
assert.Equal(t, "nil", string(buf))
|
||||
|
||||
// No quotes needed
|
||||
assert.False(t, handler.NeedsQuotes("any string"))
|
||||
func TestSingleRule(t *testing.T) {
|
||||
t.Run("strip non-printable", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformStrip)
|
||||
assert.Equal(t, "ab", s.Sanitize("a\x00b"))
|
||||
assert.Equal(t, "test", s.Sanitize("test\x01\x02\x03"))
|
||||
})
|
||||
|
||||
t.Run("txt format", func(t *testing.T) {
|
||||
san := New(HexEncode)
|
||||
handler := NewUnifiedHandler("txt", san)
|
||||
t.Run("hex encode non-printable", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformHexEncode)
|
||||
assert.Equal(t, "a<00>b", s.Sanitize("a\x00b"))
|
||||
assert.Equal(t, "bell<07>tab<09>", s.Sanitize("bell\x07tab\x09"))
|
||||
})
|
||||
|
||||
t.Run("JSON escape control", func(t *testing.T) {
|
||||
s := New().Rule(FilterControl, TransformJSONEscape)
|
||||
assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
|
||||
assert.Equal(t, "tab\\there", s.Sanitize("tab\there"))
|
||||
assert.Equal(t, "null\\u0000byte", s.Sanitize("null\x00byte"))
|
||||
})
|
||||
|
||||
t.Run("strip whitespace", func(t *testing.T) {
|
||||
s := New().Rule(FilterWhitespace, TransformStrip)
|
||||
assert.Equal(t, "nospaceshere", s.Sanitize("no spaces here"))
|
||||
assert.Equal(t, "tabsgone", s.Sanitize("tabs\t\tgone"))
|
||||
})
|
||||
|
||||
t.Run("strip shell special", func(t *testing.T) {
|
||||
s := New().Rule(FilterShellSpecial, TransformStrip)
|
||||
assert.Equal(t, "cmd echo test", s.Sanitize("cmd; echo test"))
|
||||
assert.Equal(t, "no pipes", s.Sanitize("no | pipes"))
|
||||
assert.Equal(t, "var", s.Sanitize("$var"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestPolicy(t *testing.T) {
|
||||
t.Run("PolicyTxt", func(t *testing.T) {
|
||||
s := New().Policy(PolicyTxt)
|
||||
assert.Equal(t, "hello<07>world", s.Sanitize("hello\x07world"))
|
||||
assert.Equal(t, "clean text", s.Sanitize("clean text"))
|
||||
})
|
||||
|
||||
t.Run("PolicyJSON", func(t *testing.T) {
|
||||
s := New().Policy(PolicyJSON)
|
||||
assert.Equal(t, "line1\\nline2", s.Sanitize("line1\nline2"))
|
||||
assert.Equal(t, "\\ttab", s.Sanitize("\ttab"))
|
||||
})
|
||||
|
||||
t.Run("PolicyShellArg", func(t *testing.T) {
|
||||
s := New().Policy(PolicyShell)
|
||||
assert.Equal(t, "cmdecho", s.Sanitize("cmd; echo"))
|
||||
assert.Equal(t, "nospaces", s.Sanitize("no spaces"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestRulePrecedence(t *testing.T) {
|
||||
// With append + forward iteration: Policy is checked before Rule
|
||||
s := New().Policy(PolicyTxt).Rule(FilterControl, TransformStrip)
|
||||
|
||||
// \x07 is both control AND non-printable - matches PolicyTxt first
|
||||
// \x00 is both control AND non-printable - matches PolicyTxt first
|
||||
input := "a\x07b\x00c"
|
||||
expected := "a<07>b<00>c" // FIXED: Policy wins now
|
||||
result := s.Sanitize(input)
|
||||
|
||||
assert.Equal(t, expected, result,
|
||||
"Policy() is now checked before Rule() - non-printable chars get hex encoded")
|
||||
}
|
||||
|
||||
func TestCompositeFilter(t *testing.T) {
|
||||
s := New().Rule(FilterShellSpecial|FilterWhitespace, TransformStrip)
|
||||
assert.Equal(t, "cmdechohello", s.Sanitize("cmd; echo hello"))
|
||||
assert.Equal(t, "nopipesnospaces", s.Sanitize("no |pipes| no spaces"))
|
||||
}
|
||||
|
||||
func TestChaining(t *testing.T) {
|
||||
s := New().
|
||||
Rule(FilterWhitespace, TransformStrip).
|
||||
Rule(FilterShellSpecial, TransformHexEncode)
|
||||
|
||||
// Shell special chars are checked first (prepended), get hex encoded
|
||||
// Whitespace rule is second, strips spaces
|
||||
assert.Equal(t, "cmd<3b>echohello", s.Sanitize("cmd; echo hello"))
|
||||
}
|
||||
|
||||
func TestMultipleRulesOrder(t *testing.T) {
|
||||
// Test that first matching rule wins
|
||||
s := New().
|
||||
Rule(FilterControl, TransformStrip).
|
||||
Rule(FilterControl, TransformHexEncode) // This should never match
|
||||
|
||||
assert.Equal(t, "ab", s.Sanitize("a\x00b"), "first rule should win")
|
||||
}
|
||||
|
||||
func TestEdgeCases(t *testing.T) {
|
||||
t.Run("empty string", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformStrip)
|
||||
assert.Equal(t, "", s.Sanitize(""))
|
||||
})
|
||||
|
||||
t.Run("only sanitizable characters", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformStrip)
|
||||
assert.Equal(t, "", s.Sanitize("\x00\x01\x02\x03"))
|
||||
})
|
||||
|
||||
t.Run("multi-byte UTF-8", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformHexEncode)
|
||||
input := "Hello 世界 ✓"
|
||||
assert.Equal(t, input, s.Sanitize(input), "UTF-8 should pass through")
|
||||
})
|
||||
|
||||
t.Run("multi-byte control character", func(t *testing.T) {
|
||||
s := New().Rule(FilterNonPrintable, TransformHexEncode)
|
||||
// NEL (Next Line) is U+0085, encoded as C2 85 in UTF-8
|
||||
assert.Equal(t, "line1<c285>line2", s.Sanitize("line1\u0085line2"))
|
||||
})
|
||||
}
|
||||
|
||||
func TestSerializer(t *testing.T) {
|
||||
t.Run("raw format with sanitizer", func(t *testing.T) {
|
||||
san := New().Rule(FilterNonPrintable, TransformHexEncode)
|
||||
handler := NewSerializer("raw", san)
|
||||
|
||||
var buf []byte
|
||||
handler.WriteString(&buf, "test\x00data")
|
||||
assert.Equal(t, "test<00>data", string(buf))
|
||||
})
|
||||
|
||||
// String with spaces gets quoted
|
||||
t.Run("txt format with quotes", func(t *testing.T) {
|
||||
san := New() // No sanitization
|
||||
handler := NewSerializer("txt", san)
|
||||
|
||||
var buf []byte
|
||||
handler.WriteString(&buf, "hello world")
|
||||
assert.Equal(t, `"hello world"`, string(buf))
|
||||
|
||||
// String without spaces unquoted
|
||||
buf = nil
|
||||
handler.WriteString(&buf, "single")
|
||||
assert.Equal(t, "single", string(buf))
|
||||
|
||||
// Nil handling
|
||||
buf = nil
|
||||
handler.WriteNil(&buf)
|
||||
assert.Equal(t, "null", string(buf))
|
||||
|
||||
// Quotes needed for empty or space-containing
|
||||
assert.True(t, handler.NeedsQuotes(""))
|
||||
assert.True(t, handler.NeedsQuotes("has space"))
|
||||
assert.False(t, handler.NeedsQuotes("nospace"))
|
||||
handler.WriteString(&buf, "nospace")
|
||||
assert.Equal(t, "nospace", string(buf))
|
||||
})
|
||||
|
||||
t.Run("json format", func(t *testing.T) {
|
||||
san := New(Escape) // Not used for JSON, direct escaping
|
||||
handler := NewUnifiedHandler("json", san)
|
||||
t.Run("json format escaping", func(t *testing.T) {
|
||||
san := New() // JSON handler does its own escaping
|
||||
handler := NewSerializer("json", san)
|
||||
|
||||
var buf []byte
|
||||
|
||||
// JSON escaping
|
||||
handler.WriteString(&buf, "line1\nline2\t\"quoted\"")
|
||||
assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf))
|
||||
|
||||
// Control char escaping
|
||||
buf = nil
|
||||
handler.WriteString(&buf, "null\x00byte")
|
||||
assert.Equal(t, `"null\u0000byte"`, string(buf))
|
||||
|
||||
// Always quotes
|
||||
assert.True(t, handler.NeedsQuotes("anything"))
|
||||
})
|
||||
|
||||
t.Run("complex value handling", func(t *testing.T) {
|
||||
san := New(HexEncode)
|
||||
san := New()
|
||||
handler := NewSerializer("raw", san)
|
||||
|
||||
// Raw uses spew
|
||||
rawHandler := NewUnifiedHandler("raw", san)
|
||||
var buf []byte
|
||||
rawHandler.WriteComplex(&buf, map[string]int{"a": 1})
|
||||
handler.WriteComplex(&buf, map[string]int{"a": 1})
|
||||
assert.Contains(t, string(buf), "map[")
|
||||
|
||||
// Txt/JSON use fmt.Sprintf
|
||||
txtHandler := NewUnifiedHandler("txt", san)
|
||||
buf = nil
|
||||
txtHandler.WriteComplex(&buf, []int{1, 2, 3})
|
||||
assert.Contains(t, string(buf), "[1 2 3]")
|
||||
})
|
||||
|
||||
t.Run("nil handling", func(t *testing.T) {
|
||||
san := New()
|
||||
|
||||
rawHandler := NewSerializer("raw", san)
|
||||
var buf []byte
|
||||
rawHandler.WriteNil(&buf)
|
||||
assert.Equal(t, "nil", string(buf))
|
||||
|
||||
jsonHandler := NewSerializer("json", san)
|
||||
buf = nil
|
||||
jsonHandler.WriteNil(&buf)
|
||||
assert.Equal(t, "null", string(buf))
|
||||
})
|
||||
}
|
||||
|
||||
func TestPolicyWithCustomRules(t *testing.T) {
|
||||
s := New().
|
||||
Policy(PolicyTxt).
|
||||
Rule(FilterControl, TransformStrip).
|
||||
Rule(FilterWhitespace, TransformJSONEscape)
|
||||
|
||||
// \x07 is non-printable AND control - matches PolicyTxt first (hex encode)
|
||||
// \x7F is non-printable but NOT control - matches PolicyTxt (hex encode)
|
||||
input := "a\x07b c\x7Fd"
|
||||
result := s.Sanitize(input)
|
||||
|
||||
assert.Equal(t, "a<07>b c<7f>d", result) // FIXED: \x07 now hex encoded
|
||||
}
|
||||
|
||||
func BenchmarkSanitizer(b *testing.B) {
|
||||
input := strings.Repeat("normal text\x00\n\t", 100)
|
||||
|
||||
benchmarks := []struct {
|
||||
name string
|
||||
mode Mode
|
||||
name string
|
||||
sanitizer *Sanitizer
|
||||
}{
|
||||
{"None", None},
|
||||
{"HexEncode", HexEncode},
|
||||
{"Strip", Strip},
|
||||
{"Escape", Escape},
|
||||
{"Passthrough", New()},
|
||||
{"SingleRule", New().Rule(FilterNonPrintable, TransformHexEncode)},
|
||||
{"Policy", New().Policy(PolicyTxt)},
|
||||
{"Complex", New().
|
||||
Policy(PolicyTxt).
|
||||
Rule(FilterControl, TransformStrip).
|
||||
Rule(FilterWhitespace, TransformJSONEscape)},
|
||||
}
|
||||
|
||||
for _, bm := range benchmarks {
|
||||
b.Run(bm.name, func(b *testing.B) {
|
||||
s := New(bm.mode)
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = s.Sanitize(input)
|
||||
_ = bm.sanitizer.Sanitize(input)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransformPriority(t *testing.T) {
|
||||
// Test that only one transform is applied per rule
|
||||
s := New().Rule(FilterControl, TransformStrip|TransformHexEncode)
|
||||
|
||||
// Should strip (first flag checked), not hex encode
|
||||
assert.Equal(t, "ab", s.Sanitize("a\x00b"))
|
||||
}
|
||||
Reference in New Issue
Block a user