v0.1.1 format refactored, sanitizer added

This commit is contained in:
2025-11-15 04:32:39 -05:00
parent 1379455528
commit af162755dd
16 changed files with 794 additions and 544 deletions

229
sanitizer/sanitizer.go Normal file
View File

@ -0,0 +1,229 @@
// FILE: lixenwraith/log/sanitizer/sanitizer.go
package sanitizer
import (
"bytes"
"encoding/hex"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
"github.com/davecgh/go-spew/spew"
)
// Mode controls how non-printable characters are handled
type Mode int
// Sanitization modes
const (
None Mode = iota // No sanitization
HexEncode // Encode as <hex> (current default)
Strip // Remove control characters
Escape // JSON-style escaping
)
// Sanitizer provides centralized sanitization logic
type Sanitizer struct {
mode Mode
buf []byte // Reusable buffer
}
func New(mode Mode) *Sanitizer {
return &Sanitizer{
mode: mode,
buf: make([]byte, 0, 256),
}
}
func (s *Sanitizer) Reset() {
s.buf = s.buf[:0]
}
func (s *Sanitizer) Sanitize(data string) string {
if s.mode == None {
return data
}
s.Reset()
for _, r := range data {
if strconv.IsPrint(r) {
s.buf = utf8.AppendRune(s.buf, r)
continue
}
switch s.mode {
case HexEncode:
var runeBytes [utf8.UTFMax]byte
n := utf8.EncodeRune(runeBytes[:], r)
s.buf = append(s.buf, '<')
s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
s.buf = append(s.buf, '>')
case Strip:
// Skip non-printable
continue
case Escape:
switch r {
case '\n':
s.buf = append(s.buf, '\\', 'n')
case '\r':
s.buf = append(s.buf, '\\', 'r')
case '\t':
s.buf = append(s.buf, '\\', 't')
case '\b':
s.buf = append(s.buf, '\\', 'b')
case '\f':
s.buf = append(s.buf, '\\', 'f')
default:
// Unicode escape for other control chars
s.buf = append(s.buf, '\\', 'u')
s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
}
}
}
return string(s.buf)
}
// UnifiedHandler implements all format behaviors in a single struct
type UnifiedHandler struct {
format string
sanitizer *Sanitizer
}
func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler {
return &UnifiedHandler{
format: format,
sanitizer: san,
}
}
func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
switch h.format {
case "raw":
*buf = append(*buf, h.sanitizer.Sanitize(s)...)
case "txt":
sanitized := h.sanitizer.Sanitize(s)
if h.NeedsQuotes(sanitized) {
*buf = append(*buf, '"')
// Escape quotes within quoted strings
for i := 0; i < len(sanitized); i++ {
if sanitized[i] == '"' || sanitized[i] == '\\' {
*buf = append(*buf, '\\')
}
*buf = append(*buf, sanitized[i])
}
*buf = append(*buf, '"')
} else {
*buf = append(*buf, sanitized...)
}
case "json":
*buf = append(*buf, '"')
// Direct JSON escaping without pre-sanitization
for i := 0; i < len(s); {
c := s[i]
if c >= ' ' && c != '"' && c != '\\' {
start := i
for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' {
i++
}
*buf = append(*buf, s[start:i]...)
} else {
switch c {
case '\\', '"':
*buf = append(*buf, '\\', c)
case '\n':
*buf = append(*buf, '\\', 'n')
case '\r':
*buf = append(*buf, '\\', 'r')
case '\t':
*buf = append(*buf, '\\', 't')
case '\b':
*buf = append(*buf, '\\', 'b')
case '\f':
*buf = append(*buf, '\\', 'f')
default:
*buf = append(*buf, fmt.Sprintf("\\u%04x", c)...)
}
i++
}
}
*buf = append(*buf, '"')
}
}
func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) {
*buf = append(*buf, n...)
}
func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) {
*buf = strconv.AppendBool(*buf, b)
}
func (h *UnifiedHandler) WriteNil(buf *[]byte) {
switch h.format {
case "raw":
*buf = append(*buf, "nil"...)
default: // txt, json
*buf = append(*buf, "null"...)
}
}
func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
switch h.format {
case "raw":
// Use spew for complex types in raw mode, DEBUG use
var b bytes.Buffer
dumper := &spew.ConfigState{
Indent: " ",
MaxDepth: 10,
DisablePointerAddresses: true,
DisableCapacities: true,
SortKeys: true,
}
dumper.Fdump(&b, v)
*buf = append(*buf, bytes.TrimSpace(b.Bytes())...)
default: // txt, json
str := fmt.Sprintf("%+v", v)
h.WriteString(buf, str)
}
}
func (h *UnifiedHandler) NeedsQuotes(s string) bool {
switch h.format {
case "json":
return true // JSON always quotes
case "txt":
// Quote strings that:
// 1. Are empty
if len(s) == 0 {
return true
}
for _, r := range s {
// 2. Contain whitespace (space, tab, newline, etc.)
if unicode.IsSpace(r) {
return true
}
// 3. Contain shell special characters (POSIX + common extensions)
switch r {
case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
'(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
'~', '#', '%', '=', '\n', '\r', '\t':
return true
}
// 4. Non-print
if !unicode.IsPrint(r) {
return true
}
}
return false
default: // raw
return false
}
}

206
sanitizer/sanitizer_test.go Normal file
View File

@ -0,0 +1,206 @@
// FILE: lixenwraith/log/sanitizer/sanitizer_test.go
package sanitizer
import (
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func TestSanitizer(t *testing.T) {
testCases := []struct {
name string
input string
mode Mode
expected string
}{
// None mode tests
{
name: "none mode passes through",
input: "hello\x00world\n",
mode: None,
expected: "hello\x00world\n",
},
// HexEncode tests
{
name: "hex encode null byte",
input: "test\x00data",
mode: HexEncode,
expected: "test<00>data",
},
{
name: "hex encode control chars",
input: "bell\x07tab\x09form\x0c",
mode: HexEncode,
expected: "bell<07>tab<09>form<0c>",
},
{
name: "hex encode preserves printable",
input: "Hello World 123!@#",
mode: HexEncode,
expected: "Hello World 123!@#",
},
{
name: "hex encode multi-byte control",
input: "line1\u0085line2", // NEXT LINE (C2 85)
mode: HexEncode,
expected: "line1<c285>line2",
},
{
name: "hex encode preserves UTF-8",
input: "Hello 世界 ✓",
mode: HexEncode,
expected: "Hello 世界 ✓",
},
// Strip tests
{
name: "strip removes control chars",
input: "clean\x00\x07\ntxt",
mode: Strip,
expected: "cleantxt",
},
{
name: "strip preserves spaces",
input: "hello world",
mode: Strip,
expected: "hello world",
},
// Escape tests
{
name: "escape common control chars",
input: "line1\nline2\ttab\rreturn",
mode: Escape,
expected: "line1\\nline2\\ttab\\rreturn",
},
{
name: "escape unicode control",
input: "text\x01\x1f",
mode: Escape,
expected: "text\\u0001\\u001f",
},
{
name: "escape backspace and form feed",
input: "back\bspace form\ffeed",
mode: Escape,
expected: "back\\bspace form\\ffeed",
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
s := New(tc.mode)
result := s.Sanitize(tc.input)
assert.Equal(t, tc.expected, result)
})
}
}
func TestUnifiedHandler(t *testing.T) {
t.Run("raw format", func(t *testing.T) {
san := New(HexEncode)
handler := NewUnifiedHandler("raw", san)
var buf []byte
// String handling
handler.WriteString(&buf, "test\x00data")
assert.Equal(t, "test<00>data", string(buf))
// Nil handling
buf = nil
handler.WriteNil(&buf)
assert.Equal(t, "nil", string(buf))
// No quotes needed
assert.False(t, handler.NeedsQuotes("any string"))
})
t.Run("txt format", func(t *testing.T) {
san := New(HexEncode)
handler := NewUnifiedHandler("txt", san)
var buf []byte
// String with spaces gets quoted
handler.WriteString(&buf, "hello world")
assert.Equal(t, `"hello world"`, string(buf))
// String without spaces unquoted
buf = nil
handler.WriteString(&buf, "single")
assert.Equal(t, "single", string(buf))
// Nil handling
buf = nil
handler.WriteNil(&buf)
assert.Equal(t, "null", string(buf))
// Quotes needed for empty or space-containing
assert.True(t, handler.NeedsQuotes(""))
assert.True(t, handler.NeedsQuotes("has space"))
assert.False(t, handler.NeedsQuotes("nospace"))
})
t.Run("json format", func(t *testing.T) {
san := New(Escape) // Not used for JSON, direct escaping
handler := NewUnifiedHandler("json", san)
var buf []byte
// JSON escaping
handler.WriteString(&buf, "line1\nline2\t\"quoted\"")
assert.Equal(t, `"line1\nline2\t\"quoted\""`, string(buf))
// Control char escaping
buf = nil
handler.WriteString(&buf, "null\x00byte")
assert.Equal(t, `"null\u0000byte"`, string(buf))
// Always quotes
assert.True(t, handler.NeedsQuotes("anything"))
})
t.Run("complex value handling", func(t *testing.T) {
san := New(HexEncode)
// Raw uses spew
rawHandler := NewUnifiedHandler("raw", san)
var buf []byte
rawHandler.WriteComplex(&buf, map[string]int{"a": 1})
assert.Contains(t, string(buf), "map[")
// Txt/JSON use fmt.Sprintf
txtHandler := NewUnifiedHandler("txt", san)
buf = nil
txtHandler.WriteComplex(&buf, []int{1, 2, 3})
assert.Contains(t, string(buf), "[1 2 3]")
})
}
func BenchmarkSanitizer(b *testing.B) {
input := strings.Repeat("normal text\x00\n\t", 100)
benchmarks := []struct {
name string
mode Mode
}{
{"None", None},
{"HexEncode", HexEncode},
{"Strip", Strip},
{"Escape", Escape},
}
for _, bm := range benchmarks {
b.Run(bm.name, func(b *testing.B) {
s := New(bm.mode)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = s.Sanitize(input)
}
})
}
}