v0.1.1 format refactored, sanitizer added

This commit is contained in:
2025-11-15 04:32:39 -05:00
parent 1379455528
commit af162755dd
16 changed files with 794 additions and 544 deletions

229
sanitizer/sanitizer.go Normal file
View File

@ -0,0 +1,229 @@
// FILE: lixenwraith/log/sanitizer/sanitizer.go
package sanitizer
import (
"bytes"
"encoding/hex"
"fmt"
"strconv"
"unicode"
"unicode/utf8"
"github.com/davecgh/go-spew/spew"
)
// Mode controls how non-printable characters are handled
type Mode int
// Sanitization modes
const (
None Mode = iota // No sanitization
HexEncode // Encode as <hex> (current default)
Strip // Remove control characters
Escape // JSON-style escaping
)
// Sanitizer provides centralized sanitization logic
type Sanitizer struct {
mode Mode
buf []byte // Reusable buffer
}
func New(mode Mode) *Sanitizer {
return &Sanitizer{
mode: mode,
buf: make([]byte, 0, 256),
}
}
func (s *Sanitizer) Reset() {
s.buf = s.buf[:0]
}
func (s *Sanitizer) Sanitize(data string) string {
if s.mode == None {
return data
}
s.Reset()
for _, r := range data {
if strconv.IsPrint(r) {
s.buf = utf8.AppendRune(s.buf, r)
continue
}
switch s.mode {
case HexEncode:
var runeBytes [utf8.UTFMax]byte
n := utf8.EncodeRune(runeBytes[:], r)
s.buf = append(s.buf, '<')
s.buf = append(s.buf, hex.EncodeToString(runeBytes[:n])...)
s.buf = append(s.buf, '>')
case Strip:
// Skip non-printable
continue
case Escape:
switch r {
case '\n':
s.buf = append(s.buf, '\\', 'n')
case '\r':
s.buf = append(s.buf, '\\', 'r')
case '\t':
s.buf = append(s.buf, '\\', 't')
case '\b':
s.buf = append(s.buf, '\\', 'b')
case '\f':
s.buf = append(s.buf, '\\', 'f')
default:
// Unicode escape for other control chars
s.buf = append(s.buf, '\\', 'u')
s.buf = append(s.buf, fmt.Sprintf("%04x", r)...)
}
}
}
return string(s.buf)
}
// UnifiedHandler implements all format behaviors in a single struct
type UnifiedHandler struct {
format string
sanitizer *Sanitizer
}
func NewUnifiedHandler(format string, san *Sanitizer) *UnifiedHandler {
return &UnifiedHandler{
format: format,
sanitizer: san,
}
}
func (h *UnifiedHandler) WriteString(buf *[]byte, s string) {
switch h.format {
case "raw":
*buf = append(*buf, h.sanitizer.Sanitize(s)...)
case "txt":
sanitized := h.sanitizer.Sanitize(s)
if h.NeedsQuotes(sanitized) {
*buf = append(*buf, '"')
// Escape quotes within quoted strings
for i := 0; i < len(sanitized); i++ {
if sanitized[i] == '"' || sanitized[i] == '\\' {
*buf = append(*buf, '\\')
}
*buf = append(*buf, sanitized[i])
}
*buf = append(*buf, '"')
} else {
*buf = append(*buf, sanitized...)
}
case "json":
*buf = append(*buf, '"')
// Direct JSON escaping without pre-sanitization
for i := 0; i < len(s); {
c := s[i]
if c >= ' ' && c != '"' && c != '\\' {
start := i
for i < len(s) && s[i] >= ' ' && s[i] != '"' && s[i] != '\\' {
i++
}
*buf = append(*buf, s[start:i]...)
} else {
switch c {
case '\\', '"':
*buf = append(*buf, '\\', c)
case '\n':
*buf = append(*buf, '\\', 'n')
case '\r':
*buf = append(*buf, '\\', 'r')
case '\t':
*buf = append(*buf, '\\', 't')
case '\b':
*buf = append(*buf, '\\', 'b')
case '\f':
*buf = append(*buf, '\\', 'f')
default:
*buf = append(*buf, fmt.Sprintf("\\u%04x", c)...)
}
i++
}
}
*buf = append(*buf, '"')
}
}
func (h *UnifiedHandler) WriteNumber(buf *[]byte, n string) {
*buf = append(*buf, n...)
}
func (h *UnifiedHandler) WriteBool(buf *[]byte, b bool) {
*buf = strconv.AppendBool(*buf, b)
}
func (h *UnifiedHandler) WriteNil(buf *[]byte) {
switch h.format {
case "raw":
*buf = append(*buf, "nil"...)
default: // txt, json
*buf = append(*buf, "null"...)
}
}
func (h *UnifiedHandler) WriteComplex(buf *[]byte, v any) {
switch h.format {
case "raw":
// Use spew for complex types in raw mode, DEBUG use
var b bytes.Buffer
dumper := &spew.ConfigState{
Indent: " ",
MaxDepth: 10,
DisablePointerAddresses: true,
DisableCapacities: true,
SortKeys: true,
}
dumper.Fdump(&b, v)
*buf = append(*buf, bytes.TrimSpace(b.Bytes())...)
default: // txt, json
str := fmt.Sprintf("%+v", v)
h.WriteString(buf, str)
}
}
func (h *UnifiedHandler) NeedsQuotes(s string) bool {
switch h.format {
case "json":
return true // JSON always quotes
case "txt":
// Quote strings that:
// 1. Are empty
if len(s) == 0 {
return true
}
for _, r := range s {
// 2. Contain whitespace (space, tab, newline, etc.)
if unicode.IsSpace(r) {
return true
}
// 3. Contain shell special characters (POSIX + common extensions)
switch r {
case '"', '\'', '\\', '$', '`', '!', '&', '|', ';',
'(', ')', '<', '>', '*', '?', '[', ']', '{', '}',
'~', '#', '%', '=', '\n', '\r', '\t':
return true
}
// 4. Non-print
if !unicode.IsPrint(r) {
return true
}
}
return false
default: // raw
return false
}
}