v0.1.9 pre-stream log regex filtering added

This commit is contained in:
2025-07-08 12:54:39 -04:00
parent d7f2c0d54d
commit 44d9921e80
15 changed files with 828 additions and 107 deletions

125
README.md
View File

@ -4,7 +4,7 @@
<img src="assets/logwisp-logo.svg" alt="LogWisp Logo" width="200"/>
</p>
A high-performance log streaming service with multi-stream architecture, supporting both TCP and HTTP/SSE protocols with real-time file monitoring, rotation detection, and rate limiting.
A high-performance log streaming service with multi-stream architecture, supporting both TCP and HTTP/SSE protocols with real-time file monitoring, rotation detection, regex-based filtering, and rate limiting.
## Features
@ -12,11 +12,12 @@ A high-performance log streaming service with multi-stream architecture, support
- **Dual Protocol Support**: TCP (raw streaming) and HTTP/SSE (browser-friendly)
- **Real-time Monitoring**: Instant updates with per-stream configurable check intervals
- **File Rotation Detection**: Automatic detection and handling of log rotation
- **Regex-based Filtering**: Include/exclude patterns with AND/OR logic per stream
- **Path-based Routing**: Optional HTTP router for consolidated access
- **Rate Limiting**: Per-IP or global rate limiting with token bucket algorithm
- **Connection Limiting**: Configurable concurrent connection limits per IP
- **Per-Stream Configuration**: Independent settings including check intervals and rate limits
- **Connection Statistics**: Real-time monitoring of active connections and rate limit metrics
- **Per-Stream Configuration**: Independent settings including check intervals, filters, and rate limits
- **Connection Statistics**: Real-time monitoring of active connections, filter, and rate limit metrics
- **Flexible Targets**: Monitor individual files or entire directories
- **Version Management**: Git tag-based versioning with build information
- **Configurable Heartbeats**: Keep connections alive with customizable formats
@ -49,11 +50,13 @@ LogWisp uses a service-oriented architecture where each stream is an independent
LogStream Service
├── Stream["app-logs"]
│ ├── Monitor (watches files)
│ ├── Filter Chain (optional)
│ ├── Rate Limiter (optional)
│ ├── TCP Server (optional)
│ └── HTTP Server (optional)
├── Stream["system-logs"]
│ ├── Monitor
│ ├── Filter Chain (optional)
│ ├── Rate Limiter (optional)
│ └── HTTP Server
└── HTTP Router (optional, for path-based routing)
@ -78,6 +81,16 @@ targets = [
{ path = "/var/log/myapp/app.log", is_file = true }
]
# Filter configuration (optional)
[[streams.filters]]
type = "include" # Only show matching logs
logic = "or" # Match any pattern
patterns = [
"(?i)error", # Case-insensitive error
"(?i)warn", # Case-insensitive warning
"(?i)fatal" # Fatal errors
]
[streams.httpserver]
enabled = true
port = 8080
@ -115,6 +128,11 @@ targets = [
{ path = "/var/log/auth.log", is_file = true }
]
# Exclude debug logs
[[streams.filters]]
type = "exclude"
patterns = ["DEBUG", "TRACE"]
[streams.tcpserver]
enabled = true
port = 9090
@ -150,6 +168,44 @@ Monitor targets support both files and directories:
{ path = "./logs", pattern = "*.log", is_file = false }
```
### Filter Configuration
Control which logs are streamed using regex patterns:
```toml
# Include filter - only matching logs pass
[[streams.filters]]
type = "include"
logic = "or" # Match ANY pattern
patterns = [
"ERROR",
"WARN",
"CRITICAL"
]
# Exclude filter - matching logs are dropped
[[streams.filters]]
type = "exclude"
logic = "or" # Drop if ANY pattern matches
patterns = [
"DEBUG",
"healthcheck",
"/metrics"
]
# Complex filter with AND logic
[[streams.filters]]
type = "include"
logic = "and" # Must match ALL patterns
patterns = [
"database", # Must contain "database"
"error", # AND must contain "error"
"connection" # AND must contain "connection"
]
```
Multiple filters are applied sequentially - all must pass for a log to be streamed.
### Check Interval Configuration
Each stream can have its own check interval based on log update frequency:
@ -235,7 +291,7 @@ All HTTP streams share ports with path-based routing:
# Connect to a stream
curl -N http://localhost:8080/stream
# Check stream status (includes rate limit stats)
# Check stream status (includes filter and rate limit stats)
curl http://localhost:8080/status
# With authentication (when implemented)
@ -329,6 +385,21 @@ All log entries are streamed as JSON:
"total_entries": 15420,
"dropped_entries": 0
},
"filters": {
"filter_count": 2,
"total_processed": 15420,
"total_passed": 1234,
"filters": [
{
"type": "include",
"logic": "or",
"pattern_count": 3,
"total_processed": 15420,
"total_matched": 1234,
"total_dropped": 0
}
]
},
"features": {
"rate_limit": {
"enabled": true,
@ -352,6 +423,7 @@ LogWisp provides comprehensive statistics at multiple levels:
- **Per-Stream Stats**: Monitor performance, connection counts, data throughput
- **Per-Watcher Stats**: File size, position, entries read, rotation count
- **Filter Stats**: Processed entries, matched patterns, dropped logs
- **Rate Limit Stats**: Total requests, blocked requests, active IPs
- **Global Stats**: Aggregated view of all streams (in router mode)
@ -365,6 +437,21 @@ Access statistics via status endpoints or watch the console output:
## Advanced Features
### Log Filtering
LogWisp implements powerful regex-based filtering:
- **Include Filters**: Whitelist patterns - only matching logs pass
- **Exclude Filters**: Blacklist patterns - matching logs are dropped
- **Logic Options**: OR (match any) or AND (match all) for pattern combinations
- **Filter Chains**: Multiple filters applied sequentially
- **Performance**: Patterns compiled once at startup for efficiency
Filter statistics help monitor effectiveness:
```bash
# Watch filter statistics
watch -n 1 'curl -s http://localhost:8080/status | jq .filters'
```
### Rate Limiting
LogWisp implements token bucket rate limiting with:
@ -423,6 +510,12 @@ check_interval_ms = 60000 # Check every minute
- Configure per-stream based on expected update frequency
- Use 10000ms+ for archival or slowly updating logs
### Filter Optimization
- Place most selective filters first
- Use simple patterns when possible
- Consider combining patterns: `"ERROR|WARN"` vs separate patterns
- Monitor filter statistics to identify bottlenecks
### Rate Limiting
- `requests_per_second`: Balance between protection and availability
- `burst_size`: Set to 2-3x the per-second rate for traffic spikes
@ -547,11 +640,19 @@ services:
### Current Implementation
- Read-only file access
- Regex pattern validation at startup
- Rate limiting for DDoS protection
- Connection limits to prevent resource exhaustion
- No authentication (placeholder configuration only)
- No TLS/SSL support (placeholder configuration only)
### Filter Security
⚠️ **SECURITY**: Be aware of potential ReDoS (Regular Expression Denial of Service) attacks:
- Complex nested patterns can cause CPU spikes
- Patterns are validated at startup but not for complexity
- Monitor filter processing time in production
- Consider pattern complexity limits for public-facing streams
### Planned Security Features
- **Authentication**: Basic, Bearer/JWT, mTLS
- **TLS/SSL**: For both HTTP and TCP streams
@ -566,6 +667,8 @@ services:
4. Place behind a reverse proxy for production HTTPS
5. Monitor rate limit statistics for potential attacks
6. Regularly update dependencies
7. Test filter patterns for performance impact
8. Limit regex complexity in production environments
### Rate Limiting Best Practices
- Start with conservative limits and adjust based on monitoring
@ -576,6 +679,13 @@ services:
## Troubleshooting
### Filter Issues
1. Check filter statistics to see matched/dropped counts
2. Test patterns with sample log entries
3. Verify filter type (include vs exclude)
4. Check filter logic (or vs and)
5. Monitor CPU usage for complex patterns
### Rate Limit Issues
1. Check rate limit statistics in status endpoint
2. Verify appropriate `requests_per_second` for your use case
@ -588,6 +698,7 @@ services:
3. Ensure files match the specified patterns
4. Check monitor statistics in status endpoint
5. Verify check_interval_ms is appropriate for log update frequency
6. Review filter configuration - logs might be filtered out
### High Memory Usage
1. Reduce buffer sizes in configuration
@ -595,6 +706,7 @@ services:
3. Enable rate limiting to prevent connection floods
4. Increase check interval for less critical logs
5. Use TCP instead of HTTP for high-volume streams
6. Check for complex regex patterns causing backtracking
### Connection Drops
1. Check heartbeat configuration
@ -627,8 +739,9 @@ Contributions are welcome! Please read our contributing guidelines and submit pu
- [x] Version management
- [x] Configurable heartbeats
- [x] Rate and connection limiting
- [ ] Log filtering and transformation
- [ ] Configurable logging support
- [x] Regex-based log filtering
- [ ] Log transformation (field extraction, formatting)
- [ ] Configurable logging/stdout support
- [ ] Authentication (Basic, JWT, mTLS)
- [ ] TLS/SSL support
- [ ] Prometheus metrics export

View File

@ -30,6 +30,16 @@ targets = [
{ path = "./", pattern = "*.log", is_file = false },
]
# Filter configuration (optional) - controls which logs are streamed
# Multiple filters are applied sequentially - all must pass
# [[streams.filters]]
# type = "include" # "include" (whitelist) or "exclude" (blacklist)
# logic = "or" # "or" (match any) or "and" (match all)
# patterns = [
# "(?i)error", # Case-insensitive error matching
# "(?i)warn" # Case-insensitive warning matching
# ]
# HTTP Server configuration (SSE/Server-Sent Events)
[streams.httpserver]
enabled = true
@ -57,7 +67,7 @@ enabled = false
# max_connections_per_ip = 5 # Max SSE connections per IP
# ------------------------------------------------------------------------------
# Example: Application Logs Stream
# Example: Application Logs Stream with Error Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "app"
@ -71,6 +81,28 @@ enabled = false
# { path = "/var/log/myapp/app.log", is_file = true },
# ]
#
# # Filter 1: Include only errors and warnings
# [[streams.filters]]
# type = "include"
# logic = "or" # Match ANY of these patterns
# patterns = [
# "(?i)\\berror\\b", # Word boundary error (case-insensitive)
# "(?i)\\bwarn(ing)?\\b", # warn or warning
# "(?i)\\bfatal\\b", # fatal
# "(?i)\\bcritical\\b", # critical
# "(?i)exception", # exception anywhere
# "(?i)fail(ed|ure)?", # fail, failed, failure
# ]
#
# # Filter 2: Exclude health check noise
# [[streams.filters]]
# type = "exclude"
# patterns = [
# "/health",
# "/metrics",
# "GET /ping"
# ]
#
# [streams.httpserver]
# enabled = true
# port = 8081 # Different port for each stream
@ -95,7 +127,7 @@ enabled = false
# max_connections_per_ip = 10
# ------------------------------------------------------------------------------
# Example: System Logs Stream (TCP + HTTP)
# Example: System Logs Stream (TCP + HTTP) with Security Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "system"
@ -108,6 +140,21 @@ enabled = false
# { path = "/var/log/kern.log", is_file = true },
# ]
#
# # Include only security-relevant logs
# [[streams.filters]]
# type = "include"
# logic = "or"
# patterns = [
# "(?i)auth",
# "(?i)sudo",
# "(?i)ssh",
# "(?i)login",
# "(?i)permission",
# "(?i)denied",
# "(?i)unauthorized",
# "kernel:.*audit"
# ]
#
# # TCP Server for high-performance streaming
# [streams.tcpserver]
# enabled = true
@ -137,7 +184,7 @@ enabled = false
# status_path = "/status"
# ------------------------------------------------------------------------------
# Example: High-Volume Debug Logs
# Example: High-Volume Debug Logs with Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "debug"
@ -148,6 +195,25 @@ enabled = false
# { path = "/tmp/debug", pattern = "*.debug", is_file = false },
# ]
#
# # Exclude verbose debug output
# [[streams.filters]]
# type = "exclude"
# patterns = [
# "TRACE",
# "VERBOSE",
# "entering function",
# "exiting function",
# "memory dump"
# ]
#
# # Include only specific modules
# [[streams.filters]]
# type = "include"
# patterns = [
# "module:(api|database|auth)",
# "component:(router|handler)"
# ]
#
# [streams.httpserver]
# enabled = true
# port = 8083
@ -168,31 +234,78 @@ enabled = false
# max_connections_per_ip = 1 # One connection per IP
# ------------------------------------------------------------------------------
# Example: Archived Logs (Slow Monitoring)
# Example: Database Logs with Complex Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "archive"
# name = "database"
#
# [streams.monitor]
# check_interval_ms = 60000 # Check once per minute
# check_interval_ms = 200
# targets = [
# { path = "/var/log/archive", pattern = "*.gz", is_file = false },
# { path = "/var/log/postgresql", pattern = "*.log", is_file = false },
# ]
#
# # Complex AND filter - must match all patterns
# [[streams.filters]]
# type = "include"
# logic = "and" # Must match ALL patterns
# patterns = [
# "(?i)error|fail", # Must contain error or fail
# "(?i)connection|query", # AND must be about connections or queries
# "(?i)timeout|deadlock" # AND must involve timeout or deadlock
# ]
#
# # Exclude routine maintenance
# [[streams.filters]]
# type = "exclude"
# patterns = [
# "VACUUM",
# "ANALYZE",
# "checkpoint"
# ]
#
# [streams.tcpserver]
# enabled = true
# port = 9091
# buffer_size = 500 # Small buffer for archived logs
#
# # Infrequent heartbeat
# [streams.tcpserver.heartbeat]
# enabled = true
# interval_seconds = 300 # Every 5 minutes
# include_timestamp = false
# include_stats = false
# buffer_size = 2000
# ------------------------------------------------------------------------------
# Example: Security/Audit Logs with Strict Limits
# Example: API Access Logs with Pattern Extraction
# ------------------------------------------------------------------------------
# [[streams]]
# name = "api-access"
#
# [streams.monitor]
# check_interval_ms = 100
# targets = [
# { path = "/var/log/nginx/access.log", is_file = true },
# ]
#
# # Include only API endpoints
# [[streams.filters]]
# type = "include"
# patterns = [
# '"/api/v[0-9]+/', # API versioned endpoints
# '"(GET|POST|PUT|DELETE) /api/' # API requests
# ]
#
# # Exclude specific status codes
# [[streams.filters]]
# type = "exclude"
# patterns = [
# '" 200 ', # Success responses
# '" 204 ', # No content
# '" 304 ', # Not modified
# 'OPTIONS ' # CORS preflight
# ]
#
# [streams.httpserver]
# enabled = true
# port = 8084
# buffer_size = 3000
# ------------------------------------------------------------------------------
# Example: Security/Audit Logs with Strict Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "security"
@ -203,6 +316,25 @@ enabled = false
# { path = "/var/log/audit", pattern = "audit.log*", is_file = false },
# ]
#
# # Security-focused patterns
# [[streams.filters]]
# type = "include"
# logic = "or"
# patterns = [
# "type=USER_AUTH",
# "type=USER_LOGIN",
# "type=USER_LOGOUT",
# "type=USER_ERR",
# "type=CRED_", # All credential operations
# "type=PRIV_", # All privilege operations
# "type=ANOM_", # All anomalies
# "type=RESP_", # All responses
# "failed|failure",
# "denied|unauthorized",
# "violation",
# "attack|intrusion"
# ]
#
# [streams.httpserver]
# enabled = true
# port = 8443 # HTTPS port (for future TLS)
@ -235,29 +367,67 @@ enabled = false
# # realm = "Security Logs"
# ------------------------------------------------------------------------------
# Example: Public API Logs with Global Rate Limiting
# Example: Multi-Application Logs with Service Filtering
# ------------------------------------------------------------------------------
# [[streams]]
# name = "api-public"
# name = "microservices"
#
# [streams.monitor]
# check_interval_ms = 100
# targets = [
# { path = "/var/log/api", pattern = "access.log*", is_file = false },
# { path = "/var/log/containers", pattern = "*.log", is_file = false },
# ]
#
# # Filter by service name
# [[streams.filters]]
# type = "include"
# patterns = [
# "service=(api|auth|user|order)", # Specific services
# "pod=(api|auth|user|order)-" # Kubernetes pods
# ]
#
# # Exclude Kubernetes noise
# [[streams.filters]]
# type = "exclude"
# patterns = [
# "kube-system",
# "kube-proxy",
# "Readiness probe",
# "Liveness probe"
# ]
#
# [streams.httpserver]
# enabled = true
# port = 8084
# buffer_size = 2000
# port = 8085
# buffer_size = 5000
# ==============================================================================
# FILTER PATTERN EXAMPLES
# ==============================================================================
#
# # Global rate limiting (all clients share limit)
# [streams.httpserver.rate_limit]
# enabled = true
# requests_per_second = 100.0 # 100 req/s total
# burst_size = 200
# limit_by = "global" # All clients share this limit
# max_total_connections = 50 # Max 50 connections total
# Basic Patterns:
# - "ERROR" # Exact match
# - "(?i)error" # Case-insensitive
# - "\\berror\\b" # Word boundary (won't match "errorCode")
# - "error|warn|fatal" # Multiple options
#
# Complex Patterns:
# - "^\\[ERROR\\]" # Line starts with [ERROR]
# - "status=[4-5][0-9]{2}" # HTTP 4xx or 5xx status codes
# - "duration>[0-9]{4}ms" # Duration over 999ms
# - "user_id=\"[^\"]+\"" # Extract user_id values
#
# Performance Tips:
# - Avoid nested quantifiers: "((a+)+)+" can cause catastrophic backtracking
# - Use anchors when possible: "^ERROR" is faster than "ERROR"
# - Prefer character classes: "[0-9]" over "\\d" for clarity
# - Test complex patterns with sample data before deployment
#
# Security Considerations:
# - Be aware of ReDoS (Regular Expression Denial of Service)
# - Limit pattern complexity for public-facing streams
# - Monitor filter processing time in statistics
# - Consider pre-filtering very high volume streams
# ==============================================================================
# USAGE EXAMPLES
@ -273,6 +443,7 @@ enabled = false
# - Uncomment additional [[streams]] sections above
# - Each stream runs independently on its own port
# - Different check intervals for different log types
# - Different filters for each stream
# 3. Router mode (consolidated access):
# ./logwisp --router
@ -281,6 +452,7 @@ enabled = false
# - Example: http://localhost:8080/app/stream
# 4. Production deployment:
# - Enable filters to reduce noise and bandwidth
# - Enable rate limiting on public-facing streams
# - Use TCP for internal high-volume streams
# - Set appropriate check intervals (higher = less CPU)
@ -289,6 +461,7 @@ enabled = false
# 5. Monitoring:
# curl http://localhost:8080/status | jq .
# - Check active connections
# - Monitor filter statistics (matched/dropped)
# - Monitor rate limit statistics
# - Track log entry counts
@ -299,10 +472,14 @@ enabled = false
# LOGWISP_STREAMS_0_MONITOR_CHECK_INTERVAL_MS=50
# LOGWISP_STREAMS_0_HTTPSERVER_PORT=8090
# LOGWISP_STREAMS_0_HTTPSERVER_RATE_LIMIT_ENABLED=true
# LOGWISP_STREAMS_0_FILTERS_0_TYPE=include
# LOGWISP_STREAMS_0_FILTERS_0_PATTERNS='["ERROR","WARN"]'
# ==============================================================================
# NOTES
# ==============================================================================
# - Filters are processed sequentially - all must pass
# - Empty filter patterns means "pass everything"
# - Rate limiting is disabled by default for backward compatibility
# - Each stream can have different rate limit settings
# - TCP connections are silently dropped when rate limited
@ -310,3 +487,5 @@ enabled = false
# - IP tracking is cleaned up after 5 minutes of inactivity
# - Token bucket algorithm provides smooth rate limiting
# - Connection limits prevent resource exhaustion
# - Regex patterns are compiled once at startup for performance
# - Complex patterns can impact performance - monitor statistics

View File

@ -11,6 +11,11 @@ targets = [
{ path = "/var/log/myapp", pattern = "*.log", is_file = false }
]
# Optional: Filter for errors and warnings only
# [[streams.filters]]
# type = "include"
# patterns = ["ERROR", "WARN", "CRITICAL"]
[streams.httpserver]
enabled = true
port = 8080

View File

@ -14,8 +14,6 @@ logwisp/
│ └── logwisp.toml.minimal # Minimal configuration template
├── doc/
│ └── architecture.md # This file - architecture documentation
├── test_router.sh # Router functionality test suite
├── test_ratelimit.sh # Rate limiting test suite
└── src/
├── cmd/
│ └── logwisp/
@ -27,8 +25,11 @@ logwisp/
│ ├── loader.go # Configuration loading with lixenwraith/config
│ ├── server.go # TCP/HTTP server configurations with rate limiting
│ ├── ssl.go # SSL/TLS configuration structures
│ ├── stream.go # Stream-specific configurations
│ └── validation.go # Configuration validation including rate limits
│ ├── stream.go # Stream-specific configurations with filters
│ └── validation.go # Configuration validation including filters and rate limits
├── filter/
│ ├── filter.go # Regex-based log filtering implementation
│ └── chain.go # Sequential filter chain management
├── logstream/
│ ├── httprouter.go # HTTP router for path-based routing
│ ├── logstream.go # Stream lifecycle management
@ -92,6 +93,11 @@ LOGWISP_STREAMS_0_HTTPSERVER_BUFFER_SIZE=2000
LOGWISP_STREAMS_0_HTTPSERVER_HEARTBEAT_ENABLED=true
LOGWISP_STREAMS_0_HTTPSERVER_HEARTBEAT_FORMAT=json
# Filter configuration
LOGWISP_STREAMS_0_FILTERS_0_TYPE=include
LOGWISP_STREAMS_0_FILTERS_0_LOGIC=or
LOGWISP_STREAMS_0_FILTERS_0_PATTERNS='["ERROR","WARN"]'
# Rate limiting configuration
LOGWISP_STREAMS_0_HTTPSERVER_RATE_LIMIT_ENABLED=true
LOGWISP_STREAMS_0_HTTPSERVER_RATE_LIMIT_REQUESTS_PER_SECOND=10.0
@ -116,9 +122,9 @@ LOGWISP_STREAMS_1_TCPSERVER_PORT=9090
2. **LogStream (`logstream.LogStream`)**
- Represents a single log monitoring pipeline
- Contains: Monitor + Rate Limiter + Servers (TCP/HTTP)
- Contains: Monitor + Filter Chain + Rate Limiter + Servers (TCP/HTTP)
- Independent configuration
- Per-stream statistics with rate limit metrics
- Per-stream statistics with filter and rate limit metrics
3. **Monitor (`monitor.Monitor`)**
- Watches files and directories
@ -126,14 +132,25 @@ LOGWISP_STREAMS_1_TCPSERVER_PORT=9090
- Publishes log entries to subscribers
- Configurable check intervals
4. **Rate Limiter (`ratelimit.Limiter`)**
4. **Filter (`filter.Filter`)**
- Regex-based log filtering
- Include (whitelist) or Exclude (blacklist) modes
- OR/AND logic for multiple patterns
- Per-filter statistics (processed, matched, dropped)
5. **Filter Chain (`filter.Chain`)**
- Sequential application of multiple filters
- All filters must pass for entry to be streamed
- Aggregate statistics across filter chain
6. **Rate Limiter (`ratelimit.Limiter`)**
- Token bucket algorithm for smooth rate limiting
- Per-IP or global limiting strategies
- Connection tracking and limits
- Automatic cleanup of stale entries
- Non-blocking rejection of excess requests
5. **Streamers**
7. **Streamers**
- **HTTPStreamer**: SSE-based streaming over HTTP
- Rate limit enforcement before request handling
- Connection tracking for per-IP limits
@ -144,7 +161,7 @@ LOGWISP_STREAMS_1_TCPSERVER_PORT=9090
- Both support configurable heartbeats
- Non-blocking client management
6. **HTTPRouter (`logstream.HTTPRouter`)**
8. **HTTPRouter (`logstream.HTTPRouter`)**
- Optional component for path-based routing
- Consolidates multiple HTTP streams on shared ports
- Provides global status endpoint
@ -154,11 +171,22 @@ LOGWISP_STREAMS_1_TCPSERVER_PORT=9090
### Data Flow
```
File System → Monitor → LogEntry Channel → [Rate Limiter] → Streamer → Network Client
↑ ↓ ↓
└── Rotation Detection Rate Limit Check
Accept/Reject
File System → Monitor → LogEntry Channel → Filter Chain → [Rate Limiter] → Streamer → Network Client
↑ ↓ ↓
└── Rotation Detection Pattern Match Rate Limit Check
Pass/Drop Accept/Reject
```
### Filter Architecture
```
Log Entry → Filter Chain → Filter 1 → Filter 2 → ... → Output
↓ ↓
Include? Exclude?
↓ ↓
OR/AND OR/AND
Logic Logic
```
### Rate Limiting Architecture
@ -184,6 +212,19 @@ targets = [
{ path = "/path/to/file.log", is_file = true }
]
# Filter configuration (optional)
[[streams.filters]]
type = "include" # "include" or "exclude"
logic = "or" # "or" or "and"
patterns = [
"(?i)error", # Case-insensitive error matching
"(?i)warn" # Case-insensitive warning matching
]
[[streams.filters]]
type = "exclude"
patterns = ["DEBUG", "TRACE"]
[streams.httpserver]
enabled = true
port = 8080
@ -226,6 +267,26 @@ burst_size = 10
limit_by = "ip"
```
## Filter Implementation
### Filter Types
1. **Include Filter**: Only logs matching patterns are streamed (whitelist)
2. **Exclude Filter**: Logs matching patterns are dropped (blacklist)
### Pattern Logic
- **OR Logic**: Log matches if ANY pattern matches
- **AND Logic**: Log matches only if ALL patterns match
### Filter Chain
- Multiple filters are applied sequentially
- All filters must pass for a log to be streamed
- Efficient short-circuit evaluation
### Performance Considerations
- Regex patterns compiled once at startup
- Cached for efficient matching
- Statistics tracked without locks in hot path
## Rate Limiting Implementation
### Token Bucket Algorithm
@ -308,6 +369,13 @@ go build -ldflags "-X 'logwisp/src/internal/version.Version=v1.0.0'" \
- Statistics accuracy
- Stress testing
3. **Filter Testing** (recommended)
- Pattern matching accuracy
- Include/exclude logic
- OR/AND combination logic
- Performance with complex patterns
- Filter chain behavior
### Running Tests
```bash
@ -323,6 +391,12 @@ make test
## Performance Considerations
### Filter Overhead
- Regex compilation: One-time cost at startup
- Pattern matching: O(n*m) where n=patterns, m=text length
- Use simple patterns when possible
- Consider pattern order (most likely matches first)
### Rate Limiting Overhead
- Token bucket checks: O(1) time complexity
- Memory: ~100 bytes per tracked IP
@ -330,6 +404,8 @@ make test
- Minimal impact when disabled
### Optimization Guidelines
- Use specific patterns to reduce regex complexity
- Place most selective filters first in chain
- Use per-IP limiting for fairness
- Use global limiting for resource protection
- Set burst size to 2-3x requests_per_second
@ -343,17 +419,4 @@ make test
- Rate limiting for DDoS protection
- Connection limits for resource protection
- Non-blocking request rejection
### Future Security Roadmap
- Authentication (Basic, JWT, mTLS)
- TLS/SSL support
- IP whitelisting/blacklisting
- Audit logging
- RBAC per stream
### Security Best Practices
- Run with minimal privileges
- Enable rate limiting on public endpoints
- Use connection limits to prevent exhaustion
- Deploy behind reverse proxy for HTTPS
- Monitor rate limit statistics for attacks
- Regex pattern validation at startup

View File

@ -1,6 +1,8 @@
// FILE: src/internal/config/stream.go
package config
import "logwisp/src/internal/filter"
type StreamConfig struct {
// Stream identifier (used in logs and metrics)
Name string `toml:"name"`
@ -8,6 +10,9 @@ type StreamConfig struct {
// Monitor configuration for this stream
Monitor *StreamMonitorConfig `toml:"monitor"`
// Filter configuration
Filters []filter.Config `toml:"filters"`
// Server configurations
TCPServer *TCPConfig `toml:"tcpserver"`
HTTPServer *HTTPConfig `toml:"httpserver"`

View File

@ -3,6 +3,8 @@ package config
import (
"fmt"
"logwisp/src/internal/filter"
"regexp"
"strings"
)
@ -36,6 +38,7 @@ func (c *Config) validate() error {
stream.Name, stream.Monitor.CheckIntervalMs)
}
// Validate targets
for j, target := range stream.Monitor.Targets {
if target.Path == "" {
return fmt.Errorf("stream '%s' target %d: empty path", stream.Name, j)
@ -45,6 +48,13 @@ func (c *Config) validate() error {
}
}
// Validate filters
for j, filterCfg := range stream.Filters {
if err := validateFilter(stream.Name, j, &filterCfg); err != nil {
return err
}
}
// Validate TCP server
if stream.TCPServer != nil && stream.TCPServer.Enabled {
if stream.TCPServer.Port < 1 || stream.TCPServer.Port > 65535 {
@ -224,3 +234,38 @@ func validateRateLimit(serverType, streamName string, rl *RateLimitConfig) error
return nil
}
func validateFilter(streamName string, filterIndex int, cfg *filter.Config) error {
// Validate filter type
switch cfg.Type {
case filter.TypeInclude, filter.TypeExclude, "":
// Valid types
default:
return fmt.Errorf("stream '%s' filter[%d]: invalid type '%s' (must be 'include' or 'exclude')",
streamName, filterIndex, cfg.Type)
}
// Validate filter logic
switch cfg.Logic {
case filter.LogicOr, filter.LogicAnd, "":
// Valid logic
default:
return fmt.Errorf("stream '%s' filter[%d]: invalid logic '%s' (must be 'or' or 'and')",
streamName, filterIndex, cfg.Logic)
}
// Empty patterns is valid - passes everything
if len(cfg.Patterns) == 0 {
return nil
}
// Validate regex patterns
for i, pattern := range cfg.Patterns {
if _, err := regexp.Compile(pattern); err != nil {
return fmt.Errorf("stream '%s' filter[%d] pattern[%d] '%s': invalid regex: %w",
streamName, filterIndex, i, pattern, err)
}
}
return nil
}

View File

@ -0,0 +1,72 @@
// FILE: src/internal/filter/chain.go
package filter
import (
"fmt"
"sync/atomic"
"logwisp/src/internal/monitor"
)
// Chain manages multiple filters in sequence
type Chain struct {
filters []*Filter
// Statistics
totalProcessed atomic.Uint64
totalPassed atomic.Uint64
}
// NewChain creates a new filter chain from configurations
func NewChain(configs []Config) (*Chain, error) {
chain := &Chain{
filters: make([]*Filter, 0, len(configs)),
}
for i, cfg := range configs {
filter, err := New(cfg)
if err != nil {
return nil, fmt.Errorf("filter[%d]: %w", i, err)
}
chain.filters = append(chain.filters, filter)
}
return chain, nil
}
// Apply runs all filters in sequence
// Returns true if the entry passes all filters
func (c *Chain) Apply(entry monitor.LogEntry) bool {
c.totalProcessed.Add(1)
// No filters means pass everything
if len(c.filters) == 0 {
c.totalPassed.Add(1)
return true
}
// All filters must pass
for _, filter := range c.filters {
if !filter.Apply(entry) {
return false
}
}
c.totalPassed.Add(1)
return true
}
// GetStats returns chain statistics
func (c *Chain) GetStats() map[string]interface{} {
filterStats := make([]map[string]interface{}, len(c.filters))
for i, filter := range c.filters {
filterStats[i] = filter.GetStats()
}
return map[string]interface{}{
"filter_count": len(c.filters),
"total_processed": c.totalProcessed.Load(),
"total_passed": c.totalPassed.Load(),
"filters": filterStats,
}
}

View File

@ -0,0 +1,173 @@
// FILE: src/internal/filter/filter.go
package filter
import (
"fmt"
"regexp"
"sync"
"sync/atomic"
"logwisp/src/internal/monitor"
)
// Type represents the filter type
type Type string
const (
TypeInclude Type = "include" // Whitelist - only matching logs pass
TypeExclude Type = "exclude" // Blacklist - matching logs are dropped
)
// Logic represents how multiple patterns are combined
type Logic string
const (
LogicOr Logic = "or" // Match any pattern
LogicAnd Logic = "and" // Match all patterns
)
// Config represents filter configuration
type Config struct {
Type Type `toml:"type"`
Logic Logic `toml:"logic"`
Patterns []string `toml:"patterns"`
}
// Filter applies regex-based filtering to log entries
type Filter struct {
config Config
patterns []*regexp.Regexp
mu sync.RWMutex
// Statistics
totalProcessed atomic.Uint64
totalMatched atomic.Uint64
totalDropped atomic.Uint64
}
// New creates a new filter from configuration
func New(cfg Config) (*Filter, error) {
// Set defaults
if cfg.Type == "" {
cfg.Type = TypeInclude
}
if cfg.Logic == "" {
cfg.Logic = LogicOr
}
f := &Filter{
config: cfg,
patterns: make([]*regexp.Regexp, 0, len(cfg.Patterns)),
}
// Compile patterns
for i, pattern := range cfg.Patterns {
re, err := regexp.Compile(pattern)
if err != nil {
return nil, fmt.Errorf("invalid regex pattern[%d] '%s': %w", i, pattern, err)
}
f.patterns = append(f.patterns, re)
}
return f, nil
}
// Apply checks if a log entry should be passed through
func (f *Filter) Apply(entry monitor.LogEntry) bool {
f.totalProcessed.Add(1)
// No patterns means pass everything
if len(f.patterns) == 0 {
return true
}
// Check against all fields that might contain the log content
text := entry.Message
if entry.Level != "" {
text = entry.Level + " " + text
}
if entry.Source != "" {
text = entry.Source + " " + text
}
matched := f.matches(text)
if matched {
f.totalMatched.Add(1)
}
// Determine if we should pass or drop
shouldPass := false
switch f.config.Type {
case TypeInclude:
shouldPass = matched
case TypeExclude:
shouldPass = !matched
}
if !shouldPass {
f.totalDropped.Add(1)
}
return shouldPass
}
// matches checks if text matches the patterns according to the logic
func (f *Filter) matches(text string) bool {
switch f.config.Logic {
case LogicOr:
// Match any pattern
for _, re := range f.patterns {
if re.MatchString(text) {
return true
}
}
return false
case LogicAnd:
// Must match all patterns
for _, re := range f.patterns {
if !re.MatchString(text) {
return false
}
}
return true
default:
// Shouldn't happen after validation
return false
}
}
// GetStats returns filter statistics
func (f *Filter) GetStats() map[string]interface{} {
return map[string]interface{}{
"type": f.config.Type,
"logic": f.config.Logic,
"pattern_count": len(f.patterns),
"total_processed": f.totalProcessed.Load(),
"total_matched": f.totalMatched.Load(),
"total_dropped": f.totalDropped.Load(),
}
}
// UpdatePatterns allows dynamic pattern updates
func (f *Filter) UpdatePatterns(patterns []string) error {
compiled := make([]*regexp.Regexp, 0, len(patterns))
// Compile all patterns first
for i, pattern := range patterns {
re, err := regexp.Compile(pattern)
if err != nil {
return fmt.Errorf("invalid regex pattern[%d] '%s': %w", i, pattern, err)
}
compiled = append(compiled, re)
}
// Update atomically
f.mu.Lock()
f.patterns = compiled
f.config.Patterns = patterns
f.mu.Unlock()
return nil
}

View File

@ -135,11 +135,11 @@ func (r *HTTPRouter) Shutdown() {
fmt.Println("[ROUTER] Router shutdown complete")
}
func (r *HTTPRouter) GetStats() map[string]interface{} {
func (r *HTTPRouter) GetStats() map[string]any {
r.mu.RLock()
defer r.mu.RUnlock()
serverStats := make(map[int]interface{})
serverStats := make(map[int]any)
totalRoutes := 0
for port, rs := range r.servers {
@ -151,14 +151,14 @@ func (r *HTTPRouter) GetStats() map[string]interface{} {
}
rs.routeMu.RUnlock()
serverStats[port] = map[string]interface{}{
serverStats[port] = map[string]any{
"routes": routes,
"requests": rs.requests.Load(),
"uptime": int(time.Since(rs.startTime).Seconds()),
}
}
return map[string]interface{}{
return map[string]any{
"uptime_seconds": int(time.Since(r.startTime).Seconds()),
"total_requests": r.totalRequests.Load(),
"routed_requests": r.routedRequests.Load(),

View File

@ -40,22 +40,26 @@ func (ls *LogStream) Shutdown() {
ls.Monitor.Stop()
}
func (ls *LogStream) GetStats() map[string]interface{} {
func (ls *LogStream) GetStats() map[string]any {
monStats := ls.Monitor.GetStats()
stats := map[string]interface{}{
stats := map[string]any{
"name": ls.Name,
"uptime_seconds": int(time.Since(ls.Stats.StartTime).Seconds()),
"monitor": monStats,
}
if ls.FilterChain != nil {
stats["filters"] = ls.FilterChain.GetStats()
}
if ls.TCPServer != nil {
currentConnections := ls.TCPServer.GetActiveConnections()
stats["tcp"] = map[string]interface{}{
"enabled": true,
"port": ls.Config.TCPServer.Port,
"connections": currentConnections, // Use current value
"connections": currentConnections,
}
}
@ -65,7 +69,7 @@ func (ls *LogStream) GetStats() map[string]interface{} {
stats["http"] = map[string]interface{}{
"enabled": true,
"port": ls.Config.HTTPServer.Port,
"connections": currentConnections, // Use current value
"connections": currentConnections,
"stream_path": ls.Config.HTTPServer.StreamPath,
"status_path": ls.Config.HTTPServer.StatusPath,
}

View File

@ -101,12 +101,12 @@ func (rs *routerServer) handleGlobalStatus(ctx *fasthttp.RequestCtx) {
ctx.SetContentType("application/json")
rs.routeMu.RLock()
streams := make(map[string]interface{})
streams := make(map[string]any)
for prefix, stream := range rs.routes {
streamStats := stream.GetStats()
// Add routing information
streamStats["routing"] = map[string]interface{}{
streamStats["routing"] = map[string]any{
"path_prefix": prefix,
"endpoints": map[string]string{
"stream": prefix + stream.Config.HTTPServer.StreamPath,
@ -121,7 +121,7 @@ func (rs *routerServer) handleGlobalStatus(ctx *fasthttp.RequestCtx) {
// Get router stats
routerStats := rs.router.GetStats()
status := map[string]interface{}{
status := map[string]any{
"service": "LogWisp Router",
"version": version.String(),
"port": rs.port,
@ -155,7 +155,7 @@ func (rs *routerServer) handleNotFound(ctx *fasthttp.RequestCtx) {
}
rs.routeMu.RUnlock()
response := map[string]interface{}{
response := map[string]any{
"error": "Not Found",
"requested_path": string(ctx.Path()),
"available_routes": availableRoutes,

View File

@ -4,6 +4,7 @@ package logstream
import (
"context"
"fmt"
"logwisp/src/internal/filter"
"sync"
"time"
@ -24,6 +25,7 @@ type LogStream struct {
Name string
Config config.StreamConfig
Monitor monitor.Monitor
FilterChain *filter.Chain
TCPServer *stream.TCPStreamer
HTTPServer *stream.HTTPStreamer
Stats *StreamStats
@ -39,6 +41,7 @@ type StreamStats struct {
HTTPConnections int32
TotalBytesServed uint64
TotalEntriesServed uint64
FilterStats map[string]any
}
func New(ctx context.Context) *Service {
@ -79,11 +82,23 @@ func (s *Service) CreateStream(cfg config.StreamConfig) error {
return fmt.Errorf("failed to start monitor: %w", err)
}
// Create filter chain
var filterChain *filter.Chain
if len(cfg.Filters) > 0 {
chain, err := filter.NewChain(cfg.Filters)
if err != nil {
streamCancel()
return fmt.Errorf("failed to create filter chain: %w", err)
}
filterChain = chain
}
// Create log stream
ls := &LogStream{
Name: cfg.Name,
Config: cfg,
Monitor: mon,
FilterChain: filterChain,
Stats: &StreamStats{
StartTime: time.Now(),
},
@ -93,7 +108,18 @@ func (s *Service) CreateStream(cfg config.StreamConfig) error {
// Start TCP server if configured
if cfg.TCPServer != nil && cfg.TCPServer.Enabled {
tcpChan := mon.Subscribe()
// Create filtered channel
rawChan := mon.Subscribe()
tcpChan := make(chan monitor.LogEntry, cfg.TCPServer.BufferSize)
// Start filter goroutine for TCP
s.wg.Add(1)
go func() {
defer s.wg.Done()
defer close(tcpChan)
s.filterLoop(streamCtx, rawChan, tcpChan, filterChain)
}()
ls.TCPServer = stream.NewTCPStreamer(tcpChan, *cfg.TCPServer)
if err := s.startTCPServer(ls); err != nil {
@ -104,7 +130,18 @@ func (s *Service) CreateStream(cfg config.StreamConfig) error {
// Start HTTP server if configured
if cfg.HTTPServer != nil && cfg.HTTPServer.Enabled {
httpChan := mon.Subscribe()
// Create filtered channel
rawChan := mon.Subscribe()
httpChan := make(chan monitor.LogEntry, cfg.HTTPServer.BufferSize)
// Start filter goroutine for HTTP
s.wg.Add(1)
go func() {
defer s.wg.Done()
defer close(httpChan)
s.filterLoop(streamCtx, rawChan, httpChan, filterChain)
}()
ls.HTTPServer = stream.NewHTTPStreamer(httpChan, *cfg.HTTPServer)
if err := s.startHTTPServer(ls); err != nil {
@ -119,6 +156,31 @@ func (s *Service) CreateStream(cfg config.StreamConfig) error {
return nil
}
// filterLoop applies filters to log entries
func (s *Service) filterLoop(ctx context.Context, in <-chan monitor.LogEntry, out chan<- monitor.LogEntry, chain *filter.Chain) {
for {
select {
case <-ctx.Done():
return
case entry, ok := <-in:
if !ok {
return
}
// Apply filter chain if configured
if chain == nil || chain.Apply(entry) {
select {
case out <- entry:
case <-ctx.Done():
return
default:
// Drop if output buffer is full
}
}
}
}
}
func (s *Service) GetStream(name string) (*LogStream, error) {
s.mu.RLock()
defer s.mu.RUnlock()
@ -178,17 +240,17 @@ func (s *Service) Shutdown() {
s.wg.Wait()
}
func (s *Service) GetGlobalStats() map[string]interface{} {
func (s *Service) GetGlobalStats() map[string]any {
s.mu.RLock()
defer s.mu.RUnlock()
stats := map[string]interface{}{
"streams": make(map[string]interface{}),
stats := map[string]any{
"streams": make(map[string]any),
"total_streams": len(s.streams),
}
for name, stream := range s.streams {
stats["streams"].(map[string]interface{})[name] = stream.GetStats()
stats["streams"].(map[string]any)[name] = stream.GetStats()
}
return stats

View File

@ -192,9 +192,9 @@ func (l *Limiter) RemoveConnection(remoteAddr string) {
}
// Returns rate limiter statistics
func (l *Limiter) GetStats() map[string]interface{} {
func (l *Limiter) GetStats() map[string]any {
if l == nil {
return map[string]interface{}{
return map[string]any{
"enabled": false,
}
}
@ -210,13 +210,13 @@ func (l *Limiter) GetStats() map[string]interface{} {
}
l.connMu.RUnlock()
return map[string]interface{}{
return map[string]any{
"enabled": true,
"total_requests": l.totalRequests.Load(),
"blocked_requests": l.blockedRequests.Load(),
"active_ips": activeIPs,
"total_connections": totalConnections,
"config": map[string]interface{}{
"config": map[string]any{
"requests_per_second": l.config.RequestsPerSecond,
"burst_size": l.config.BurstSize,
"limit_by": l.config.LimitBy,

View File

@ -132,7 +132,7 @@ func (h *HTTPStreamer) requestHandler(ctx *fasthttp.RequestCtx) {
if allowed, statusCode, message := h.rateLimiter.CheckHTTP(remoteAddr); !allowed {
ctx.SetStatusCode(statusCode)
ctx.SetContentType("application/json")
json.NewEncoder(ctx).Encode(map[string]interface{}{
json.NewEncoder(ctx).Encode(map[string]any{
"error": message,
"retry_after": "60", // seconds
})
@ -149,7 +149,7 @@ func (h *HTTPStreamer) requestHandler(ctx *fasthttp.RequestCtx) {
default:
ctx.SetStatusCode(fasthttp.StatusNotFound)
ctx.SetContentType("application/json")
json.NewEncoder(ctx).Encode(map[string]interface{}{
json.NewEncoder(ctx).Encode(map[string]any{
"error": "Not Found",
"message": fmt.Sprintf("Available endpoints: %s (SSE stream), %s (status)",
h.streamPath, h.statusPath),
@ -218,7 +218,7 @@ func (h *HTTPStreamer) handleStream(ctx *fasthttp.RequestCtx) {
// Send initial connected event
clientID := fmt.Sprintf("%d", time.Now().UnixNano())
connectionInfo := map[string]interface{}{
connectionInfo := map[string]any{
"client_id": clientID,
"stream_path": h.streamPath,
"status_path": h.statusPath,
@ -280,7 +280,7 @@ func (h *HTTPStreamer) formatHeartbeat() string {
}
if h.config.Heartbeat.Format == "json" {
data := make(map[string]interface{})
data := make(map[string]any)
data["type"] = "heartbeat"
if h.config.Heartbeat.IncludeTimestamp {
@ -315,19 +315,19 @@ func (h *HTTPStreamer) formatHeartbeat() string {
func (h *HTTPStreamer) handleStatus(ctx *fasthttp.RequestCtx) {
ctx.SetContentType("application/json")
var rateLimitStats interface{}
var rateLimitStats any
if h.rateLimiter != nil {
rateLimitStats = h.rateLimiter.GetStats()
} else {
rateLimitStats = map[string]interface{}{
rateLimitStats = map[string]any{
"enabled": false,
}
}
status := map[string]interface{}{
status := map[string]any{
"service": "LogWisp",
"version": version.Short(),
"server": map[string]interface{}{
"server": map[string]any{
"type": "http",
"port": h.config.Port,
"active_clients": h.activeClients.Load(),
@ -339,8 +339,8 @@ func (h *HTTPStreamer) handleStatus(ctx *fasthttp.RequestCtx) {
"stream": h.streamPath,
"status": h.statusPath,
},
"features": map[string]interface{}{
"heartbeat": map[string]interface{}{
"features": map[string]any{
"heartbeat": map[string]any{
"enabled": h.config.Heartbeat.Enabled,
"interval": h.config.Heartbeat.IntervalSeconds,
"format": h.config.Heartbeat.Format,

View File

@ -116,7 +116,7 @@ func (t *TCPStreamer) broadcastLoop() {
}
data = append(data, '\n')
t.server.connections.Range(func(key, value interface{}) bool {
t.server.connections.Range(func(key, value any) bool {
conn := key.(gnet.Conn)
conn.AsyncWrite(data, nil)
return true
@ -124,7 +124,7 @@ func (t *TCPStreamer) broadcastLoop() {
case <-tickerChan:
if heartbeat := t.formatHeartbeat(); heartbeat != nil {
t.server.connections.Range(func(key, value interface{}) bool {
t.server.connections.Range(func(key, value any) bool {
conn := key.(gnet.Conn)
conn.AsyncWrite(heartbeat, nil)
return true
@ -142,7 +142,7 @@ func (t *TCPStreamer) formatHeartbeat() []byte {
return nil
}
data := make(map[string]interface{})
data := make(map[string]any)
data["type"] = "heartbeat"
if t.config.Heartbeat.IncludeTimestamp {