Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions plugin/action/hash/normalize/token_normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func (n *tokenNormalizer) normalizeByScanner(out []byte, scanner *lexmachine.Sca
prevEnd := 0
for tokRaw, err, eos := scanner.Next(); !eos; tokRaw, err, eos = scanner.Next() {
if ui, is := err.(*machines.UnconsumedInput); is {
scanner.TC = ui.FailTC // skip
scanner.TC = max(scanner.TC+1, ui.FailTC-1) // skip
continue
} else if err != nil {
out = out[:0]
Expand Down Expand Up @@ -484,7 +484,7 @@ var builtinTokenPatterns = []TokenPattern{
},
{
Placeholder: placeholderByPattern[pFilepath],
RE: `(/[a-zA-Z0-9-_.]+)+`,
RE: `(/[a-zA-Z-_.][a-zA-Z0-9-_.]*)+`,
mask: pFilepath,
},
{
Expand All @@ -511,10 +511,10 @@ var builtinTokenPatterns = []TokenPattern{
mask: pHash,
},
{
// RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly, Go time with monotonic clock
// RFC3339, RFC3339Nano, DateTime, DateOnly, TimeOnly, Go time with optional monotonic clock
Placeholder: placeholderByPattern[pDatetime],
RE: fmt.Sprintf(`(%s)|(%s)|(%s)|(%s)`,
`\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ [+\-]\d\d\d\d [A-Z]+ m=[+\-]\d+\.\d+`,
`\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d\.\d+ [+\-]\d\d\d\d [A-Z]+( m=[+\-]\d+\.\d+)?`,
`\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d+)?(Z|[\+\-]\d\d:\d\d)`,
`\d\d:\d\d:\d\d`,
`\d\d\d\d-\d\d-\d\d( \d\d:\d\d:\d\d)?`,
Expand Down
34 changes: 19 additions & 15 deletions plugin/action/hash/normalize/token_normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
"some 2025-01-13 20:58:04.019973588 +0000 UTC m=+1417512.275697914 here",
"some 2025-01-13 20:58:04.019973588 -0700 MST m=-123.456789012 here",
"some 2025-01-13 20:58:04.019973588 +0300 MSK m=+0.123456789 here",
"some 2025-01-13 20:58:04.019973588 -0700 MST here",
"some 2025-01-13 20:58:04.019973588 +0300 MSK here",
"some 2025-01-13T10:20:40Z here",
"some 2025-01-13T10:20:40.999999999Z here",
"some 2025-01-13T10:20:40-06:00 here",
Expand Down Expand Up @@ -470,9 +472,9 @@ func TestTokenNormalizerCustom(t *testing.T) {
},
},
inputs: []string{
`2006/01/02 15:04:05 error occurred, client: 10.125.172.251, upstream: "http://10.117.246.15:84/download", host: "mpm-youtube-downloader-38.name.com:84"`,
`2006/01/02 15:04:05 error occurred, client: 10.125.172.251, upstream: "http://10.117.246.15:84/download", host: "mpm-youtube-downloader-38.name.com:84", part/offset: 10117/2461584`,
},
want: "<nginx_datetime> error occurred, client: <ip>, upstream: <double_quoted>, host: <double_quoted>",
want: "<nginx_datetime> error occurred, client: <ip>, upstream: <double_quoted>, host: <double_quoted>, part/offset: <int>/<int>",
},
{
name: "empty_patterns",
Expand Down Expand Up @@ -515,19 +517,21 @@ func TestTokenNormalizerCustom(t *testing.T) {

func genBenchInput(count int) []byte {
var examples = []string{
"s1mple falsehood", // no match
"test@host1.host2.com", // email
"http://some.host.com/page1?a=1", // url
"hello-world-123.COM", // host
"7c1811ed-e98f-4c9c-a9f9-58c757ff494f", // uuid
"a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", // sha1
"098f6bcd4621d373cade4e832627b4f6", // md5
"2025-01-13T10:20:40Z", // datetime
"1.2.3.4", // ip
"-1.2m5s", // duration
"0x13eb85e69dfbc0758b12acdaae36287d", // hex
"-4.56", // float
"123", // int
"48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff", // hash(sha256)
"a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", // hash(sha1)
"098f6bcd4621d373cade4e832627b4f6", // hash(md5)
"s1mple falsehood", // no match
"test@host1.host2.com", // email
"http://some.host.com/page1?a=1", // url
"hello-world-123.COM", // host
"7c1811ed-e98f-4c9c-a9f9-58c757ff494f", // uuid
"/home/user/photos", // filepath
"2025-01-13T10:20:40Z", // datetime
"1.2.3.4", // ip
"-1.2m5s", // duration
"0x13eb85e69dfbc0758b12acdaae36287d", // hex
"-4.56", // float
"123", // int
"truE faLse",
}

Expand Down
Loading