Skip to content

Commit

Permalink
perf: use a state machine based approach (#54)
Browse files Browse the repository at this point in the history
* perf: use iota switch for state to reduce branching

* perf: move state default behind switch case

* perf: skip count is only checked in state default

* fix: move skip count back to start
  • Loading branch information
ayuhito authored Jan 18, 2025
1 parent fd6789d commit 70d006a
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 91 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ Benchmarks were performed against [`ua-parser/uap-go`](https://github.com/ua-par
cd ./benchmarks
go test -bench=. -benchmem ./...

MedamaParserGetSingle-12 2916867 408.5 ns/op 0 B/op 0 allocs/op
MedamaParserGetSingle-12 3871813 308.3 ns/op 0 B/op 0 allocs/op
MileusnaParserGetSingle-12 1322602 917.3 ns/op 600 B/op 16 allocs/op
UAPParserGetSingle-12 986428 1159 ns/op 233 B/op 8 allocs/op

MedamaParserGetAll-12 57078 20037 ns/op 0 B/op 0 allocs/op
MedamaParserGetAll-12 71804 15544 ns/op 0 B/op 0 allocs/op
MileusnaParserGetAll-12 28375 42301 ns/op 28031 B/op 716 allocs/op
UAPParserGetAll-12 18645 56951 ns/op 10179 B/op 344 allocs/op
```
Expand Down
180 changes: 91 additions & 89 deletions trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,19 @@ import (
"github.com/medama-io/go-useragent/internal"
)

// trieState is used to determine the current parsing state of the trie.
type trieState int

const (
// stateDefault is the default parsing state of the trie.
stateDefault trieState = iota
// stateVersion is the state when we are looking for a version number.
stateVersion
// stateSkipWhitespace is the state when we are skipping whitespace.
stateSkipWhitespace
// stateSkipClosingParenthesis is the state when we are skipping until a closing parenthesis.
// This is used to skip over device IDs.
stateSkipClosingParenthesis
// This is the number of rune trie children to store in the array
// before switching to a map. Smaller arrays are faster to iterate
// and use less memory.
Expand Down Expand Up @@ -42,136 +54,121 @@ type RuneTrie struct {
result []resultItem
}

// NewRuneTrie allocates and returns a new *RuneTrie.
func NewRuneTrie() *RuneTrie {
return new(RuneTrie)
}

// Get returns the value stored at the given key. Returns nil for internal
// nodes or for nodes with a value of nil.
func (trie *RuneTrie) Get(key string) UserAgent {
state := stateDefault
node := trie
var ua UserAgent

// Flag to indicate if we are currently iterating over a version number.
var isVersion bool
// Number of runes to skip when iterating over the trie. This is used
// to skip over version numbers or language codes.
var skipCount uint8
// Skip until we encounter whitespace.
var skipUntilWhitespace bool
// Skip until we encounter a closing parenthesis, used for skipping over device IDs.
var skipUntilClosingParenthesis bool

for i, r := range key {
if skipUntilWhitespace {
if r == ' ' {
skipUntilWhitespace = false
} else {
continue
}
}

if skipCount > 0 {
skipCount--
continue
}

if skipUntilClosingParenthesis {
switch state {
case stateSkipWhitespace:
if r == ' ' {
state = stateDefault
}

case stateSkipClosingParenthesis:
if r == ')' {
skipUntilClosingParenthesis = false
} else {
continue
state = stateDefault
}
}

if isVersion {
case stateVersion:
// If we encounter any unknown characters, we can assume the version number is over.
if !internal.IsDigit(r) && r != '.' {
isVersion = false
state = stateDefault
} else {
// Add to rune buffer.
if ua.versionIndex < cap(ua.version) {
ua.version[ua.versionIndex] = r
ua.versionIndex++
}
continue
}
}

// Strip any other version numbers from other products to get more hits to the trie.
//
// Also do not use a switch here as Go does not generate a jump table for switch
// statements with no integral constants. Benchmarking shows that ops go down
// if we try to migrate statements like this to a switch.
if internal.IsDigit(r) || (r == '.' && len(key) > i+1 && internal.IsDigit(rune(key[i+1]))) {
continue
}

// Identify and skip language codes e.g. en-US, zh-cn, en_US, ZH_cn
if len(key) > i+6 && r == ' ' && internal.IsLetter(rune(key[i+1])) && internal.IsLetter(rune(key[i+2])) && (key[i+3] == '-' || key[i+3] == '_') && internal.IsLetter(rune(key[i+4])) && internal.IsLetter(rune(key[i+5])) && (key[i+6] == ' ' || key[i+6] == ')' || key[i+6] == ';') {
// Add the number of runes to skip to the skip count.
skipCount += 6
continue
}

switch r {
case ' ', ';', ')', '(', ',', '_', '-', '/':
continue
}

// If result exists, we can append it to the value.
for _, result := range node.result {
matched := ua.addMatch(result)

// If we matched a browser of the highest precedence, we can mark the
// next set of runes as the version number we want to store.
case stateDefault:
// Strip any other version numbers from other products to get more hits to the trie.
//
// We also reject any version numbers related to Safari since it has a
// separate key for its version number.
if (matched && result.Type == internal.MatchBrowser && result.Match != internal.Safari) || (result.Type == internal.MatchVersion && ua.versionIndex == 0) {
// Clear version buffer if it has old values.
if ua.versionIndex > 0 {
ua.version = [32]rune{}
ua.versionIndex = 0
}
// Also do not use a switch here as Go does not generate a jump table for switch
// statements with no integral constants. Benchmarking shows that ops go down
// if we try to migrate statements like this to a switch.
if internal.IsDigit(r) || (r == '.' && len(key) > i+1 && internal.IsDigit(rune(key[i+1]))) {
continue
}

// We want to omit the slash after the browser name.
skipCount = 1
isVersion = true
// Identify and skip language codes e.g. en-US, zh-cn, en_US, ZH_cn
if len(key) > i+6 && r == ' ' && internal.IsLetter(rune(key[i+1])) && internal.IsLetter(rune(key[i+2])) && (key[i+3] == '-' || key[i+3] == '_') && internal.IsLetter(rune(key[i+4])) && internal.IsLetter(rune(key[i+5])) && (key[i+6] == ' ' || key[i+6] == ')' || key[i+6] == ';') {
// Add the number of runes to skip to the skip count.
skipCount += 6
continue
}

// If we matched a mobile token, we want to strip everything after it
// until we reach whitespace to get around random device IDs.
// For example, "Mobile/14F89" should be "Mobile".
if matched && result.Match == internal.Mobile {
skipUntilWhitespace = true
switch r {
case ' ', ';', ')', '(', ',', '_', '-', '/':
continue
}

// If we matched an Android token, we want to strip everything after it until
// we reach a closing parenthesis to get around random device IDs.
if matched && result.Match == internal.Android {
skipUntilClosingParenthesis = true
// If result exists, we can append it to the value.
for _, result := range node.result {
matched := ua.addMatch(result)

// If we matched a browser of the highest precedence, we can mark the
// next set of runes as the version number we want to store.
//
// We also reject any version numbers related to Safari since it has a
// separate key for its version number.
if (matched && result.Type == internal.MatchBrowser && result.Match != internal.Safari) || (result.Type == internal.MatchVersion && ua.versionIndex == 0) {
// Clear version buffer if it has old values.
if ua.versionIndex > 0 {
ua.version = [32]rune{}
ua.versionIndex = 0
}

// We want to omit the slash after the browser name.
skipCount = 1
state = stateVersion
}

// If we matched a mobile token, we want to strip everything after it
// until we reach whitespace to get around random device IDs.
// For example, "Mobile/14F89" should be "Mobile".
if matched && result.Match == internal.Mobile {
state = stateSkipWhitespace
}

// If we matched an Android token, we want to strip everything after it until
// we reach a closing parenthesis to get around random device IDs.
if matched && result.Match == internal.Android {
state = stateSkipClosingParenthesis
}
}
}

// Set the next node to the child of the current node.
var next *RuneTrie
if len(node.childrenArr) != 0 {
for _, child := range node.childrenArr {
if child.r == r {
next = child.node
break
// Set the next node to the child of the current node.
var next *RuneTrie
if len(node.childrenArr) != 0 {
for _, child := range node.childrenArr {
if child.r == r {
next = child.node
break
}
}
} else {
next = node.childrenMap[r]
}
} else {
next = node.childrenMap[r]
}

if next == nil {
continue // No match found, but we can try to match the next rune.
if next == nil {
continue // No match found, but we can try to match the next rune.
}
node = next
}
node = next
}

return ua
Expand Down Expand Up @@ -344,3 +341,8 @@ func (ua *UserAgent) addMatch(result resultItem) bool {

return false
}

// NewRuneTrie allocates and returns a new *RuneTrie.
func NewRuneTrie() *RuneTrie {
return new(RuneTrie)
}

0 comments on commit 70d006a

Please sign in to comment.