Skip to content

Commit

Permalink
Add Contains() function
Browse files Browse the repository at this point in the history
This function shortcuts the normal process of finding strings, exiting
early on the first string match.  This can give a practical speed-up in
cases where only existence of any string matters.

The go1.8 gofmt flattens the import statement.
  • Loading branch information
allenluce authored and Allen Luce committed Apr 2, 2021
1 parent d3b2751 commit 8b500e7
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
26 changes: 26 additions & 0 deletions ahocorasick.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,3 +304,29 @@ func (m *Matcher) MatchThreadSafe(in []byte) []int {
m.heap.Put(heap)
return hits
}

// Contains returns true if any string matches. This can be faster
// than Match() when you do not need to know which words matched.
func (m *Matcher) Contains(in []byte) bool {
n := m.root
for _, b := range in {
c := int(b)
if !n.root {
n = n.fails[c]
}

if n.child[c] != nil {
f := n.child[c]
n = f

if f.output {
return true
}

for !f.suffix.root {
return true
}
}
}
return false
}
16 changes: 15 additions & 1 deletion ahocorasick_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func TestWikipediaConcurrently(t *testing.T) {
}

func TestMatch(t *testing.T) {
m := NewStringMatcher([]string{"Mozilla", "Mac", "Macintosh", "Safari", "Sausage"})
m := NewStringMatcher(dictionary)
hits := m.Match([]byte("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"))
assert(t, len(hits) == 4)
assert(t, hits[0] == 0)
Expand Down Expand Up @@ -312,6 +312,20 @@ func TestLargeDictionaryMatchThreadSafeWorks(t *testing.T) {
*/
hits := precomputed6.MatchThreadSafe(bytes2)
assert(t, len(hits) == 105)

}

func TestContains(t *testing.T) {
m := NewStringMatcher(dictionary)
contains := m.Contains([]byte("Mozilla/5.0 (Moc; Intel Computer OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Sofari/537.36"))
assert(t, contains)

contains = m.Contains([]byte("Mazilla/5.0 (Moc; Intel Computer OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Sofari/537.36"))
assert(t, !contains)

m = NewStringMatcher([]string{"SupermanX", "per"})
contains = m.Contains([]byte("The Man Of Steel: Superman"))
assert(t, contains == true)
}

var bytes = []byte("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36")
Expand Down

0 comments on commit 8b500e7

Please sign in to comment.