|
@@ -4,6 +4,9 @@ import (
|
|
|
"regexp"
|
|
"regexp"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
|
|
|
|
|
+const PrimeRK = 16777619
|
|
|
|
|
+
|
|
|
// Matcher is the interface to determine a string matches a pattern.
|
|
// Matcher is the interface to determine a string matches a pattern.
|
|
|
type Matcher interface {
|
|
type Matcher interface {
|
|
|
// Match returns true if the given string matches a predefined pattern.
|
|
// Match returns true if the given string matches a predefined pattern.
|
|
@@ -61,20 +64,42 @@ type matcherEntry struct {
|
|
|
type ACAutomatonMatcherGroup struct {
|
|
type ACAutomatonMatcherGroup struct {
|
|
|
count uint32
|
|
count uint32
|
|
|
ac *ACAutomaton
|
|
ac *ACAutomaton
|
|
|
|
|
+ nonSubstrMap map[uint32]string
|
|
|
otherMatchers []matcherEntry
|
|
otherMatchers []matcherEntry
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
|
|
func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
|
|
|
var g = new(ACAutomatonMatcherGroup)
|
|
var g = new(ACAutomatonMatcherGroup)
|
|
|
g.count = 1
|
|
g.count = 1
|
|
|
- g.ac = NewACAutomaton()
|
|
|
|
|
|
|
+ g.nonSubstrMap = map[uint32]string{}
|
|
|
return g
|
|
return g
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+// Add `full` or `domain` pattern to hashmap
|
|
|
|
|
+func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
|
|
|
|
|
+ h := uint32(0)
|
|
|
|
|
+ for i := len(pattern) - 1; i >= 0; i-- {
|
|
|
|
|
+ h = h*PrimeRK + uint32(pattern[i])
|
|
|
|
|
+ }
|
|
|
|
|
+ switch t {
|
|
|
|
|
+ case Full:
|
|
|
|
|
+ g.nonSubstrMap[h] = pattern
|
|
|
|
|
+ case Domain:
|
|
|
|
|
+ g.nonSubstrMap[h] = pattern
|
|
|
|
|
+ g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern
|
|
|
|
|
+ default:
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
|
func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
|
|
|
switch t {
|
|
switch t {
|
|
|
- case Full, Substr, Domain:
|
|
|
|
|
|
|
+ case Substr:
|
|
|
|
|
+ if g.ac == nil {
|
|
|
|
|
+ g.ac = NewACAutomaton()
|
|
|
|
|
+ }
|
|
|
g.ac.Add(pattern, t)
|
|
g.ac.Add(pattern, t)
|
|
|
|
|
+ case Full, Domain:
|
|
|
|
|
+ g.AddFullOrDomainPattern(pattern, t)
|
|
|
case Regex:
|
|
case Regex:
|
|
|
g.count++
|
|
g.count++
|
|
|
r, err := regexp.Compile(pattern)
|
|
r, err := regexp.Compile(pattern)
|
|
@@ -92,18 +117,36 @@ func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, er
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
func (g *ACAutomatonMatcherGroup) Build() {
|
|
func (g *ACAutomatonMatcherGroup) Build() {
|
|
|
- g.ac.Build()
|
|
|
|
|
|
|
+ if g.ac != nil {
|
|
|
|
|
+ g.ac.Build()
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Match implements IndexMatcher.Match.
|
|
// Match implements IndexMatcher.Match.
|
|
|
func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
|
|
func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
|
|
|
result := []uint32{}
|
|
result := []uint32{}
|
|
|
- if g.ac.Match(pattern) {
|
|
|
|
|
|
|
+ hash := uint32(0)
|
|
|
|
|
+ for i := len(pattern) - 1; i >= 0; i-- {
|
|
|
|
|
+ hash = hash*PrimeRK + uint32(pattern[i])
|
|
|
|
|
+ if pattern[i] == '.' {
|
|
|
|
|
+ if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] {
|
|
|
|
|
+ result = append(result, 1)
|
|
|
|
|
+ return result
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ if v, ok := g.nonSubstrMap[hash]; ok && v == pattern {
|
|
|
|
|
+ result = append(result, 1)
|
|
|
|
|
+ return result
|
|
|
|
|
+ }
|
|
|
|
|
+ if g.ac != nil && g.ac.Match(pattern) {
|
|
|
result = append(result, 1)
|
|
result = append(result, 1)
|
|
|
|
|
+ return result
|
|
|
}
|
|
}
|
|
|
for _, e := range g.otherMatchers {
|
|
for _, e := range g.otherMatchers {
|
|
|
if e.m.Match(pattern) {
|
|
if e.m.Match(pattern) {
|
|
|
result = append(result, e.id)
|
|
result = append(result, e.id)
|
|
|
|
|
+ return result
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
return result
|
|
return result
|