strmatcher.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. package strmatcher
  2. import (
  3. "regexp"
  4. )
  5. // PrimeRK is the prime base used in Rabin-Karp algorithm.
  6. const PrimeRK = 16777619
  7. // Matcher is the interface to determine a string matches a pattern.
  8. type Matcher interface {
  9. // Match returns true if the given string matches a predefined pattern.
  10. Match(string) bool
  11. String() string
  12. }
  13. // Type is the type of the matcher.
  14. type Type byte
  15. const (
  16. // Full is the type of matcher that the input string must exactly equal to the pattern.
  17. Full Type = iota
  18. // Substr is the type of matcher that the input string must contain the pattern as a sub-string.
  19. Substr
  20. // Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
  21. Domain
  22. // Regex is the type of matcher that the input string must matches the regular-expression pattern.
  23. Regex
  24. )
  25. // New creates a new Matcher based on the given pattern.
  26. func (t Type) New(pattern string) (Matcher, error) {
  27. switch t {
  28. case Full:
  29. return fullMatcher(pattern), nil
  30. case Substr:
  31. return substrMatcher(pattern), nil
  32. case Domain:
  33. return domainMatcher(pattern), nil
  34. case Regex:
  35. r, err := regexp.Compile(pattern)
  36. if err != nil {
  37. return nil, err
  38. }
  39. return &regexMatcher{
  40. pattern: r,
  41. }, nil
  42. default:
  43. panic("Unknown type")
  44. }
  45. }
  46. // IndexMatcher is the interface for matching with a group of matchers.
  47. type IndexMatcher interface {
  48. // Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
  49. Match(input string) []uint32
  50. }
  51. type matcherEntry struct {
  52. m Matcher
  53. id uint32
  54. }
  55. type ACAutomatonMatcherGroup struct {
  56. count uint32
  57. ac *ACAutomaton
  58. nonSubstrMap map[uint32]string
  59. otherMatchers []matcherEntry
  60. }
  61. func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
  62. var g = new(ACAutomatonMatcherGroup)
  63. g.count = 1
  64. g.nonSubstrMap = map[uint32]string{}
  65. return g
  66. }
  67. // Add `full` or `domain` pattern to hashmap
  68. func (g *ACAutomatonMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
  69. h := uint32(0)
  70. for i := len(pattern) - 1; i >= 0; i-- {
  71. h = h*PrimeRK + uint32(pattern[i])
  72. }
  73. switch t {
  74. case Full:
  75. g.nonSubstrMap[h] = pattern
  76. case Domain:
  77. g.nonSubstrMap[h] = pattern
  78. g.nonSubstrMap[h*PrimeRK+uint32('.')] = "." + pattern
  79. default:
  80. }
  81. }
  82. func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
  83. switch t {
  84. case Substr:
  85. if g.ac == nil {
  86. g.ac = NewACAutomaton()
  87. }
  88. g.ac.Add(pattern, t)
  89. case Full, Domain:
  90. g.AddFullOrDomainPattern(pattern, t)
  91. case Regex:
  92. g.count++
  93. r, err := regexp.Compile(pattern)
  94. if err != nil {
  95. return 0, err
  96. }
  97. g.otherMatchers = append(g.otherMatchers, matcherEntry{
  98. m: &regexMatcher{pattern: r},
  99. id: g.count,
  100. })
  101. default:
  102. panic("Unknown type")
  103. }
  104. return g.count, nil
  105. }
  106. func (g *ACAutomatonMatcherGroup) Build() {
  107. if g.ac != nil {
  108. g.ac.Build()
  109. }
  110. }
  111. // Match implements IndexMatcher.Match.
  112. func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
  113. result := []uint32{}
  114. hash := uint32(0)
  115. for i := len(pattern) - 1; i >= 0; i-- {
  116. hash = hash*PrimeRK + uint32(pattern[i])
  117. if pattern[i] == '.' {
  118. if v, ok := g.nonSubstrMap[hash]; ok && v == pattern[i:] {
  119. result = append(result, 1)
  120. return result
  121. }
  122. }
  123. }
  124. if v, ok := g.nonSubstrMap[hash]; ok && v == pattern {
  125. result = append(result, 1)
  126. return result
  127. }
  128. if g.ac != nil && g.ac.Match(pattern) {
  129. result = append(result, 1)
  130. return result
  131. }
  132. for _, e := range g.otherMatchers {
  133. if e.m.Match(pattern) {
  134. result = append(result, e.id)
  135. return result
  136. }
  137. }
  138. return result
  139. }
  140. // MatcherGroup is an implementation of IndexMatcher.
  141. // Empty initialization works.
  142. type MatcherGroup struct {
  143. count uint32
  144. fullMatcher FullMatcherGroup
  145. domainMatcher DomainMatcherGroup
  146. otherMatchers []matcherEntry
  147. }
  148. // Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
  149. func (g *MatcherGroup) Add(m Matcher) uint32 {
  150. g.count++
  151. c := g.count
  152. switch tm := m.(type) {
  153. case fullMatcher:
  154. g.fullMatcher.addMatcher(tm, c)
  155. case domainMatcher:
  156. g.domainMatcher.addMatcher(tm, c)
  157. default:
  158. g.otherMatchers = append(g.otherMatchers, matcherEntry{
  159. m: m,
  160. id: c,
  161. })
  162. }
  163. return c
  164. }
  165. // Match implements IndexMatcher.Match.
  166. func (g *MatcherGroup) Match(pattern string) []uint32 {
  167. result := []uint32{}
  168. result = append(result, g.fullMatcher.Match(pattern)...)
  169. result = append(result, g.domainMatcher.Match(pattern)...)
  170. for _, e := range g.otherMatchers {
  171. if e.m.Match(pattern) {
  172. result = append(result, e.id)
  173. }
  174. }
  175. return result
  176. }
  177. // Size returns the number of matchers in the MatcherGroup.
  178. func (g *MatcherGroup) Size() uint32 {
  179. return g.count
  180. }