matchergroup_ac_automation.go 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. package strmatcher
  2. import (
  3. "container/list"
  4. )
  5. const validCharCount = 53
  6. type MatchType struct {
  7. matchType Type
  8. exist bool
  9. }
  10. const (
  11. TrieEdge bool = true
  12. FailEdge bool = false
  13. )
  14. type Edge struct {
  15. edgeType bool
  16. nextNode int
  17. }
  18. // ACAutoMationMatcherGroup is an implementation of MatcherGroup.
  19. // It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based.
  20. type ACAutomatonMatcherGroup struct {
  21. trie [][validCharCount]Edge
  22. fail []int
  23. exists []MatchType
  24. count int
  25. }
  26. func newNode() [validCharCount]Edge {
  27. var s [validCharCount]Edge
  28. for i := range s {
  29. s[i] = Edge{
  30. edgeType: FailEdge,
  31. nextNode: 0,
  32. }
  33. }
  34. return s
  35. }
  36. var char2Index = []int{
  37. 'A': 0,
  38. 'a': 0,
  39. 'B': 1,
  40. 'b': 1,
  41. 'C': 2,
  42. 'c': 2,
  43. 'D': 3,
  44. 'd': 3,
  45. 'E': 4,
  46. 'e': 4,
  47. 'F': 5,
  48. 'f': 5,
  49. 'G': 6,
  50. 'g': 6,
  51. 'H': 7,
  52. 'h': 7,
  53. 'I': 8,
  54. 'i': 8,
  55. 'J': 9,
  56. 'j': 9,
  57. 'K': 10,
  58. 'k': 10,
  59. 'L': 11,
  60. 'l': 11,
  61. 'M': 12,
  62. 'm': 12,
  63. 'N': 13,
  64. 'n': 13,
  65. 'O': 14,
  66. 'o': 14,
  67. 'P': 15,
  68. 'p': 15,
  69. 'Q': 16,
  70. 'q': 16,
  71. 'R': 17,
  72. 'r': 17,
  73. 'S': 18,
  74. 's': 18,
  75. 'T': 19,
  76. 't': 19,
  77. 'U': 20,
  78. 'u': 20,
  79. 'V': 21,
  80. 'v': 21,
  81. 'W': 22,
  82. 'w': 22,
  83. 'X': 23,
  84. 'x': 23,
  85. 'Y': 24,
  86. 'y': 24,
  87. 'Z': 25,
  88. 'z': 25,
  89. '!': 26,
  90. '$': 27,
  91. '&': 28,
  92. '\'': 29,
  93. '(': 30,
  94. ')': 31,
  95. '*': 32,
  96. '+': 33,
  97. ',': 34,
  98. ';': 35,
  99. '=': 36,
  100. ':': 37,
  101. '%': 38,
  102. '-': 39,
  103. '.': 40,
  104. '_': 41,
  105. '~': 42,
  106. '0': 43,
  107. '1': 44,
  108. '2': 45,
  109. '3': 46,
  110. '4': 47,
  111. '5': 48,
  112. '6': 49,
  113. '7': 50,
  114. '8': 51,
  115. '9': 52,
  116. }
  117. func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
  118. ac := new(ACAutomatonMatcherGroup)
  119. ac.trie = append(ac.trie, newNode())
  120. ac.fail = append(ac.fail, 0)
  121. ac.exists = append(ac.exists, MatchType{
  122. matchType: Full,
  123. exist: false,
  124. })
  125. return ac
  126. }
  127. // AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
  128. func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
  129. ac.addPattern(0, matcher.Pattern(), matcher.Type())
  130. }
  131. // AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
  132. func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
  133. node := ac.addPattern(0, matcher.Pattern(), Full)
  134. ac.addPattern(node, ".", Domain)
  135. }
  136. // AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
  137. func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) {
  138. ac.addPattern(0, matcher.Pattern(), matcher.Type())
  139. }
  140. func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int {
  141. for i := len(pattern) - 1; i >= 0; i-- {
  142. idx := char2Index[pattern[i]]
  143. if ac.trie[node][idx].nextNode == 0 {
  144. ac.count++
  145. if len(ac.trie) < ac.count+1 {
  146. ac.trie = append(ac.trie, newNode())
  147. ac.fail = append(ac.fail, 0)
  148. ac.exists = append(ac.exists, MatchType{
  149. matchType: Full,
  150. exist: false,
  151. })
  152. }
  153. ac.trie[node][idx] = Edge{
  154. edgeType: TrieEdge,
  155. nextNode: ac.count,
  156. }
  157. }
  158. node = ac.trie[node][idx].nextNode
  159. }
  160. ac.exists[node] = MatchType{
  161. matchType: matcherType,
  162. exist: true,
  163. }
  164. return node
  165. }
  166. func (ac *ACAutomatonMatcherGroup) Build() {
  167. queue := list.New()
  168. for i := 0; i < validCharCount; i++ {
  169. if ac.trie[0][i].nextNode != 0 {
  170. queue.PushBack(ac.trie[0][i])
  171. }
  172. }
  173. for {
  174. front := queue.Front()
  175. if front == nil {
  176. break
  177. } else {
  178. node := front.Value.(Edge).nextNode
  179. queue.Remove(front)
  180. for i := 0; i < validCharCount; i++ {
  181. if ac.trie[node][i].nextNode != 0 {
  182. ac.fail[ac.trie[node][i].nextNode] = ac.trie[ac.fail[node]][i].nextNode
  183. queue.PushBack(ac.trie[node][i])
  184. } else {
  185. ac.trie[node][i] = Edge{
  186. edgeType: FailEdge,
  187. nextNode: ac.trie[ac.fail[node]][i].nextNode,
  188. }
  189. }
  190. }
  191. }
  192. }
  193. }
  194. // Match implements MatcherGroup.Match.
  195. func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 {
  196. return nil
  197. }
  198. // MatchAny implements MatcherGroup.MatchAny.
  199. func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool {
  200. node := 0
  201. fullMatch := true
  202. // 1. the match string is all through trie edge. FULL MATCH or DOMAIN
  203. // 2. the match string is through a fail edge. NOT FULL MATCH
  204. // 2.1 Through a fail edge, but there exists a valid node. SUBSTR
  205. for i := len(s) - 1; i >= 0; i-- {
  206. idx := char2Index[s[i]]
  207. fullMatch = fullMatch && ac.trie[node][idx].edgeType
  208. node = ac.trie[node][idx].nextNode
  209. switch ac.exists[node].matchType {
  210. case Substr:
  211. return true
  212. case Domain:
  213. if fullMatch {
  214. return true
  215. }
  216. default:
  217. break
  218. }
  219. }
  220. return fullMatch && ac.exists[node].exist
  221. }