strmatcher.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. package strmatcher
  2. import (
  3. "regexp"
  4. )
  5. // Matcher is the interface to determine a string matches a pattern.
  6. type Matcher interface {
  7. // Match returns true if the given string matches a predefined pattern.
  8. Match(string) bool
  9. String() string
  10. }
  11. // Type is the type of the matcher.
  12. type Type byte
  13. const (
  14. // Full is the type of matcher that the input string must exactly equal to the pattern.
  15. Full Type = iota
  16. // Substr is the type of matcher that the input string must contain the pattern as a sub-string.
  17. Substr
  18. // Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
  19. Domain
  20. // Regex is the type of matcher that the input string must matches the regular-expression pattern.
  21. Regex
  22. )
  23. // New creates a new Matcher based on the given pattern.
  24. func (t Type) New(pattern string) (Matcher, error) {
  25. switch t {
  26. case Full:
  27. return fullMatcher(pattern), nil
  28. case Substr:
  29. return substrMatcher(pattern), nil
  30. case Domain:
  31. return domainMatcher(pattern), nil
  32. case Regex:
  33. r, err := regexp.Compile(pattern)
  34. if err != nil {
  35. return nil, err
  36. }
  37. return &regexMatcher{
  38. pattern: r,
  39. }, nil
  40. default:
  41. panic("Unknown type")
  42. }
  43. }
  44. // IndexMatcher is the interface for matching with a group of matchers.
  45. type IndexMatcher interface {
  46. // Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
  47. Match(input string) []uint32
  48. }
  49. type matcherEntry struct {
  50. m Matcher
  51. id uint32
  52. }
  53. type ACAutomatonMatcherGroup struct {
  54. count uint32
  55. ac *ACAutomaton
  56. otherMatchers []matcherEntry
  57. }
  58. func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
  59. var g = new(ACAutomatonMatcherGroup)
  60. g.count = 1
  61. g.ac = NewACAutomaton()
  62. return g
  63. }
  64. func (g *ACAutomatonMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
  65. switch t {
  66. case Full, Substr, Domain:
  67. g.ac.Add(pattern, t)
  68. case Regex:
  69. g.count++
  70. r, err := regexp.Compile(pattern)
  71. if err != nil {
  72. return 0, err
  73. }
  74. g.otherMatchers = append(g.otherMatchers, matcherEntry{
  75. m: &regexMatcher{pattern: r},
  76. id: g.count,
  77. })
  78. default:
  79. panic("Unknown type")
  80. }
  81. return g.count, nil
  82. }
  83. func (g *ACAutomatonMatcherGroup) Build() {
  84. g.ac.Build()
  85. }
  86. // Match implements IndexMatcher.Match.
  87. func (g *ACAutomatonMatcherGroup) Match(pattern string) []uint32 {
  88. result := []uint32{}
  89. if g.ac.Match(pattern) {
  90. result = append(result, 1)
  91. }
  92. for _, e := range g.otherMatchers {
  93. if e.m.Match(pattern) {
  94. result = append(result, e.id)
  95. }
  96. }
  97. return result
  98. }
  99. // MatcherGroup is an implementation of IndexMatcher.
  100. // Empty initialization works.
  101. type MatcherGroup struct {
  102. count uint32
  103. fullMatcher FullMatcherGroup
  104. domainMatcher DomainMatcherGroup
  105. otherMatchers []matcherEntry
  106. }
  107. // Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
  108. func (g *MatcherGroup) Add(m Matcher) uint32 {
  109. g.count++
  110. c := g.count
  111. switch tm := m.(type) {
  112. case fullMatcher:
  113. g.fullMatcher.addMatcher(tm, c)
  114. case domainMatcher:
  115. g.domainMatcher.addMatcher(tm, c)
  116. default:
  117. g.otherMatchers = append(g.otherMatchers, matcherEntry{
  118. m: m,
  119. id: c,
  120. })
  121. }
  122. return c
  123. }
  124. // Match implements IndexMatcher.Match.
  125. func (g *MatcherGroup) Match(pattern string) []uint32 {
  126. result := []uint32{}
  127. result = append(result, g.fullMatcher.Match(pattern)...)
  128. result = append(result, g.domainMatcher.Match(pattern)...)
  129. for _, e := range g.otherMatchers {
  130. if e.m.Match(pattern) {
  131. result = append(result, e.id)
  132. }
  133. }
  134. return result
  135. }
  136. // Size returns the number of matchers in the MatcherGroup.
  137. func (g *MatcherGroup) Size() uint32 {
  138. return g.count
  139. }