matchers.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. package strmatcher
  2. import (
  3. "errors"
  4. "regexp"
  5. "strings"
  6. "unicode/utf8"
  7. )
  8. // FullMatcher is an implementation of Matcher.
  9. type FullMatcher string
  10. func (FullMatcher) Type() Type {
  11. return Full
  12. }
  13. func (m FullMatcher) Pattern() string {
  14. return string(m)
  15. }
  16. func (m FullMatcher) String() string {
  17. return "full:" + m.Pattern()
  18. }
  19. func (m FullMatcher) Match(s string) bool {
  20. return string(m) == s
  21. }
  22. // DomainMatcher is an implementation of Matcher.
  23. type DomainMatcher string
  24. func (DomainMatcher) Type() Type {
  25. return Domain
  26. }
  27. func (m DomainMatcher) Pattern() string {
  28. return string(m)
  29. }
  30. func (m DomainMatcher) String() string {
  31. return "domain:" + m.Pattern()
  32. }
  33. func (m DomainMatcher) Match(s string) bool {
  34. pattern := m.Pattern()
  35. if !strings.HasSuffix(s, pattern) {
  36. return false
  37. }
  38. return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
  39. }
  40. // SubstrMatcher is an implementation of Matcher.
  41. type SubstrMatcher string
  42. func (SubstrMatcher) Type() Type {
  43. return Substr
  44. }
  45. func (m SubstrMatcher) Pattern() string {
  46. return string(m)
  47. }
  48. func (m SubstrMatcher) String() string {
  49. return "keyword:" + m.Pattern()
  50. }
  51. func (m SubstrMatcher) Match(s string) bool {
  52. return strings.Contains(s, m.Pattern())
  53. }
  54. // RegexMatcher is an implementation of Matcher.
  55. type RegexMatcher struct {
  56. pattern *regexp.Regexp
  57. }
  58. func (*RegexMatcher) Type() Type {
  59. return Regex
  60. }
  61. func (m *RegexMatcher) Pattern() string {
  62. return m.pattern.String()
  63. }
  64. func (m *RegexMatcher) String() string {
  65. return "regexp:" + m.Pattern()
  66. }
  67. func (m *RegexMatcher) Match(s string) bool {
  68. return m.pattern.MatchString(s)
  69. }
  70. // New creates a new Matcher based on the given pattern.
  71. func (t Type) New(pattern string) (Matcher, error) {
  72. switch t {
  73. case Full:
  74. return FullMatcher(pattern), nil
  75. case Substr:
  76. return SubstrMatcher(pattern), nil
  77. case Domain:
  78. pattern, err := ToDomain(pattern)
  79. if err != nil {
  80. return nil, err
  81. }
  82. return DomainMatcher(pattern), nil
  83. case Regex: // 1. regex matching is case-sensitive
  84. regex, err := regexp.Compile(pattern)
  85. if err != nil {
  86. return nil, err
  87. }
  88. return &RegexMatcher{pattern: regex}, nil
  89. default:
  90. return nil, errors.New("unknown matcher type")
  91. }
  92. }
  93. // NewDomainPattern creates a new Matcher based on the given domain pattern.
  94. // It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
  95. func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
  96. switch t {
  97. case Full:
  98. pattern, err := ToDomain(pattern)
  99. if err != nil {
  100. return nil, err
  101. }
  102. return FullMatcher(pattern), nil
  103. case Substr:
  104. pattern, err := ToDomain(pattern)
  105. if err != nil {
  106. return nil, err
  107. }
  108. return SubstrMatcher(pattern), nil
  109. case Domain:
  110. pattern, err := ToDomain(pattern)
  111. if err != nil {
  112. return nil, err
  113. }
  114. return DomainMatcher(pattern), nil
  115. case Regex: // Regex's charset not in LDH subset
  116. regex, err := regexp.Compile(pattern)
  117. if err != nil {
  118. return nil, err
  119. }
  120. return &RegexMatcher{pattern: regex}, nil
  121. default:
  122. return nil, errors.New("unknown matcher type")
  123. }
  124. }
  125. // ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
  126. // 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
  127. // * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
  128. // * Digits 0 to 9
  129. // * Hyphens(-) and Periods(.)
  130. // 2. Non-ASCII characters not supported for now.
  131. // * May support Internationalized domain name to Punycode if needed in the future.
  132. func ToDomain(pattern string) (string, error) {
  133. builder := strings.Builder{}
  134. builder.Grow(len(pattern))
  135. for i := 0; i < len(pattern); i++ {
  136. c := pattern[i]
  137. if c >= utf8.RuneSelf {
  138. return "", errors.New("non-ASCII characters not supported for now")
  139. }
  140. switch {
  141. case 'A' <= c && c <= 'Z':
  142. c += 'a' - 'A'
  143. case 'a' <= c && c <= 'z':
  144. case '0' <= c && c <= '9':
  145. case c == '-':
  146. case c == '.':
  147. default:
  148. return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
  149. }
  150. builder.WriteByte(c)
  151. }
  152. return builder.String(), nil
  153. }
  154. // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
  155. type MatcherGroupForAll interface {
  156. AddMatcher(matcher Matcher, value uint32)
  157. }
  158. // MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
  159. type MatcherGroupForFull interface {
  160. AddFullMatcher(matcher FullMatcher, value uint32)
  161. }
  162. // MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
  163. type MatcherGroupForDomain interface {
  164. AddDomainMatcher(matcher DomainMatcher, value uint32)
  165. }
  166. // MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
  167. type MatcherGroupForSubstr interface {
  168. AddSubstrMatcher(matcher SubstrMatcher, value uint32)
  169. }
  170. // MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
  171. type MatcherGroupForRegex interface {
  172. AddRegexMatcher(matcher *RegexMatcher, value uint32)
  173. }
  174. // AddMatcherToGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
  175. // It returns error if the MatcherGroup does not accept the provided Matcher's type.
  176. // This function is provided to help writing code to test a MatcherGroup.
  177. func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
  178. if g, ok := g.(IndexMatcher); ok {
  179. g.Add(matcher)
  180. return nil
  181. }
  182. if g, ok := g.(MatcherGroupForAll); ok {
  183. g.AddMatcher(matcher, value)
  184. return nil
  185. }
  186. switch matcher := matcher.(type) {
  187. case FullMatcher:
  188. if g, ok := g.(MatcherGroupForFull); ok {
  189. g.AddFullMatcher(matcher, value)
  190. return nil
  191. }
  192. case DomainMatcher:
  193. if g, ok := g.(MatcherGroupForDomain); ok {
  194. g.AddDomainMatcher(matcher, value)
  195. return nil
  196. }
  197. case SubstrMatcher:
  198. if g, ok := g.(MatcherGroupForSubstr); ok {
  199. g.AddSubstrMatcher(matcher, value)
  200. return nil
  201. }
  202. case *RegexMatcher:
  203. if g, ok := g.(MatcherGroupForRegex); ok {
  204. g.AddRegexMatcher(matcher, value)
  205. return nil
  206. }
  207. }
  208. return errors.New("cannot add matcher to matcher group")
  209. }
  210. // CompositeMatches flattens the matches slice to produce a single matched indices slice.
  211. // It is designed to avoid new memory allocation as possible.
  212. func CompositeMatches(matches [][]uint32) []uint32 {
  213. switch len(matches) {
  214. case 0:
  215. return nil
  216. case 1:
  217. return matches[0]
  218. default:
  219. result := make([]uint32, 0, 5)
  220. for i := 0; i < len(matches); i++ {
  221. result = append(result, matches[i]...)
  222. }
  223. return result
  224. }
  225. }
  226. // CompositeMatches flattens the matches slice to produce a single matched indices slice.
  227. // It is designed that:
  228. // 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
  229. // 2. Indices in the same matcher keeps their original order.
  230. // 3. Avoid new memory allocation as possible.
  231. func CompositeMatchesReverse(matches [][]uint32) []uint32 {
  232. switch len(matches) {
  233. case 0:
  234. return nil
  235. case 1:
  236. return matches[0]
  237. default:
  238. result := make([]uint32, 0, 5)
  239. for i := len(matches) - 1; i >= 0; i-- {
  240. result = append(result, matches[i]...)
  241. }
  242. return result
  243. }
  244. }