|
@@ -4,6 +4,7 @@ import (
|
|
|
"errors"
|
|
"errors"
|
|
|
"regexp"
|
|
"regexp"
|
|
|
"strings"
|
|
"strings"
|
|
|
|
|
+ "unicode/utf8"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
// FullMatcher is an implementation of Matcher.
|
|
// FullMatcher is an implementation of Matcher.
|
|
@@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) {
|
|
|
case Substr:
|
|
case Substr:
|
|
|
return SubstrMatcher(pattern), nil
|
|
return SubstrMatcher(pattern), nil
|
|
|
case Domain:
|
|
case Domain:
|
|
|
|
|
+ pattern, err := ToDomain(pattern)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
return DomainMatcher(pattern), nil
|
|
return DomainMatcher(pattern), nil
|
|
|
case Regex: // 1. regex matching is case-sensitive
|
|
case Regex: // 1. regex matching is case-sensitive
|
|
|
regex, err := regexp.Compile(pattern)
|
|
regex, err := regexp.Compile(pattern)
|
|
@@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) {
|
|
|
}
|
|
}
|
|
|
return &RegexMatcher{pattern: regex}, nil
|
|
return &RegexMatcher{pattern: regex}, nil
|
|
|
default:
|
|
default:
|
|
|
- panic("Unknown type")
|
|
|
|
|
|
|
+ return nil, errors.New("unknown matcher type")
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+// NewDomainPattern creates a new Matcher based on the given domain pattern.
|
|
|
|
|
+// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
|
|
|
|
|
+func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
|
|
|
|
|
+ switch t {
|
|
|
|
|
+ case Full:
|
|
|
|
|
+ pattern, err := ToDomain(pattern)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
+ return FullMatcher(pattern), nil
|
|
|
|
|
+ case Substr:
|
|
|
|
|
+ pattern, err := ToDomain(pattern)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
+ return SubstrMatcher(pattern), nil
|
|
|
|
|
+ case Domain:
|
|
|
|
|
+ pattern, err := ToDomain(pattern)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
+ return DomainMatcher(pattern), nil
|
|
|
|
|
+ case Regex: // Regex's charset not in LDH subset
|
|
|
|
|
+ regex, err := regexp.Compile(pattern)
|
|
|
|
|
+ if err != nil {
|
|
|
|
|
+ return nil, err
|
|
|
|
|
+ }
|
|
|
|
|
+ return &RegexMatcher{pattern: regex}, nil
|
|
|
|
|
+ default:
|
|
|
|
|
+ return nil, errors.New("unknown matcher type")
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
|
|
|
|
|
+// 1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
|
|
|
|
|
+// * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
|
|
|
|
|
+// * Digits 0 to 9
|
|
|
|
|
+// * Hyphens(-) and Periods(.)
|
|
|
|
|
+// 2. Non-ASCII characters not supported for now.
|
|
|
|
|
+// * May support Internationalized domain name to Punycode if needed in the future.
|
|
|
|
|
+func ToDomain(pattern string) (string, error) {
|
|
|
|
|
+ builder := strings.Builder{}
|
|
|
|
|
+ builder.Grow(len(pattern))
|
|
|
|
|
+ for i := 0; i < len(pattern); i++ {
|
|
|
|
|
+ c := pattern[i]
|
|
|
|
|
+ if c >= utf8.RuneSelf {
|
|
|
|
|
+ return "", errors.New("non-ASCII characters not supported for now")
|
|
|
|
|
+ }
|
|
|
|
|
+ switch {
|
|
|
|
|
+ case 'A' <= c && c <= 'Z':
|
|
|
|
|
+ c += 'a' - 'A'
|
|
|
|
|
+ case 'a' <= c && c <= 'z':
|
|
|
|
|
+ case '0' <= c && c <= '9':
|
|
|
|
|
+ case c == '-':
|
|
|
|
|
+ case c == '.':
|
|
|
|
|
+ default:
|
|
|
|
|
+ return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
|
|
|
|
|
+ }
|
|
|
|
|
+ builder.WriteByte(c)
|
|
|
|
|
+ }
|
|
|
|
|
+ return builder.String(), nil
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
|
// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
|
|
|
type MatcherGroupForAll interface {
|
|
type MatcherGroupForAll interface {
|
|
|
AddMatcher(matcher Matcher, value uint32)
|
|
AddMatcher(matcher Matcher, value uint32)
|
|
@@ -137,6 +205,10 @@ type MatcherGroupForRegex interface {
|
|
|
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
|
// It returns error if the MatcherGroup does not accept the provided Matcher's type.
|
|
|
// This function is provided to help writing code to test a MatcherGroup.
|
|
// This function is provided to help writing code to test a MatcherGroup.
|
|
|
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
|
func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
|
|
|
|
+ if g, ok := g.(IndexMatcher); ok {
|
|
|
|
|
+ g.Add(matcher)
|
|
|
|
|
+ return nil
|
|
|
|
|
+ }
|
|
|
if g, ok := g.(MatcherGroupForAll); ok {
|
|
if g, ok := g.(MatcherGroupForAll); ok {
|
|
|
g.AddMatcher(matcher, value)
|
|
g.AddMatcher(matcher, value)
|
|
|
return nil
|
|
return nil
|
|
@@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
|
|
|
}
|
|
}
|
|
|
return errors.New("cannot add matcher to matcher group")
|
|
return errors.New("cannot add matcher to matcher group")
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
|
|
|
|
+// It is designed to avoid new memory allocation as possible.
|
|
|
|
|
+func CompositeMatches(matches [][]uint32) []uint32 {
|
|
|
|
|
+ switch len(matches) {
|
|
|
|
|
+ case 0:
|
|
|
|
|
+ return nil
|
|
|
|
|
+ case 1:
|
|
|
|
|
+ return matches[0]
|
|
|
|
|
+ default:
|
|
|
|
|
+ result := make([]uint32, 0, 5)
|
|
|
|
|
+ for i := 0; i < len(matches); i++ {
|
|
|
|
|
+ result = append(result, matches[i]...)
|
|
|
|
|
+ }
|
|
|
|
|
+ return result
|
|
|
|
|
+ }
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// CompositeMatches flattens the matches slice to produce a single matched indices slice.
|
|
|
|
|
+// It is designed that:
|
|
|
|
|
+// 1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
|
|
|
|
|
+// 2. Indices in the same matcher keeps their original order.
|
|
|
|
|
+// 3. Avoid new memory allocation as possible.
|
|
|
|
|
+func CompositeMatchesReverse(matches [][]uint32) []uint32 {
|
|
|
|
|
+ switch len(matches) {
|
|
|
|
|
+ case 0:
|
|
|
|
|
+ return nil
|
|
|
|
|
+ case 1:
|
|
|
|
|
+ return matches[0]
|
|
|
|
|
+ default:
|
|
|
|
|
+ result := make([]uint32, 0, 5)
|
|
|
|
|
+ for i := len(matches) - 1; i >= 0; i-- {
|
|
|
|
|
+ result = append(result, matches[i]...)
|
|
|
|
|
+ }
|
|
|
|
|
+ return result
|
|
|
|
|
+ }
|
|
|
|
|
+}
|