Browse Source

Refactor: strmatcher module (#1333)

* Reorganize strmatcher's package structure

* Rename types in strmatcher package according to their file names

* Stablize strmatcher's Matcher interface

* Implement []matcherEntry as SimpleMatcherGroup

* Implement mph algorithm extracted from MphIndexMatcher as MphMatcherGroup

* Implement AddMatcher/AddFullMatcher/AddDomainMatcher/AddSubstrMatcher for each MatcherGroup

* Stablize strmatcher's MatcherGroup interface

* Stablize strmatcher's IndexMatcher interface

* Update strmatcher's benchmark

* Compatibility fix for app/router's DomainMatcher condition

* Fix code quality issue

* Fix basic matcher issues

* Update priority specification for Substr matcher
Ye Zhihao 4 years ago
parent
commit
d4da365c5f

+ 1 - 1
app/dns/dns.go

@@ -98,7 +98,7 @@ func New(ctx context.Context, config *Config) (*DNS, error) {
 
 	// MatcherInfos is ensured to cover the maximum index domainMatcher could return, where matcher's index starts from 1
 	matcherInfos := make([]DomainMatcherInfo, domainRuleCount+1)
-	domainMatcher := &strmatcher.MatcherGroup{}
+	domainMatcher := &strmatcher.LinearIndexMatcher{}
 	geoipContainer := router.GeoIPMatcherContainer{}
 
 	for _, endpoint := range config.NameServers {

+ 2 - 2
app/dns/hosts.go

@@ -11,12 +11,12 @@ import (
 // StaticHosts represents static domain-ip mapping in DNS server.
 type StaticHosts struct {
 	ips      [][]net.Address
-	matchers *strmatcher.MatcherGroup
+	matchers *strmatcher.LinearIndexMatcher
 }
 
 // NewStaticHosts creates a new StaticHosts instance.
 func NewStaticHosts(hosts []*HostMapping, legacy map[string]*net.IPOrDomain) (*StaticHosts, error) {
-	g := new(strmatcher.MatcherGroup)
+	g := new(strmatcher.LinearIndexMatcher)
 	sh := &StaticHosts{
 		ips:      make([][]net.Address, len(hosts)+len(legacy)+16),
 		matchers: g,

+ 22 - 32
app/router/condition.go

@@ -64,44 +64,34 @@ func domainToMatcher(domain *routercommon.Domain) (strmatcher.Matcher, error) {
 }
 
 type DomainMatcher struct {
-	matchers strmatcher.IndexMatcher
-}
-
-func NewMphMatcherGroup(domains []*routercommon.Domain) (*DomainMatcher, error) {
-	g := strmatcher.NewMphMatcherGroup()
-	for _, d := range domains {
-		matcherType, f := matcherTypeMap[d.Type]
-		if !f {
-			return nil, newError("unsupported domain type", d.Type)
-		}
-		_, err := g.AddPattern(d.Value, matcherType)
-		if err != nil {
-			return nil, err
-		}
+	matcher strmatcher.IndexMatcher
+}
+
+func NewDomainMatcher(matcherType string, domains []*routercommon.Domain) (*DomainMatcher, error) {
+	var indexMatcher strmatcher.IndexMatcher
+	switch matcherType {
+	case "mph", "hybrid":
+		indexMatcher = strmatcher.NewMphIndexMatcher()
+	case "linear":
+		indexMatcher = strmatcher.NewLinearIndexMatcher()
+	default:
+		indexMatcher = strmatcher.NewLinearIndexMatcher()
 	}
-	g.Build()
-	return &DomainMatcher{
-		matchers: g,
-	}, nil
-}
-
-func NewDomainMatcher(domains []*routercommon.Domain) (*DomainMatcher, error) {
-	g := new(strmatcher.MatcherGroup)
-	for _, d := range domains {
-		m, err := domainToMatcher(d)
+	for _, domain := range domains {
+		matcher, err := domainToMatcher(domain)
 		if err != nil {
 			return nil, err
 		}
-		g.Add(m)
+		indexMatcher.Add(matcher)
 	}
-
-	return &DomainMatcher{
-		matchers: g,
-	}, nil
+	if err := indexMatcher.Build(); err != nil {
+		return nil, err
+	}
+	return &DomainMatcher{matcher: indexMatcher}, nil
 }
 
-func (m *DomainMatcher) ApplyDomain(domain string) bool {
-	return len(m.matchers.Match(strings.ToLower(domain))) > 0
+func (m *DomainMatcher) Match(domain string) bool {
+	return m.matcher.MatchAny(domain)
 }
 
 // Apply implements Condition.
@@ -110,7 +100,7 @@ func (m *DomainMatcher) Apply(ctx routing.Context) bool {
 	if len(domain) == 0 {
 		return false
 	}
-	return m.ApplyDomain(domain)
+	return m.Match(domain)
 }
 
 type MultiGeoIPMatcher struct {

+ 8 - 8
app/router/condition_test.go

@@ -375,9 +375,9 @@ func TestChinaSites(t *testing.T) {
 	domains, err := loadGeoSite("CN")
 	common.Must(err)
 
-	matcher, err := router.NewDomainMatcher(domains)
+	matcher, err := router.NewDomainMatcher("linear", domains)
 	common.Must(err)
-	acMatcher, err := router.NewMphMatcherGroup(domains)
+	mphMatcher, err := router.NewDomainMatcher("mph", domains)
 	common.Must(err)
 
 	type TestCase struct {
@@ -408,8 +408,8 @@ func TestChinaSites(t *testing.T) {
 	}
 
 	for _, testCase := range testCases {
-		r1 := matcher.ApplyDomain(testCase.Domain)
-		r2 := acMatcher.ApplyDomain(testCase.Domain)
+		r1 := matcher.Match(testCase.Domain)
+		r2 := mphMatcher.Match(testCase.Domain)
 		if r1 != testCase.Output {
 			t.Error("DomainMatcher expected output ", testCase.Output, " for domain ", testCase.Domain, " but got ", r1)
 		} else if r2 != testCase.Output {
@@ -422,7 +422,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
 	domains, err := loadGeoSite("CN")
 	common.Must(err)
 
-	matcher, err := router.NewMphMatcherGroup(domains)
+	matcher, err := router.NewDomainMatcher("mph", domains)
 	common.Must(err)
 
 	type TestCase struct {
@@ -455,7 +455,7 @@ func BenchmarkMphDomainMatcher(b *testing.B) {
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, testCase := range testCases {
-			_ = matcher.ApplyDomain(testCase.Domain)
+			_ = matcher.Match(testCase.Domain)
 		}
 	}
 }
@@ -464,7 +464,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
 	domains, err := loadGeoSite("CN")
 	common.Must(err)
 
-	matcher, err := router.NewDomainMatcher(domains)
+	matcher, err := router.NewDomainMatcher("linear", domains)
 	common.Must(err)
 
 	type TestCase struct {
@@ -497,7 +497,7 @@ func BenchmarkDomainMatcher(b *testing.B) {
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, testCase := range testCases {
-			_ = matcher.ApplyDomain(testCase.Domain)
+			_ = matcher.Match(testCase.Domain)
 		}
 	}
 }

+ 4 - 16
app/router/config.go

@@ -39,23 +39,11 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) {
 	conds := NewConditionChan()
 
 	if len(rr.Domain) > 0 {
-		switch rr.DomainMatcher {
-		case "mph", "hybrid":
-			matcher, err := NewMphMatcherGroup(rr.Domain)
-			if err != nil {
-				return nil, newError("failed to build domain condition with MphDomainMatcher").Base(err)
-			}
-			newError("MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)").AtDebug().WriteToLog()
-			conds.Add(matcher)
-		case "linear":
-			fallthrough
-		default:
-			matcher, err := NewDomainMatcher(rr.Domain)
-			if err != nil {
-				return nil, newError("failed to build domain condition").Base(err)
-			}
-			conds.Add(matcher)
+		cond, err := NewDomainMatcher(rr.DomainMatcher, rr.Domain)
+		if err != nil {
+			return nil, newError("failed to build domain condition").Base(err)
 		}
+		conds.Add(cond)
 	}
 
 	if len(rr.UserEmail) > 0 {

+ 1 - 1
app/stats/command/command.go

@@ -49,7 +49,7 @@ func (s *statsServer) GetStats(ctx context.Context, request *GetStatsRequest) (*
 }
 
 func (s *statsServer) QueryStats(ctx context.Context, request *QueryStatsRequest) (*QueryStatsResponse, error) {
-	mgroup := &strmatcher.MatcherGroup{}
+	mgroup := &strmatcher.LinearIndexMatcher{}
 	if request.Pattern != "" {
 		request.Patterns = append(request.Patterns, request.Pattern)
 	}

+ 108 - 9
common/strmatcher/benchmark_test.go

@@ -8,16 +8,18 @@ import (
 	. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
 )
 
-func BenchmarkACAutomaton(b *testing.B) {
-	ac := NewACAutomaton()
+// Benchmark Domain Matcher Groups
+
+func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
+	g := new(SimpleMatcherGroup)
+
 	for i := 1; i <= 1024; i++ {
-		ac.Add(strconv.Itoa(i)+".v2fly.org", Domain)
+		AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
 	}
-	ac.Build()
 
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		_ = ac.Match("0.v2fly.org")
+		_ = g.Match("0.v2fly.org")
 	}
 }
 
@@ -25,7 +27,48 @@ func BenchmarkDomainMatcherGroup(b *testing.B) {
 	g := new(DomainMatcherGroup)
 
 	for i := 1; i <= 1024; i++ {
-		g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
+		AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = g.Match("0.v2fly.org")
+	}
+}
+
+func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
+	ac := NewACAutomatonMatcherGroup()
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+	ac.Build()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ac.MatchAny("0.v2fly.org")
+	}
+}
+
+func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
+	mph := NewMphMatcherGroup()
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+	mph.Build()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = mph.MatchAny("0.v2fly.org")
+	}
+}
+
+// Benchmark Full Matcher Groups
+
+func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
+	g := new(SimpleMatcherGroup)
+
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
 	}
 
 	b.ResetTimer()
@@ -38,7 +81,48 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
 	g := new(FullMatcherGroup)
 
 	for i := 1; i <= 1024; i++ {
-		g.Add(strconv.Itoa(i)+".v2fly.org", uint32(i))
+		AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = g.Match("0.v2fly.org")
+	}
+}
+
+func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
+	ac := NewACAutomatonMatcherGroup()
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+	ac.Build()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ac.MatchAny("0.v2fly.org")
+	}
+}
+
+func BenchmarkMphMatcherGroupFull(b *testing.B) {
+	mph := NewMphMatcherGroup()
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+	mph.Build()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = mph.MatchAny("0.v2fly.org")
+	}
+}
+
+// Benchmark Substr Matcher Groups
+
+func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
+	g := new(SimpleMatcherGroup)
+
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
 	}
 
 	b.ResetTimer()
@@ -47,8 +131,23 @@ func BenchmarkFullMatcherGroup(b *testing.B) {
 	}
 }
 
-func BenchmarkMarchGroup(b *testing.B) {
-	g := new(MatcherGroup)
+func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
+	ac := NewACAutomatonMatcherGroup()
+	for i := 1; i <= 1024; i++ {
+		AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+	}
+	ac.Build()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = ac.MatchAny("0.v2fly.org")
+	}
+}
+
+// Benchmark Index Matchers
+
+func BenchmarkLinearIndexMatcher(b *testing.B) {
+	g := new(LinearIndexMatcher)
 	for i := 1; i <= 1024; i++ {
 		m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
 		common.Must(err)

+ 0 - 25
common/strmatcher/full_matcher.go

@@ -1,25 +0,0 @@
-package strmatcher
-
-type FullMatcherGroup struct {
-	matchers map[string][]uint32
-}
-
-func (g *FullMatcherGroup) Add(domain string, value uint32) {
-	if g.matchers == nil {
-		g.matchers = make(map[string][]uint32)
-	}
-
-	g.matchers[domain] = append(g.matchers[domain], value)
-}
-
-func (g *FullMatcherGroup) addMatcher(m fullMatcher, value uint32) {
-	g.Add(string(m), value)
-}
-
-func (g *FullMatcherGroup) Match(str string) []uint32 {
-	if g.matchers == nil {
-		return nil
-	}
-
-	return g.matchers[str]
-}

+ 59 - 0
common/strmatcher/indexmatcher_linear.go

@@ -0,0 +1,59 @@
+package strmatcher
+
+// LinearIndexMatcher is an implementation of IndexMatcher.
+// Empty initialization works.
+type LinearIndexMatcher struct {
+	count         uint32
+	fullMatcher   FullMatcherGroup
+	domainMatcher DomainMatcherGroup
+	substrMatcher SubstrMatcherGroup
+	otherMatchers SimpleMatcherGroup
+}
+
+func NewLinearIndexMatcher() *LinearIndexMatcher {
+	return new(LinearIndexMatcher)
+}
+
+// Add implements IndexMatcher.Add.
+func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
+	g.count++
+	index := g.count
+
+	switch matcher := matcher.(type) {
+	case FullMatcher:
+		g.fullMatcher.AddFullMatcher(matcher, index)
+	case DomainMatcher:
+		g.domainMatcher.AddDomainMatcher(matcher, index)
+	case SubstrMatcher:
+		g.substrMatcher.AddSubstrMatcher(matcher, index)
+	default:
+		g.otherMatchers.AddMatcher(matcher, index)
+	}
+
+	return index
+}
+
+// Build implements IndexMatcher.Build.
+func (*LinearIndexMatcher) Build() error {
+	return nil
+}
+
+// Match implements IndexMatcher.Match.
+func (g *LinearIndexMatcher) Match(input string) []uint32 {
+	result := []uint32{}
+	result = append(result, g.fullMatcher.Match(input)...)
+	result = append(result, g.domainMatcher.Match(input)...)
+	result = append(result, g.substrMatcher.Match(input)...)
+	result = append(result, g.otherMatchers.Match(input)...)
+	return result
+}
+
+// MatchAny implements IndexMatcher.MatchAny.
+func (g *LinearIndexMatcher) MatchAny(input string) bool {
+	return len(g.Match(input)) > 0
+}
+
+// Size implements IndexMatcher.Size.
+func (g *LinearIndexMatcher) Size() uint32 {
+	return g.count
+}

+ 4 - 3
common/strmatcher/strmatcher_test.go → common/strmatcher/indexmatcher_linear_test.go

@@ -9,7 +9,7 @@ import (
 )
 
 // See https://github.com/v2fly/v2ray-core/issues/92#issuecomment-673238489
-func TestMatcherGroup(t *testing.T) {
+func TestLinearIndexMatcher(t *testing.T) {
 	rules := []struct {
 		Type   Type
 		Domain string
@@ -73,19 +73,20 @@ func TestMatcherGroup(t *testing.T) {
 		},
 		{
 			Input:  "testapis.us",
-			Output: []uint32{1, 2, 6},
+			Output: []uint32{2, 6, 1},
 		},
 		{
 			Input:  "example.com",
 			Output: []uint32{10, 4},
 		},
 	}
-	matcherGroup := &MatcherGroup{}
+	matcherGroup := NewLinearIndexMatcher()
 	for _, rule := range rules {
 		matcher, err := rule.Type.New(rule.Domain)
 		common.Must(err)
 		matcherGroup.Add(matcher)
 	}
+	matcherGroup.Build()
 	for _, test := range cases {
 		if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
 			t.Error("unexpected output: ", m, " for test case ", test)

+ 80 - 0
common/strmatcher/indexmatcher_mph.go

@@ -0,0 +1,80 @@
+package strmatcher
+
+// A MphIndexMatcher is divided into three parts:
+// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
+// 2. `substr` patterns are matched by ac automaton;
+// 3. `regex` patterns are matched with the regex library.
+type MphIndexMatcher struct {
+	count uint32
+	mph   *MphMatcherGroup
+	ac    *ACAutomatonMatcherGroup
+	regex SimpleMatcherGroup
+}
+
+func NewMphIndexMatcher() *MphIndexMatcher {
+	return &MphIndexMatcher{
+		mph:   nil,
+		ac:    nil,
+		regex: SimpleMatcherGroup{},
+	}
+}
+
+// Add implements IndexMatcher.Add.
+func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
+	g.count++
+	index := g.count
+
+	switch matcher := matcher.(type) {
+	case FullMatcher:
+		if g.mph == nil {
+			g.mph = NewMphMatcherGroup()
+		}
+		g.mph.AddFullMatcher(matcher, index)
+	case DomainMatcher:
+		if g.mph == nil {
+			g.mph = NewMphMatcherGroup()
+		}
+		g.mph.AddDomainMatcher(matcher, index)
+	case SubstrMatcher:
+		if g.ac == nil {
+			g.ac = NewACAutomatonMatcherGroup()
+		}
+		g.ac.AddSubstrMatcher(matcher, index)
+	case *RegexMatcher:
+		g.regex.AddMatcher(matcher, index)
+	}
+
+	return index
+}
+
+// Build implements IndexMatcher.Build.
+func (g *MphIndexMatcher) Build() error {
+	if g.mph != nil {
+		g.mph.Build()
+	}
+	if g.ac != nil {
+		g.ac.Build()
+	}
+	return nil
+}
+
+// Match implements IndexMatcher.Match.
+func (*MphIndexMatcher) Match(string) []uint32 {
+	return nil
+}
+
+// MatchAny implements IndexMatcher.MatchAny.
+func (g *MphIndexMatcher) MatchAny(input string) bool {
+	if g.mph != nil && g.mph.MatchAny(input) {
+		return true
+	}
+	if g.ac != nil && g.ac.MatchAny(input) {
+		return true
+	}
+	return g.regex.MatchAny(input)
+}
+
+// Size implements IndexMatcher.Size.
+func (g *MphIndexMatcher) Size() uint32 {
+	return g.count
+}

+ 34 - 40
common/strmatcher/ac_automaton_matcher.go → common/strmatcher/matchergroup_ac_automation.go

@@ -21,7 +21,9 @@ type Edge struct {
 	nextNode int
 }
 
-type ACAutomaton struct {
+// ACAutoMationMatcherGroup is an implementation of MatcherGroup.
+// It uses an AC Automata to provide support for Full, Domain and Substr matcher. Trie node is char based.
+type ACAutomatonMatcherGroup struct {
 	trie   [][validCharCount]Edge
 	fail   []int
 	exists []MatchType
@@ -121,8 +123,8 @@ var char2Index = []int{
 	'9':  52,
 }
 
-func NewACAutomaton() *ACAutomaton {
-	ac := new(ACAutomaton)
+func NewACAutomatonMatcherGroup() *ACAutomatonMatcherGroup {
+	ac := new(ACAutomatonMatcherGroup)
 	ac.trie = append(ac.trie, newNode())
 	ac.fail = append(ac.fail, 0)
 	ac.exists = append(ac.exists, MatchType{
@@ -132,10 +134,25 @@ func NewACAutomaton() *ACAutomaton {
 	return ac
 }
 
-func (ac *ACAutomaton) Add(domain string, t Type) {
-	node := 0
-	for i := len(domain) - 1; i >= 0; i-- {
-		idx := char2Index[domain[i]]
+// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
+func (ac *ACAutomatonMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
+	ac.addPattern(0, matcher.Pattern(), matcher.Type())
+}
+
+// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
+func (ac *ACAutomatonMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
+	node := ac.addPattern(0, matcher.Pattern(), Full)
+	ac.addPattern(node, ".", Domain)
+}
+
+// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
+func (ac *ACAutomatonMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, _ uint32) {
+	ac.addPattern(0, matcher.Pattern(), matcher.Type())
+}
+
+func (ac *ACAutomatonMatcherGroup) addPattern(node int, pattern string, matcherType Type) int {
+	for i := len(pattern) - 1; i >= 0; i-- {
+		idx := char2Index[pattern[i]]
 		if ac.trie[node][idx].nextNode == 0 {
 			ac.count++
 			if len(ac.trie) < ac.count+1 {
@@ -154,42 +171,13 @@ func (ac *ACAutomaton) Add(domain string, t Type) {
 		node = ac.trie[node][idx].nextNode
 	}
 	ac.exists[node] = MatchType{
-		matchType: t,
+		matchType: matcherType,
 		exist:     true,
 	}
-	switch t {
-	case Domain:
-		ac.exists[node] = MatchType{
-			matchType: Full,
-			exist:     true,
-		}
-		idx := char2Index['.']
-		if ac.trie[node][idx].nextNode == 0 {
-			ac.count++
-			if len(ac.trie) < ac.count+1 {
-				ac.trie = append(ac.trie, newNode())
-				ac.fail = append(ac.fail, 0)
-				ac.exists = append(ac.exists, MatchType{
-					matchType: Full,
-					exist:     false,
-				})
-			}
-			ac.trie[node][idx] = Edge{
-				edgeType: TrieEdge,
-				nextNode: ac.count,
-			}
-		}
-		node = ac.trie[node][idx].nextNode
-		ac.exists[node] = MatchType{
-			matchType: t,
-			exist:     true,
-		}
-	default:
-		break
-	}
+	return node
 }
 
-func (ac *ACAutomaton) Build() {
+func (ac *ACAutomatonMatcherGroup) Build() {
 	queue := list.New()
 	for i := 0; i < validCharCount; i++ {
 		if ac.trie[0][i].nextNode != 0 {
@@ -218,7 +206,13 @@ func (ac *ACAutomaton) Build() {
 	}
 }
 
-func (ac *ACAutomaton) Match(s string) bool {
+// Match implements MatcherGroup.Match.
+func (*ACAutomatonMatcherGroup) Match(_ string) []uint32 {
+	return nil
+}
+
+// MatchAny implements MatcherGroup.MatchAny.
+func (ac *ACAutomatonMatcherGroup) MatchAny(s string) bool {
 	node := 0
 	fullMatch := true
 	// 1. the match string is all through trie edge. FULL MATCH or DOMAIN

+ 183 - 0
common/strmatcher/matchergroup_ac_automation_test.go

@@ -0,0 +1,183 @@
+package strmatcher_test
+
+import (
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v4/common"
+	. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
+)
+
+func TestACAutomatonMatcherGroup(t *testing.T) {
+	cases1 := []struct {
+		pattern string
+		mType   Type
+		input   string
+		output  bool
+	}{
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "www.v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "www.v3fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "2fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "xv2fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Full,
+			input:   "v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Full,
+			input:   "xv2fly.org",
+			output:  false,
+		},
+	}
+	for _, test := range cases1 {
+		ac := NewACAutomatonMatcherGroup()
+		matcher, err := test.mType.New(test.pattern)
+		common.Must(err)
+		common.Must(AddMatcherToGroup(ac, matcher, 0))
+		ac.Build()
+		if m := ac.MatchAny(test.input); m != test.output {
+			t.Error("unexpected output: ", m, " for test case ", test)
+		}
+	}
+	{
+		cases2Input := []struct {
+			pattern string
+			mType   Type
+		}{
+			{
+				pattern: "163.com",
+				mType:   Domain,
+			},
+			{
+				pattern: "m.126.com",
+				mType:   Full,
+			},
+			{
+				pattern: "3.com",
+				mType:   Full,
+			},
+			{
+				pattern: "google.com",
+				mType:   Substr,
+			},
+			{
+				pattern: "vgoogle.com",
+				mType:   Substr,
+			},
+		}
+		ac := NewACAutomatonMatcherGroup()
+		for _, test := range cases2Input {
+			matcher, err := test.mType.New(test.pattern)
+			common.Must(err)
+			common.Must(AddMatcherToGroup(ac, matcher, 0))
+		}
+		ac.Build()
+		cases2Output := []struct {
+			pattern string
+			res     bool
+		}{
+			{
+				pattern: "126.com",
+				res:     false,
+			},
+			{
+				pattern: "m.163.com",
+				res:     true,
+			},
+			{
+				pattern: "mm163.com",
+				res:     false,
+			},
+			{
+				pattern: "m.126.com",
+				res:     true,
+			},
+			{
+				pattern: "163.com",
+				res:     true,
+			},
+			{
+				pattern: "63.com",
+				res:     false,
+			},
+			{
+				pattern: "oogle.com",
+				res:     false,
+			},
+			{
+				pattern: "vvgoogle.com",
+				res:     true,
+			},
+		}
+		for _, test := range cases2Output {
+			if m := ac.MatchAny(test.pattern); m != test.res {
+				t.Error("unexpected output: ", m, " for test case ", test)
+			}
+		}
+	}
+
+	{
+		cases3Input := []struct {
+			pattern string
+			mType   Type
+		}{
+			{
+				pattern: "video.google.com",
+				mType:   Domain,
+			},
+			{
+				pattern: "gle.com",
+				mType:   Domain,
+			},
+		}
+		ac := NewACAutomatonMatcherGroup()
+		for _, test := range cases3Input {
+			matcher, err := test.mType.New(test.pattern)
+			common.Must(err)
+			common.Must(AddMatcherToGroup(ac, matcher, 0))
+		}
+		ac.Build()
+		cases3Output := []struct {
+			pattern string
+			res     bool
+		}{
+			{
+				pattern: "google.com",
+				res:     false,
+			},
+		}
+		for _, test := range cases3Output {
+			if m := ac.MatchAny(test.pattern); m != test.res {
+				t.Error("unexpected output: ", m, " for test case ", test)
+			}
+		}
+	}
+}

+ 11 - 8
common/strmatcher/domain_matcher.go → common/strmatcher/matchergroup_domain.go

@@ -11,19 +11,20 @@ type node struct {
 	sub    map[string]*node
 }
 
-// DomainMatcherGroup is a IndexMatcher for a large set of Domain matchers.
-// Visible for testing only.
+// DomainMatcherGroup is an implementation of MatcherGroup.
+// It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
 type DomainMatcherGroup struct {
 	root *node
 }
 
-func (g *DomainMatcherGroup) Add(domain string, value uint32) {
+// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
+func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
 	if g.root == nil {
 		g.root = new(node)
 	}
 
 	current := g.root
-	parts := breakDomain(domain)
+	parts := breakDomain(matcher.Pattern())
 	for i := len(parts) - 1; i >= 0; i-- {
 		part := parts[i]
 		if current.sub == nil {
@@ -40,10 +41,7 @@ func (g *DomainMatcherGroup) Add(domain string, value uint32) {
 	current.values = append(current.values, value)
 }
 
-func (g *DomainMatcherGroup) addMatcher(m domainMatcher, value uint32) {
-	g.Add(string(m), value)
-}
-
+// Match implements MatcherGroup.Match.
 func (g *DomainMatcherGroup) Match(domain string) []uint32 {
 	if domain == "" {
 		return nil
@@ -96,3 +94,8 @@ func (g *DomainMatcherGroup) Match(domain string) []uint32 {
 		return result
 	}
 }
+
+// MatchAny implements MatcherGroup.MatchAny.
+func (g *DomainMatcherGroup) MatchAny(domain string) bool {
+	return len(g.Match(domain)) > 0
+}

+ 37 - 10
common/strmatcher/domain_matcher_test.go → common/strmatcher/matchergroup_domain_test.go

@@ -8,15 +8,39 @@ import (
 )
 
 func TestDomainMatcherGroup(t *testing.T) {
-	g := new(DomainMatcherGroup)
-	g.Add("v2fly.org", 1)
-	g.Add("google.com", 2)
-	g.Add("x.a.com", 3)
-	g.Add("a.b.com", 4)
-	g.Add("c.a.b.com", 5)
-	g.Add("x.y.com", 4)
-	g.Add("x.y.com", 6)
-
+	patterns := []struct {
+		Pattern string
+		Value   uint32
+	}{
+		{
+			Pattern: "v2fly.org",
+			Value:   1,
+		},
+		{
+			Pattern: "google.com",
+			Value:   2,
+		},
+		{
+			Pattern: "x.a.com",
+			Value:   3,
+		},
+		{
+			Pattern: "a.b.com",
+			Value:   4,
+		},
+		{
+			Pattern: "c.a.b.com",
+			Value:   5,
+		},
+		{
+			Pattern: "x.y.com",
+			Value:   4,
+		},
+		{
+			Pattern: "x.y.com",
+			Value:   6,
+		},
+	}
 	testCases := []struct {
 		Domain string
 		Result []uint32
@@ -58,7 +82,10 @@ func TestDomainMatcherGroup(t *testing.T) {
 			Result: []uint32{4, 6},
 		},
 	}
-
+	g := new(DomainMatcherGroup)
+	for _, pattern := range patterns {
+		AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
+	}
 	for _, testCase := range testCases {
 		r := g.Match(testCase.Domain)
 		if !reflect.DeepEqual(r, testCase.Result) {

+ 30 - 0
common/strmatcher/matchergroup_full.go

@@ -0,0 +1,30 @@
+package strmatcher
+
+// FullMatcherGroup is an implementation of MatcherGroup.
+// It uses a hash table to facilitate exact match lookup.
+type FullMatcherGroup struct {
+	matchers map[string][]uint32
+}
+
+// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
+func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
+	if g.matchers == nil {
+		g.matchers = make(map[string][]uint32)
+	}
+
+	domain := matcher.Pattern()
+	g.matchers[domain] = append(g.matchers[domain], value)
+}
+
+// Match implements MatcherGroup.Match.
+func (g *FullMatcherGroup) Match(input string) []uint32 {
+	if g.matchers == nil {
+		return nil
+	}
+	return g.matchers[input]
+}
+
+// MatchAny implements MatcherGroup.Any.
+func (g *FullMatcherGroup) MatchAny(input string) bool {
+	return len(g.Match(input)) > 0
+}

+ 29 - 8
common/strmatcher/full_matcher_test.go → common/strmatcher/matchergroup_full_test.go

@@ -8,13 +8,31 @@ import (
 )
 
 func TestFullMatcherGroup(t *testing.T) {
-	g := new(FullMatcherGroup)
-	g.Add("v2fly.org", 1)
-	g.Add("google.com", 2)
-	g.Add("x.a.com", 3)
-	g.Add("x.y.com", 4)
-	g.Add("x.y.com", 6)
-
+	patterns := []struct {
+		Pattern string
+		Value   uint32
+	}{
+		{
+			Pattern: "v2fly.org",
+			Value:   1,
+		},
+		{
+			Pattern: "google.com",
+			Value:   2,
+		},
+		{
+			Pattern: "x.a.com",
+			Value:   3,
+		},
+		{
+			Pattern: "x.y.com",
+			Value:   4,
+		},
+		{
+			Pattern: "x.y.com",
+			Value:   6,
+		},
+	}
 	testCases := []struct {
 		Domain string
 		Result []uint32
@@ -32,7 +50,10 @@ func TestFullMatcherGroup(t *testing.T) {
 			Result: []uint32{4, 6},
 		},
 	}
-
+	g := new(FullMatcherGroup)
+	for _, pattern := range patterns {
+		AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
+	}
 	for _, testCase := range testCases {
 		r := g.Match(testCase.Domain)
 		if !reflect.DeepEqual(r, testCase.Result) {

+ 47 - 94
common/strmatcher/mph_matcher.go → common/strmatcher/matchergroup_mph.go

@@ -2,7 +2,6 @@ package strmatcher
 
 import (
 	"math/bits"
-	"regexp"
 	"sort"
 	"strings"
 	"unsafe"
@@ -20,79 +19,44 @@ func RollingHash(s string) uint32 {
 	return h
 }
 
-// A MphMatcherGroup is divided into three parts:
-// 1. `full` and `domain` patterns are matched by Rabin-Karp algorithm and minimal perfect hash table;
-// 2. `substr` patterns are matched by ac automaton;
-// 3. `regex` patterns are matched with the regex library.
+// MphMatcherGroup is an implementation of MatcherGroup.
+// It implements Rabin-Karp algorithm and minimal perfect hash table for Full and Domain matcher.
 type MphMatcherGroup struct {
-	ac            *ACAutomaton
-	otherMatchers []matcherEntry
-	rules         []string
-	level0        []uint32
-	level0Mask    int
-	level1        []uint32
-	level1Mask    int
-	count         uint32
-	ruleMap       *map[string]uint32
-}
-
-func (g *MphMatcherGroup) AddFullOrDomainPattern(pattern string, t Type) {
-	h := RollingHash(pattern)
-	switch t {
-	case Domain:
-		(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
-		fallthrough
-	case Full:
-		(*g.ruleMap)[pattern] = h
-	default:
-	}
+	rules      []string
+	level0     []uint32
+	level0Mask int
+	level1     []uint32
+	level1Mask int
+	ruleMap    *map[string]uint32
 }
 
 func NewMphMatcherGroup() *MphMatcherGroup {
 	return &MphMatcherGroup{
-		ac:            nil,
-		otherMatchers: nil,
-		rules:         nil,
-		level0:        nil,
-		level0Mask:    0,
-		level1:        nil,
-		level1Mask:    0,
-		count:         1,
-		ruleMap:       &map[string]uint32{},
+		rules:      nil,
+		level0:     nil,
+		level0Mask: 0,
+		level1:     nil,
+		level1Mask: 0,
+		ruleMap:    &map[string]uint32{},
 	}
 }
 
-// AddPattern adds a pattern to MphMatcherGroup
-func (g *MphMatcherGroup) AddPattern(pattern string, t Type) (uint32, error) {
-	switch t {
-	case Substr:
-		if g.ac == nil {
-			g.ac = NewACAutomaton()
-		}
-		g.ac.Add(pattern, t)
-	case Full, Domain:
-		pattern = strings.ToLower(pattern)
-		g.AddFullOrDomainPattern(pattern, t)
-	case Regex:
-		r, err := regexp.Compile(pattern)
-		if err != nil {
-			return 0, err
-		}
-		g.otherMatchers = append(g.otherMatchers, matcherEntry{
-			m:  &regexMatcher{pattern: r},
-			id: g.count,
-		})
-	default:
-		panic("Unknown type")
-	}
-	return g.count, nil
+// AddFullMatcher implements MatcherGroupForFull.
+func (g *MphMatcherGroup) AddFullMatcher(matcher FullMatcher, _ uint32) {
+	pattern := strings.ToLower(matcher.Pattern())
+	(*g.ruleMap)[pattern] = RollingHash(pattern)
 }
 
-// Build builds a minimal perfect hash table and ac automaton from insert rules
+// AddDomainMatcher implements MatcherGroupForDomain.
+func (g *MphMatcherGroup) AddDomainMatcher(matcher DomainMatcher, _ uint32) {
+	pattern := strings.ToLower(matcher.Pattern())
+	h := RollingHash(pattern)
+	(*g.ruleMap)[pattern] = h
+	(*g.ruleMap)["."+pattern] = h*PrimeRK + uint32('.')
+}
+
+// Build builds a minimal perfect hash table for insert rules.
 func (g *MphMatcherGroup) Build() {
-	if g.ac != nil {
-		g.ac.Build()
-	}
 	keyLen := len(*g.ruleMap)
 	if keyLen == 0 {
 		keyLen = 1
@@ -127,7 +91,7 @@ func (g *MphMatcherGroup) Build() {
 			findSeed := true
 			tmpOcc = tmpOcc[:0]
 			for _, i := range bucket.vals {
-				n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask
+				n := int(strhashFallback(unsafe.Pointer(&g.rules[i]), uintptr(seed))) & g.level1Mask // nosemgrep
 				if occ[n] {
 					for _, n := range tmpOcc {
 						occ[n] = false
@@ -148,52 +112,41 @@ func (g *MphMatcherGroup) Build() {
 	}
 }
 
-func nextPow2(v int) int {
-	if v <= 1 {
-		return 1
-	}
-	const MaxUInt = ^uint(0)
-	n := (MaxUInt >> bits.LeadingZeros(uint(v))) + 1
-	return int(n)
-}
-
 // Lookup searches for s in t and returns its index and whether it was found.
 func (g *MphMatcherGroup) Lookup(h uint32, s string) bool {
 	i0 := int(h) & g.level0Mask
 	seed := g.level0[i0]
-	i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask
+	i1 := int(strhashFallback(unsafe.Pointer(&s), uintptr(seed))) & g.level1Mask // nosemgrep
 	n := g.level1[i1]
 	return s == g.rules[int(n)]
 }
 
-// Match implements IndexMatcher.Match.
-func (g *MphMatcherGroup) Match(pattern string) []uint32 {
-	result := []uint32{}
+// Match implements MatcherGroup.Match.
+func (*MphMatcherGroup) Match(_ string) []uint32 {
+	return nil
+}
+
+// MatchAny implements MatcherGroup.MatchAny.
+func (g *MphMatcherGroup) MatchAny(pattern string) bool {
 	hash := uint32(0)
 	for i := len(pattern) - 1; i >= 0; i-- {
 		hash = hash*PrimeRK + uint32(pattern[i])
 		if pattern[i] == '.' {
 			if g.Lookup(hash, pattern[i:]) {
-				result = append(result, 1)
-				return result
+				return true
 			}
 		}
 	}
-	if g.Lookup(hash, pattern) {
-		result = append(result, 1)
-		return result
-	}
-	if g.ac != nil && g.ac.Match(pattern) {
-		result = append(result, 1)
-		return result
-	}
-	for _, e := range g.otherMatchers {
-		if e.m.Match(pattern) {
-			result = append(result, e.id)
-			return result
-		}
+	return g.Lookup(hash, pattern)
+}
+
+func nextPow2(v int) int {
+	if v <= 1 {
+		return 1
 	}
-	return nil
+	const MaxUInt = ^uint(0)
+	n := (MaxUInt >> bits.LeadingZeros(uint(v))) + 1
+	return int(n)
 }
 
 type indexBucket struct {
@@ -286,7 +239,7 @@ tail:
 }
 
 func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
-	return unsafe.Pointer(uintptr(p) + x)
+	return unsafe.Pointer(uintptr(p) + x) // nosemgrep
 }
 
 func readUnaligned32(p unsafe.Pointer) uint32 {

+ 174 - 0
common/strmatcher/matchergroup_mph_test.go

@@ -0,0 +1,174 @@
+package strmatcher_test
+
+import (
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v4/common"
+	. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
+)
+
+func TestMphMatcherGroup(t *testing.T) {
+	cases1 := []struct {
+		pattern string
+		mType   Type
+		input   string
+		output  bool
+	}{
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "www.v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "www.v3fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "2fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+			input:   "xv2fly.org",
+			output:  false,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Full,
+			input:   "v2fly.org",
+			output:  true,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Full,
+			input:   "xv2fly.org",
+			output:  false,
+		},
+	}
+	for _, test := range cases1 {
+		mph := NewMphMatcherGroup()
+		matcher, err := test.mType.New(test.pattern)
+		common.Must(err)
+		common.Must(AddMatcherToGroup(mph, matcher, 0))
+		mph.Build()
+		if m := mph.MatchAny(test.input); m != test.output {
+			t.Error("unexpected output: ", m, " for test case ", test)
+		}
+	}
+	{
+		cases2Input := []struct {
+			pattern string
+			mType   Type
+		}{
+			{
+				pattern: "163.com",
+				mType:   Domain,
+			},
+			{
+				pattern: "m.126.com",
+				mType:   Full,
+			},
+			{
+				pattern: "3.com",
+				mType:   Full,
+			},
+		}
+		mph := NewMphMatcherGroup()
+		for _, test := range cases2Input {
+			matcher, err := test.mType.New(test.pattern)
+			common.Must(err)
+			common.Must(AddMatcherToGroup(mph, matcher, 0))
+		}
+		mph.Build()
+		cases2Output := []struct {
+			pattern string
+			res     bool
+		}{
+			{
+				pattern: "126.com",
+				res:     false,
+			},
+			{
+				pattern: "m.163.com",
+				res:     true,
+			},
+			{
+				pattern: "mm163.com",
+				res:     false,
+			},
+			{
+				pattern: "m.126.com",
+				res:     true,
+			},
+			{
+				pattern: "163.com",
+				res:     true,
+			},
+			{
+				pattern: "63.com",
+				res:     false,
+			},
+			{
+				pattern: "oogle.com",
+				res:     false,
+			},
+			{
+				pattern: "vvgoogle.com",
+				res:     false,
+			},
+		}
+		for _, test := range cases2Output {
+			if m := mph.MatchAny(test.pattern); m != test.res {
+				t.Error("unexpected output: ", m, " for test case ", test)
+			}
+		}
+	}
+	{
+		cases3Input := []struct {
+			pattern string
+			mType   Type
+		}{
+			{
+				pattern: "video.google.com",
+				mType:   Domain,
+			},
+			{
+				pattern: "gle.com",
+				mType:   Domain,
+			},
+		}
+		mph := NewMphMatcherGroup()
+		for _, test := range cases3Input {
+			matcher, err := test.mType.New(test.pattern)
+			common.Must(err)
+			common.Must(AddMatcherToGroup(mph, matcher, 0))
+		}
+		mph.Build()
+		cases3Output := []struct {
+			pattern string
+			res     bool
+		}{
+			{
+				pattern: "google.com",
+				res:     false,
+			},
+		}
+		for _, test := range cases3Output {
+			if m := mph.MatchAny(test.pattern); m != test.res {
+				t.Error("unexpected output: ", m, " for test case ", test)
+			}
+		}
+	}
+}

+ 36 - 0
common/strmatcher/matchergroup_simple.go

@@ -0,0 +1,36 @@
+package strmatcher
+
+type matcherEntry struct {
+	matcher Matcher
+	value   uint32
+}
+
+// SimpleMatcherGroup is an implementation of MatcherGroup.
+// It simply stores all matchers in an array and sequentially matches them.
+type SimpleMatcherGroup struct {
+	matchers []matcherEntry
+}
+
+// AddMatcher implements MatcherGroupForAll.AddMatcher.
+func (g *SimpleMatcherGroup) AddMatcher(matcher Matcher, value uint32) {
+	g.matchers = append(g.matchers, matcherEntry{
+		matcher: matcher,
+		value:   value,
+	})
+}
+
+// Match implements MatcherGroup.Match.
+func (g *SimpleMatcherGroup) Match(input string) []uint32 {
+	result := []uint32{}
+	for _, e := range g.matchers {
+		if e.matcher.Match(input) {
+			result = append(result, e.value)
+		}
+	}
+	return result
+}
+
+// MatchAny implements MatcherGroup.MatchAny.
+func (g *SimpleMatcherGroup) MatchAny(input string) bool {
+	return len(g.Match(input)) > 0
+}

+ 69 - 0
common/strmatcher/matchergroup_simple_test.go

@@ -0,0 +1,69 @@
+package strmatcher_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v4/common"
+	. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
+)
+
+func TestSimpleMatcherGroup(t *testing.T) {
+	patterns := []struct {
+		pattern string
+		mType   Type
+	}{
+		{
+			pattern: "v2fly.org",
+			mType:   Domain,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Full,
+		},
+		{
+			pattern: "v2fly.org",
+			mType:   Regex,
+		},
+	}
+	cases := []struct {
+		input  string
+		output []uint32
+	}{
+		{
+			input:  "www.v2fly.org",
+			output: []uint32{0, 2},
+		},
+		{
+			input:  "v2fly.org",
+			output: []uint32{0, 1, 2},
+		},
+		{
+			input:  "www.v3fly.org",
+			output: []uint32{},
+		},
+		{
+			input:  "2fly.org",
+			output: []uint32{},
+		},
+		{
+			input:  "xv2fly.org",
+			output: []uint32{2},
+		},
+		{
+			input:  "v2flyxorg",
+			output: []uint32{2},
+		},
+	}
+	matcherGroup := &SimpleMatcherGroup{}
+	for id, entry := range patterns {
+		matcher, err := entry.mType.New(entry.pattern)
+		common.Must(err)
+		common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
+	}
+	for _, test := range cases {
+		if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
+			t.Error("unexpected output: ", r, " for test case ", test)
+		}
+	}
+}

+ 47 - 0
common/strmatcher/matchergroup_substr.go

@@ -0,0 +1,47 @@
+package strmatcher
+
+import (
+	"sort"
+	"strings"
+)
+
+// SubstrMatcherGroup is implementation of MatcherGroup,
+// It is simply implmeneted to comply with the priority specification of Substr matchers.
+type SubstrMatcherGroup struct {
+	patterns []string
+	values   []uint32
+}
+
+// AddSubstrMatcher implements MatcherGroupForSubstr.AddSubstrMatcher.
+func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint32) {
+	g.patterns = append(g.patterns, matcher.Pattern())
+	g.values = append(g.values, value)
+}
+
+// Match implements MatcherGroup.Match.
+func (g *SubstrMatcherGroup) Match(input string) []uint32 {
+	result := []uint32{}
+	for i, pattern := range g.patterns {
+		for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
+			result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
+		}
+	}
+	// Sort the match results in dictionary order, so that:
+	//   1. Pattern matched at smaller position (meaning matched further) takes precedence.
+	//   2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
+	sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
+	for i, entry := range result {
+		result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
+	}
+	return result
+}
+
+// MatchAny implements MatcherGroup.MatchAny.
+func (g *SubstrMatcherGroup) MatchAny(input string) bool {
+	for _, pattern := range g.patterns {
+		if strings.Contains(input, pattern) {
+			return true
+		}
+	}
+	return false
+}

+ 65 - 0
common/strmatcher/matchergroup_substr_test.go

@@ -0,0 +1,65 @@
+package strmatcher_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v4/common"
+	. "github.com/v2fly/v2ray-core/v4/common/strmatcher"
+)
+
+func TestSubstrMatcherGroup(t *testing.T) {
+	patterns := []struct {
+		pattern string
+		mType   Type
+	}{
+		{
+			pattern: "apis",
+			mType:   Substr,
+		},
+		{
+			pattern: "google",
+			mType:   Substr,
+		},
+		{
+			pattern: "apis",
+			mType:   Substr,
+		},
+	}
+	cases := []struct {
+		input  string
+		output []uint32
+	}{
+		{
+			input:  "google.com",
+			output: []uint32{1},
+		},
+		{
+			input:  "apis.com",
+			output: []uint32{0, 2},
+		},
+		{
+			input:  "googleapis.com",
+			output: []uint32{1, 0, 2},
+		},
+		{
+			input:  "fonts.googleapis.com",
+			output: []uint32{1, 0, 2},
+		},
+		{
+			input:  "apis.googleapis.com",
+			output: []uint32{0, 2, 1, 0, 2},
+		},
+	}
+	matcherGroup := &SubstrMatcherGroup{}
+	for id, entry := range patterns {
+		matcher, err := entry.mType.New(entry.pattern)
+		common.Must(err)
+		common.Must(AddMatcherToGroup(matcherGroup, matcher, uint32(id)))
+	}
+	for _, test := range cases {
+		if r := matcherGroup.Match(test.input); !reflect.DeepEqual(r, test.output) {
+			t.Error("unexpected output: ", r, " for test case ", test)
+		}
+	}
+}

+ 134 - 19
common/strmatcher/matchers.go

@@ -1,52 +1,167 @@
 package strmatcher
 
 import (
+	"errors"
 	"regexp"
 	"strings"
 )
 
-type fullMatcher string
+// FullMatcher is an implementation of Matcher.
+type FullMatcher string
 
-func (m fullMatcher) Match(s string) bool {
-	return string(m) == s
+func (FullMatcher) Type() Type {
+	return Full
 }
 
-func (m fullMatcher) String() string {
-	return "full:" + string(m)
+func (m FullMatcher) Pattern() string {
+	return string(m)
 }
 
-type substrMatcher string
+func (m FullMatcher) String() string {
+	return "full:" + m.Pattern()
+}
 
-func (m substrMatcher) Match(s string) bool {
-	return strings.Contains(s, string(m))
+func (m FullMatcher) Match(s string) bool {
+	return string(m) == s
 }
 
-func (m substrMatcher) String() string {
-	return "keyword:" + string(m)
+// DomainMatcher is an implementation of Matcher.
+type DomainMatcher string
+
+func (DomainMatcher) Type() Type {
+	return Domain
 }
 
-type domainMatcher string
+func (m DomainMatcher) Pattern() string {
+	return string(m)
+}
 
-func (m domainMatcher) Match(s string) bool {
-	pattern := string(m)
+func (m DomainMatcher) String() string {
+	return "domain:" + m.Pattern()
+}
+
+func (m DomainMatcher) Match(s string) bool {
+	pattern := m.Pattern()
 	if !strings.HasSuffix(s, pattern) {
 		return false
 	}
 	return len(s) == len(pattern) || s[len(s)-len(pattern)-1] == '.'
 }
 
-func (m domainMatcher) String() string {
-	return "domain:" + string(m)
+// SubstrMatcher is an implementation of Matcher.
+type SubstrMatcher string
+
+func (SubstrMatcher) Type() Type {
+	return Substr
+}
+
+func (m SubstrMatcher) Pattern() string {
+	return string(m)
+}
+
+func (m SubstrMatcher) String() string {
+	return "keyword:" + m.Pattern()
+}
+
+func (m SubstrMatcher) Match(s string) bool {
+	return strings.Contains(s, m.Pattern())
 }
 
-type regexMatcher struct {
+// RegexMatcher is an implementation of Matcher.
+type RegexMatcher struct {
 	pattern *regexp.Regexp
 }
 
-func (m *regexMatcher) Match(s string) bool {
+func (*RegexMatcher) Type() Type {
+	return Regex
+}
+
+func (m *RegexMatcher) Pattern() string {
+	return m.pattern.String()
+}
+
+func (m *RegexMatcher) String() string {
+	return "regexp:" + m.Pattern()
+}
+
+func (m *RegexMatcher) Match(s string) bool {
 	return m.pattern.MatchString(s)
 }
 
-func (m *regexMatcher) String() string {
-	return "regexp:" + m.pattern.String()
+// New creates a new Matcher based on the given pattern.
+func (t Type) New(pattern string) (Matcher, error) {
+	switch t {
+	case Full:
+		return FullMatcher(pattern), nil
+	case Substr:
+		return SubstrMatcher(pattern), nil
+	case Domain:
+		return DomainMatcher(pattern), nil
+	case Regex: // 1. regex matching is case-sensitive
+		regex, err := regexp.Compile(pattern)
+		if err != nil {
+			return nil, err
+		}
+		return &RegexMatcher{pattern: regex}, nil
+	default:
+		panic("Unknown type")
+	}
+}
+
+// MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
+type MatcherGroupForAll interface {
+	AddMatcher(matcher Matcher, value uint32)
+}
+
+// MatcherGroupForFull is an interface indicating a MatcherGroup could accept FullMatchers.
+type MatcherGroupForFull interface {
+	AddFullMatcher(matcher FullMatcher, value uint32)
+}
+
+// MatcherGroupForDomain is an interface indicating a MatcherGroup could accept DomainMatchers.
+type MatcherGroupForDomain interface {
+	AddDomainMatcher(matcher DomainMatcher, value uint32)
+}
+
+// MatcherGroupForSubstr is an interface indicating a MatcherGroup could accept SubstrMatchers.
+type MatcherGroupForSubstr interface {
+	AddSubstrMatcher(matcher SubstrMatcher, value uint32)
+}
+
+// MatcherGroupForRegex is an interface indicating a MatcherGroup could accept RegexMatchers.
+type MatcherGroupForRegex interface {
+	AddRegexMatcher(matcher *RegexMatcher, value uint32)
+}
+
+// AddMatcherGroup is a helper function to try to add a Matcher to any kind of MatcherGroup.
+// It returns error if the MatcherGroup does not accept the provided Matcher's type.
+// This function is provided to help writing code to test a MatcherGroup.
+func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
+	if g, ok := g.(MatcherGroupForAll); ok {
+		g.AddMatcher(matcher, value)
+		return nil
+	}
+	switch matcher := matcher.(type) {
+	case FullMatcher:
+		if g, ok := g.(MatcherGroupForFull); ok {
+			g.AddFullMatcher(matcher, value)
+			return nil
+		}
+	case DomainMatcher:
+		if g, ok := g.(MatcherGroupForDomain); ok {
+			g.AddDomainMatcher(matcher, value)
+			return nil
+		}
+	case SubstrMatcher:
+		if g, ok := g.(MatcherGroupForSubstr); ok {
+			g.AddSubstrMatcher(matcher, value)
+			return nil
+		}
+	case *RegexMatcher:
+		if g, ok := g.(MatcherGroupForRegex); ok {
+			g.AddRegexMatcher(matcher, value)
+			return nil
+		}
+	}
+	return errors.New("cannot add matcher to matcher group")
 }

+ 0 - 169
common/strmatcher/matchers_test.go

@@ -71,172 +71,3 @@ func TestMatcher(t *testing.T) {
 		}
 	}
 }
-
-func TestACAutomaton(t *testing.T) {
-	cases1 := []struct {
-		pattern string
-		mType   Type
-		input   string
-		output  bool
-	}{
-		{
-			pattern: "v2fly.org",
-			mType:   Domain,
-			input:   "www.v2fly.org",
-			output:  true,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Domain,
-			input:   "v2fly.org",
-			output:  true,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Domain,
-			input:   "www.v3fly.org",
-			output:  false,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Domain,
-			input:   "2fly.org",
-			output:  false,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Domain,
-			input:   "xv2fly.org",
-			output:  false,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Full,
-			input:   "v2fly.org",
-			output:  true,
-		},
-		{
-			pattern: "v2fly.org",
-			mType:   Full,
-			input:   "xv2fly.org",
-			output:  false,
-		},
-	}
-	for _, test := range cases1 {
-		ac := NewACAutomaton()
-		ac.Add(test.pattern, test.mType)
-		ac.Build()
-		if m := ac.Match(test.input); m != test.output {
-			t.Error("unexpected output: ", m, " for test case ", test)
-		}
-	}
-	{
-		cases2Input := []struct {
-			pattern string
-			mType   Type
-		}{
-			{
-				pattern: "163.com",
-				mType:   Domain,
-			},
-			{
-				pattern: "m.126.com",
-				mType:   Full,
-			},
-			{
-				pattern: "3.com",
-				mType:   Full,
-			},
-			{
-				pattern: "google.com",
-				mType:   Substr,
-			},
-			{
-				pattern: "vgoogle.com",
-				mType:   Substr,
-			},
-		}
-		ac := NewACAutomaton()
-		for _, test := range cases2Input {
-			ac.Add(test.pattern, test.mType)
-		}
-		ac.Build()
-		cases2Output := []struct {
-			pattern string
-			res     bool
-		}{
-			{
-				pattern: "126.com",
-				res:     false,
-			},
-			{
-				pattern: "m.163.com",
-				res:     true,
-			},
-			{
-				pattern: "mm163.com",
-				res:     false,
-			},
-			{
-				pattern: "m.126.com",
-				res:     true,
-			},
-			{
-				pattern: "163.com",
-				res:     true,
-			},
-			{
-				pattern: "63.com",
-				res:     false,
-			},
-			{
-				pattern: "oogle.com",
-				res:     false,
-			},
-			{
-				pattern: "vvgoogle.com",
-				res:     true,
-			},
-		}
-		for _, test := range cases2Output {
-			if m := ac.Match(test.pattern); m != test.res {
-				t.Error("unexpected output: ", m, " for test case ", test)
-			}
-		}
-	}
-
-	{
-		cases3Input := []struct {
-			pattern string
-			mType   Type
-		}{
-			{
-				pattern: "video.google.com",
-				mType:   Domain,
-			},
-			{
-				pattern: "gle.com",
-				mType:   Domain,
-			},
-		}
-		ac := NewACAutomaton()
-		for _, test := range cases3Input {
-			ac.Add(test.pattern, test.mType)
-		}
-		ac.Build()
-		cases3Output := []struct {
-			pattern string
-			res     bool
-		}{
-			{
-				pattern: "google.com",
-				res:     false,
-			},
-		}
-		for _, test := range cases3Output {
-			if m := ac.Match(test.pattern); m != test.res {
-				t.Error("unexpected output: ", m, " for test case ", test)
-			}
-		}
-	}
-}

+ 52 - 85
common/strmatcher/strmatcher.go

@@ -1,107 +1,74 @@
 package strmatcher
 
-import (
-	"regexp"
-)
-
-// Matcher is the interface to determine a string matches a pattern.
-type Matcher interface {
-	// Match returns true if the given string matches a predefined pattern.
-	Match(string) bool
-	String() string
-}
-
 // Type is the type of the matcher.
 type Type byte
 
 const (
 	// Full is the type of matcher that the input string must exactly equal to the pattern.
-	Full Type = iota
-	// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
-	Substr
+	Full Type = 0
 	// Domain is the type of matcher that the input string must be a sub-domain or itself of the pattern.
-	Domain
+	Domain Type = 1
+	// Substr is the type of matcher that the input string must contain the pattern as a sub-string.
+	Substr Type = 2
 	// Regex is the type of matcher that the input string must matches the regular-expression pattern.
-	Regex
+	Regex Type = 3
 )
 
-// New creates a new Matcher based on the given pattern.
-func (t Type) New(pattern string) (Matcher, error) {
-	// 1. regex matching is case-sensitive
-	switch t {
-	case Full:
-		return fullMatcher(pattern), nil
-	case Substr:
-		return substrMatcher(pattern), nil
-	case Domain:
-		return domainMatcher(pattern), nil
-	case Regex:
-		r, err := regexp.Compile(pattern)
-		if err != nil {
-			return nil, err
-		}
-		return &regexMatcher{
-			pattern: r,
-		}, nil
-	default:
-		panic("Unknown type")
-	}
-}
+// Matcher is the interface to determine a string matches a pattern.
+//   * This is a basic matcher to represent a certain kind of match semantic(full, substr, domain or regex).
+type Matcher interface {
+	// Type returns the matcher's type.
+	Type() Type
 
-// IndexMatcher is the interface for matching with a group of matchers.
-type IndexMatcher interface {
-	// Match returns the index of a matcher that matches the input. It returns empty array if no such matcher exists.
-	Match(input string) []uint32
-}
+	// Pattern returns the matcher's raw string representation.
+	Pattern() string
 
-type matcherEntry struct {
-	m  Matcher
-	id uint32
+	// String returns a string representation of the matcher containing its type and pattern.
+	String() string
+
+	// Match returns true if the given string matches a predefined pattern.
+	//   * This method is seldom used for performance reason
+	//     and is generally taken over by their corresponding MatcherGroup.
+	Match(input string) bool
 }
 
-// MatcherGroup is an implementation of IndexMatcher.
-// Empty initialization works.
-type MatcherGroup struct {
-	count         uint32
-	fullMatcher   FullMatcherGroup
-	domainMatcher DomainMatcherGroup
-	otherMatchers []matcherEntry
+// MatcherGroup is an advanced type of matcher to accept a bunch of basic Matchers (of certain type, not all matcher types).
+// For example:
+//   * FullMatcherGroup accepts FullMatcher and uses a hash table to facilitate lookup.
+//   * DomainMatcherGroup accepts DomainMatcher and uses a trie to optimize both memory consumption and lookup speed.
+type MatcherGroup interface {
+	// Match returns all matched matchers with their corresponding values.
+	Match(input string) []uint32
+
+	// MatchAny returns true as soon as one matching matcher is found.
+	MatchAny(input string) bool
 }
 
-// Add adds a new Matcher into the MatcherGroup, and returns its index. The index will never be 0.
-func (g *MatcherGroup) Add(m Matcher) uint32 {
-	g.count++
-	c := g.count
+// IndexMatcher is a general type of matcher thats accepts all kinds of basic matchers.
+// It should:
+//   * Accept all Matcher types with no exception.
+//   * Optimize string matching with a combination of MatcherGroups.
+//   * Obey certain priority order specification when returning matched Matchers.
+type IndexMatcher interface {
+	// Size returns number of matchers added to IndexMatcher.
+	Size() uint32
 
-	switch tm := m.(type) {
-	case fullMatcher:
-		g.fullMatcher.addMatcher(tm, c)
-	case domainMatcher:
-		g.domainMatcher.addMatcher(tm, c)
-	default:
-		g.otherMatchers = append(g.otherMatchers, matcherEntry{
-			m:  m,
-			id: c,
-		})
-	}
+	// Add adds a new Matcher to IndexMatcher, and returns its index. The index will never be 0.
+	Add(matcher Matcher) uint32
 
-	return c
-}
+	// Build builds the IndexMatcher to be ready for matching.
+	Build() error
 
-// Match implements IndexMatcher.Match.
-func (g *MatcherGroup) Match(pattern string) []uint32 {
-	result := []uint32{}
-	result = append(result, g.fullMatcher.Match(pattern)...)
-	result = append(result, g.domainMatcher.Match(pattern)...)
-	for _, e := range g.otherMatchers {
-		if e.m.Match(pattern) {
-			result = append(result, e.id)
-		}
-	}
-	return result
-}
+	// Match returns the indices of all matchers that matches the input.
+	//   * Empty array is returned if no such matcher exists.
+	//   * The order of returned matchers should follow priority specification.
+	// Priority specification:
+	//   1. Priority between matcher types: full > domain > substr > regex.
+	//   2. Priority of same-priority matchers matching at same position: the early added takes precedence.
+	//   3. Priority of domain matchers matching at different levels: the further matched domain takes precedence.
+	//   4. Priority of substr matchers matching at different positions: the further matched substr takes precedence.
+	Match(input string) []uint32
 
-// Size returns the number of matchers in the MatcherGroup.
-func (g *MatcherGroup) Size() uint32 {
-	return g.count
+	// MatchAny returns true as soon as one matching matcher is found.
+	MatchAny(input string) bool
 }