Browse Source

feat: Implement Match and MatchAny for all MatcherGroup, IndexMatcher

[common/strmatcher] Implement Match and MatchAny for all MatcherGroup and IndexMatcher
Vigilans 3 years ago
parent
commit
f494df2567

+ 58 - 0
common/strmatcher/benchmark_indexmatcher_test.go

@@ -0,0 +1,58 @@
+package strmatcher_test
+
+import (
+	"testing"
+
+	. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
+)
+
+func BenchmarkLinearIndexMatcher(b *testing.B) {
+	benchmarkIndexMatcher(b, func() IndexMatcher {
+		return NewLinearIndexMatcher()
+	})
+}
+
+func BenchmarkMphIndexMatcher(b *testing.B) {
+	benchmarkIndexMatcher(b, func() IndexMatcher {
+		return NewMphIndexMatcher()
+	})
+}
+
+func benchmarkIndexMatcher(b *testing.B, ctor func() IndexMatcher) {
+	b.Run("Match", func(b *testing.B) {
+		b.Run("Domain------------", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: true})
+		})
+		b.Run("Domain+Full-------", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true})
+		})
+		b.Run("Domain+Full+Substr", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true})
+		})
+		b.Run("All-Fail----------", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false})
+		})
+	})
+	b.Run("Match/Dotless", func(b *testing.B) { // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
+		b.Run("All-Succ", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: true, Full: true, Substr: true, Regex: true})
+		})
+		b.Run("All-Fail", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{Domain: false, Full: false, Substr: false, Regex: false})
+		})
+	})
+	b.Run("MatchAny", func(b *testing.B) {
+		b.Run("First-Full--", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{Full: true, Domain: true, Substr: true})
+		})
+		b.Run("First-Domain", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: true, Substr: true})
+		})
+		b.Run("First-Substr", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: true})
+		})
+		b.Run("All-Fail----", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{Full: false, Domain: false, Substr: false})
+		})
+	})
+}

+ 149 - 0
common/strmatcher/benchmark_matchers_test.go

@@ -0,0 +1,149 @@
+package strmatcher_test
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v5/common"
+	. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
+)
+
+func BenchmarkFullMatcher(b *testing.B) {
+	b.Run("SimpleMatcherGroup------", func(b *testing.B) {
+		benchmarkMatcherType(b, Full, func() MatcherGroup {
+			return new(SimpleMatcherGroup)
+		})
+	})
+	b.Run("FullMatcherGroup--------", func(b *testing.B) {
+		benchmarkMatcherType(b, Full, func() MatcherGroup {
+			return NewFullMatcherGroup()
+		})
+	})
+	b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
+		benchmarkMatcherType(b, Full, func() MatcherGroup {
+			return NewACAutomatonMatcherGroup()
+		})
+	})
+	b.Run("MphMatcherGroup---------", func(b *testing.B) {
+		benchmarkMatcherType(b, Full, func() MatcherGroup {
+			return NewMphMatcherGroup()
+		})
+	})
+}
+
+func BenchmarkDomainMatcher(b *testing.B) {
+	b.Run("SimpleMatcherGroup------", func(b *testing.B) {
+		benchmarkMatcherType(b, Domain, func() MatcherGroup {
+			return new(SimpleMatcherGroup)
+		})
+	})
+	b.Run("DomainMatcherGroup------", func(b *testing.B) {
+		benchmarkMatcherType(b, Domain, func() MatcherGroup {
+			return NewDomainMatcherGroup()
+		})
+	})
+	b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
+		benchmarkMatcherType(b, Domain, func() MatcherGroup {
+			return NewACAutomatonMatcherGroup()
+		})
+	})
+	b.Run("MphMatcherGroup---------", func(b *testing.B) {
+		benchmarkMatcherType(b, Domain, func() MatcherGroup {
+			return NewMphMatcherGroup()
+		})
+	})
+}
+
+func BenchmarkSubstrMatcher(b *testing.B) {
+	b.Run("SimpleMatcherGroup------", func(b *testing.B) {
+		benchmarkMatcherType(b, Substr, func() MatcherGroup {
+			return new(SimpleMatcherGroup)
+		})
+	})
+	b.Run("SubstrMatcherGroup------", func(b *testing.B) {
+		benchmarkMatcherType(b, Substr, func() MatcherGroup {
+			return new(SubstrMatcherGroup)
+		})
+	})
+	b.Run("ACAutomationMatcherGroup", func(b *testing.B) {
+		benchmarkMatcherType(b, Substr, func() MatcherGroup {
+			return NewACAutomatonMatcherGroup()
+		})
+	})
+}
+
+// Utility functions for benchmark
+
+func benchmarkMatcherType(b *testing.B, t Type, ctor func() MatcherGroup) {
+	b.Run("Match", func(b *testing.B) {
+		b.Run("Succ", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{t: true})
+		})
+		b.Run("Fail", func(b *testing.B) {
+			benchmarkMatch(b, ctor(), map[Type]bool{t: false})
+		})
+	})
+	b.Run("MatchAny", func(b *testing.B) {
+		b.Run("Succ", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{t: true})
+		})
+		b.Run("Fail", func(b *testing.B) {
+			benchmarkMatchAny(b, ctor(), map[Type]bool{t: false})
+		})
+	})
+}
+
+func benchmarkMatch(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
+	prepareMatchers(g, enabledTypes)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = g.Match("0.v2fly.org")
+	}
+}
+
+func benchmarkMatchAny(b *testing.B, g MatcherGroup, enabledTypes map[Type]bool) {
+	prepareMatchers(g, enabledTypes)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = g.MatchAny("0.v2fly.org")
+	}
+}
+
+func prepareMatchers(g MatcherGroup, enabledTypes map[Type]bool) {
+	for matcherType, hasMatch := range enabledTypes {
+		switch matcherType {
+		case Domain:
+			if hasMatch {
+				AddMatcherToGroup(g, DomainMatcher("v2fly.org"), 0)
+			}
+			for i := 1; i < 1024; i++ {
+				AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+			}
+		case Full:
+			if hasMatch {
+				AddMatcherToGroup(g, FullMatcher("0.v2fly.org"), 0)
+			}
+			for i := 1; i < 64; i++ {
+				AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+			}
+		case Substr:
+			if hasMatch {
+				AddMatcherToGroup(g, SubstrMatcher("v2fly.org"), 0)
+			}
+			for i := 1; i < 4; i++ {
+				AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
+			}
+		case Regex:
+			matcher, err := Regex.New("^[^.]*$") // Dotless domain matcher automatically inserted in DNS app when "localhost" DNS is used.
+			common.Must(err)
+			AddMatcherToGroup(g, matcher, 0)
+		}
+	}
+	if g, ok := g.(buildable); ok {
+		common.Must(g.Build())
+	}
+}
+
+type buildable interface {
+	Build() error
+}

+ 0 - 161
common/strmatcher/benchmark_test.go

@@ -1,161 +0,0 @@
-package strmatcher_test
-
-import (
-	"strconv"
-	"testing"
-
-	"github.com/v2fly/v2ray-core/v5/common"
-	. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
-)
-
-// Benchmark Domain Matcher Groups
-
-func BenchmarkSimpleMatcherGroupForDomain(b *testing.B) {
-	g := new(SimpleMatcherGroup)
-
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}
-
-func BenchmarkDomainMatcherGroup(b *testing.B) {
-	g := new(DomainMatcherGroup)
-
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(g, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}
-
-func BenchmarkACAutomatonMatcherGroupForDomain(b *testing.B) {
-	ac := NewACAutomatonMatcherGroup()
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(ac, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-	ac.Build()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = ac.MatchAny("0.v2fly.org")
-	}
-}
-
-func BenchmarkMphMatcherGroupForDomain(b *testing.B) {
-	mph := NewMphMatcherGroup()
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(mph, DomainMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-	mph.Build()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = mph.MatchAny("0.v2fly.org")
-	}
-}
-
-// Benchmark Full Matcher Groups
-
-func BenchmarkSimpleMatcherGroupForFull(b *testing.B) {
-	g := new(SimpleMatcherGroup)
-
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}
-
-func BenchmarkFullMatcherGroup(b *testing.B) {
-	g := new(FullMatcherGroup)
-
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(g, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}
-
-func BenchmarkACAutomatonMatcherGroupForFull(b *testing.B) {
-	ac := NewACAutomatonMatcherGroup()
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(ac, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-	ac.Build()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = ac.MatchAny("0.v2fly.org")
-	}
-}
-
-func BenchmarkMphMatcherGroupFull(b *testing.B) {
-	mph := NewMphMatcherGroup()
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(mph, FullMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-	mph.Build()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = mph.MatchAny("0.v2fly.org")
-	}
-}
-
-// Benchmark Substr Matcher Groups
-
-func BenchmarkSimpleMatcherGroupForSubstr(b *testing.B) {
-	g := new(SimpleMatcherGroup)
-
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(g, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}
-
-func BenchmarkACAutomatonMatcherGroupForSubstr(b *testing.B) {
-	ac := NewACAutomatonMatcherGroup()
-	for i := 1; i <= 1024; i++ {
-		AddMatcherToGroup(ac, SubstrMatcher(strconv.Itoa(i)+".v2fly.org"), uint32(i))
-	}
-	ac.Build()
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = ac.MatchAny("0.v2fly.org")
-	}
-}
-
-// Benchmark Index Matchers
-
-func BenchmarkLinearIndexMatcher(b *testing.B) {
-	g := new(LinearIndexMatcher)
-	for i := 1; i <= 1024; i++ {
-		m, err := Domain.New(strconv.Itoa(i) + ".v2fly.org")
-		common.Must(err)
-		g.Add(m)
-	}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		_ = g.Match("0.v2fly.org")
-	}
-}

+ 54 - 17
common/strmatcher/indexmatcher_linear.go

@@ -1,13 +1,12 @@
 package strmatcher
 
 // LinearIndexMatcher is an implementation of IndexMatcher.
-// Empty initialization works.
 type LinearIndexMatcher struct {
-	count         uint32
-	fullMatcher   FullMatcherGroup
-	domainMatcher DomainMatcherGroup
-	substrMatcher SubstrMatcherGroup
-	otherMatchers SimpleMatcherGroup
+	count  uint32
+	full   *FullMatcherGroup
+	domain *DomainMatcherGroup
+	substr *SubstrMatcherGroup
+	regex  *SimpleMatcherGroup
 }
 
 func NewLinearIndexMatcher() *LinearIndexMatcher {
@@ -21,13 +20,25 @@ func (g *LinearIndexMatcher) Add(matcher Matcher) uint32 {
 
 	switch matcher := matcher.(type) {
 	case FullMatcher:
-		g.fullMatcher.AddFullMatcher(matcher, index)
+		if g.full == nil {
+			g.full = NewFullMatcherGroup()
+		}
+		g.full.AddFullMatcher(matcher, index)
 	case DomainMatcher:
-		g.domainMatcher.AddDomainMatcher(matcher, index)
+		if g.domain == nil {
+			g.domain = NewDomainMatcherGroup()
+		}
+		g.domain.AddDomainMatcher(matcher, index)
 	case SubstrMatcher:
-		g.substrMatcher.AddSubstrMatcher(matcher, index)
+		if g.substr == nil {
+			g.substr = new(SubstrMatcherGroup)
+		}
+		g.substr.AddSubstrMatcher(matcher, index)
 	default:
-		g.otherMatchers.AddMatcher(matcher, index)
+		if g.regex == nil {
+			g.regex = new(SimpleMatcherGroup)
+		}
+		g.regex.AddMatcher(matcher, index)
 	}
 
 	return index
@@ -40,17 +51,43 @@ func (*LinearIndexMatcher) Build() error {
 
 // Match implements IndexMatcher.Match.
 func (g *LinearIndexMatcher) Match(input string) []uint32 {
-	result := []uint32{}
-	result = append(result, g.fullMatcher.Match(input)...)
-	result = append(result, g.domainMatcher.Match(input)...)
-	result = append(result, g.substrMatcher.Match(input)...)
-	result = append(result, g.otherMatchers.Match(input)...)
-	return result
+	// Allocate capacity to prevent matches escaping to heap
+	result := make([][]uint32, 0, 5)
+	if g.full != nil {
+		if matches := g.full.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	if g.domain != nil {
+		if matches := g.domain.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	if g.substr != nil {
+		if matches := g.substr.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	if g.regex != nil {
+		if matches := g.regex.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	return CompositeMatches(result)
 }
 
 // MatchAny implements IndexMatcher.MatchAny.
 func (g *LinearIndexMatcher) MatchAny(input string) bool {
-	return len(g.Match(input)) > 0
+	if g.full != nil && g.full.MatchAny(input) {
+		return true
+	}
+	if g.domain != nil && g.domain.MatchAny(input) {
+		return true
+	}
+	if g.substr != nil && g.substr.MatchAny(input) {
+		return true
+	}
+	return g.regex != nil && g.regex.MatchAny(input)
 }
 
 // Size implements IndexMatcher.Size.

+ 24 - 9
common/strmatcher/indexmatcher_mph.go

@@ -8,15 +8,11 @@ type MphIndexMatcher struct {
 	count uint32
 	mph   *MphMatcherGroup
 	ac    *ACAutomatonMatcherGroup
-	regex SimpleMatcherGroup
+	regex *SimpleMatcherGroup
 }
 
 func NewMphIndexMatcher() *MphIndexMatcher {
-	return &MphIndexMatcher{
-		mph:   nil,
-		ac:    nil,
-		regex: SimpleMatcherGroup{},
-	}
+	return new(MphIndexMatcher)
 }
 
 // Add implements IndexMatcher.Add.
@@ -41,6 +37,9 @@ func (g *MphIndexMatcher) Add(matcher Matcher) uint32 {
 		}
 		g.ac.AddSubstrMatcher(matcher, index)
 	case *RegexMatcher:
+		if g.regex == nil {
+			g.regex = &SimpleMatcherGroup{}
+		}
 		g.regex.AddMatcher(matcher, index)
 	}
 
@@ -59,8 +58,24 @@ func (g *MphIndexMatcher) Build() error {
 }
 
 // Match implements IndexMatcher.Match.
-func (*MphIndexMatcher) Match(string) []uint32 {
-	return nil
+func (g *MphIndexMatcher) Match(input string) []uint32 {
+	result := make([][]uint32, 0, 5)
+	if g.mph != nil {
+		if matches := g.mph.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	if g.ac != nil {
+		if matches := g.ac.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	if g.regex != nil {
+		if matches := g.regex.Match(input); len(matches) > 0 {
+			result = append(result, matches)
+		}
+	}
+	return CompositeMatches(result)
 }
 
 // MatchAny implements IndexMatcher.MatchAny.
@@ -71,7 +86,7 @@ func (g *MphIndexMatcher) MatchAny(input string) bool {
 	if g.ac != nil && g.ac.MatchAny(input) {
 		return true
 	}
-	return g.regex.MatchAny(input)
+	return g.regex != nil && g.regex.MatchAny(input)
 }
 
 // Size implements IndexMatcher.Size.

+ 94 - 0
common/strmatcher/indexmatcher_mph_test.go

@@ -0,0 +1,94 @@
+package strmatcher_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/v2fly/v2ray-core/v5/common"
+	. "github.com/v2fly/v2ray-core/v5/common/strmatcher"
+)
+
+func TestMphIndexMatcher(t *testing.T) {
+	rules := []struct {
+		Type   Type
+		Domain string
+	}{
+		{
+			Type:   Regex,
+			Domain: "apis\\.us$",
+		},
+		{
+			Type:   Substr,
+			Domain: "apis",
+		},
+		{
+			Type:   Domain,
+			Domain: "googleapis.com",
+		},
+		{
+			Type:   Domain,
+			Domain: "com",
+		},
+		{
+			Type:   Full,
+			Domain: "www.baidu.com",
+		},
+		{
+			Type:   Substr,
+			Domain: "apis",
+		},
+		{
+			Type:   Domain,
+			Domain: "googleapis.com",
+		},
+		{
+			Type:   Full,
+			Domain: "fonts.googleapis.com",
+		},
+		{
+			Type:   Full,
+			Domain: "www.baidu.com",
+		},
+		{
+			Type:   Domain,
+			Domain: "example.com",
+		},
+	}
+	cases := []struct {
+		Input  string
+		Output []uint32
+	}{
+		{
+			Input:  "www.baidu.com",
+			Output: []uint32{5, 9, 4},
+		},
+		{
+			Input:  "fonts.googleapis.com",
+			Output: []uint32{8, 3, 7, 4, 2, 6},
+		},
+		{
+			Input:  "example.googleapis.com",
+			Output: []uint32{3, 7, 4, 2, 6},
+		},
+		{
+			Input:  "testapis.us",
+			Output: []uint32{2, 6, 1},
+		},
+		{
+			Input:  "example.com",
+			Output: []uint32{10, 4},
+		},
+	}
+	matcherGroup := NewMphIndexMatcher()
+	for _, rule := range rules {
+		matcher, err := rule.Type.New(rule.Domain)
+		common.Must(err)
+		matcherGroup.Add(matcher)
+	}
+	matcherGroup.Build()
+	for _, test := range cases {
+		if m := matcherGroup.Match(test.Input); !reflect.DeepEqual(m, test.Output) {
+			t.Error("unexpected output: ", m, " for test case ", test)
+		}
+	}
+}

+ 5 - 13
common/strmatcher/matchergroup_ac_automation.go

@@ -127,8 +127,8 @@ func (ac *ACAutomatonMatcherGroup) Build() error {
 
 // Match implements MatcherGroup.Match.
 func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
-	var suffixMatches [][]uint32
-	var substrMatches [][]uint32
+	suffixMatches := make([][]uint32, 0, 5)
+	substrMatches := make([][]uint32, 0, 5)
 	fullMatch := true    // fullMatch indicates no fail edge traversed so far.
 	node := &ac.nodes[0] // start from root node.
 	// 1. the match string is all through trie edge. FULL MATCH or DOMAIN
@@ -177,18 +177,10 @@ func (ac *ACAutomatonMatcherGroup) Match(input string) []uint32 {
 			suffixMatches = append(suffixMatches, values[Full])
 		}
 	}
-	switch matches := append(substrMatches, suffixMatches...); len(matches) { // nolint: gocritic
-	case 0:
-		return nil
-	case 1:
-		return matches[0]
-	default:
-		result := []uint32{}
-		for i := len(matches) - 1; i >= 0; i-- {
-			result = append(result, matches[i]...)
-		}
-		return result
+	if len(substrMatches) == 0 {
+		return CompositeMatchesReverse(suffixMatches)
 	}
+	return CompositeMatchesReverse(append(substrMatches, suffixMatches...))
 }
 
 // MatchAny implements MatcherGroup.MatchAny.

+ 79 - 71
common/strmatcher/matchergroup_domain.go

@@ -1,101 +1,109 @@
 package strmatcher
 
-import "strings"
-
-func breakDomain(domain string) []string {
-	return strings.Split(domain, ".")
-}
-
-type node struct {
-	values []uint32
-	sub    map[string]*node
+type trieNode struct {
+	values   []uint32
+	children map[string]*trieNode
 }
 
 // DomainMatcherGroup is an implementation of MatcherGroup.
 // It uses trie to optimize both memory consumption and lookup speed. Trie node is domain label based.
 type DomainMatcherGroup struct {
-	root *node
+	root *trieNode
 }
 
-// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
-func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
-	if g.root == nil {
-		g.root = new(node)
+func NewDomainMatcherGroup() *DomainMatcherGroup {
+	return &DomainMatcherGroup{
+		root: new(trieNode),
 	}
+}
 
-	current := g.root
-	parts := breakDomain(matcher.Pattern())
-	for i := len(parts) - 1; i >= 0; i-- {
-		part := parts[i]
-		if current.sub == nil {
-			current.sub = make(map[string]*node)
+// AddDomainMatcher implements MatcherGroupForDomain.AddDomainMatcher.
+func (g *DomainMatcherGroup) AddDomainMatcher(matcher DomainMatcher, value uint32) {
+	node := g.root
+	pattern := matcher.Pattern()
+	for i := len(pattern); i > 0; {
+		var part string
+		for j := i - 1; ; j-- {
+			if pattern[j] == '.' {
+				part = pattern[j+1 : i]
+				i = j
+				break
+			}
+			if j == 0 {
+				part = pattern[j:i]
+				i = j
+				break
+			}
 		}
-		next := current.sub[part]
+		if node.children == nil {
+			node.children = make(map[string]*trieNode)
+		}
+		next := node.children[part]
 		if next == nil {
-			next = new(node)
-			current.sub[part] = next
+			next = new(trieNode)
+			node.children[part] = next
 		}
-		current = next
+		node = next
 	}
 
-	current.values = append(current.values, value)
+	node.values = append(node.values, value)
 }
 
 // Match implements MatcherGroup.Match.
-func (g *DomainMatcherGroup) Match(domain string) []uint32 {
-	if domain == "" {
-		return nil
-	}
-
-	current := g.root
-	if current == nil {
-		return nil
-	}
-
-	nextPart := func(idx int) int {
-		for i := idx - 1; i >= 0; i-- {
-			if domain[i] == '.' {
-				return i
+func (g *DomainMatcherGroup) Match(input string) []uint32 {
+	matches := make([][]uint32, 0, 5)
+	node := g.root
+	for i := len(input); i > 0; {
+		for j := i - 1; ; j-- {
+			if input[j] == '.' { // Domain label found
+				node = node.children[input[j+1:i]]
+				i = j
+				break
+			}
+			if j == 0 { // The last part of domain label
+				node = node.children[input[j:i]]
+				i = j
+				break
 			}
 		}
-		return -1
-	}
-
-	matches := [][]uint32{}
-	idx := len(domain)
-	for {
-		if idx == -1 || current.sub == nil {
-			break
-		}
-
-		nidx := nextPart(idx)
-		part := domain[nidx+1 : idx]
-		next := current.sub[part]
-		if next == nil {
+		if node == nil { // No more match if no trie edge transition
 			break
 		}
-		current = next
-		idx = nidx
-		if len(current.values) > 0 {
-			matches = append(matches, current.values)
+		if len(node.values) > 0 { // Found matched matchers
+			matches = append(matches, node.values)
 		}
-	}
-	switch len(matches) {
-	case 0:
-		return nil
-	case 1:
-		return matches[0]
-	default:
-		result := []uint32{}
-		for idx := range matches {
-			// Insert reversely, the subdomain that matches further ranks higher
-			result = append(result, matches[len(matches)-1-idx]...)
+		if node.children == nil { // No more match if leaf node reached
+			break
 		}
-		return result
 	}
+	return CompositeMatchesReverse(matches)
 }
 
 // MatchAny implements MatcherGroup.MatchAny.
-func (g *DomainMatcherGroup) MatchAny(domain string) bool {
-	return len(g.Match(domain)) > 0
+func (g *DomainMatcherGroup) MatchAny(input string) bool {
+	node := g.root
+	for i := len(input); i > 0; {
+		for j := i - 1; ; j-- {
+			if input[j] == '.' {
+				node = node.children[input[j+1:i]]
+				i = j
+				break
+			}
+			if j == 0 {
+				node = node.children[input[j:i]]
+				i = j
+				break
+			}
+		}
+		if node == nil {
+			return false
+		}
+		if len(node.values) > 0 {
+			return true
+		}
+		if node.children == nil {
+			return false
+		}
+	}
+	return false
 }

+ 2 - 2
common/strmatcher/matchergroup_domain_test.go

@@ -82,7 +82,7 @@ func TestDomainMatcherGroup(t *testing.T) {
 			Result: []uint32{4, 6},
 		},
 	}
-	g := new(DomainMatcherGroup)
+	g := NewDomainMatcherGroup()
 	for _, pattern := range patterns {
 		AddMatcherToGroup(g, DomainMatcher(pattern.Pattern), pattern.Value)
 	}
@@ -95,7 +95,7 @@ func TestDomainMatcherGroup(t *testing.T) {
 }
 
 func TestEmptyDomainMatcherGroup(t *testing.T) {
-	g := new(DomainMatcherGroup)
+	g := NewDomainMatcherGroup()
 	r := g.Match("v2fly.org")
 	if len(r) != 0 {
 		t.Error("Expect [], but ", r)

+ 8 - 8
common/strmatcher/matchergroup_full.go

@@ -6,25 +6,25 @@ type FullMatcherGroup struct {
 	matchers map[string][]uint32
 }
 
-// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
-func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
-	if g.matchers == nil {
-		g.matchers = make(map[string][]uint32)
+func NewFullMatcherGroup() *FullMatcherGroup {
+	return &FullMatcherGroup{
+		matchers: make(map[string][]uint32),
 	}
+}
 
+// AddFullMatcher implements MatcherGroupForFull.AddFullMatcher.
+func (g *FullMatcherGroup) AddFullMatcher(matcher FullMatcher, value uint32) {
 	domain := matcher.Pattern()
 	g.matchers[domain] = append(g.matchers[domain], value)
 }
 
 // Match implements MatcherGroup.Match.
 func (g *FullMatcherGroup) Match(input string) []uint32 {
-	if g.matchers == nil {
-		return nil
-	}
 	return g.matchers[input]
 }
 
 // MatchAny implements MatcherGroup.Any.
 func (g *FullMatcherGroup) MatchAny(input string) bool {
-	return len(g.Match(input)) > 0
+	_, found := g.matchers[input]
+	return found
 }

+ 2 - 2
common/strmatcher/matchergroup_full_test.go

@@ -50,7 +50,7 @@ func TestFullMatcherGroup(t *testing.T) {
 			Result: []uint32{4, 6},
 		},
 	}
-	g := new(FullMatcherGroup)
+	g := NewFullMatcherGroup()
 	for _, pattern := range patterns {
 		AddMatcherToGroup(g, FullMatcher(pattern.Pattern), pattern.Value)
 	}
@@ -63,7 +63,7 @@ func TestFullMatcherGroup(t *testing.T) {
 }
 
 func TestEmptyFullMatcherGroup(t *testing.T) {
-	g := new(FullMatcherGroup)
+	g := NewFullMatcherGroup()
 	r := g.Match("v2fly.org")
 	if len(r) != 0 {
 		t.Error("Expect [], but ", r)

+ 2 - 13
common/strmatcher/matchergroup_mph.go

@@ -152,7 +152,7 @@ func (g *MphMatcherGroup) Lookup(rollingHash uint32, input string) uint32 {
 
 // Match implements MatcherGroup.Match.
 func (g *MphMatcherGroup) Match(input string) []uint32 {
-	matches := [][]uint32{}
+	matches := make([][]uint32, 0, 5)
 	hash := uint32(0)
 	for i := len(input) - 1; i >= 0; i-- {
 		hash = hash*PrimeRK + uint32(input[i])
@@ -165,18 +165,7 @@ func (g *MphMatcherGroup) Match(input string) []uint32 {
 	if mphIdx := g.Lookup(hash, input); mphIdx != 0 {
 		matches = append(matches, g.values[mphIdx])
 	}
-	switch len(matches) {
-	case 0:
-		return nil
-	case 1:
-		return matches[0]
-	default:
-		result := []uint32{}
-		for i := len(matches) - 1; i >= 0; i-- {
-			result = append(result, matches[i]...)
-		}
-		return result
-	}
+	return CompositeMatchesReverse(matches)
 }
 
 // MatchAny implements MatcherGroup.MatchAny.

+ 6 - 1
common/strmatcher/matchergroup_simple.go

@@ -32,5 +32,10 @@ func (g *SimpleMatcherGroup) Match(input string) []uint32 {
 
 // MatchAny implements MatcherGroup.MatchAny.
 func (g *SimpleMatcherGroup) MatchAny(input string) bool {
-	return len(g.Match(input)) > 0
+	for _, e := range g.matchers {
+		if e.matcher.Match(input) {
+			return true
+		}
+	}
+	return false
 }

+ 19 - 5
common/strmatcher/matchergroup_substr.go

@@ -20,16 +20,30 @@ func (g *SubstrMatcherGroup) AddSubstrMatcher(matcher SubstrMatcher, value uint3
 
 // Match implements MatcherGroup.Match.
 func (g *SubstrMatcherGroup) Match(input string) []uint32 {
-	result := []uint32{}
+	var result []uint32
 	for i, pattern := range g.patterns {
 		for j := strings.LastIndex(input, pattern); j != -1; j = strings.LastIndex(input[:j], pattern) {
 			result = append(result, uint32(j)<<16|uint32(i)&0xffff) // uint32: position (higher 16 bit) | patternIdx (lower 16 bit)
 		}
 	}
-	// Sort the match results in dictionary order, so that:
-	//   1. Pattern matched at smaller position (meaning matched further) takes precedence.
-	//   2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
-	sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
+	// sort.Slice will trigger allocation no matter what input is. See https://github.com/golang/go/issues/17332
+	// We optimize the sorting by length to prevent memory allocation as possible.
+	switch len(result) {
+	case 0:
+		return nil
+	case 1:
+		// No need to sort
+	case 2:
+		// Do a simple swap if unsorted
+		if result[0] > result[1] {
+			result[0], result[1] = result[1], result[0]
+		}
+	default:
+		// Sort the match results in dictionary order, so that:
+		//   1. Pattern matched at smaller position (meaning matched further) takes precedence.
+		//   2. When patterns matched at same position, pattern with smaller index (meaning inserted early) takes precedence.
+		sort.Slice(result, func(i, j int) bool { return result[i] < result[j] })
+	}
 	for i, entry := range result {
 		result[i] = g.values[entry&0xffff] // Get pattern value from its index (the lower 16 bit)
 	}

+ 110 - 1
common/strmatcher/matchers.go

@@ -4,6 +4,7 @@ import (
 	"errors"
 	"regexp"
 	"strings"
+	"unicode/utf8"
 )
 
 // FullMatcher is an implementation of Matcher.
@@ -96,6 +97,10 @@ func (t Type) New(pattern string) (Matcher, error) {
 	case Substr:
 		return SubstrMatcher(pattern), nil
 	case Domain:
+		pattern, err := ToDomain(pattern)
+		if err != nil {
+			return nil, err
+		}
 		return DomainMatcher(pattern), nil
 	case Regex: // 1. regex matching is case-sensitive
 		regex, err := regexp.Compile(pattern)
@@ -104,10 +109,73 @@ func (t Type) New(pattern string) (Matcher, error) {
 		}
 		return &RegexMatcher{pattern: regex}, nil
 	default:
-		panic("Unknown type")
+		return nil, errors.New("unknown matcher type")
 	}
 }
 
+// NewDomainPattern creates a new Matcher based on the given domain pattern.
+// It works like `Type.New`, but will do validation and conversion to ensure it's a valid domain pattern.
+func (t Type) NewDomainPattern(pattern string) (Matcher, error) {
+	switch t {
+	case Full:
+		pattern, err := ToDomain(pattern)
+		if err != nil {
+			return nil, err
+		}
+		return FullMatcher(pattern), nil
+	case Substr:
+		pattern, err := ToDomain(pattern)
+		if err != nil {
+			return nil, err
+		}
+		return SubstrMatcher(pattern), nil
+	case Domain:
+		pattern, err := ToDomain(pattern)
+		if err != nil {
+			return nil, err
+		}
+		return DomainMatcher(pattern), nil
+	case Regex: // Regex's charset not in LDH subset
+		regex, err := regexp.Compile(pattern)
+		if err != nil {
+			return nil, err
+		}
+		return &RegexMatcher{pattern: regex}, nil
+	default:
+		return nil, errors.New("unknown matcher type")
+	}
+}
+
+// ToDomain converts input pattern to a domain string, and return error if such a conversion cannot be made.
+//  1. Conforms to Letter-Digit-Hyphen (LDH) subset (https://tools.ietf.org/html/rfc952):
+//     * Letters A to Z (no distinction between uppercase and lowercase, we convert to lowers)
+//     * Digits 0 to 9
+//     * Hyphens(-) and Periods(.)
+//  2. Non-ASCII characters not supported for now.
+//     * May support Internationalized domain name to Punycode if needed in the future.
+func ToDomain(pattern string) (string, error) {
+	builder := strings.Builder{}
+	builder.Grow(len(pattern))
+	for i := 0; i < len(pattern); i++ {
+		c := pattern[i]
+		if c >= utf8.RuneSelf {
+			return "", errors.New("non-ASCII characters not supported for now")
+		}
+		switch {
+		case 'A' <= c && c <= 'Z':
+			c += 'a' - 'A'
+		case 'a' <= c && c <= 'z':
+		case '0' <= c && c <= '9':
+		case c == '-':
+		case c == '.':
+		default:
+			return "", errors.New("pattern string does not conform to Letter-Digit-Hyphen (LDH) subset")
+		}
+		builder.WriteByte(c)
+	}
+	return builder.String(), nil
+}
+
 // MatcherGroupForAll is an interface indicating a MatcherGroup could accept all types of matchers.
 type MatcherGroupForAll interface {
 	AddMatcher(matcher Matcher, value uint32)
@@ -137,6 +205,10 @@ type MatcherGroupForRegex interface {
 // It returns error if the MatcherGroup does not accept the provided Matcher's type.
 // This function is provided to help writing code to test a MatcherGroup.
 func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
+	if g, ok := g.(IndexMatcher); ok {
+		g.Add(matcher)
+		return nil
+	}
 	if g, ok := g.(MatcherGroupForAll); ok {
 		g.AddMatcher(matcher, value)
 		return nil
@@ -165,3 +237,40 @@ func AddMatcherToGroup(g MatcherGroup, matcher Matcher, value uint32) error {
 	}
 	return errors.New("cannot add matcher to matcher group")
 }
+
+// CompositeMatches flattens the matches slice to produce a single matched indices slice.
+// It is designed to avoid new memory allocation as possible.
+func CompositeMatches(matches [][]uint32) []uint32 {
+	switch len(matches) {
+	case 0:
+		return nil
+	case 1:
+		return matches[0]
+	default:
+		result := make([]uint32, 0, 5)
+		for i := 0; i < len(matches); i++ {
+			result = append(result, matches[i]...)
+		}
+		return result
+	}
+}
+
+// CompositeMatches flattens the matches slice to produce a single matched indices slice.
+// It is designed that:
+//  1. All matchers are concatenated in reverse order, so the matcher that matches further ranks higher.
+//  2. Indices in the same matcher keeps their original order.
+//  3. Avoid new memory allocation as possible.
+func CompositeMatchesReverse(matches [][]uint32) []uint32 {
+	switch len(matches) {
+	case 0:
+		return nil
+	case 1:
+		return matches[0]
+	default:
+		result := make([]uint32, 0, 5)
+		for i := len(matches) - 1; i >= 0; i-- {
+			result = append(result, matches[i]...)
+		}
+		return result
+	}
+}