healthping.go 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. package burst
  2. import (
  3. "context"
  4. "fmt"
  5. "strings"
  6. "sync"
  7. "time"
  8. "github.com/v2fly/v2ray-core/v5/common/dice"
  9. )
  10. // HealthPingSettings holds settings for health Checker
  11. type HealthPingSettings struct {
  12. Destination string `json:"destination"`
  13. Connectivity string `json:"connectivity"`
  14. Interval time.Duration `json:"interval"`
  15. SamplingCount int `json:"sampling"`
  16. Timeout time.Duration `json:"timeout"`
  17. }
  18. // HealthPing is the health checker for balancers
  19. type HealthPing struct {
  20. ctx context.Context
  21. access sync.Mutex
  22. ticker *time.Ticker
  23. tickerClose chan struct{}
  24. Settings *HealthPingSettings
  25. Results map[string]*HealthPingRTTS
  26. }
  27. // NewHealthPing creates a new HealthPing with settings
  28. func NewHealthPing(ctx context.Context, config *HealthPingConfig) *HealthPing {
  29. settings := &HealthPingSettings{}
  30. if config != nil {
  31. settings = &HealthPingSettings{
  32. Connectivity: strings.TrimSpace(config.Connectivity),
  33. Destination: strings.TrimSpace(config.Destination),
  34. Interval: time.Duration(config.Interval),
  35. SamplingCount: int(config.SamplingCount),
  36. Timeout: time.Duration(config.Timeout),
  37. }
  38. }
  39. if settings.Destination == "" {
  40. settings.Destination = "http://www.google.com/gen_204"
  41. }
  42. if settings.Interval == 0 {
  43. settings.Interval = time.Duration(1) * time.Minute
  44. } else if settings.Interval < 10 {
  45. newError("health check interval is too small, 10s is applied").AtWarning().WriteToLog()
  46. settings.Interval = time.Duration(10) * time.Second
  47. }
  48. if settings.SamplingCount <= 0 {
  49. settings.SamplingCount = 10
  50. }
  51. if settings.Timeout <= 0 {
  52. // results are saved after all health pings finish,
  53. // a larger timeout could possibly makes checks run longer
  54. settings.Timeout = time.Duration(5) * time.Second
  55. }
  56. return &HealthPing{
  57. ctx: ctx,
  58. Settings: settings,
  59. Results: nil,
  60. }
  61. }
  62. // StartScheduler implements the HealthChecker
  63. func (h *HealthPing) StartScheduler(selector func() ([]string, error)) {
  64. if h.ticker != nil {
  65. return
  66. }
  67. interval := h.Settings.Interval * time.Duration(h.Settings.SamplingCount)
  68. ticker := time.NewTicker(interval)
  69. tickerClose := make(chan struct{})
  70. h.ticker = ticker
  71. h.tickerClose = tickerClose
  72. go func() {
  73. for {
  74. go func() {
  75. tags, err := selector()
  76. if err != nil {
  77. newError("error select outbounds for scheduled health check: ", err).AtWarning().WriteToLog()
  78. return
  79. }
  80. h.doCheck(tags, interval, h.Settings.SamplingCount)
  81. h.Cleanup(tags)
  82. }()
  83. select {
  84. case <-ticker.C:
  85. continue
  86. case <-tickerClose:
  87. return
  88. }
  89. }
  90. }()
  91. }
  92. // StopScheduler implements the HealthChecker
  93. func (h *HealthPing) StopScheduler() {
  94. if h.ticker == nil {
  95. return
  96. }
  97. h.ticker.Stop()
  98. h.ticker = nil
  99. close(h.tickerClose)
  100. h.tickerClose = nil
  101. }
  102. // Check implements the HealthChecker
  103. func (h *HealthPing) Check(tags []string) error {
  104. if len(tags) == 0 {
  105. return nil
  106. }
  107. newError("perform one-time health check for tags ", tags).AtInfo().WriteToLog()
  108. h.doCheck(tags, 0, 1)
  109. return nil
  110. }
  111. type rtt struct {
  112. handler string
  113. value time.Duration
  114. }
  115. // doCheck performs the 'rounds' amount checks in given 'duration'. You should make
  116. // sure all tags are valid for current balancer
  117. func (h *HealthPing) doCheck(tags []string, duration time.Duration, rounds int) {
  118. count := len(tags) * rounds
  119. if count == 0 {
  120. return
  121. }
  122. ch := make(chan *rtt, count)
  123. for _, tag := range tags {
  124. handler := tag
  125. client := newPingClient(
  126. h.ctx,
  127. h.Settings.Destination,
  128. h.Settings.Timeout,
  129. handler,
  130. )
  131. for i := 0; i < rounds; i++ {
  132. delay := time.Duration(0)
  133. if duration > 0 {
  134. delay = time.Duration(dice.Roll(int(duration)))
  135. }
  136. time.AfterFunc(delay, func() {
  137. newError("checking ", handler).AtDebug().WriteToLog()
  138. delay, err := client.MeasureDelay()
  139. if err == nil {
  140. ch <- &rtt{
  141. handler: handler,
  142. value: delay,
  143. }
  144. return
  145. }
  146. if !h.checkConnectivity() {
  147. newError("network is down").AtWarning().WriteToLog()
  148. ch <- &rtt{
  149. handler: handler,
  150. value: 0,
  151. }
  152. return
  153. }
  154. newError(fmt.Sprintf(
  155. "error ping %s with %s: %s",
  156. h.Settings.Destination,
  157. handler,
  158. err,
  159. )).AtWarning().WriteToLog()
  160. ch <- &rtt{
  161. handler: handler,
  162. value: rttFailed,
  163. }
  164. })
  165. }
  166. }
  167. for i := 0; i < count; i++ {
  168. rtt := <-ch
  169. if rtt.value > 0 {
  170. // should not put results when network is down
  171. h.PutResult(rtt.handler, rtt.value)
  172. }
  173. }
  174. }
  175. // PutResult puts a ping rtt to results
  176. func (h *HealthPing) PutResult(tag string, rtt time.Duration) {
  177. h.access.Lock()
  178. defer h.access.Unlock()
  179. if h.Results == nil {
  180. h.Results = make(map[string]*HealthPingRTTS)
  181. }
  182. r, ok := h.Results[tag]
  183. if !ok {
  184. // validity is 2 times to sampling period, since the check are
  185. // distributed in the time line randomly, in extreme cases,
  186. // previous checks are distributed on the left, and latters
  187. // on the right
  188. validity := h.Settings.Interval * time.Duration(h.Settings.SamplingCount) * 2
  189. r = NewHealthPingResult(h.Settings.SamplingCount, validity)
  190. h.Results[tag] = r
  191. }
  192. r.Put(rtt)
  193. }
  194. // Cleanup removes results of removed handlers,
  195. // tags should be all valid tags of the Balancer now
  196. func (h *HealthPing) Cleanup(tags []string) {
  197. h.access.Lock()
  198. defer h.access.Unlock()
  199. for tag := range h.Results {
  200. found := false
  201. for _, v := range tags {
  202. if tag == v {
  203. found = true
  204. break
  205. }
  206. }
  207. if !found {
  208. delete(h.Results, tag)
  209. }
  210. }
  211. }
  212. // checkConnectivity checks the network connectivity, it returns
  213. // true if network is good or "connectivity check url" not set
  214. func (h *HealthPing) checkConnectivity() bool {
  215. if h.Settings.Connectivity == "" {
  216. return true
  217. }
  218. tester := newDirectPingClient(
  219. h.Settings.Connectivity,
  220. h.Settings.Timeout,
  221. )
  222. if _, err := tester.MeasureDelay(); err != nil {
  223. return false
  224. }
  225. return true
  226. }