diff --git a/cmd/notifier/config.go b/cmd/notifier/config.go index eb737a798..0801d54bb 100644 --- a/cmd/notifier/config.go +++ b/cmd/notifier/config.go @@ -10,6 +10,7 @@ import ( "github.com/moira-alert/moira/cmd" "github.com/moira-alert/moira/notifier" "github.com/moira-alert/moira/notifier/selfstate" + "github.com/moira-alert/moira/notifier/selfstate/heartbeat" ) type config struct { @@ -43,7 +44,7 @@ type notifierConfig struct { // Senders configuration section. See https://moira.readthedocs.io/en/latest/installation/configuration.html for more explanation Senders []map[string]interface{} `yaml:"senders"` // Self state monitor configuration section. Note: No inner subscriptions is required. It's own notification mechanism will be used. - SelfState selfStateConfig `yaml:"moira_selfstate"` + Selfstate selfstateConfig `yaml:"moira_selfstate"` // Web-UI uri prefix for trigger links in notifications. For example: with 'http://localhost' every notification will contain link like 'http://localhost/trigger/triggerId' FrontURI string `yaml:"front_uri"` // Timezone to use to convert ticks. Default is UTC. See https://golang.org/pkg/time/#LoadLocation for more details. @@ -58,27 +59,118 @@ type notifierConfig struct { SetLogLevel setLogLevelConfig `yaml:"set_log_level"` } -type selfStateConfig struct { - // If true, Self state monitor will be enabled - Enabled bool `yaml:"enabled"` - // If true, Self state monitor will check remote checker status - RemoteTriggersEnabled bool `yaml:"remote_triggers_enabled"` - // Max Redis disconnect delay to send alert when reached - RedisDisconnectDelay string `yaml:"redis_disconect_delay"` - // Max Filter metrics receive delay to send alert when reached - LastMetricReceivedDelay string `yaml:"last_metric_received_delay"` - // Max Checker checks perform delay to send alert when reached - LastCheckDelay string `yaml:"last_check_delay"` - // Max Remote triggers Checker checks perform delay to send alert when reached - LastRemoteCheckDelay string `yaml:"last_remote_check_delay"` - // Contact list for Self state monitor alerts - Contacts []map[string]string `yaml:"contacts"` +type heartbeaterAlertConfig struct { + Name string `yaml:"name"` + Desc string `yaml:"desc"` +} + +type heartbeaterBaseConfig struct { + Enabled bool `yaml:"enabled"` + NeedTurnOffNotifier bool `yaml:"need_turn_off_notifier"` + + AlertCfg heartbeaterAlertConfig `yaml:"alert"` +} + +func (cfg heartbeaterBaseConfig) getSettings() heartbeat.HeartbeaterBaseConfig { + return heartbeat.HeartbeaterBaseConfig{ + Enabled: cfg.Enabled, + NeedTurnOffNotifier: cfg.NeedTurnOffNotifier, + + AlertCfg: heartbeat.AlertConfig{ + Name: cfg.AlertCfg.Name, + Desc: cfg.AlertCfg.Desc, + }, + } +} + +type databaseHeartbeaterConfig struct { + heartbeaterBaseConfig + + RedisDisconnectDelay string `yaml:"redis_disconnect_delay"` +} + +type filterHeartbeaterConfig struct { + heartbeaterBaseConfig + + MetricReceivedDelay string `yaml:"last_metric_received_delay"` +} + +type localCheckerHeartbeaterConfig struct { + heartbeaterBaseConfig + + LocalCheckDelay string `yaml:"last_check_delay"` +} + +type remoteCheckerHeartbeaterConfig struct { + heartbeaterBaseConfig + + RemoteCheckDelay string `yaml:"last_remote_check_delay"` +} + +type notifierHeartbeaterConfig struct { + heartbeaterBaseConfig +} + +type heartbeatsConfig struct { + DatabaseCfg databaseHeartbeaterConfig `yaml:"database"` + FilterCfg filterHeartbeaterConfig `yaml:"filter"` + LocalCheckerCfg localCheckerHeartbeaterConfig `yaml:"local_checker"` + RemoteCheckerCfg remoteCheckerHeartbeaterConfig `yaml:"remote_checker"` + NotifierCfg notifierHeartbeaterConfig `yaml:"notifier"` +} + +func (cfg heartbeatsConfig) getSettings() selfstate.HeartbeatsCfg { + return selfstate.HeartbeatsCfg{ + DatabaseCfg: heartbeat.DatabaseHeartbeaterConfig{ + HeartbeaterBaseConfig: cfg.DatabaseCfg.heartbeaterBaseConfig.getSettings(), + RedisDisconnectDelay: to.Duration(cfg.DatabaseCfg.RedisDisconnectDelay), + }, + FilterCfg: heartbeat.FilterHeartbeaterConfig{ + HeartbeaterBaseConfig: cfg.FilterCfg.heartbeaterBaseConfig.getSettings(), + MetricReceivedDelay: to.Duration(cfg.FilterCfg.MetricReceivedDelay), + }, + LocalCheckerCfg: heartbeat.LocalCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: cfg.LocalCheckerCfg.heartbeaterBaseConfig.getSettings(), + LocalCheckDelay: to.Duration(cfg.LocalCheckerCfg.LocalCheckDelay), + }, + RemoteCheckerCfg: heartbeat.RemoteCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: cfg.RemoteCheckerCfg.heartbeaterBaseConfig.getSettings(), + RemoteCheckDelay: to.Duration(cfg.RemoteCheckerCfg.RemoteCheckDelay), + }, + NotifierCfg: heartbeat.NotifierHeartbeaterConfig{ + HeartbeaterBaseConfig: cfg.NotifierCfg.heartbeaterBaseConfig.getSettings(), + }, + } +} + +type monitorBaseConfig struct { + Enabled bool `yaml:"enabled"` + HearbeatsCfg heartbeatsConfig `yaml:"heartbeats"` // Self state monitor alerting interval NoticeInterval string `yaml:"notice_interval"` // Self state monitor check interval CheckInterval string `yaml:"check_interval"` } +type adminMonitorConfig struct { + monitorBaseConfig + + AdminContacts []map[string]string `yaml:"contacts"` +} + +type userMonitorConfig struct { + monitorBaseConfig +} + +type monitorConfig struct { + AdminCfg adminMonitorConfig `yaml:"admin"` + UserCfg userMonitorConfig `yaml:"user"` +} + +type selfstateConfig struct { + Monitor monitorConfig `yaml:"monitor"` +} + func getDefault() config { return config{ Redis: cmd.RedisConfig{ @@ -105,12 +197,49 @@ func getDefault() config { SenderTimeout: "10s", ResendingTimeout: "1:00", ReschedulingDelay: "60s", - SelfState: selfStateConfig{ - Enabled: false, - RedisDisconnectDelay: "30s", - LastMetricReceivedDelay: "60s", - LastCheckDelay: "60s", - NoticeInterval: "300s", + Selfstate: selfstateConfig{ + Monitor: monitorConfig{ + AdminCfg: adminMonitorConfig{ + monitorBaseConfig: monitorBaseConfig{ + Enabled: false, + HearbeatsCfg: heartbeatsConfig{ + DatabaseCfg: databaseHeartbeaterConfig{ + RedisDisconnectDelay: "30s", + }, + FilterCfg: filterHeartbeaterConfig{ + MetricReceivedDelay: "60s", + }, + LocalCheckerCfg: localCheckerHeartbeaterConfig{ + LocalCheckDelay: "60s", + }, + RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: "300s", + }, + NotifierCfg: notifierHeartbeaterConfig{}, + }, + }, + }, + UserCfg: userMonitorConfig{ + monitorBaseConfig: monitorBaseConfig{ + Enabled: false, + HearbeatsCfg: heartbeatsConfig{ + DatabaseCfg: databaseHeartbeaterConfig{ + RedisDisconnectDelay: "30s", + }, + FilterCfg: filterHeartbeaterConfig{ + MetricReceivedDelay: "60s", + }, + LocalCheckerCfg: localCheckerHeartbeaterConfig{ + LocalCheckDelay: "60s", + }, + RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: "300s", + }, + NotifierCfg: notifierHeartbeaterConfig{}, + }, + }, + }, + }, }, FrontURI: "http://localhost", Timezone: "UTC", @@ -188,9 +317,10 @@ func (config *notifierConfig) getSettings(logger moira.Logger) notifier.Config { Int("subscriptions_count", len(subscriptions)). Msg("Found dynamic log rules in config for some contacts and subscriptions") + selfstateMonitorEnabled := config.Selfstate.Monitor.AdminCfg.Enabled || config.Selfstate.Monitor.UserCfg.Enabled + return notifier.Config{ - SelfStateEnabled: config.SelfState.Enabled, - SelfStateContacts: config.SelfState.Contacts, + SelfstateMonitorEnabled: selfstateMonitorEnabled, SendingTimeout: to.Duration(config.SenderTimeout), ResendingTimeout: to.Duration(config.ResendingTimeout), ReschedulingDelay: to.Duration(config.ReschedulingDelay), @@ -214,21 +344,26 @@ func checkDateTimeFormat(format string) error { return nil } -func (config *selfStateConfig) getSettings() selfstate.Config { - // 10 sec is default check value - checkInterval := 10 * time.Second - if config.CheckInterval != "" { - checkInterval = to.Duration(config.CheckInterval) - } - +func (cfg *selfstateConfig) getSettings() selfstate.Config { return selfstate.Config{ - Enabled: config.Enabled, - RedisDisconnectDelaySeconds: int64(to.Duration(config.RedisDisconnectDelay).Seconds()), - LastMetricReceivedDelaySeconds: int64(to.Duration(config.LastMetricReceivedDelay).Seconds()), - LastCheckDelaySeconds: int64(to.Duration(config.LastCheckDelay).Seconds()), - LastRemoteCheckDelaySeconds: int64(to.Duration(config.LastRemoteCheckDelay).Seconds()), - CheckInterval: checkInterval, - Contacts: config.Contacts, - NoticeIntervalSeconds: int64(to.Duration(config.NoticeInterval).Seconds()), + Monitor: selfstate.MonitorConfig{ + AdminCfg: selfstate.AdminMonitorConfig{ + MonitorBaseConfig: selfstate.MonitorBaseConfig{ + Enabled: cfg.Monitor.AdminCfg.Enabled, + HeartbeatsCfg: cfg.Monitor.AdminCfg.HearbeatsCfg.getSettings(), + NoticeInterval: to.Duration(cfg.Monitor.AdminCfg.NoticeInterval), + CheckInterval: to.Duration(cfg.Monitor.AdminCfg.CheckInterval), + }, + AdminContacts: cfg.Monitor.AdminCfg.AdminContacts, + }, + UserCfg: selfstate.UserMonitorConfig{ + MonitorBaseConfig: selfstate.MonitorBaseConfig{ + Enabled: cfg.Monitor.UserCfg.Enabled, + HeartbeatsCfg: cfg.Monitor.UserCfg.HearbeatsCfg.getSettings(), + NoticeInterval: to.Duration(cfg.Monitor.UserCfg.NoticeInterval), + CheckInterval: to.Duration(cfg.Monitor.UserCfg.CheckInterval), + }, + }, + }, } } diff --git a/cmd/notifier/main.go b/cmd/notifier/main.go index 37e1ac7f7..936bb28e1 100644 --- a/cmd/notifier/main.go +++ b/cmd/notifier/main.go @@ -17,7 +17,7 @@ import ( "github.com/moira-alert/moira/notifier" "github.com/moira-alert/moira/notifier/events" "github.com/moira-alert/moira/notifier/notifications" - "github.com/moira-alert/moira/notifier/selfstate" + selfstate "github.com/moira-alert/moira/notifier/selfstate/worker" _ "go.uber.org/automaxprocs" ) @@ -117,18 +117,14 @@ func main() { Msg("Can not configure senders") } - // Start moira self state checker - if config.Notifier.SelfState.getSettings().Enabled { - selfState := selfstate.NewSelfCheckWorker(logger, database, sender, config.Notifier.SelfState.getSettings(), metrics.ConfigureHeartBeatMetrics(telemetry.Metrics)) - if err := selfState.Start(); err != nil { - logger.Fatal(). - Error(err). - Msg("SelfState failed") - } - defer stopSelfStateChecker(selfState) - } else { - logger.Debug().Msg("Moira Self State Monitoring disabled") + selfstateCfg := config.Notifier.Selfstate.getSettings() + selfstateWorker, err := selfstate.NewSelfstateWorker(selfstateCfg, logger, database, sender, systemClock) + if err != nil { + logger.Fatal(). + Error(err). + Msg("Failed to create a new selfstate worker") } + defer stopSelfstateWorker(selfstateWorker) // Start moira notification fetcher fetchNotificationsWorker := ¬ifications.FetchNotificationsWorker{ @@ -181,10 +177,10 @@ func stopNotificationsFetcher(worker *notifications.FetchNotificationsWorker) { } } -func stopSelfStateChecker(checker *selfstate.SelfCheckWorker) { - if err := checker.Stop(); err != nil { +func stopSelfstateWorker(selfstateWorker selfstate.SelfstateWorker) { + if err := selfstateWorker.Stop(); err != nil { logger.Error(). Error(err). - Msg("Failed to stop self check worker") + Msg("Failed to stop selfstate worker") } } diff --git a/go.mod b/go.mod index 91857a6eb..6ead8c423 100644 --- a/go.mod +++ b/go.mod @@ -46,7 +46,7 @@ require ( require github.com/prometheus/common v0.37.0 require ( - github.com/golang/mock v1.6.0 + github.com/go-playground/validator/v10 v10.4.1 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/mattermost/mattermost/server/public v0.1.1 github.com/mitchellh/mapstructure v1.5.0 @@ -184,12 +184,15 @@ require ( github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/spec v0.20.9 // indirect github.com/go-openapi/swag v0.22.4 // indirect + github.com/go-playground/locales v0.13.0 // indirect + github.com/go-playground/universal-translator v0.17.0 // indirect github.com/hashicorp/go-hclog v1.6.3 // indirect github.com/hashicorp/go-plugin v1.6.0 // indirect github.com/hashicorp/yamux v0.1.1 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/imdario/mergo v0.3.11 // indirect github.com/josharian/intern v1.0.0 // indirect + github.com/leodido/go-urn v1.2.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/mitchellh/copystructure v1.0.0 // indirect github.com/mitchellh/go-testing-interface v1.14.1 // indirect diff --git a/go.sum b/go.sum index d95d23b39..ff6f4d1d0 100644 --- a/go.sum +++ b/go.sum @@ -637,9 +637,13 @@ github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/ github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= +github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= +github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg= github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA= @@ -680,7 +684,6 @@ github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= -github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -899,6 +902,7 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= diff --git a/notifier/config.go b/notifier/config.go index a89b4b7ec..ae3960cfc 100644 --- a/notifier/config.go +++ b/notifier/config.go @@ -9,8 +9,7 @@ const NotificationsLimitUnlimited = int64(-1) // Config is sending settings including log settings. type Config struct { Enabled bool - SelfStateEnabled bool - SelfStateContacts []map[string]string + SelfstateMonitorEnabled bool SendingTimeout time.Duration ResendingTimeout time.Duration ReschedulingDelay time.Duration diff --git a/notifier/registrator.go b/notifier/registrator.go index d1c4bfebb..da894709c 100644 --- a/notifier/registrator.go +++ b/notifier/registrator.go @@ -28,7 +28,7 @@ const ( pushoverSender = "pushover" discordSender = "discord" scriptSender = "script" - selfStateSender = "selfstate" + selfstateSender = "selfstate" slackSender = "slack" telegramSender = "telegram" twilioSmsSender = "twilio sms" @@ -91,12 +91,14 @@ func (notifier *StandardNotifier) RegisterSenders(connector moira.Database) erro return err } } - if notifier.config.SelfStateEnabled { - selfStateSettings := map[string]interface{}{ - "sender_type": selfStateSender, - "contact_type": selfStateSender, + + if notifier.config.SelfstateMonitorEnabled { + selfstateSettings := map[string]interface{}{ + "sender_type": selfstateSender, + "contact_type": selfstateSender, } - if err = notifier.RegisterSender(selfStateSettings, &selfstate.Sender{Database: connector}); err != nil { + + if err = notifier.RegisterSender(selfstateSettings, &selfstate.Sender{Database: connector}); err != nil { notifier.logger.Warning(). Error(err). Msg("Failed to register selfstate sender") diff --git a/notifier/selfstate/check.go b/notifier/selfstate/check.go deleted file mode 100644 index 9b2ad2fe5..000000000 --- a/notifier/selfstate/check.go +++ /dev/null @@ -1,120 +0,0 @@ -package selfstate - -import ( - "encoding/json" - "sync" - "time" - - "github.com/moira-alert/moira" - "github.com/moira-alert/moira/notifier" -) - -func (selfCheck *SelfCheckWorker) selfStateChecker(stop <-chan struct{}) error { - selfCheck.Logger.Info().Msg("Moira Notifier Self State Monitor started") - - checkTicker := time.NewTicker(selfCheck.Config.CheckInterval) - defer checkTicker.Stop() - - nextSendErrorMessage := time.Now().Unix() - - for { - select { - case <-stop: - selfCheck.Logger.Info().Msg("Moira Notifier Self State Monitor stopped") - return nil - case <-checkTicker.C: - selfCheck.Logger.Debug(). - Int64("nextSendErrorMessage", nextSendErrorMessage). - Msg("call check") - - nextSendErrorMessage = selfCheck.check(time.Now().Unix(), nextSendErrorMessage) - } - } -} - -func (selfCheck *SelfCheckWorker) handleCheckServices(nowTS int64) []moira.NotificationEvent { - var events []moira.NotificationEvent - - for _, heartbeat := range selfCheck.heartbeats { - currentValue, hasErrors, err := heartbeat.Check(nowTS) - if err != nil { - selfCheck.Logger.Error(). - Error(err). - Msg("Heartbeat failed") - } - - if hasErrors { - events = append(events, generateNotificationEvent(heartbeat.GetErrorMessage(), currentValue)) - if heartbeat.NeedTurnOffNotifier() { - selfCheck.setNotifierState(moira.SelfStateERROR) - } - - if !heartbeat.NeedToCheckOthers() { - break - } - } - } - - return events -} - -func (selfCheck *SelfCheckWorker) sendNotification(events []moira.NotificationEvent, nowTS int64) int64 { - eventsJSON, _ := json.Marshal(events) - selfCheck.Logger.Error(). - Int("number_of_events", len(events)). - String("events_json", string(eventsJSON)). - Msg("Health check. Send package notification events") - selfCheck.sendErrorMessages(events) - return nowTS + selfCheck.Config.NoticeIntervalSeconds -} - -func (selfCheck *SelfCheckWorker) check(nowTS int64, nextSendErrorMessage int64) int64 { - events := selfCheck.handleCheckServices(nowTS) - if nextSendErrorMessage < nowTS && len(events) > 0 { - nextSendErrorMessage = selfCheck.sendNotification(events, nowTS) - } - - return nextSendErrorMessage -} - -func (selfCheck *SelfCheckWorker) sendErrorMessages(events []moira.NotificationEvent) { - var sendingWG sync.WaitGroup - - for _, adminContact := range selfCheck.Config.Contacts { - pkg := notifier.NotificationPackage{ - Contact: moira.ContactData{ - Type: adminContact["type"], - Value: adminContact["value"], - }, - Trigger: moira.TriggerData{ - Name: "Moira health check", - ErrorValue: float64(0), - }, - Events: events, - DontResend: true, - } - - selfCheck.Notifier.Send(&pkg, &sendingWG) - sendingWG.Wait() - } -} - -func generateNotificationEvent(message string, currentValue int64) moira.NotificationEvent { - val := float64(currentValue) - return moira.NotificationEvent{ - Timestamp: time.Now().Unix(), - OldState: moira.StateNODATA, - State: moira.StateERROR, - Metric: message, - Value: &val, - } -} - -func (selfCheck *SelfCheckWorker) setNotifierState(state string) { - err := selfCheck.Database.SetNotifierState(state) - if err != nil { - selfCheck.Logger.Error(). - Error(err). - Msg("Can't set notifier state") - } -} diff --git a/notifier/selfstate/config.go b/notifier/selfstate/config.go index ad25c7e7e..5538fe1d5 100644 --- a/notifier/selfstate/config.go +++ b/notifier/selfstate/config.go @@ -3,35 +3,85 @@ package selfstate import ( "fmt" "time" + + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira/notifier/selfstate/heartbeat" ) -// Config is representation of self state worker settings like moira admins contacts and threshold values for checked services. -type Config struct { - Enabled bool - RedisDisconnectDelaySeconds int64 - LastMetricReceivedDelaySeconds int64 - LastCheckDelaySeconds int64 - LastRemoteCheckDelaySeconds int64 - NoticeIntervalSeconds int64 - CheckInterval time.Duration - Contacts []map[string]string -} - -func (config *Config) checkConfig(senders map[string]bool) error { - if !config.Enabled { +type HeartbeatsCfg struct { + DatabaseCfg heartbeat.DatabaseHeartbeaterConfig + FilterCfg heartbeat.FilterHeartbeaterConfig + LocalCheckerCfg heartbeat.LocalCheckerHeartbeaterConfig + RemoteCheckerCfg heartbeat.RemoteCheckerHeartbeaterConfig + NotifierCfg heartbeat.NotifierHeartbeaterConfig +} + +type MonitorBaseConfig struct { + Enabled bool + HeartbeatsCfg HeartbeatsCfg + NoticeInterval time.Duration `validate:"required,gt=0"` + CheckInterval time.Duration `validate:"required,gt=0"` +} + +type AdminMonitorConfig struct { + MonitorBaseConfig + + AdminContacts []map[string]string `validate:"required,min=1"` +} + +func (cfg AdminMonitorConfig) validate(senders map[string]bool) error { + if !cfg.Enabled { return nil } - if len(config.Contacts) < 1 { - return fmt.Errorf("contacts must be specified") + + validator := validator.New() + if err := validator.Struct(cfg); err != nil { + return err } - for _, adminContact := range config.Contacts { - if _, ok := senders[adminContact["type"]]; !ok { - return fmt.Errorf("unknown contact type [%s]", adminContact["type"]) + + for _, contact := range cfg.AdminContacts { + if _, ok := senders[contact["type"]]; !ok { + return fmt.Errorf("unknown contact type [%s]", contact["type"]) } - if adminContact["value"] == "" { - return fmt.Errorf("value for [%s] must be present", adminContact["type"]) + + if contact["value"] == "" { + return fmt.Errorf("value for [%s] must be present", contact["type"]) } } return nil } + +type UserMonitorConfig struct { + MonitorBaseConfig +} + +func (cfg UserMonitorConfig) validate() error { + if !cfg.Enabled { + return nil + } + + validator := validator.New() + return validator.Struct(cfg) +} + +type MonitorConfig struct { + AdminCfg AdminMonitorConfig + UserCfg UserMonitorConfig +} + +type Config struct { + Monitor MonitorConfig +} + +func (cfg *Config) Validate(senders map[string]bool) error { + if err := cfg.Monitor.AdminCfg.validate(senders); err != nil { + return fmt.Errorf("admin config validation error: %w", err) + } + + if err := cfg.Monitor.UserCfg.validate(); err != nil { + return fmt.Errorf("user config validation error: %w", err) + } + + return nil +} diff --git a/notifier/selfstate/config_test.go b/notifier/selfstate/config_test.go index 438ae3927..ba2029c60 100644 --- a/notifier/selfstate/config_test.go +++ b/notifier/selfstate/config_test.go @@ -1,114 +1,296 @@ package selfstate import ( + "errors" "fmt" "testing" + "time" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira/notifier/selfstate/heartbeat" . "github.com/smartystreets/goconvey/convey" ) -func TestConfigCheck(testing *testing.T) { - contactTypes := map[string]bool{ - "admin-mail": true, +var defaultHeartbeatersConfig = HeartbeatsCfg{ + DatabaseCfg: heartbeat.DatabaseHeartbeaterConfig{ + RedisDisconnectDelay: time.Minute, + }, + FilterCfg: heartbeat.FilterHeartbeaterConfig{ + MetricReceivedDelay: time.Minute, + }, + LocalCheckerCfg: heartbeat.LocalCheckerHeartbeaterConfig{ + LocalCheckDelay: time.Minute, + }, + RemoteCheckerCfg: heartbeat.RemoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: time.Minute, + }, + NotifierCfg: heartbeat.NotifierHeartbeaterConfig{}, +} + +func TestValidateConfig(t *testing.T) { + senders := map[string]bool{ + "telegram": true, } - Convey("SelfCheck disabled", testing, func() { - config := Config{ - Enabled: false, - Contacts: []map[string]string{ - { - "type": "admin-mail", - "value": "admin@company.com", + validationErr := validator.ValidationErrors{} + + Convey("Test Validate", t, func() { + Convey("With disabled admin and user selfchecks", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, }, - }, - } + } - Convey("all data valid, should return nil error", func() { - actual := config.checkConfig(contactTypes) - So(actual, ShouldBeNil) + err := cfg.Validate(senders) + So(err, ShouldBeNil) }) - Convey("contacts empty, should return nil error", func() { - config.Contacts = []map[string]string{} - actual := config.checkConfig(contactTypes) - So(actual, ShouldBeNil) + Convey("Without heartbeats config", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } + + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) }) - Convey("admin sending type not registered, should return nil error", func() { - actual := config.checkConfig(make(map[string]bool)) - So(actual, ShouldBeNil) + Convey("Without admin notice interval", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } + + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) }) - Convey("admin sending contact empty, should return nil error", func() { - config.Contacts = []map[string]string{ - { - "type": "admin-mail", - "value": "", + Convey("Without user notice interval", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + }, + }, }, } - actual := config.checkConfig(make(map[string]bool)) - So(actual, ShouldBeNil) + + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) }) - }) - Convey("SelfCheck contacts empty, should return contacts must be specified error", testing, func() { - config := Config{ - Enabled: true, - } - actual := config.checkConfig(make(map[string]bool)) - So(actual, ShouldResemble, fmt.Errorf("contacts must be specified")) - }) + Convey("Without admin check interval", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } - Convey("Admin sending type not registered, should not pass check without admin contact type", testing, func() { - config := Config{ - Enabled: true, - Contacts: []map[string]string{ - { - "type": "admin-mail", - "value": "admin@company.com", + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) + }) + + Convey("Without user check interval", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + }, + }, }, - }, - } + } - actual := config.checkConfig(make(map[string]bool)) - So(actual, ShouldResemble, fmt.Errorf("unknown contact type [admin-mail]")) - }) + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) + }) - Convey("Admin sending contact empty, should not pass check without admin contact", testing, func() { - config := Config{ - Enabled: true, - Contacts: []map[string]string{ - { - "type": "admin-mail", - "value": "", + Convey("Without admin contacts", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + CheckInterval: time.Minute, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, }, - }, - } + } - contactTypes := map[string]bool{ - "admin-mail": true, - } + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) + }) - actual := config.checkConfig(contactTypes) - So(actual, ShouldResemble, fmt.Errorf("value for [admin-mail] must be present")) - }) + Convey("With empty admin contacts", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + CheckInterval: time.Minute, + }, + AdminContacts: []map[string]string{}, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } + + err := cfg.Validate(senders) + So(errors.As(err, &validationErr), ShouldBeTrue) + }) - Convey("Has registered valid admin contact, should pass check", testing, func() { - config := Config{ - Enabled: true, - Contacts: []map[string]string{ - { - "type": "admin-mail", - "value": "admin@company.com", + Convey("With unknown contact type", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + CheckInterval: time.Minute, + }, + AdminContacts: []map[string]string{ + { + "type": "test-contact-type", + }, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, }, - }, - } + } - contactTypes := map[string]bool{ - "admin-mail": true, - } + err := cfg.Validate(senders) + So(errors.Unwrap(err), ShouldResemble, fmt.Errorf("unknown contact type [%s]", cfg.Monitor.AdminCfg.AdminContacts[0]["type"])) + }) - actual := config.checkConfig(contactTypes) - So(actual, ShouldBeNil) + Convey("Without contact value", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + CheckInterval: time.Minute, + }, + AdminContacts: []map[string]string{ + { + "type": "telegram", + }, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } + + err := cfg.Validate(senders) + So(errors.Unwrap(err), ShouldResemble, fmt.Errorf("value for [%s] must be present", cfg.Monitor.AdminCfg.AdminContacts[0]["type"])) + }) + + Convey("With valid contact type and value", func() { + cfg := Config{ + Monitor: MonitorConfig{ + AdminCfg: AdminMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: true, + HeartbeatsCfg: defaultHeartbeatersConfig, + NoticeInterval: time.Minute, + CheckInterval: time.Minute, + }, + AdminContacts: []map[string]string{ + { + "type": "telegram", + "value": "@webcamsmodel", + }, + }, + }, + UserCfg: UserMonitorConfig{ + MonitorBaseConfig: MonitorBaseConfig{ + Enabled: false, + }, + }, + }, + } + + err := cfg.Validate(senders) + So(err, ShouldBeNil) + }) }) } diff --git a/notifier/selfstate/heartbeat/database.go b/notifier/selfstate/heartbeat/database.go index 5b8cd551f..21f465673 100644 --- a/notifier/selfstate/heartbeat/database.go +++ b/notifier/selfstate/heartbeat/database.go @@ -1,52 +1,67 @@ package heartbeat import ( + "fmt" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" ) -type databaseHeartbeat struct{ heartbeat } +var _ Heartbeater = (*databaseHeartbeater)(nil) -func GetDatabase(delay int64, logger moira.Logger, database moira.Database) Heartbeater { - if delay > 0 { - return &databaseHeartbeat{heartbeat{ - logger: logger, - database: database, - delay: delay, - lastSuccessfulCheck: time.Now().Unix(), - }} +type DatabaseHeartbeaterConfig struct { + HeartbeaterBaseConfig + + RedisDisconnectDelay time.Duration `validate:"required,gt=0"` +} + +func (cfg DatabaseHeartbeaterConfig) validate() error { + validator := validator.New() + return validator.Struct(cfg) +} + +type databaseHeartbeater struct { + *heartbeaterBase + + cfg DatabaseHeartbeaterConfig +} + +func NewDatabaseHeartbeater(cfg DatabaseHeartbeaterConfig, base *heartbeaterBase) (*databaseHeartbeater, error) { + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("database heartbeater configuration error: %w", err) } - return nil + + return &databaseHeartbeater{ + heartbeaterBase: base, + cfg: cfg, + }, nil } -func (check *databaseHeartbeat) Check(nowTS int64) (int64, bool, error) { - _, err := check.database.GetChecksUpdatesCount() +func (heartbeater *databaseHeartbeater) Check() (State, error) { + now := heartbeater.clock.NowUTC() + + _, err := heartbeater.database.GetChecksUpdatesCount() if err == nil { - check.lastSuccessfulCheck = nowTS - return 0, false, nil + heartbeater.lastSuccessfulCheck = now + return StateOK, nil } - if check.lastSuccessfulCheck < nowTS-check.delay { - check.logger.Error(). - String("error", check.GetErrorMessage()). - Int64("time_since_successful_check", nowTS-check.heartbeat.lastSuccessfulCheck). - Msg("Send message") - - return nowTS - check.lastSuccessfulCheck, true, nil + if now.Sub(heartbeater.lastSuccessfulCheck) > heartbeater.cfg.RedisDisconnectDelay { + return StateError, nil } - return 0, false, nil + return StateOK, err } -func (databaseHeartbeat) NeedTurnOffNotifier() bool { - return true +func (heartbeater databaseHeartbeater) NeedTurnOffNotifier() bool { + return heartbeater.cfg.NeedTurnOffNotifier } -func (databaseHeartbeat) NeedToCheckOthers() bool { - return false +func (databaseHeartbeater) Type() moira.EmergencyContactType { + return moira.EmergencyTypeRedisDisconnected } -func (databaseHeartbeat) GetErrorMessage() string { - return "Redis disconnected" +func (heartbeater databaseHeartbeater) AlertSettings() AlertConfig { + return heartbeater.cfg.AlertCfg } diff --git a/notifier/selfstate/heartbeat/database_test.go b/notifier/selfstate/heartbeat/database_test.go index a5b8b8a5a..1f68c4944 100644 --- a/notifier/selfstate/heartbeat/database_test.go +++ b/notifier/selfstate/heartbeat/database_test.go @@ -5,69 +5,164 @@ import ( "testing" "time" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" - logging "github.com/moira-alert/moira/logging/zerolog_adapter" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) -func TestDatabaseHeartbeat(t *testing.T) { - Convey("Test database heartbeat", t, func() { - now := time.Now().Unix() - err := errors.New("test database error") - check := createRedisDelayTest(t) - database := check.database.(*mock_moira_alert.MockDatabase) +const ( + defaultRedisDisconnectDelay = time.Minute +) + +func TestNewDatabaseHeartbeater(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) - Convey("Checking the created heartbeat database", func() { - expected := &databaseHeartbeat{heartbeat{database: check.database, logger: check.logger, delay: 1, lastSuccessfulCheck: now}} + validationErr := validator.ValidationErrors{} - So(GetDatabase(0, check.logger, check.database), ShouldBeNil) - So(GetDatabase(1, check.logger, check.database), ShouldResemble, expected) + Convey("Test NewDatabaseHeartbeater", t, func() { + Convey("With too low redis disconnect delay", func() { + cfg := DatabaseHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + Enabled: true, + }, + RedisDisconnectDelay: -1, + } + + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(databaseHeartbeater, ShouldBeNil) }) - Convey("Test update lastSuccessfulCheck", func() { - now += 1000 - database.EXPECT().GetChecksUpdatesCount().Return(int64(1), nil) + Convey("Without redis disconnect delay", func() { + cfg := DatabaseHeartbeaterConfig{} - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) - So(check.lastSuccessfulCheck, ShouldResemble, now) + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(databaseHeartbeater, ShouldBeNil) }) - Convey("Database error handling test", func() { - database.EXPECT().GetChecksUpdatesCount().Return(int64(1), err) + Convey("With correct database heartbeater config", func() { + cfg := DatabaseHeartbeaterConfig{ + RedisDisconnectDelay: 1, + } + + expected := &databaseHeartbeater{ + heartbeaterBase: heartbeaterBase, + cfg: cfg, + } - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) - So(check.lastSuccessfulCheck, ShouldResemble, now) + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + So(databaseHeartbeater, ShouldResemble, expected) }) + }) +} + +func TestDatabaseHeartbeaterCheck(t *testing.T) { + database, clock, testTime, heartbeaterBase := heartbeaterHelper(t) + + cfg := DatabaseHeartbeaterConfig{ + RedisDisconnectDelay: defaultRedisDisconnectDelay, + } - Convey("Check for notification", func() { - check.lastSuccessfulCheck = now - check.delay - 1 + databaseHeartbeater, _ := NewDatabaseHeartbeater(cfg, heartbeaterBase) - database.EXPECT().GetChecksUpdatesCount().Return(int64(0), err) + var ( + testErr = errors.New("test error") + checkUpdates int64 + ) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeTrue) - So(value, ShouldEqual, now-check.lastSuccessfulCheck) + Convey("Test databaseHeartbeater.Check", t, func() { + Convey("With nil error in GetCheckUpdatedCount", func() { + database.EXPECT().GetChecksUpdatesCount().Return(checkUpdates, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := databaseHeartbeater.Check() + So(state, ShouldResemble, StateOK) + So(err, ShouldBeNil) }) - Convey("Test NeedToCheckOthers and NeedTurnOffNotifier", func() { - So(check.NeedTurnOffNotifier(), ShouldBeTrue) - So(check.NeedToCheckOthers(), ShouldBeFalse) + Convey("With too much time elapsed since the last successful check", func() { + heartbeaterBase.lastSuccessfulCheck = testTime.Add(-10 * defaultRedisDisconnectDelay) + defer func() { + heartbeaterBase.lastSuccessfulCheck = testTime + }() + + database.EXPECT().GetChecksUpdatesCount().Return(checkUpdates, testErr) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := databaseHeartbeater.Check() + So(state, ShouldResemble, StateError) + So(err, ShouldBeNil) }) + + Convey("With only error from GetChecksUpdateCount", func() { + database.EXPECT().GetChecksUpdatesCount().Return(checkUpdates, testErr) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := databaseHeartbeater.Check() + So(state, ShouldResemble, StateOK) + So(err, ShouldResemble, testErr) + }) + }) +} + +func TestDatabaseHeartbeaterNeedTurnOffNotifier(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test databaseHeartbeater.TurnOffNotifier", t, func() { + cfg := DatabaseHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + NeedTurnOffNotifier: true, + }, + RedisDisconnectDelay: defaultRedisDisconnectDelay, + } + + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + needTurnOffNotifier := databaseHeartbeater.NeedTurnOffNotifier() + So(needTurnOffNotifier, ShouldBeTrue) }) } -func createRedisDelayTest(t *testing.T) *databaseHeartbeat { - mockCtrl := gomock.NewController(t) - logger, _ := logging.GetLogger("CheckDelay") +func TestDatabaseHeartbeaterType(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test databaseHeartbeater.Type", t, func() { + cfg := DatabaseHeartbeaterConfig{ + RedisDisconnectDelay: defaultRedisDisconnectDelay, + } - return GetDatabase(10, logger, mock_moira_alert.NewMockDatabase(mockCtrl)).(*databaseHeartbeat) + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + databaseHeartbeaterType := databaseHeartbeater.Type() + So(databaseHeartbeaterType, ShouldResemble, moira.EmergencyTypeRedisDisconnected) + }) +} + +func TestDatabaseHeartbeaterAlertSettings(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test databaseHeartbeater.AlertSettings", t, func() { + alertCfg := AlertConfig{ + Name: "test name", + Desc: "test desc", + } + + cfg := DatabaseHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + AlertCfg: alertCfg, + }, + RedisDisconnectDelay: defaultRedisDisconnectDelay, + } + + databaseHeartbeater, err := NewDatabaseHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + alertSettings := databaseHeartbeater.AlertSettings() + So(alertSettings, ShouldResemble, alertCfg) + }) } diff --git a/notifier/selfstate/heartbeat/filter.go b/notifier/selfstate/heartbeat/filter.go index 80f000e35..1cc42341a 100644 --- a/notifier/selfstate/heartbeat/filter.go +++ b/notifier/selfstate/heartbeat/filter.go @@ -1,70 +1,82 @@ package heartbeat import ( + "fmt" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" ) -type filter struct { - heartbeat - count int64 - firstCheckWasSuccessful bool +var ( + localClusterKey = moira.DefaultLocalCluster + + _ Heartbeater = (*filterHeartbeater)(nil) +) + +type FilterHeartbeaterConfig struct { + HeartbeaterBaseConfig + + MetricReceivedDelay time.Duration `validate:"required,gt=0"` } -func GetFilter(delay int64, logger moira.Logger, database moira.Database) Heartbeater { - if delay > 0 { - return &filter{ - heartbeat: heartbeat{ - logger: logger, - database: database, - delay: delay, - lastSuccessfulCheck: time.Now().Unix(), - }, - firstCheckWasSuccessful: false, - } +func (cfg FilterHeartbeaterConfig) validate() error { + validator := validator.New() + return validator.Struct(cfg) +} + +type filterHeartbeater struct { + *heartbeaterBase + + cfg FilterHeartbeaterConfig + lastMetricsCount int64 +} + +func NewFilterHeartbeater(cfg FilterHeartbeaterConfig, base *heartbeaterBase) (*filterHeartbeater, error) { + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("filter heartheater configuration error: %w", err) } - return nil + + return &filterHeartbeater{ + heartbeaterBase: base, + cfg: cfg, + }, nil } -func (check *filter) Check(nowTS int64) (int64, bool, error) { - defaultLocalCluster := moira.DefaultLocalCluster - triggersCount, err := check.database.GetTriggersToCheckCount(defaultLocalCluster) +func (heartbeater *filterHeartbeater) Check() (State, error) { + triggersCount, err := heartbeater.database.GetTriggersToCheckCount(localClusterKey) if err != nil { - return 0, false, err + return StateError, err } - metricsCount, err := check.database.GetMetricsUpdatesCount() + metricsCount, err := heartbeater.database.GetMetricsUpdatesCount() if err != nil { - return 0, false, err - } - if check.count != metricsCount || triggersCount == 0 { - check.count = metricsCount - check.lastSuccessfulCheck = nowTS - return 0, false, nil + return StateError, err } - if check.lastSuccessfulCheck < nowTS-check.heartbeat.delay { - check.logger.Error(). - String("error", check.GetErrorMessage()). - Int64("time_since_successful_check", nowTS-check.heartbeat.lastSuccessfulCheck). - Msg("Send message") + now := heartbeater.clock.NowUTC() + if heartbeater.lastMetricsCount != metricsCount || triggersCount == 0 { + heartbeater.lastMetricsCount = metricsCount + heartbeater.lastSuccessfulCheck = now + return StateOK, nil + } - check.firstCheckWasSuccessful = true - return nowTS - check.heartbeat.lastSuccessfulCheck, true, nil + if now.Sub(heartbeater.lastSuccessfulCheck) > heartbeater.cfg.MetricReceivedDelay { + return StateError, nil } - return 0, false, nil + + return StateOK, nil } // NeedTurnOffNotifier: turn off notifications if at least once the filter check was successful. -func (check filter) NeedTurnOffNotifier() bool { - return check.firstCheckWasSuccessful +func (heartbeater filterHeartbeater) NeedTurnOffNotifier() bool { + return heartbeater.cfg.NeedTurnOffNotifier } -func (check filter) NeedToCheckOthers() bool { - return true +func (filterHeartbeater) Type() moira.EmergencyContactType { + return moira.EmergencyTypeFilterNoMetricsReceived } -func (filter) GetErrorMessage() string { - return "Moira-Filter does not receive metrics" +func (heartbeater filterHeartbeater) AlertSettings() AlertConfig { + return heartbeater.cfg.AlertCfg } diff --git a/notifier/selfstate/heartbeat/filter_test.go b/notifier/selfstate/heartbeat/filter_test.go index 678c443d9..6fd851f52 100644 --- a/notifier/selfstate/heartbeat/filter_test.go +++ b/notifier/selfstate/heartbeat/filter_test.go @@ -5,92 +5,204 @@ import ( "testing" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" - - logging "github.com/moira-alert/moira/logging/zerolog_adapter" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) -func TestFilter(t *testing.T) { - Convey("Test filter heartbeat", t, func() { - err := errors.New("test filter error") - now := time.Now().Unix() - check, mockCtrl := createFilterTest(t) - defer mockCtrl.Finish() - database := check.database.(*mock_moira_alert.MockDatabase) - defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) - - Convey("Checking the created filter", func() { - expected := &filter{ - heartbeat: heartbeat{ - database: check.database, - logger: check.logger, - delay: 1, - lastSuccessfulCheck: now, - }, +const ( + defaultMetricReceivedDelay = time.Minute +) + +func TestNewFilterHeartbeater(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + validationErr := validator.ValidationErrors{} + + Convey("Test NewFilterHeartbeater", t, func() { + Convey("With too low metric received delay", func() { + cfg := FilterHeartbeaterConfig{ + MetricReceivedDelay: -1, } - So(GetFilter(0, check.logger, check.database), ShouldBeNil) - So(GetFilter(1, check.logger, check.database), ShouldResemble, expected) + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(filterHeartbeater, ShouldBeNil) }) - Convey("Filter error handling test", func() { - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), err) + Convey("Without metric received delay", func() { + cfg := FilterHeartbeaterConfig{} - value, needSend, errActual := check.Check(now) - So(errActual, ShouldEqual, err) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(filterHeartbeater, ShouldBeNil) }) - Convey("Test update lastSuccessfulCheck", func() { - now += 1000 - database.EXPECT().GetMetricsUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + Convey("With correct filter heartbeater config", func() { + cfg := FilterHeartbeaterConfig{ + MetricReceivedDelay: 1, + } - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) - So(check.lastSuccessfulCheck, ShouldResemble, now) + expected := &filterHeartbeater{ + heartbeaterBase: heartbeaterBase, + cfg: cfg, + } + + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + So(filterHeartbeater, ShouldResemble, expected) }) + }) +} + +func TestFilterHeartbeaterCheck(t *testing.T) { + database, clock, testTime, heartbeaterBase := heartbeaterHelper(t) - Convey("Check for notification", func() { - check.lastSuccessfulCheck = now - check.delay - 1 + cfg := FilterHeartbeaterConfig{ + MetricReceivedDelay: defaultMetricReceivedDelay, + } - database.EXPECT().GetMetricsUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + filterHeartbeater, _ := NewFilterHeartbeater(cfg, heartbeaterBase) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeTrue) - So(value, ShouldEqual, now-check.lastSuccessfulCheck) + var ( + testErr = errors.New("test error") + triggersToCheckCount, metricsUpdatesCount int64 = 10, 10 + ) + + Convey("Test filterHeartbeater.Check", t, func() { + Convey("With GetTriggersToCheckCount error", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, testErr) + + state, err := filterHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Exit without action", func() { - database.EXPECT().GetMetricsUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + Convey("With GetMetricsUpdatesCount error", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetMetricsUpdatesCount().Return(metricsUpdatesCount, testErr) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + state, err := filterHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Test NeedToCheckOthers and NeedTurnOffNotifier", func() { - // TODO(litleleprikon): seems that this test checks nothing. Seems that NeedToCheckOthers and NeedTurnOffNotifier do not work. - So(check.NeedToCheckOthers(), ShouldBeTrue) + Convey("With last metrics count not equal current metrics count", func() { + defer func() { + filterHeartbeater.lastMetricsCount = 0 + }() + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetMetricsUpdatesCount().Return(metricsUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) - So(check.NeedTurnOffNotifier(), ShouldBeFalse) + state, err := filterHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(filterHeartbeater.lastMetricsCount, ShouldResemble, metricsUpdatesCount) }) + + Convey("With zero triggers to check count", func() { + defer func() { + filterHeartbeater.lastMetricsCount = 0 + }() + + var zeroTriggersToCheckCount int64 + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(zeroTriggersToCheckCount, nil) + database.EXPECT().GetMetricsUpdatesCount().Return(metricsUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := filterHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(filterHeartbeater.lastMetricsCount, ShouldResemble, metricsUpdatesCount) + }) + + filterHeartbeater.lastMetricsCount = metricsUpdatesCount + + Convey("With too much time elapsed since the last successful check", func() { + filterHeartbeater.lastSuccessfulCheck = testTime.Add(-10 * defaultMetricReceivedDelay) + defer func() { + filterHeartbeater.lastSuccessfulCheck = testTime + }() + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetMetricsUpdatesCount().Return(metricsUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := filterHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateError) + }) + + Convey("With short time elapsed since the last successful check", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetMetricsUpdatesCount().Return(metricsUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := filterHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + }) + }) +} + +func TestFilterHeartbeaterNeedTurnOffNotifier(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test filterHeartbeater.TurnOffNotifier", t, func() { + cfg := FilterHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + NeedTurnOffNotifier: true, + }, + MetricReceivedDelay: defaultMetricReceivedDelay, + } + + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + needTurnOffNotifier := filterHeartbeater.NeedTurnOffNotifier() + So(needTurnOffNotifier, ShouldBeTrue) }) } -func createFilterTest(t *testing.T) (*filter, *gomock.Controller) { - mockCtrl := gomock.NewController(t) - logger, _ := logging.GetLogger("MetricDelay") +func TestFilterHeartbeaterType(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) - return GetFilter(60, logger, mock_moira_alert.NewMockDatabase(mockCtrl)).(*filter), mockCtrl + Convey("Test filterHeartbeater.Type", t, func() { + cfg := FilterHeartbeaterConfig{ + MetricReceivedDelay: defaultMetricReceivedDelay, + } + + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + filterHeartbeaterType := filterHeartbeater.Type() + So(filterHeartbeaterType, ShouldResemble, moira.EmergencyTypeFilterNoMetricsReceived) + }) +} + +func TestFilterHeartbeaterAlertSettings(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test filterHeartbeater.AlertSettings", t, func() { + alertCfg := AlertConfig{ + Name: "test name", + Desc: "test desc", + } + + cfg := FilterHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + AlertCfg: alertCfg, + }, + MetricReceivedDelay: defaultMetricReceivedDelay, + } + + filterHeartbeater, err := NewFilterHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + alertSettings := filterHeartbeater.AlertSettings() + So(alertSettings, ShouldResemble, alertCfg) + }) } diff --git a/notifier/selfstate/heartbeat/heartbeat.go b/notifier/selfstate/heartbeat/heartbeat.go index 244de1bcd..61bccc40d 100644 --- a/notifier/selfstate/heartbeat/heartbeat.go +++ b/notifier/selfstate/heartbeat/heartbeat.go @@ -1,21 +1,66 @@ package heartbeat import ( + "time" + "github.com/moira-alert/moira" ) +type State string + +const ( + StateOK State = "heartbeat_state_ok" + StateError State = "heartbeat_state_error" +) + +func (lastState State) IsDegradated(newState State) bool { + return lastState == StateOK && newState == StateError +} + +func (lastState State) IsRecovered(newState State) bool { + return lastState == StateError && newState == StateOK +} + // Heartbeater is the interface for simplified events verification. type Heartbeater interface { - Check(int64) (int64, bool, error) + Check() (State, error) NeedTurnOffNotifier() bool - NeedToCheckOthers() bool - GetErrorMessage() string + AlertSettings() AlertConfig + Type() moira.EmergencyContactType +} + +type HeartbeaterBaseConfig struct { + Enabled bool + NeedTurnOffNotifier bool + NeedToCheckOthers bool + + AlertCfg AlertConfig `validate:"required_if=Enabled true"` +} + +type AlertConfig struct { + Name string `validate:"required_if=Enabled true"` + Desc string } // heartbeat basic structure for Heartbeater. -type heartbeat struct { +type heartbeaterBase struct { logger moira.Logger database moira.Database + clock moira.Clock + + lastSuccessfulCheck time.Time +} + +func NewHeartbeaterBase( + logger moira.Logger, + database moira.Database, + clock moira.Clock, +) *heartbeaterBase { + return &heartbeaterBase{ + logger: logger, + database: database, + clock: clock, - delay, lastSuccessfulCheck int64 + lastSuccessfulCheck: clock.NowUTC(), + } } diff --git a/notifier/selfstate/heartbeat/heartbeat_test.go b/notifier/selfstate/heartbeat/heartbeat_test.go new file mode 100644 index 000000000..1e84f0abe --- /dev/null +++ b/notifier/selfstate/heartbeat/heartbeat_test.go @@ -0,0 +1,96 @@ +package heartbeat + +import ( + "testing" + "time" + + logging "github.com/moira-alert/moira/logging/zerolog_adapter" + mock_clock "github.com/moira-alert/moira/mock/clock" + mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" + . "github.com/smartystreets/goconvey/convey" + "go.uber.org/mock/gomock" +) + +func heartbeaterHelper(t *testing.T) (*mock_moira_alert.MockDatabase, *mock_clock.MockClock, time.Time, *heartbeaterBase) { + t.Helper() + + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + logger, _ := logging.GetLogger("Test") + database := mock_moira_alert.NewMockDatabase(mockCtrl) + clock := mock_clock.NewMockClock(mockCtrl) + + testTime := time.Date(2022, time.June, 6, 10, 0, 0, 0, time.UTC) + + clock.EXPECT().NowUTC().Return(testTime) + heartbeaterBase := NewHeartbeaterBase(logger, database, clock) + + return database, clock, testTime, heartbeaterBase +} + +func TestStateIsDegradated(t *testing.T) { + Convey("Test state.IsDegradated", t, func() { + Convey("With degradated state", func() { + lastState := StateOK + newState := StateError + + degradated := lastState.IsDegradated(newState) + So(degradated, ShouldBeTrue) + }) + + Convey("Without degradated state", func() { + lastState := StateError + newState := StateOK + + degradated := lastState.IsDegradated(newState) + So(degradated, ShouldBeFalse) + }) + }) +} + +func TestStateIsRecovered(t *testing.T) { + Convey("Test state.IsRecovered", t, func() { + Convey("With recovered state", func() { + lastState := StateError + newState := StateOK + + recovered := lastState.IsRecovered(newState) + So(recovered, ShouldBeTrue) + }) + + Convey("Without recovered state", func() { + lastState := StateOK + newState := StateError + + recovered := lastState.IsRecovered(newState) + So(recovered, ShouldBeFalse) + }) + }) +} + +func TestNewHeartbeaterBase(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + logger, _ := logging.GetLogger("Test") + database := mock_moira_alert.NewMockDatabase(mockCtrl) + clock := mock_clock.NewMockClock(mockCtrl) + + testTime := time.Date(2022, time.June, 6, 10, 0, 0, 0, time.UTC) + + Convey("Test NewHeartbeaterBase", t, func() { + clock.EXPECT().NowUTC().Return(testTime) + + expected := &heartbeaterBase{ + logger: logger, + database: database, + clock: clock, + + lastSuccessfulCheck: testTime, + } + + heartbeaterBase := NewHeartbeaterBase(logger, database, clock) + So(heartbeaterBase, ShouldResemble, expected) + }) +} diff --git a/notifier/selfstate/heartbeat/local_checker.go b/notifier/selfstate/heartbeat/local_checker.go index 7c4c88cfd..9dee798ad 100644 --- a/notifier/selfstate/heartbeat/local_checker.go +++ b/notifier/selfstate/heartbeat/local_checker.go @@ -1,62 +1,77 @@ package heartbeat import ( + "fmt" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" ) -type localChecker struct { - heartbeat - count int64 +var _ Heartbeater = (*localCheckerHeartbeater)(nil) + +type LocalCheckerHeartbeaterConfig struct { + HeartbeaterBaseConfig + + LocalCheckDelay time.Duration `validate:"required,gt=0"` +} + +func (cfg LocalCheckerHeartbeaterConfig) validate() error { + validator := validator.New() + return validator.Struct(cfg) +} + +type localCheckerHeartbeater struct { + *heartbeaterBase + + cfg LocalCheckerHeartbeaterConfig + lastChecksCount int64 } -func GetLocalChecker(delay int64, logger moira.Logger, database moira.Database) Heartbeater { - if delay > 0 { - return &localChecker{heartbeat: heartbeat{ - logger: logger, - database: database, - delay: delay, - lastSuccessfulCheck: time.Now().Unix(), - }} +func NewLocalCheckerHeartbeater(cfg LocalCheckerHeartbeaterConfig, base *heartbeaterBase) (*localCheckerHeartbeater, error) { + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("local checker heartbeater configuration error: %w", err) } - return nil + + return &localCheckerHeartbeater{ + heartbeaterBase: base, + cfg: cfg, + }, nil } -func (check *localChecker) Check(nowTS int64) (int64, bool, error) { - defaultLocalCluster := moira.DefaultLocalCluster - triggersCount, err := check.database.GetTriggersToCheckCount(defaultLocalCluster) +func (heartbeater *localCheckerHeartbeater) Check() (State, error) { + triggersCount, err := heartbeater.database.GetTriggersToCheckCount(localClusterKey) if err != nil { - return 0, false, err + return StateError, err } - checksCount, _ := check.database.GetChecksUpdatesCount() - if check.count != checksCount || triggersCount == 0 { - check.count = checksCount - check.lastSuccessfulCheck = nowTS - return 0, false, nil + checksCount, err := heartbeater.database.GetChecksUpdatesCount() + if err != nil { + return StateError, err } - if check.lastSuccessfulCheck < nowTS-check.delay { - check.logger.Error(). - String("error", check.GetErrorMessage()). - Int64("time_since_successful_check", nowTS-check.heartbeat.lastSuccessfulCheck). - Msg("Send message") + now := heartbeater.clock.NowUTC() + if heartbeater.lastChecksCount != checksCount || triggersCount == 0 { + heartbeater.lastChecksCount = checksCount + heartbeater.lastSuccessfulCheck = now + return StateOK, nil + } - return nowTS - check.lastSuccessfulCheck, true, nil + if now.Sub(heartbeater.lastSuccessfulCheck) > heartbeater.cfg.LocalCheckDelay { + return StateError, nil } - return 0, false, nil + return StateOK, nil } -func (localChecker) NeedToCheckOthers() bool { - return true +func (heartbeater localCheckerHeartbeater) NeedTurnOffNotifier() bool { + return heartbeater.cfg.NeedTurnOffNotifier } -func (check localChecker) NeedTurnOffNotifier() bool { - return false +func (localCheckerHeartbeater) Type() moira.EmergencyContactType { + return moira.EmergencyTypeCheckerNoTriggerCheck } -func (localChecker) GetErrorMessage() string { - return "Moira-Checker does not check triggers" +func (heartbeater localCheckerHeartbeater) AlertSettings() AlertConfig { + return heartbeater.cfg.AlertCfg } diff --git a/notifier/selfstate/heartbeat/local_checker_test.go b/notifier/selfstate/heartbeat/local_checker_test.go index 7c638907a..0ed400c94 100644 --- a/notifier/selfstate/heartbeat/local_checker_test.go +++ b/notifier/selfstate/heartbeat/local_checker_test.go @@ -5,84 +5,204 @@ import ( "testing" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" - - logging "github.com/moira-alert/moira/logging/zerolog_adapter" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) -func TestCheckDelay_Check(t *testing.T) { - defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) - Convey("Test local checker heartbeat", t, func() { - err := errors.New("test error localChecker") - now := time.Now().Unix() - check, mockCtrl := createGraphiteLocalCheckerTest(t) - defer mockCtrl.Finish() - database := check.database.(*mock_moira_alert.MockDatabase) - - Convey("Test creation localChecker", func() { - expected := &localChecker{heartbeat: heartbeat{database: check.database, logger: check.logger, delay: 1, lastSuccessfulCheck: now}} - So(GetLocalChecker(0, check.logger, check.database), ShouldBeNil) - So(GetLocalChecker(1, check.logger, check.database), ShouldResemble, expected) +const ( + defaultLocalCheckDelay = time.Minute +) + +func TestNewLocalCheckerHeartbeater(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + validationErr := validator.ValidationErrors{} + + Convey("Test NewLocalCheckerHeartbeater", t, func() { + Convey("With too low local check delay", func() { + cfg := LocalCheckerHeartbeaterConfig{ + LocalCheckDelay: -1, + } + + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(localCheckerHeartbeater, ShouldBeNil) }) - Convey("GraphiteLocalChecker error handling test", func() { - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), err) + Convey("Without local check delay", func() { + cfg := LocalCheckerHeartbeaterConfig{} - value, needSend, errActual := check.Check(now) - So(errActual, ShouldEqual, err) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(localCheckerHeartbeater, ShouldBeNil) }) - Convey("Test update lastSuccessfulCheck", func() { - now += 1000 - database.EXPECT().GetChecksUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + Convey("With correct local checker heartbeater config", func() { + cfg := LocalCheckerHeartbeaterConfig{ + LocalCheckDelay: 1, + } + + expected := &localCheckerHeartbeater{ + heartbeaterBase: heartbeaterBase, + cfg: cfg, + } + + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + So(localCheckerHeartbeater, ShouldResemble, expected) + }) + }) +} + +func TestLocalCheckerHeartbeaterCheck(t *testing.T) { + database, clock, testTime, heartbeaterBase := heartbeaterHelper(t) + + cfg := LocalCheckerHeartbeaterConfig{ + LocalCheckDelay: defaultMetricReceivedDelay, + } + + localCheckerHeartbeater, _ := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) - So(check.lastSuccessfulCheck, ShouldResemble, now) + var ( + testErr = errors.New("test error") + triggersToCheckCount, checksUpdatesCount int64 = 10, 10 + ) + + Convey("Test localCheckerHeartbeater.Check", t, func() { + Convey("With GetTriggersToCheckCount error", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, testErr) + + state, err := localCheckerHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Test get notification", func() { - check.lastSuccessfulCheck = now - check.delay - 1 - database.EXPECT().GetChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + Convey("With GetChecksUpdatesCount error", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetChecksUpdatesCount().Return(checksUpdatesCount, testErr) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeTrue) - So(value, ShouldEqual, now-check.lastSuccessfulCheck) + state, err := localCheckerHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Exit without action", func() { - database.EXPECT().GetChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil) + Convey("With last checks count not equal current checks count", func() { + defer func() { + localCheckerHeartbeater.lastChecksCount = 0 + }() + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetChecksUpdatesCount().Return(checksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + state, err := localCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(localCheckerHeartbeater.lastChecksCount, ShouldResemble, checksUpdatesCount) }) - Convey("Test NeedToCheckOthers and NeedTurnOffNotifier", func() { - // TODO(litleleprikon): seems that this test checks nothing. Seems that NeedToCheckOthers and NeedTurnOffNotifier do not work. - needCheck := check.NeedToCheckOthers() - So(needCheck, ShouldBeTrue) + Convey("With zero triggers to check count", func() { + defer func() { + localCheckerHeartbeater.lastChecksCount = 0 + }() - So(check.NeedTurnOffNotifier(), ShouldBeFalse) + var zeroTriggersToCheckCount int64 + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(zeroTriggersToCheckCount, nil) + database.EXPECT().GetChecksUpdatesCount().Return(checksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := localCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(localCheckerHeartbeater.lastChecksCount, ShouldResemble, checksUpdatesCount) }) + + localCheckerHeartbeater.lastChecksCount = checksUpdatesCount + + Convey("With too much time elapsed since the last successful check", func() { + localCheckerHeartbeater.lastSuccessfulCheck = testTime.Add(-10 * defaultLocalCheckDelay) + defer func() { + localCheckerHeartbeater.lastSuccessfulCheck = testTime + }() + + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetChecksUpdatesCount().Return(checksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := localCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateError) + }) + + Convey("With short time elapsed since the last successful check", func() { + database.EXPECT().GetTriggersToCheckCount(localClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetChecksUpdatesCount().Return(checksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := localCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + }) + }) +} + +func TestLocalCheckerHeartbeaterNeedTurnOffNotifier(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test localCheckerHeartbeater.TurnOffNotifier", t, func() { + cfg := LocalCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + NeedTurnOffNotifier: true, + }, + LocalCheckDelay: defaultLocalCheckDelay, + } + + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + needTurnOffNotifier := localCheckerHeartbeater.NeedTurnOffNotifier() + So(needTurnOffNotifier, ShouldBeTrue) }) } -func createGraphiteLocalCheckerTest(t *testing.T) (*localChecker, *gomock.Controller) { - mockCtrl := gomock.NewController(t) - logger, _ := logging.GetLogger("CheckDelay") +func TestLocalCheckerHeartbeaterType(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) - return GetLocalChecker(120, logger, mock_moira_alert.NewMockDatabase(mockCtrl)).(*localChecker), mockCtrl + Convey("Test localCheckerHeartbeater.Type", t, func() { + cfg := LocalCheckerHeartbeaterConfig{ + LocalCheckDelay: defaultLocalCheckDelay, + } + + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + localCheckerHeartbeaterType := localCheckerHeartbeater.Type() + So(localCheckerHeartbeaterType, ShouldResemble, moira.EmergencyTypeCheckerNoTriggerCheck) + }) +} + +func TestLocalCheckerHeartbeaterAlertSettings(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test localCheckerHeartbeater.AlertSettings", t, func() { + alertCfg := AlertConfig{ + Name: "test name", + Desc: "test desc", + } + + cfg := LocalCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + AlertCfg: alertCfg, + }, + LocalCheckDelay: defaultLocalCheckDelay, + } + + localCheckerHeartbeater, err := NewLocalCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + alertSettings := localCheckerHeartbeater.AlertSettings() + So(alertSettings, ShouldResemble, alertCfg) + }) } diff --git a/notifier/selfstate/heartbeat/notifier.go b/notifier/selfstate/heartbeat/notifier.go index 7b6877be4..56750a3d9 100644 --- a/notifier/selfstate/heartbeat/notifier.go +++ b/notifier/selfstate/heartbeat/notifier.go @@ -1,56 +1,49 @@ package heartbeat import ( - "fmt" - - "github.com/moira-alert/moira/metrics" - "github.com/moira-alert/moira" ) -type notifier struct { - db moira.Database - log moira.Logger - metrics *metrics.HeartBeatMetrics -} +var _ Heartbeater = (*notifierHeartbeater)(nil) -func GetNotifier(logger moira.Logger, database moira.Database, metrics *metrics.HeartBeatMetrics) Heartbeater { - return ¬ifier{ - db: database, - log: logger, - metrics: metrics, - } +type NotifierHeartbeaterConfig struct { + HeartbeaterBaseConfig } -func (check notifier) Check(int64) (int64, bool, error) { - state, _ := check.db.GetNotifierState() - if state != moira.SelfStateOK { - check.metrics.MarkNotifierIsAlive(false) +type notifierHeartbeater struct { + *heartbeaterBase - check.log.Error(). - String("error", check.GetErrorMessage()). - Msg("Notifier is not healthy") + cfg NotifierHeartbeaterConfig +} + +func NewNotifierHeartbeater(cfg NotifierHeartbeaterConfig, base *heartbeaterBase) (*notifierHeartbeater, error) { + return ¬ifierHeartbeater{ + heartbeaterBase: base, + cfg: cfg, + }, nil +} - return 0, true, nil +func (heartbeater *notifierHeartbeater) Check() (State, error) { + notifierState, err := heartbeater.database.GetNotifierState() + if err != nil { + return StateError, err } - check.metrics.MarkNotifierIsAlive(true) - check.log.Debug(). - String("state", state). - Msg("Notifier is healthy") + if notifierState != moira.SelfStateOK { + return StateError, nil + } - return 0, false, nil + return StateOK, nil } -func (notifier) NeedTurnOffNotifier() bool { - return false +func (heartbeater *notifierHeartbeater) NeedTurnOffNotifier() bool { + return heartbeater.cfg.NeedTurnOffNotifier } -func (notifier) NeedToCheckOthers() bool { - return true +func (notifierHeartbeater) Type() moira.EmergencyContactType { + return moira.EmergencyTypeNotifierOff } -func (check notifier) GetErrorMessage() string { - state, _ := check.db.GetNotifierState() - return fmt.Sprintf("Moira-Notifier does not send messages. State: %v", state) +func (heartbeater notifierHeartbeater) AlertSettings() AlertConfig { + return heartbeater.cfg.AlertCfg } diff --git a/notifier/selfstate/heartbeat/notifier_test.go b/notifier/selfstate/heartbeat/notifier_test.go index 3d4976035..988cff474 100644 --- a/notifier/selfstate/heartbeat/notifier_test.go +++ b/notifier/selfstate/heartbeat/notifier_test.go @@ -1,53 +1,120 @@ package heartbeat import ( + "errors" "testing" - "time" - - "github.com/moira-alert/moira/metrics" "github.com/moira-alert/moira" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" - logging "github.com/moira-alert/moira/logging/zerolog_adapter" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) -func TestNotifierState(t *testing.T) { - Convey("Test notifier delay heartbeat", t, func() { - now := time.Now().Unix() - check := createNotifierStateTest(t) +func TestNewNotifierHeartbeater(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test NewNotifierHeartbeater", t, func() { + Convey("With correct local checker heartbeater config", func() { + cfg := NotifierHeartbeaterConfig{} + + expected := ¬ifierHeartbeater{ + heartbeaterBase: heartbeaterBase, + cfg: cfg, + } + + notifierHeartbeater, err := NewNotifierHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + So(notifierHeartbeater, ShouldResemble, expected) + }) + }) +} + +func TestNotifierHeartbeaterCheck(t *testing.T) { + database, _, _, heartbeaterBase := heartbeaterHelper(t) + + cfg := NotifierHeartbeaterConfig{} + + notifierHeartbeater, _ := NewNotifierHeartbeater(cfg, heartbeaterBase) + + testErr := errors.New("test error") - Convey("Test get notifier delay", func() { - check.db.(*mock_moira_alert.MockDatabase).EXPECT().GetNotifierState().Return(moira.SelfStateOK, nil) + Convey("Test notifierHeartbeater.Check", t, func() { + Convey("With GetNotifierState error", func() { + database.EXPECT().GetNotifierState().Return(string(moira.SelfStateOK), testErr) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + state, err := notifierHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Test get notification", func() { - check.db.(*mock_moira_alert.MockDatabase).EXPECT().GetNotifierState().Return(moira.SelfStateERROR, nil).Times(2) + Convey("With notifier state equals error", func() { + database.EXPECT().GetNotifierState().Return(moira.SelfStateERROR, nil) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeTrue) - So(value, ShouldEqual, 0) + state, err := notifierHeartbeater.Check() + So(err, ShouldResemble, nil) + So(state, ShouldResemble, StateError) }) - Convey("Test NeedToCheckOthers and NeedTurnOffNotifier", func() { - So(check.NeedTurnOffNotifier(), ShouldBeFalse) - So(check.NeedToCheckOthers(), ShouldBeTrue) + Convey("With notifier state equals ok", func() { + database.EXPECT().GetNotifierState().Return(moira.SelfStateOK, nil) + + state, err := notifierHeartbeater.Check() + So(err, ShouldResemble, nil) + So(state, ShouldResemble, StateOK) }) }) } -func createNotifierStateTest(t *testing.T) *notifier { - mockCtrl := gomock.NewController(t) - logger, _ := logging.GetLogger("MetricDelay") - metric := metrics.ConfigureHeartBeatMetrics(metrics.NewDummyRegistry()) +func TestNotifierHeartbeaterNeedTurnOffNotifier(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test notifierHeartbeater.TurnOffNotifier", t, func() { + cfg := NotifierHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + NeedTurnOffNotifier: true, + }, + } + + notifierHeartbeater, err := NewNotifierHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + needTurnOffNotifier := notifierHeartbeater.NeedTurnOffNotifier() + So(needTurnOffNotifier, ShouldBeTrue) + }) +} + +func TestNotifierHeartbeaterType(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test notifierHeartbeater.Type", t, func() { + cfg := NotifierHeartbeaterConfig{} + + notifierHeartbeater, err := NewNotifierHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + notifierHeartbeaterType := notifierHeartbeater.Type() + So(notifierHeartbeaterType, ShouldResemble, moira.EmergencyTypeNotifierOff) + }) +} + +func TestNotifierHeartbeaterAlertSettings(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test notifierHeartbeater.AlertSettings", t, func() { + alertCfg := AlertConfig{ + Name: "test name", + Desc: "test desc", + } - return GetNotifier(logger, mock_moira_alert.NewMockDatabase(mockCtrl), metric).(*notifier) + cfg := NotifierHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + AlertCfg: alertCfg, + }, + } + + notifierHeartbeater, err := NewNotifierHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + alertSettings := notifierHeartbeater.AlertSettings() + So(alertSettings, ShouldResemble, alertCfg) + }) } diff --git a/notifier/selfstate/heartbeat/remote_checker.go b/notifier/selfstate/heartbeat/remote_checker.go index 4b0e76113..bfc6c5901 100644 --- a/notifier/selfstate/heartbeat/remote_checker.go +++ b/notifier/selfstate/heartbeat/remote_checker.go @@ -1,60 +1,81 @@ package heartbeat import ( + "fmt" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" ) -type remoteChecker struct { - heartbeat - count int64 +var ( + remoteClusterKey = moira.DefaultGraphiteRemoteCluster + + _ Heartbeater = (*remoteCheckerHeartbeater)(nil) +) + +type RemoteCheckerHeartbeaterConfig struct { + HeartbeaterBaseConfig + + RemoteCheckDelay time.Duration `validate:"required,gt=0"` +} + +func (cfg RemoteCheckerHeartbeaterConfig) validate() error { + validator := validator.New() + return validator.Struct(cfg) +} + +type remoteCheckerHeartbeater struct { + *heartbeaterBase + + cfg RemoteCheckerHeartbeaterConfig + lastRemoteChecksCount int64 } -func GetRemoteChecker(delay int64, logger moira.Logger, database moira.Database) Heartbeater { - if delay > 0 { - return &remoteChecker{heartbeat: heartbeat{ - logger: logger, - database: database, - delay: delay, - lastSuccessfulCheck: time.Now().Unix(), - }} +func NewRemoteCheckerHeartbeater(cfg RemoteCheckerHeartbeaterConfig, base *heartbeaterBase) (*remoteCheckerHeartbeater, error) { + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("remote checker heartbeater configuration error: %w", err) } - return nil + + return &remoteCheckerHeartbeater{ + heartbeaterBase: base, + cfg: cfg, + }, nil } -func (check *remoteChecker) Check(nowTS int64) (int64, bool, error) { - defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster - triggerCount, err := check.database.GetTriggersToCheckCount(defaultRemoteCluster) +func (heartbeater remoteCheckerHeartbeater) Check() (State, error) { + triggersCount, err := heartbeater.database.GetTriggersToCheckCount(remoteClusterKey) if err != nil { - return 0, false, err + return StateError, err } - remoteTriggersCount, _ := check.database.GetRemoteChecksUpdatesCount() - if check.count != remoteTriggersCount || triggerCount == 0 { - check.count = remoteTriggersCount - check.lastSuccessfulCheck = nowTS - return 0, false, nil + remoteChecksCount, err := heartbeater.database.GetRemoteChecksUpdatesCount() + if err != nil { + return StateError, err } - if check.lastSuccessfulCheck < nowTS-check.delay { - check.logger.Error(). - String("error", check.GetErrorMessage()). - Int64("time_since_successful_check", nowTS-check.heartbeat.lastSuccessfulCheck). - Msg("Send message") - return nowTS - check.lastSuccessfulCheck, true, nil + now := heartbeater.clock.NowUTC() + if heartbeater.lastRemoteChecksCount != remoteChecksCount || triggersCount == 0 { + heartbeater.lastRemoteChecksCount = remoteChecksCount + heartbeater.lastSuccessfulCheck = now + return StateOK, nil } - return 0, false, nil + + if now.Sub(heartbeater.lastSuccessfulCheck) > heartbeater.cfg.RemoteCheckDelay { + return StateError, nil + } + + return StateOK, nil } -func (check remoteChecker) NeedTurnOffNotifier() bool { - return false +func (heartbeater remoteCheckerHeartbeater) NeedTurnOffNotifier() bool { + return heartbeater.cfg.NeedTurnOffNotifier } -func (remoteChecker) NeedToCheckOthers() bool { - return true +func (remoteCheckerHeartbeater) Type() moira.EmergencyContactType { + return moira.EmergencyTypeRemoteCheckerNoTriggerCheck } -func (remoteChecker) GetErrorMessage() string { - return "Moira-Remote-Checker does not check remote triggers" +func (heartbeater remoteCheckerHeartbeater) AlertSettings() AlertConfig { + return heartbeater.cfg.AlertCfg } diff --git a/notifier/selfstate/heartbeat/remote_checker_test.go b/notifier/selfstate/heartbeat/remote_checker_test.go index a48ee30a8..5339c5243 100644 --- a/notifier/selfstate/heartbeat/remote_checker_test.go +++ b/notifier/selfstate/heartbeat/remote_checker_test.go @@ -5,84 +5,204 @@ import ( "testing" "time" + "github.com/go-playground/validator/v10" "github.com/moira-alert/moira" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" - - logging "github.com/moira-alert/moira/logging/zerolog_adapter" . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" ) -func TestGraphiteRemoteChecker(t *testing.T) { - defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster +const ( + defaultRemoteCheckDelay = time.Minute +) + +func TestNewRemoteCheckerHeartbeater(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) - Convey("Test remote checker heartbeat", t, func() { - err := errors.New("test error remoteChecker") - now := time.Now().Unix() - check, mockCtrl := createGraphiteRemoteCheckerTest(t) - defer mockCtrl.Finish() - database := check.database.(*mock_moira_alert.MockDatabase) + validationErr := validator.ValidationErrors{} - Convey("Checking the created graphite remote checker", func() { - expected := &remoteChecker{heartbeat: heartbeat{database: check.database, logger: check.logger, delay: 1, lastSuccessfulCheck: now}} - So(GetRemoteChecker(0, check.logger, check.database), ShouldBeNil) - So(GetRemoteChecker(1, check.logger, check.database), ShouldResemble, expected) + Convey("Test NewRemoteCheckerHeartbeater", t, func() { + Convey("With too low remote check delay", func() { + cfg := RemoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: -1, + } + + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(remoteCheckerHeartbeater, ShouldBeNil) }) - Convey("GraphiteRemoteChecker error handling test", func() { - database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(0), err) + Convey("Without remote check delay", func() { + cfg := RemoteCheckerHeartbeaterConfig{} - value, needSend, errActual := check.Check(now) - So(errActual, ShouldEqual, err) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(errors.As(err, &validationErr), ShouldBeTrue) + So(remoteCheckerHeartbeater, ShouldBeNil) }) - Convey("Test update lastSuccessfulCheck", func() { - now += 1000 - database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(1), nil) - database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) + Convey("With correct remote checker heartbeater config", func() { + cfg := RemoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: 1, + } + + expected := &remoteCheckerHeartbeater{ + heartbeaterBase: heartbeaterBase, + cfg: cfg, + } - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) - So(check.lastSuccessfulCheck, ShouldResemble, now) + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + So(remoteCheckerHeartbeater, ShouldResemble, expected) }) + }) +} + +func TestRemoteCheckerHeartbeaterCheck(t *testing.T) { + database, clock, testTime, heartbeaterBase := heartbeaterHelper(t) + + cfg := RemoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: defaultRemoteCheckDelay, + } - Convey("Check for notification", func() { - check.lastSuccessfulCheck = now - check.delay - 1 + remoteCheckerHeartbeater, _ := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) - database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) + var ( + testErr = errors.New("test error") + triggersToCheckCount, remoteChecksUpdatesCount int64 = 10, 10 + ) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeTrue) - So(value, ShouldEqual, now-check.lastSuccessfulCheck) + Convey("Test remoteCheckerHeartbeater.Check", t, func() { + Convey("With GetTriggersToCheckCount error", func() { + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(triggersToCheckCount, testErr) + + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Exit without action", func() { - database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(0), nil) - database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) + Convey("With GetRemoteChecksUpdatesCount error", func() { + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetRemoteChecksUpdatesCount().Return(remoteChecksUpdatesCount, testErr) - value, needSend, errActual := check.Check(now) - So(errActual, ShouldBeNil) - So(needSend, ShouldBeFalse) - So(value, ShouldEqual, 0) + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldResemble, testErr) + So(state, ShouldResemble, StateError) }) - Convey("Test NeedToCheckOthers and NeedTurnOffNotifier", func() { - // TODO(litleleprikon): seems that this test checks nothing. Seems that NeedToCheckOthers and NeedTurnOffNotifier do not work. - So(check.NeedToCheckOthers(), ShouldBeTrue) - So(check.NeedTurnOffNotifier(), ShouldBeFalse) + Convey("With last remote checks count not equal current remote checks count", func() { + defer func() { + remoteCheckerHeartbeater.lastRemoteChecksCount = 0 + }() + + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetRemoteChecksUpdatesCount().Return(remoteChecksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(remoteCheckerHeartbeater.lastRemoteChecksCount, ShouldResemble, remoteChecksUpdatesCount) + }) + + Convey("With zero triggers to check count", func() { + defer func() { + remoteCheckerHeartbeater.lastRemoteChecksCount = 0 + }() + + var zeroTriggersToCheckCount int64 + + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(zeroTriggersToCheckCount, nil) + database.EXPECT().GetRemoteChecksUpdatesCount().Return(remoteChecksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + So(remoteCheckerHeartbeater.lastRemoteChecksCount, ShouldResemble, remoteChecksUpdatesCount) }) + + remoteCheckerHeartbeater.lastRemoteChecksCount = remoteChecksUpdatesCount + + Convey("With too much time elapsed since the last successful check", func() { + remoteCheckerHeartbeater.lastSuccessfulCheck = testTime.Add(-10 * defaultRemoteCheckDelay) + defer func() { + remoteCheckerHeartbeater.lastSuccessfulCheck = testTime + }() + + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetRemoteChecksUpdatesCount().Return(remoteChecksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateError) + }) + + Convey("With short time elapsed since the last successful check", func() { + database.EXPECT().GetTriggersToCheckCount(remoteClusterKey).Return(triggersToCheckCount, nil) + database.EXPECT().GetRemoteChecksUpdatesCount().Return(remoteChecksUpdatesCount, nil) + clock.EXPECT().NowUTC().Return(testTime) + + state, err := remoteCheckerHeartbeater.Check() + So(err, ShouldBeNil) + So(state, ShouldResemble, StateOK) + }) + }) +} + +func TestRemoteCheckerHeartbeaterNeedTurnOffNotifier(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test remoteCheckerHeartbeater.TurnOffNotifier", t, func() { + cfg := RemoteCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + NeedTurnOffNotifier: true, + }, + RemoteCheckDelay: defaultRemoteCheckDelay, + } + + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + needTurnOffNotifier := remoteCheckerHeartbeater.NeedTurnOffNotifier() + So(needTurnOffNotifier, ShouldBeTrue) }) } -func createGraphiteRemoteCheckerTest(t *testing.T) (*remoteChecker, *gomock.Controller) { - mockCtrl := gomock.NewController(t) - logger, _ := logging.GetLogger("MetricDelay") +func TestRemoteCheckerHeartbeaterType(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) - return GetRemoteChecker(120, logger, mock_moira_alert.NewMockDatabase(mockCtrl)).(*remoteChecker), mockCtrl + Convey("Test remoteCheckerHeartbeater.Type", t, func() { + cfg := RemoteCheckerHeartbeaterConfig{ + RemoteCheckDelay: defaultRemoteCheckDelay, + } + + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + remoteCheckerHeartbeaterType := remoteCheckerHeartbeater.Type() + So(remoteCheckerHeartbeaterType, ShouldResemble, moira.EmergencyTypeRemoteCheckerNoTriggerCheck) + }) +} + +func TestRemoteCheckerHeartbeaterAlertSettings(t *testing.T) { + _, _, _, heartbeaterBase := heartbeaterHelper(t) + + Convey("Test remoteCheckerHeartbeater.AlertSettings", t, func() { + alertCfg := AlertConfig{ + Name: "test name", + Desc: "test desc", + } + + cfg := RemoteCheckerHeartbeaterConfig{ + HeartbeaterBaseConfig: HeartbeaterBaseConfig{ + AlertCfg: alertCfg, + }, + RemoteCheckDelay: defaultRemoteCheckDelay, + } + + remoteCheckerHeartbeater, err := NewRemoteCheckerHeartbeater(cfg, heartbeaterBase) + So(err, ShouldBeNil) + + alertSettings := remoteCheckerHeartbeater.AlertSettings() + So(alertSettings, ShouldResemble, alertCfg) + }) } diff --git a/notifier/selfstate/monitor/admin.go b/notifier/selfstate/monitor/admin.go new file mode 100644 index 000000000..a7fee0d37 --- /dev/null +++ b/notifier/selfstate/monitor/admin.go @@ -0,0 +1,74 @@ +package monitor + +import ( + "sync" + "time" + + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/notifier" + "github.com/moira-alert/moira/notifier/selfstate" +) + +const ( + adminMonitorName = "Moira Admin Selfstate Monitoring" + adminMonitorLockName = "moira-admin-selfstate-monitor" + adminMonitorLockTTL = 15 * time.Second +) + +type adminMonitor struct { + adminCfg selfstate.AdminMonitorConfig + database moira.Database + notifier notifier.Notifier +} + +func NewForAdmin( + adminCfg selfstate.AdminMonitorConfig, + logger moira.Logger, + database moira.Database, + clock moira.Clock, + notifier notifier.Notifier, +) (*monitor, error) { + adminMonitor := adminMonitor{ + adminCfg: adminCfg, + database: database, + notifier: notifier, + } + + cfg := monitorConfig{ + Name: adminMonitorName, + LockName: adminMonitorLockName, + LockTTL: adminMonitorLockTTL, + NoticeInterval: adminCfg.NoticeInterval, + CheckInterval: adminCfg.CheckInterval, + } + + heartbeaters := createHearbeaters(adminCfg.HeartbeatsCfg, logger, database, clock) + + return newMonitor( + cfg, + logger, + database, + clock, + notifier, + heartbeaters, + adminMonitor.sendNotifications, + ) +} + +func (am *adminMonitor) sendNotifications(pkgs []notifier.NotificationPackage) error { + sendingWG := &sync.WaitGroup{} + + for _, pkg := range pkgs { + for _, adminContact := range am.adminCfg.AdminContacts { + contact := moira.ContactData{ + Type: adminContact["type"], + Value: adminContact["value"], + } + pkg.Contact = contact + am.notifier.Send(&pkg, sendingWG) + sendingWG.Wait() + } + } + + return nil +} diff --git a/notifier/selfstate/monitor/monitor.go b/notifier/selfstate/monitor/monitor.go new file mode 100644 index 000000000..0f0b7cb95 --- /dev/null +++ b/notifier/selfstate/monitor/monitor.go @@ -0,0 +1,289 @@ +package monitor + +import ( + "fmt" + "time" + + "github.com/go-playground/validator/v10" + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/notifier" + "github.com/moira-alert/moira/notifier/selfstate" + "github.com/moira-alert/moira/notifier/selfstate/heartbeat" + w "github.com/moira-alert/moira/worker" + "gopkg.in/tomb.v2" +) + +var ( + okValue = 0.0 + errorValue = 1.0 + triggerErrorValue = 1.0 + + _ Monitor = (*monitor)(nil) +) + +type hearbeatInfo struct { + lastAlertTime time.Time + lastCheckState heartbeat.State +} + +type monitorConfig struct { + Name string `validate:"required"` + LockName string `validate:"required"` + LockTTL time.Duration `validate:"required,gt=0"` + NoticeInterval time.Duration `validate:"required,gt=0"` + CheckInterval time.Duration `validate:"required,gt=0"` +} + +func (cfg monitorConfig) validate() error { + validator := validator.New() + return validator.Struct(cfg) +} + +type Monitor interface { + Start() + Stop() error +} + +type monitor struct { + cfg monitorConfig + logger moira.Logger + database moira.Database + notifier notifier.Notifier + tomb tomb.Tomb + heartbeaters []heartbeat.Heartbeater + clock moira.Clock + heartbeatsInfo map[moira.EmergencyContactType]*hearbeatInfo + sendNotifications func(pkgs []notifier.NotificationPackage) error +} + +func newMonitor( + cfg monitorConfig, + logger moira.Logger, + database moira.Database, + clock moira.Clock, + notifier notifier.Notifier, + heartbeaters []heartbeat.Heartbeater, + sendNotifications func(pkgs []notifier.NotificationPackage) error, +) (*monitor, error) { + if err := cfg.validate(); err != nil { + return nil, fmt.Errorf("monitor configuration error: %w", err) + } + + hearbeatersInfo := make(map[moira.EmergencyContactType]*hearbeatInfo, len(heartbeaters)) + for _, heartbeater := range heartbeaters { + hearbeatersInfo[heartbeater.Type()] = &hearbeatInfo{ + lastCheckState: heartbeat.StateOK, + } + } + + return &monitor{ + cfg: cfg, + logger: logger, + database: database, + notifier: notifier, + heartbeaters: heartbeaters, + clock: clock, + heartbeatsInfo: hearbeatersInfo, + sendNotifications: sendNotifications, + }, nil +} + +func createHearbeaters( + heartbeatsCfg selfstate.HeartbeatsCfg, + logger moira.Logger, + database moira.Database, + clock moira.Clock, +) []heartbeat.Heartbeater { + hearbeaterBase := heartbeat.NewHeartbeaterBase(logger, database, clock) + + heartbeaters := make([]heartbeat.Heartbeater, 0) + + if heartbeatsCfg.DatabaseCfg.Enabled { + databaseHeartbeater, err := heartbeat.NewDatabaseHeartbeater(heartbeatsCfg.DatabaseCfg, hearbeaterBase) + if err != nil { + logger.Error(). + Error(err). + String("heartbeater", string(databaseHeartbeater.Type())). + Msg("Failed to create a new database heartbeater") + } else { + heartbeaters = append(heartbeaters, databaseHeartbeater) + } + } + + if heartbeatsCfg.FilterCfg.Enabled { + filterHeartbeater, err := heartbeat.NewFilterHeartbeater(heartbeatsCfg.FilterCfg, hearbeaterBase) + if err != nil { + logger.Error(). + Error(err). + String("heartbeater", string(filterHeartbeater.Type())). + Msg("Failed to create a new filter heartbeater") + } else { + heartbeaters = append(heartbeaters, filterHeartbeater) + } + } + + if heartbeatsCfg.LocalCheckerCfg.Enabled { + localCheckerHeartbeater, err := heartbeat.NewLocalCheckerHeartbeater(heartbeatsCfg.LocalCheckerCfg, hearbeaterBase) + if err != nil { + logger.Error(). + Error(err). + String("heartbeater", string(localCheckerHeartbeater.Type())). + Msg("Failed to create a new local checker heartbeater") + } else { + heartbeaters = append(heartbeaters, localCheckerHeartbeater) + } + } + + if heartbeatsCfg.RemoteCheckerCfg.Enabled { + remoteCheckerHeartbeater, err := heartbeat.NewRemoteCheckerHeartbeater(heartbeatsCfg.RemoteCheckerCfg, hearbeaterBase) + if err != nil { + logger.Error(). + Error(err). + String("heartbeater", string(remoteCheckerHeartbeater.Type())). + Msg("Failed to create a new remote checker heartbeater") + } else { + heartbeaters = append(heartbeaters, remoteCheckerHeartbeater) + } + } + + if heartbeatsCfg.NotifierCfg.Enabled { + notifierHeartbeater, err := heartbeat.NewNotifierHeartbeater(heartbeatsCfg.NotifierCfg, hearbeaterBase) + if err != nil { + logger.Error(). + Error(err). + String("heartbeater", string(notifierHeartbeater.Type())). + Msg("Failed to create a new notifier heartbeater") + } else { + heartbeaters = append(heartbeaters, notifierHeartbeater) + } + } + + return heartbeaters +} + +func (m *monitor) Start() { + m.tomb.Go(func() error { + w.NewWorker( + m.cfg.Name, + m.logger, + m.database.NewLock(m.cfg.LockName, m.cfg.LockTTL), + m.selfstateCheck, + ) + return nil + }) +} + +func (m *monitor) selfstateCheck(stop <-chan struct{}) error { + m.logger.Info().Msg(fmt.Sprintf("%s started", m.cfg.Name)) + + checkTicker := time.NewTicker(m.cfg.CheckInterval) + defer checkTicker.Stop() + + for { + select { + case <-stop: + m.logger.Info().Msg(fmt.Sprintf("%s stopped", m.cfg.Name)) + return nil + case <-checkTicker.C: + m.logger.Debug().Msg(fmt.Sprintf("%s selfstate check", m.cfg.Name)) + + m.check() + } + } +} + +func (m *monitor) check() { + pkgs := m.checkHeartbeats() + if len(pkgs) > 0 { + if err := m.sendNotifications(pkgs); err != nil { + m.logger.Error(). + Error(err). + Interface("notification_packages", pkgs). + Msg("Failed to send heartbeats notifications") + } + } +} + +func (m *monitor) checkHeartbeats() []notifier.NotificationPackage { + pkgs := make([]notifier.NotificationPackage, 0) + + for _, heartbeater := range m.heartbeaters { + heartbeatState, err := heartbeater.Check() + if err != nil { + m.logger.Error(). + Error(err). + String("heartbeater", string(heartbeater.Type())). + Msg("Heartbeat check failed") + } + + pkg := m.generateHeartbeatNotificationPackage(heartbeater, heartbeatState) + if pkg != nil { + pkgs = append(pkgs, *pkg) + } + } + + return pkgs +} + +func (m *monitor) generateHeartbeatNotificationPackage(heartbeater heartbeat.Heartbeater, heartbeatState heartbeat.State) *notifier.NotificationPackage { + heartbeatInfo := m.heartbeatsInfo[heartbeater.Type()] + + isDegradated := heartbeatInfo.lastCheckState.IsDegradated(heartbeatState) + isRecovered := heartbeatInfo.lastCheckState.IsRecovered(heartbeatState) + allowNotify := time.Since(heartbeatInfo.lastAlertTime) > m.cfg.NoticeInterval + + if isDegradated && allowNotify { + return createErrorNotificationPackage(heartbeater, m.clock) + } else if isRecovered { + return createOkNotificationPackage(heartbeater, m.clock) + } + + return nil +} + +func createErrorNotificationPackage(heartbeater heartbeat.Heartbeater, clock moira.Clock) *notifier.NotificationPackage { + event := moira.NotificationEvent{ + Timestamp: clock.NowUnix(), + OldState: moira.StateNODATA, + State: moira.StateERROR, + Metric: string(heartbeater.Type()), + Value: &errorValue, + } + + trigger := moira.TriggerData{ + Name: heartbeater.AlertSettings().Name, + Desc: heartbeater.AlertSettings().Desc, + ErrorValue: triggerErrorValue, + } + + return ¬ifier.NotificationPackage{ + Events: []moira.NotificationEvent{event}, + Trigger: trigger, + } +} + +func createOkNotificationPackage(heartbeater heartbeat.Heartbeater, clock moira.Clock) *notifier.NotificationPackage { + event := moira.NotificationEvent{ + Timestamp: clock.NowUnix(), + OldState: moira.StateERROR, + State: moira.StateOK, + Metric: string(heartbeater.Type()), + Value: &okValue, + } + + trigger := moira.TriggerData{ + Name: heartbeater.AlertSettings().Name, + Desc: heartbeater.AlertSettings().Desc, + ErrorValue: triggerErrorValue, + } + + return ¬ifier.NotificationPackage{ + Events: []moira.NotificationEvent{event}, + Trigger: trigger, + } +} + +func (m *monitor) Stop() error { + m.tomb.Kill(nil) + return m.tomb.Wait() +} diff --git a/notifier/selfstate/monitor/user.go b/notifier/selfstate/monitor/user.go new file mode 100644 index 000000000..c470ca6a7 --- /dev/null +++ b/notifier/selfstate/monitor/user.go @@ -0,0 +1,85 @@ +package monitor + +import ( + "fmt" + "sync" + "time" + + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/notifier" + "github.com/moira-alert/moira/notifier/selfstate" +) + +const ( + userMonitorName = "Moira User Selfstate Monitoring" + userMonitorLockName = "moira-user-selfstate-monitor" + userMonitorLockTTL = 15 * time.Second +) + +type userMonitor struct { + userCfg selfstate.UserMonitorConfig + database moira.Database + notifier notifier.Notifier +} + +func NewForUser( + userCfg selfstate.UserMonitorConfig, + logger moira.Logger, + database moira.Database, + clock moira.Clock, + notifier notifier.Notifier, +) (*monitor, error) { + userMonitor := userMonitor{ + userCfg: userCfg, + database: database, + notifier: notifier, + } + + cfg := monitorConfig{ + Name: userMonitorName, + LockName: userMonitorLockName, + LockTTL: userMonitorLockTTL, + NoticeInterval: userCfg.NoticeInterval, + CheckInterval: userCfg.CheckInterval, + } + + heartbeaters := createHearbeaters(userCfg.HeartbeatsCfg, logger, database, clock) + + return newMonitor( + cfg, + logger, + database, + clock, + notifier, + heartbeaters, + userMonitor.sendNotifications, + ) +} + +func (um *userMonitor) sendNotifications(pkgs []notifier.NotificationPackage) error { + sendingWG := &sync.WaitGroup{} + + for _, pkg := range pkgs { + event := pkg.Events[0] + emergencyType := moira.EmergencyContactType(event.Metric) + contactIDs, err := um.database.GetEmergencyTypeContactIDs(emergencyType) + if err != nil { + return fmt.Errorf("failed to get emergency type contact ids: %w", err) + } + + contacts, err := um.database.GetContacts(contactIDs) + if err != nil { + return fmt.Errorf("failed to get contacts by ids: %w", err) + } + + for _, contact := range contacts { + if contact != nil { + pkg.Contact = *contact + um.notifier.Send(&pkg, sendingWG) + sendingWG.Wait() + } + } + } + + return nil +} diff --git a/notifier/selfstate/selfstate.go b/notifier/selfstate/selfstate.go deleted file mode 100644 index 99bba0ed0..000000000 --- a/notifier/selfstate/selfstate.go +++ /dev/null @@ -1,88 +0,0 @@ -package selfstate - -import ( - "time" - - "github.com/moira-alert/moira/metrics" - - "github.com/moira-alert/moira/notifier/selfstate/heartbeat" - - "gopkg.in/tomb.v2" - - "github.com/moira-alert/moira" - "github.com/moira-alert/moira/notifier" - w "github.com/moira-alert/moira/worker" -) - -const ( - selfStateLockName = "moira-self-state-monitor" - selfStateLockTTL = time.Second * 15 -) - -// SelfCheckWorker checks what all notifier services works correctly and send message when moira don't work. -type SelfCheckWorker struct { - Logger moira.Logger - Database moira.Database - Notifier notifier.Notifier - Config Config - tomb tomb.Tomb - heartbeats []heartbeat.Heartbeater -} - -// NewSelfCheckWorker creates SelfCheckWorker. -func NewSelfCheckWorker(logger moira.Logger, database moira.Database, notifier notifier.Notifier, config Config, metrics *metrics.HeartBeatMetrics) *SelfCheckWorker { - heartbeats := createStandardHeartbeats(logger, database, config, metrics) - return &SelfCheckWorker{Logger: logger, Database: database, Notifier: notifier, Config: config, heartbeats: heartbeats} -} - -// Start self check worker. -func (selfCheck *SelfCheckWorker) Start() error { - senders := selfCheck.Notifier.GetSenders() - if err := selfCheck.Config.checkConfig(senders); err != nil { - return err - } - - selfCheck.tomb.Go(func() error { - w.NewWorker( - "Moira Self State Monitoring", - selfCheck.Logger, - selfCheck.Database.NewLock(selfStateLockName, selfStateLockTTL), - selfCheck.selfStateChecker, - ).Run(selfCheck.tomb.Dying()) - return nil - }) - - return nil -} - -// Stop self check worker and wait for finish. -func (selfCheck *SelfCheckWorker) Stop() error { - selfCheck.tomb.Kill(nil) - return selfCheck.tomb.Wait() -} - -func createStandardHeartbeats(logger moira.Logger, database moira.Database, conf Config, metrics *metrics.HeartBeatMetrics) []heartbeat.Heartbeater { - heartbeats := make([]heartbeat.Heartbeater, 0) - - if hb := heartbeat.GetDatabase(conf.RedisDisconnectDelaySeconds, logger, database); hb != nil { - heartbeats = append(heartbeats, hb) - } - - if hb := heartbeat.GetFilter(conf.LastMetricReceivedDelaySeconds, logger, database); hb != nil { - heartbeats = append(heartbeats, hb) - } - - if hb := heartbeat.GetLocalChecker(conf.LastCheckDelaySeconds, logger, database); hb != nil && hb.NeedToCheckOthers() { - heartbeats = append(heartbeats, hb) - } - - if hb := heartbeat.GetRemoteChecker(conf.LastRemoteCheckDelaySeconds, logger, database); hb != nil && hb.NeedToCheckOthers() { - heartbeats = append(heartbeats, hb) - } - - if hb := heartbeat.GetNotifier(logger, database, metrics); hb != nil { - heartbeats = append(heartbeats, hb) - } - - return heartbeats -} diff --git a/notifier/selfstate/selfstate_test.go b/notifier/selfstate/selfstate_test.go deleted file mode 100644 index 71859dca9..000000000 --- a/notifier/selfstate/selfstate_test.go +++ /dev/null @@ -1,189 +0,0 @@ -package selfstate - -import ( - "errors" - "testing" - "time" - - "github.com/moira-alert/moira/metrics" - - mock_heartbeat "github.com/moira-alert/moira/mock/heartbeat" - "github.com/moira-alert/moira/notifier/selfstate/heartbeat" - - "github.com/moira-alert/moira" - - logging "github.com/moira-alert/moira/logging/zerolog_adapter" - mock_moira_alert "github.com/moira-alert/moira/mock/moira-alert" - mock_notifier "github.com/moira-alert/moira/mock/notifier" - . "github.com/smartystreets/goconvey/convey" - "go.uber.org/mock/gomock" -) - -type selfCheckWorkerMock struct { - selfCheckWorker *SelfCheckWorker - database *mock_moira_alert.MockDatabase - notif *mock_notifier.MockNotifier - conf Config - mockCtrl *gomock.Controller -} - -func TestSelfCheckWorker_selfStateChecker(t *testing.T) { - defaultLocalCluster := moira.MakeClusterKey(moira.GraphiteLocal, moira.DefaultCluster) - defaultRemoteCluster := moira.DefaultGraphiteRemoteCluster - - mock := configureWorker(t, true) - Convey("SelfCheckWorker should call all heartbeats checks", t, func() { - mock.database.EXPECT().GetChecksUpdatesCount().Return(int64(1), nil).Times(2) - mock.database.EXPECT().GetMetricsUpdatesCount().Return(int64(1), nil) - mock.database.EXPECT().GetRemoteChecksUpdatesCount().Return(int64(1), nil) - mock.database.EXPECT().GetNotifierState().Return(moira.SelfStateOK, nil) - mock.database.EXPECT().GetTriggersToCheckCount(defaultLocalCluster).Return(int64(1), nil).Times(2) - mock.database.EXPECT().GetTriggersToCheckCount(defaultRemoteCluster).Return(int64(1), nil) - - // Start worker after configuring Mock to avoid race conditions - err := mock.selfCheckWorker.Start() - So(err, ShouldBeNil) - - So(len(mock.selfCheckWorker.heartbeats), ShouldEqual, 5) - - const oneTickDelay = time.Millisecond * 1500 - time.Sleep(oneTickDelay) // wait for one tick of worker - - err = mock.selfCheckWorker.Stop() - So(err, ShouldBeNil) - }) - - mock.mockCtrl.Finish() -} - -func TestSelfCheckWorker_sendErrorMessages(t *testing.T) { - mock := configureWorker(t, true) - - Convey("Should call notifier send", t, func() { - err := mock.selfCheckWorker.Start() - So(err, ShouldBeNil) - - mock.notif.EXPECT().Send(gomock.Any(), gomock.Any()) - - var events []moira.NotificationEvent - mock.selfCheckWorker.sendErrorMessages(events) - - err = mock.selfCheckWorker.Stop() - So(err, ShouldBeNil) - }) - - mock.mockCtrl.Finish() -} - -func TestSelfCheckWorker_Start(t *testing.T) { - mock := configureWorker(t, false) - Convey("When Contact not corresponds to any Sender", t, func() { - mock.notif.EXPECT().GetSenders().Return(nil) - - Convey("Start should return error", func() { - err := mock.selfCheckWorker.Start() - So(err, ShouldNotBeNil) - }) - }) -} - -func TestSelfCheckWorker(t *testing.T) { - Convey("Test checked heartbeat", t, func() { - err := errors.New("test error") - now := time.Now().Unix() - - mock := configureWorker(t, false) - - Convey("Test handle error and no needed send events", func() { - check := mock_heartbeat.NewMockHeartbeater(mock.mockCtrl) - mock.selfCheckWorker.heartbeats = []heartbeat.Heartbeater{check} - - check.EXPECT().Check(now).Return(int64(0), false, err) - - events := mock.selfCheckWorker.handleCheckServices(now) - So(events, ShouldBeNil) - }) - - Convey("Test turn off notification", func() { - first := mock_heartbeat.NewMockHeartbeater(mock.mockCtrl) - second := mock_heartbeat.NewMockHeartbeater(mock.mockCtrl) - - mock.selfCheckWorker.heartbeats = []heartbeat.Heartbeater{first, second} - - first.EXPECT().NeedTurnOffNotifier().Return(true) - first.EXPECT().NeedToCheckOthers().Return(false) - first.EXPECT().GetErrorMessage().Return(moira.SelfStateERROR) - first.EXPECT().Check(now).Return(int64(0), true, nil) - mock.database.EXPECT().SetNotifierState(moira.SelfStateERROR) - - events := mock.selfCheckWorker.handleCheckServices(now) - So(len(events), ShouldEqual, 1) - }) - - Convey("Test of sending notifications from a check", func() { - now = time.Now().Unix() - first := mock_heartbeat.NewMockHeartbeater(mock.mockCtrl) - second := mock_heartbeat.NewMockHeartbeater(mock.mockCtrl) - - mock.selfCheckWorker.heartbeats = []heartbeat.Heartbeater{first, second} - nextSendErrorMessage := time.Now().Unix() - time.Hour.Milliseconds() - - first.EXPECT().Check(now).Return(int64(0), true, nil) - first.EXPECT().GetErrorMessage().Return(moira.SelfStateERROR) - first.EXPECT().NeedTurnOffNotifier().Return(true) - first.EXPECT().NeedToCheckOthers().Return(false) - mock.database.EXPECT().SetNotifierState(moira.SelfStateERROR).Return(err) - mock.notif.EXPECT().Send(gomock.Any(), gomock.Any()) - - nextSendErrorMessage = mock.selfCheckWorker.check(now, nextSendErrorMessage) - So(nextSendErrorMessage, ShouldEqual, now+60) - }) - - mock.mockCtrl.Finish() - }) -} - -func configureWorker(t *testing.T, isStart bool) *selfCheckWorkerMock { - adminContact := map[string]string{ - "type": "admin-mail", - "value": "admin@company.com", - } - conf := Config{ - Enabled: true, - Contacts: []map[string]string{ - adminContact, - }, - RedisDisconnectDelaySeconds: 10, - LastMetricReceivedDelaySeconds: 60, - LastCheckDelaySeconds: 120, - NoticeIntervalSeconds: 60, - LastRemoteCheckDelaySeconds: 120, - CheckInterval: 1 * time.Second, - } - - mockCtrl := gomock.NewController(t) - database := mock_moira_alert.NewMockDatabase(mockCtrl) - logger, _ := logging.GetLogger("SelfState") - notif := mock_notifier.NewMockNotifier(mockCtrl) - if isStart { - senders := map[string]bool{ - "admin-mail": true, - } - notif.EXPECT().GetSenders().Return(senders).MinTimes(1) - - lock := mock_moira_alert.NewMockLock(mockCtrl) - lock.EXPECT().Acquire(gomock.Any()).Return(nil, nil) - lock.EXPECT().Release() - database.EXPECT().NewLock(gomock.Any(), gomock.Any()).Return(lock) - } - - metric := &metrics.HeartBeatMetrics{} - - return &selfCheckWorkerMock{ - selfCheckWorker: NewSelfCheckWorker(logger, database, notif, conf, metric), - database: database, - notif: notif, - conf: conf, - mockCtrl: mockCtrl, - } -} diff --git a/notifier/selfstate/worker/worker.go b/notifier/selfstate/worker/worker.go new file mode 100644 index 000000000..a45f0632c --- /dev/null +++ b/notifier/selfstate/worker/worker.go @@ -0,0 +1,95 @@ +package worker + +import ( + "errors" + "fmt" + + "github.com/moira-alert/moira" + "github.com/moira-alert/moira/notifier" + "github.com/moira-alert/moira/notifier/selfstate" + "github.com/moira-alert/moira/notifier/selfstate/monitor" +) + +var _ SelfstateWorker = (*selfstateWorker)(nil) + +type SelfstateWorker interface { + Start() + Stop() error +} + +type selfstateWorker struct { + monitors []monitor.Monitor +} + +func NewSelfstateWorker( + cfg selfstate.Config, + logger moira.Logger, + database moira.Database, + notifier notifier.Notifier, + clock moira.Clock, +) (*selfstateWorker, error) { + if err := cfg.Validate(notifier.GetSenders()); err != nil { + return nil, fmt.Errorf("selfstate worker configuration error: %w", err) + } + + adminMonitorEnabled := cfg.Monitor.AdminCfg.Enabled + userMonitorEnabled := cfg.Monitor.UserCfg.Enabled + + monitors := make([]monitor.Monitor, 0) + + if adminMonitorEnabled { + adminMonitor, err := monitor.NewForAdmin( + cfg.Monitor.AdminCfg, + logger, + database, + clock, + notifier, + ) + if err != nil { + logger.Error(). + Error(err). + Msg("Failed to create a new admin monitor") + } else { + monitors = append(monitors, adminMonitor) + } + } + + if userMonitorEnabled { + userMonitor, err := monitor.NewForUser( + cfg.Monitor.UserCfg, + logger, + database, + clock, + notifier, + ) + if err != nil { + logger.Error(). + Error(err). + Msg("Failed to create a new user monitor") + } else { + monitors = append(monitors, userMonitor) + } + } + + return &selfstateWorker{ + monitors: monitors, + }, nil +} + +func (selfstateWorker *selfstateWorker) Start() { + for _, monitor := range selfstateWorker.monitors { + monitor.Start() + } +} + +func (selfstateWorker *selfstateWorker) Stop() error { + stopErrors := make([]error, 0) + + for _, monitor := range selfstateWorker.monitors { + if err := monitor.Stop(); err != nil { + stopErrors = append(stopErrors, err) + } + } + + return errors.Join(stopErrors...) +}