Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remote graphite retries #1085

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/handler/triggers.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ func getTriggerFromRequest(request *http.Request) (*dto.Trigger, *api.ErrorRespo
return nil, api.ErrorInvalidRequest(err)
case remote.ErrRemoteTriggerResponse:
response := api.ErrorRemoteServerUnavailable(err)

middleware.GetLoggerEntry(request).Error().
String("status", response.StatusText).
Error(err).
Expand Down
9 changes: 7 additions & 2 deletions clock/clock.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,17 @@ func NewSystemClock() *SystemClock {
return &SystemClock{}
}

// Now returns now time.Time with UTC location.
// NowUTC returns now time.Time with UTC location.
func (t *SystemClock) NowUTC() time.Time {
return time.Now().UTC()
}

// Now returns now time.Time as a Unix time.
// Sleep pauses the current goroutine for at least the passed duration.
func (t *SystemClock) Sleep(duration time.Duration) {
time.Sleep(duration)
}

// NowUnix returns now time.Time as a Unix time.
func (t *SystemClock) NowUnix() int64 {
return time.Now().Unix()
}
38 changes: 32 additions & 6 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"

"github.com/moira-alert/moira"
"github.com/moira-alert/moira/metrics"
Expand Down Expand Up @@ -237,21 +239,45 @@ type GraphiteRemoteConfig struct {
User string `yaml:"user"`
// Password for basic auth
Password string `yaml:"password"`
// Retry seconds for remote requests divided by spaces
RetrySeconds string `yaml:"retry_seconds"`
// HealthCheckTimeout is timeout for remote api health check requests
HealthCheckTimeout string `yaml:"health_check_timeout"`
// Retry seconds for remote api health check requests divided by spaces
HealthCheckRetrySeconds string `yaml:"health_check_retry_seconds"`
}

func (config GraphiteRemoteConfig) getRemoteCommon() *RemoteCommonConfig {
return &config.RemoteCommonConfig
}

// ParseRetrySeconds parses config value string into array of integers.
func ParseRetrySeconds(retrySecondsString string) []time.Duration {
secondsStringList := strings.Fields(retrySecondsString)
retrySecondsIntList := make([]time.Duration, len(secondsStringList))

for index, secondsString := range secondsStringList {
secondsInt, err := strconv.Atoi(secondsString)
if err != nil {
panic(err)
}
retrySecondsIntList[index] = time.Second * time.Duration(secondsInt)
}
return retrySecondsIntList
}

// GetRemoteSourceSettings returns remote config parsed from moira config files.
func (config *GraphiteRemoteConfig) GetRemoteSourceSettings() *graphiteRemoteSource.Config {
return &graphiteRemoteSource.Config{
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
URL: config.URL,
CheckInterval: to.Duration(config.CheckInterval),
MetricsTTL: to.Duration(config.MetricsTTL),
Timeout: to.Duration(config.Timeout),
User: config.User,
Password: config.Password,
RetrySeconds: ParseRetrySeconds(config.RetrySeconds),
HealthCheckTimeout: to.Duration(config.HealthCheckTimeout),
HealthCheckRetrySeconds: ParseRetrySeconds(config.HealthCheckRetrySeconds),
}
}

Expand Down
1 change: 1 addition & 0 deletions interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,5 +229,6 @@ type PlotTheme interface {
// Clock is an interface to work with Time.
type Clock interface {
NowUTC() time.Time
Sleep(duration time.Duration)
NowUnix() int64
}
5 changes: 4 additions & 1 deletion local/checker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ graphite_remote:
cluster_name: Graphite 1
url: "http://graphite:80/render"
check_interval: 60s
timeout: 60s
metrics_ttl: 168h
timeout: 60s
retry_seconds: 1 1 1
health_check_timeout: 6s
health_check_retry_seconds: 1 1 1
prometheus_remote:
- cluster_id: default
cluster_name: Prometheus 1
Expand Down
15 changes: 9 additions & 6 deletions metric_source/remote/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import "time"

// Config represents config from remote storage.
type Config struct {
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
URL string
CheckInterval time.Duration
MetricsTTL time.Duration
Timeout time.Duration
User string
Password string
RetrySeconds []time.Duration
HealthCheckTimeout time.Duration
HealthCheckRetrySeconds []time.Duration
}
46 changes: 30 additions & 16 deletions metric_source/remote/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"net/http"
"time"

"github.com/moira-alert/moira/clock"

"github.com/moira-alert/moira"
metricSource "github.com/moira-alert/moira/metric_source"
)
Expand All @@ -23,10 +25,22 @@ func (err ErrRemoteTriggerResponse) Error() string {
return err.InternalError.Error()
}

// ErrRemoteUnavailable is a custom error when remote trigger check fails.
type ErrRemoteUnavailable struct {
InternalError error
Target string
}

// Error is a representation of Error interface method.
func (err ErrRemoteUnavailable) Error() string {
return err.InternalError.Error()
}

// Remote is implementation of MetricSource interface, which implements fetch metrics method from remote graphite installation.
type Remote struct {
config *Config
client *http.Client
clock moira.Clock
}

// Create configures remote metric source.
Expand All @@ -37,6 +51,7 @@ func Create(config *Config) (metricSource.MetricSource, error) {
return &Remote{
config: config,
client: &http.Client{Timeout: config.Timeout},
clock: clock.NewSystemClock(),
}, nil
}

Expand All @@ -53,9 +68,15 @@ func (remote *Remote) Fetch(target string, from, until int64, allowRealTimeAlert
Target: target,
}
}
body, err := remote.makeRequest(req)
body, isRemoteAvailable, err := remote.makeRequestWithRetries(req, remote.config.Timeout, remote.config.RetrySeconds)
if err != nil {
return nil, ErrRemoteTriggerResponse{
if isRemoteAvailable {
return nil, ErrRemoteTriggerResponse{
InternalError: err,
Target: target,
}
}
return nil, ErrRemoteUnavailable{
InternalError: err,
Target: target,
}
Expand All @@ -76,25 +97,18 @@ func (remote *Remote) GetMetricsTTLSeconds() int64 {
return int64(remote.config.MetricsTTL.Seconds())
}

// IsConfigured returns false in cases that user does not properly configure remote settings like graphite URL.
func (remote *Remote) IsConfigured() (bool, error) {
return true, nil
}

// IsRemoteAvailable checks if graphite API is available and returns 200 response.
// IsAvailable checks if graphite API is available and returns 200 response.
func (remote *Remote) IsAvailable() (bool, error) {
maxRetries := 3
until := time.Now().Unix()
from := until - 600 //nolint

req, err := remote.prepareRequest(from, until, "NonExistingTarget")
if err != nil {
return false, err
}
for attempt := 0; attempt < maxRetries; attempt++ {
_, err = remote.makeRequest(req)
if err == nil {
return true, nil
}
}
return false, err

_, isRemoteAvailable, err := remote.makeRequestWithRetries(
req, remote.config.HealthCheckTimeout, remote.config.HealthCheckRetrySeconds)

return isRemoteAvailable, err
}
Loading
Loading