Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for cluster validations in gRPC clients #10788

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0161aef
Ingester client: configure cluster validation label via common config…
duricanikolic Mar 3, 2025
7c46af2
Merge remote-tracking branch 'origin/main' into yuri/grpc-cluster-val…
duricanikolic Mar 4, 2025
75e98b6
Improving common field inheritance
duricanikolic Mar 4, 2025
4f13b8b
Allow cluster validation in query-frontend and query-scheduler client…
duricanikolic Mar 4, 2025
e61b653
Allow cluster validation in query-frontend clients of scheduled proce…
duricanikolic Mar 5, 2025
940a665
Replace -common.cluster-validation-label with -common.client-cluster-…
duricanikolic Mar 5, 2025
12faa5c
Merge remote-tracking branch 'origin/main' into yuri/grpc-cluster-val…
duricanikolic Mar 5, 2025
06ea6f7
Upgrade to latest dskit
duricanikolic Mar 5, 2025
a166e0f
Merge remote-tracking branch 'origin/main' into yuri/grpc-cluster-val…
duricanikolic Mar 6, 2025
8f0b382
Upgrade to the latest dskit
duricanikolic Mar 6, 2025
9bddb85
Configure gRPC clients for block-builder->block-builder-scheduler com…
duricanikolic Mar 6, 2025
ed4205b
Configure gRPC clients for query-frontend->query-scheduler communicat…
duricanikolic Mar 6, 2025
80900b8
Configure gRPC clients for querier->store-gateway communications
duricanikolic Mar 6, 2025
2ca25b2
Upgrade dskit
duricanikolic Mar 6, 2025
fa9d0e4
Configure gRPC clients for query-scheduler->query-frontend communicat…
duricanikolic Mar 6, 2025
7a47cd8
Merge remote-tracking branch 'origin/main' into yuri/grpc-cluster-val…
duricanikolic Mar 7, 2025
841c5e0
Configure gRPC clients for ruler->ruler communications
duricanikolic Mar 7, 2025
5eb94ef
Merge remote-tracking branch 'origin/main' into yuri/grpc-cluster-val…
duricanikolic Mar 8, 2025
1923344
Configure gRPC clients for ruler->ruler-query-frontend communications
duricanikolic Mar 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions cmd/mimir/config-descriptor.json
Original file line number Diff line number Diff line change
Expand Up @@ -2400,6 +2400,17 @@
"fieldFlag": "ingester.client.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "ingester.client.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -5654,6 +5665,17 @@
"fieldFlag": "querier.frontend-client.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "querier.frontend-client.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -5916,6 +5938,17 @@
"fieldFlag": "querier.scheduler-client.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "querier.scheduler-client.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -6307,6 +6340,17 @@
"fieldFlag": "query-frontend.grpc-client-config.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "query-frontend.grpc-client-config.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -12791,6 +12835,17 @@
"fieldFlag": "ruler.client.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "ruler.client.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -13828,6 +13883,17 @@
"fieldFlag": "ruler.query-frontend.grpc-client-config.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "ruler.query-frontend.grpc-client-config.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -16416,6 +16482,17 @@
"fieldFlag": "alertmanager.alertmanager-client.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "alertmanager.alertmanager-client.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -18321,6 +18398,17 @@
"fieldFlag": "query-scheduler.grpc-client-config.connect-backoff-max-delay",
"fieldType": "duration",
"fieldCategory": "advanced"
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "query-scheduler.grpc-client-config.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down Expand Up @@ -20293,6 +20381,17 @@
],
"fieldValue": null,
"fieldDefaultValue": null
},
{
"kind": "field",
"name": "cluster_validation_label",
"required": false,
"desc": "Optionally define gRPC client's cluster validation label.",
"fieldValue": null,
"fieldDefaultValue": "",
"fieldFlag": "common.cluster-validation-label",
"fieldType": "string",
"fieldCategory": "experimental"
}
],
"fieldValue": null,
Expand Down
18 changes: 18 additions & 0 deletions cmd/mimir/help-all.txt.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-alertmanager.alertmanager-client.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-alertmanager.alertmanager-client.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-alertmanager.alertmanager-client.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-alertmanager.alertmanager-client.connect-backoff-max-delay duration
Expand Down Expand Up @@ -987,6 +989,8 @@ Usage of ./cmd/mimir/mimir:
Maximum number of CPUs that can simultaneously processes WAL replay. If it is set to 0, then each TSDB is replayed with a concurrency equal to the number of CPU cores available on the machine.
-blocks-storage.tsdb.wal-segment-size-bytes int
TSDB WAL segments files max size (bytes). (default 134217728)
-common.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-common.storage.azure.account-key string
Azure storage account key. If unset, Azure managed identities will be used for authentication instead.
-common.storage.azure.account-name string
Expand Down Expand Up @@ -1599,6 +1603,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-ingester.client.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-ingester.client.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-ingester.client.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-ingester.client.connect-backoff-max-delay duration
Expand Down Expand Up @@ -2115,6 +2121,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-querier.frontend-client.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-querier.frontend-client.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-querier.frontend-client.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-querier.frontend-client.connect-backoff-max-delay duration
Expand Down Expand Up @@ -2223,6 +2231,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-querier.scheduler-client.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-querier.scheduler-client.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-querier.scheduler-client.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-querier.scheduler-client.connect-backoff-max-delay duration
Expand Down Expand Up @@ -2305,6 +2315,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-query-frontend.grpc-client-config.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-query-frontend.grpc-client-config.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-query-frontend.grpc-client-config.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-query-frontend.grpc-client-config.connect-backoff-max-delay duration
Expand Down Expand Up @@ -2521,6 +2533,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-query-scheduler.grpc-client-config.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-query-scheduler.grpc-client-config.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-query-scheduler.grpc-client-config.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-query-scheduler.grpc-client-config.connect-backoff-max-delay duration
Expand Down Expand Up @@ -2955,6 +2969,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-ruler.client.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-ruler.client.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-ruler.client.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-ruler.client.connect-backoff-max-delay duration
Expand Down Expand Up @@ -3043,6 +3059,8 @@ Usage of ./cmd/mimir/mimir:
Enable backoff and retry when we hit rate limits.
-ruler.query-frontend.grpc-client-config.backoff-retries int
Number of times to backoff and retry before failing. (default 10)
-ruler.query-frontend.grpc-client-config.cluster-validation-label string
[experimental] Optionally define gRPC client's cluster validation label.
-ruler.query-frontend.grpc-client-config.connect-backoff-base-delay duration
Initial backoff delay after first connection failure. Only relevant if ConnectTimeout > 0. (default 1s)
-ruler.query-frontend.grpc-client-config.connect-backoff-max-delay duration
Expand Down
12 changes: 12 additions & 0 deletions docs/sources/mimir/configure/configuration-parameters/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,10 @@ storage:
# system as object storage backend.
# The CLI flags prefix for this block configuration is: common.storage
[filesystem: <filesystem_storage_backend>]
# (experimental) Optionally define gRPC client's cluster validation label.
# CLI flag: -common.cluster-validation-label
[cluster_validation_label: <string> | default = ""]
```

### server
Expand Down Expand Up @@ -2669,6 +2673,10 @@ alertmanager_client:
# CLI flag: -alertmanager.alertmanager-client.connect-backoff-max-delay
[connect_backoff_max_delay: <duration> | default = 5s]
# (experimental) Optionally define gRPC client's cluster validation label.
# CLI flag: -alertmanager.alertmanager-client.cluster-validation-label
[cluster_validation_label: <string> | default = ""]
# (advanced) The interval between persisting the current alertmanager state
# (notification log and silences) to object storage. This is only used when
# sharding is enabled. This state is read when all replicas for a shard can not
Expand Down Expand Up @@ -2927,6 +2935,10 @@ backoff_config:
# if ConnectTimeout > 0.
# CLI flag: -<prefix>.connect-backoff-max-delay
[connect_backoff_max_delay: <duration> | default = 5s]
# (experimental) Optionally define gRPC client's cluster validation label.
# CLI flag: -<prefix>.cluster-validation-label
[cluster_validation_label: <string> | default = ""]
```

### frontend_worker
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ require (
github.com/golang/snappy v0.0.4
github.com/google/gopacket v1.1.19
github.com/gorilla/mux v1.8.1
github.com/grafana/dskit v0.0.0-20250303172748-fd4441b85237
github.com/grafana/dskit v0.0.0-20250303214858-d23654211757
github.com/grafana/e2e v0.1.2-0.20240118170847-db90b84177fc
github.com/hashicorp/golang-lru v1.0.2 // indirect
github.com/influxdata/influxdb/v2 v2.7.11
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1272,8 +1272,8 @@ github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc h1:PXZQA2WCxe85T
github.com/grafana-tools/sdk v0.0.0-20220919052116-6562121319fc/go.mod h1:AHHlOEv1+GGQ3ktHMlhuTUwo3zljV3QJbC0+8o2kn+4=
github.com/grafana/alerting v0.0.0-20250225150117-15e285d78df2 h1:kESrzm0FcRVLmGIQCgl1MCwDGLH4sLzWphr7mcFdbfI=
github.com/grafana/alerting v0.0.0-20250225150117-15e285d78df2/go.mod h1:hdGB3dSl8Ma9Rjo2YiAEAjMkZ5HiNJbNDqRKDefRZrM=
github.com/grafana/dskit v0.0.0-20250303172748-fd4441b85237 h1:VZagYtPcmjgazfPAuWN7lER6mprG20r51+1eYPpATkw=
github.com/grafana/dskit v0.0.0-20250303172748-fd4441b85237/go.mod h1:cu2zIOHhAgRaIDuECsERftSp1l7KHq1aX1jgihQCu0c=
github.com/grafana/dskit v0.0.0-20250303214858-d23654211757 h1:nAd6h3RfteaAMeTO4cJLcPQGm1X7uYxv5oAhZICkBNw=
github.com/grafana/dskit v0.0.0-20250303214858-d23654211757/go.mod h1:cu2zIOHhAgRaIDuECsERftSp1l7KHq1aX1jgihQCu0c=
github.com/grafana/e2e v0.1.2-0.20240118170847-db90b84177fc h1:BW+LjKJDz0So5LI8UZfW5neWeKpSkWqhmGjQFzcFfLM=
github.com/grafana/e2e v0.1.2-0.20240118170847-db90b84177fc/go.mod h1:JVmqPBe8A/pZWwRoJW5ZjyALeY5OXMzPl7LrVXOdZAI=
github.com/grafana/franz-go v0.0.0-20241009100846-782ba1442937 h1:fwwnG/NcygoS6XbAaEyK2QzMXI/BZIEJvQ3CD+7XZm8=
Expand Down
10 changes: 2 additions & 8 deletions integration/ingester_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,7 @@ func TestInvalidClusterValidationLabel(t *testing.T) {
baseFlags := map[string]string{
"-distributor.ingestion-tenant-shard-size": "0",
"-ingester.ring.heartbeat-period": "1s",
"-common.cluster-validation-label": testCase.distributorClusterLabel,
}

flags := mergeFlags(
Expand All @@ -857,13 +858,6 @@ func TestInvalidClusterValidationLabel(t *testing.T) {
baseFlags,
)

distributorFlags := mergeFlags(
flags,
map[string]string{
"-server.cluster-validation.label": testCase.distributorClusterLabel,
},
)

ingesterFlags := mergeFlags(
flags,
map[string]string{
Expand All @@ -879,7 +873,7 @@ func TestInvalidClusterValidationLabel(t *testing.T) {
require.NoError(t, s.StartAndWaitReady(consul, minio))

// Start Mimir components.
distributor := e2emimir.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), distributorFlags)
distributor := e2emimir.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), flags)
ingester := e2emimir.NewIngester("ingester", consul.NetworkHTTPEndpoint(), ingesterFlags)
require.NoError(t, s.StartAndWaitReady(distributor, ingester))

Expand Down
7 changes: 3 additions & 4 deletions pkg/ingester/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ func MakeIngesterClient(inst ring.InstanceDesc, cfg Config, metrics *Metrics, lo
reportGRPCStatusesOptions := []middleware.InstrumentationOption{middleware.ReportGRPCStatusOption}
unary, stream := grpcclient.Instrument(metrics.requestDuration, reportGRPCStatusesOptions...)
unary = append(unary, querierapi.ReadConsistencyClientUnaryInterceptor)
if cfg.ClusterValidationLabel != "" {
unary = append(unary, middleware.ClusterUnaryClientInterceptor(cfg.ClusterValidationLabel, metrics.invalidClusterVerificationLabels, logger))
if cfg.GRPCClientConfig.ClusterValidationLabel != "" {
unary = append(unary, middleware.ClusterUnaryClientInterceptor(cfg.GRPCClientConfig.ClusterValidationLabel, metrics.invalidClusterVerificationLabels, logger))
}
stream = append(stream, querierapi.ReadConsistencyClientStreamInterceptor)

Expand Down Expand Up @@ -70,8 +70,7 @@ func (c *closableHealthAndIngesterClient) Close() error {

// Config is the configuration struct for the ingester client
type Config struct {
GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=Configures the gRPC client used to communicate with ingesters from distributors, queriers and rulers."`
ClusterValidationLabel string `yaml:"-"`
GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=Configures the gRPC client used to communicate with ingesters from distributors, queriers and rulers."`
}

// RegisterFlags registers configuration settings used by the ingester client config.
Expand Down
Loading
Loading