Skip to content

Commit

Permalink
mixin: Remove usage of cortex_distributor_replication_factor from Mim…
Browse files Browse the repository at this point in the history
…irIngesterInstanceHasNoTenants (grafana#8218)

* mixin: Remove usage of cortex_distributor_replication_factor in MimirIngesterInstanceHasNoTenants

The metric was only used to remove false positives for Mimir clusters with no tenants. using the zone with most series should still be a decent approximation for this.

Signed-off-by: Dimitar Dimitrov <[email protected]>

* Account for cases which don't use zone awareness

Signed-off-by: Dimitar Dimitrov <[email protected]>

* Don't hardcode pod label

Signed-off-by: Dimitar Dimitrov <[email protected]>

---------

Signed-off-by: Dimitar Dimitrov <[email protected]>
  • Loading branch information
dimitarvdimitrov authored and narqo committed Jun 6, 2024
1 parent 6dada35 commit 569876b
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,24 @@ spec:
expr: |
(min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0)
and on (cluster, namespace)
# Only if there are more time-series than would be expected due to continuous testing load
# Only if there are more timeseries than would be expected due to continuous testing load
(
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
( # Classic storage timeseries
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
)
or
( # Ingest storage timeseries
sum by(cluster, namespace) (
max by(ingester_id, cluster, namespace) (
label_replace(cortex_ingester_memory_series,
"ingester_id", "$1",
"pod", ".*-([0-9]+)$"
)
)
)
)
) > 100000
for: 1h
labels:
Expand Down
21 changes: 17 additions & 4 deletions operations/mimir-mixin-compiled-baremetal/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,24 @@ groups:
expr: |
(min by(cluster, namespace, instance) (cortex_ingester_memory_users) == 0)
and on (cluster, namespace)
# Only if there are more time-series than would be expected due to continuous testing load
# Only if there are more timeseries than would be expected due to continuous testing load
(
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
( # Classic storage timeseries
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
)
or
( # Ingest storage timeseries
sum by(cluster, namespace) (
max by(ingester_id, cluster, namespace) (
label_replace(cortex_ingester_memory_series,
"ingester_id", "$1",
"instance", ".*-([0-9]+)$"
)
)
)
)
) > 100000
for: 1h
labels:
Expand Down
21 changes: 17 additions & 4 deletions operations/mimir-mixin-compiled/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,24 @@ groups:
expr: |
(min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0)
and on (cluster, namespace)
# Only if there are more time-series than would be expected due to continuous testing load
# Only if there are more timeseries than would be expected due to continuous testing load
(
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
( # Classic storage timeseries
sum by(cluster, namespace) (cortex_ingester_memory_series)
/
max by(cluster, namespace) (cortex_distributor_replication_factor)
)
or
( # Ingest storage timeseries
sum by(cluster, namespace) (
max by(ingester_id, cluster, namespace) (
label_replace(cortex_ingester_memory_series,
"ingester_id", "$1",
"pod", ".*-([0-9]+)$"
)
)
)
)
) > 100000
for: 1h
labels:
Expand Down
21 changes: 17 additions & 4 deletions operations/mimir-mixin/alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,24 @@ local utils = import 'mixin-utils/utils.libsonnet';
expr: |||
(min by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_memory_users) == 0)
and on (%(alert_aggregation_labels)s)
# Only if there are more time-series than would be expected due to continuous testing load
# Only if there are more timeseries than would be expected due to continuous testing load
(
sum by(%(alert_aggregation_labels)s) (cortex_ingester_memory_series)
/
max by(%(alert_aggregation_labels)s) (cortex_distributor_replication_factor)
( # Classic storage timeseries
sum by(%(alert_aggregation_labels)s) (cortex_ingester_memory_series)
/
max by(%(alert_aggregation_labels)s) (cortex_distributor_replication_factor)
)
or
( # Ingest storage timeseries
sum by(%(alert_aggregation_labels)s) (
max by(ingester_id, %(alert_aggregation_labels)s) (
label_replace(cortex_ingester_memory_series,
"ingester_id", "$1",
"%(per_instance_label)s", ".*-([0-9]+)$"
)
)
)
)
) > 100000
||| % $._config,
labels: {
Expand Down

0 comments on commit 569876b

Please sign in to comment.