diff --git a/CHANGELOG.md b/CHANGELOG.md index c0a69acab59..a3db8af5aee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * [FEATURE] Ingester/Distributor: Add support for exporting cost attribution metrics (`cortex_ingester_attributed_active_series`, `cortex_distributor_received_attributed_samples_total`, and `cortex_discarded_attributed_samples_total`) with labels specified by customers to a custom Prometheus registry. This feature enables more flexible billing data tracking. #10269 #10702 * [FEATURE] Ruler: Added `/ruler/tenants` endpoints to list the discovered tenants with rule groups. #10738 +* [FEATURE] Distributor: Add experimental Influx handler. #10153 * [CHANGE] Querier: pass context to queryable `IsApplicable` hook. #10451 * [CHANGE] Distributor: OTLP and push handler replace all non-UTF8 characters with the unicode replacement character `\uFFFD` in error messages before propagating them. #10236 * [CHANGE] Querier: pass query matchers to queryable `IsApplicable` hook. #10256 @@ -17,7 +18,7 @@ * [CHANGE] Ruler: Add `user` and `reason` labels to `cortex_ruler_write_requests_failed_total` and `cortex_ruler_queries_failed_total`; add `user` to `cortex_ruler_write_requests_total` and `cortex_ruler_queries_total` metrics. #10536 * [CHANGE] Querier / Query-frontend: Remove experimental `-querier.promql-experimental-functions-enabled` and `-query-frontend.block-promql-experimental-functions` CLI flags and respective YAML configuration options to enable experimental PromQL functions. Instead access to experimental PromQL functions is always blocked. You can enable them using the per-tenant setting `enabled_promql_experimental_functions`. #10660 #10712 -* [FEATURE] Distributor: Add experimental Influx handler. #10153 +* [CHANGE] Store-gateway: Include posting sampling rate in sparse index headers. When the sampling rate isn't set in a sparse index header, store gateway will rebuild the sparse header with the configured `blocks-storage.bucket-store.posting-offsets-in-mem-sampling` value. If the sparse header's sampling rate is set, but doesn't match the configured rate, store gateway will either rebuild the sparse header or downsample to the configured sampling rate. #10684 * [ENHANCEMENT] Compactor: Expose `cortex_bucket_index_last_successful_update_timestamp_seconds` for all tenants assigned to the compactor before starting the block cleanup job. #10569 * [ENHANCEMENT] Query Frontend: Return server-side `samples_processed` statistics. #10103 * [ENHANCEMENT] Distributor: OTLP receiver now converts also metric metadata. See also https://github.com/prometheus/prometheus/pull/15416. #10168 @@ -64,6 +65,7 @@ * [ENHANCEMENT] All: Add `cortex_client_request_invalid_cluster_validation_labels_total` metrics, that is used by Mimir's gRPC clients to track invalid cluster validations. #10767 * [ENHANCEMENT] Ingester client: Add support to configure cluster validation for ingester clients. Failed cluster validations are tracked by `cortex_client_request_invalid_cluster_validation_labels_total` with label `client=ingester`. #10767 * [ENHANCEMENT] Add experimental metric `cortex_distributor_dropped_native_histograms_total` to measure native histograms silently dropped when native histograms are disabled for a tenant. #10760 +* [ENCHACEMENT] Compactor: Add experimental `-compactor.upload-sparse-index-headers` option. When enabled, the compactor will attempt to upload sparse index headers to object storage. This prevents latency spikes after adding store-gateway replicas. #10684 * [BUGFIX] Distributor: Use a boolean to track changes while merging the ReplicaDesc components, rather than comparing the objects directly. #10185 * [BUGFIX] Querier: fix timeout responding to query-frontend when response size is very close to `-querier.frontend-client.grpc-max-send-msg-size`. #10154 * [BUGFIX] Query-frontend and querier: show warning/info annotations in some cases where they were missing (if a lazy querier was used). #10277 @@ -84,6 +86,7 @@ * [BUGFIX] Distributor: Report partially converted OTLP requests with status 400 Bad Request. #10588 * [BUGFIX] Ruler: fix issue where rule evaluations could be missed while shutting down a ruler instance if that instance owns many rule groups. prometheus/prometheus#15804 #10762 * [BUGFIX] Ingester: Add additional check on reactive limiter queue sizes. #10722 +* [BUGFIX] TSDB: fix unknown series errors and possible lost data during WAL replay when series are removed from the head due to inactivity and reappear before the next WAL checkpoint. https://github.com/prometheus/prometheus/pull/16060 #10824 ### Mixin diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json index 6571a906c77..8b42119c2b9 100644 --- a/cmd/mimir/config-descriptor.json +++ b/cmd/mimir/config-descriptor.json @@ -11723,6 +11723,17 @@ "fieldFlag": "compactor.max-lookback", "fieldType": "duration", "fieldCategory": "experimental" + }, + { + "kind": "field", + "name": "upload_sparse_index_headers", + "required": false, + "desc": "If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.", + "fieldValue": null, + "fieldDefaultValue": false, + "fieldFlag": "compactor.upload-sparse-index-headers", + "fieldType": "boolean", + "fieldCategory": "experimental" } ], "fieldValue": null, diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl index 2075f2c46ec..c057dd926cf 100644 --- a/cmd/mimir/help-all.txt.tmpl +++ b/cmd/mimir/help-all.txt.tmpl @@ -1301,6 +1301,8 @@ Usage of ./cmd/mimir/mimir: Number of symbols flushers used when doing split compaction. (default 1) -compactor.tenant-cleanup-delay duration For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant. (default 6h0m0s) + -compactor.upload-sparse-index-headers + [experimental] If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally. -config.expand-env Expands ${var} or $var in config according to the values of the environment variables. -config.file value diff --git a/docs/sources/mimir/configure/about-versioning.md b/docs/sources/mimir/configure/about-versioning.md index 089aa3afb73..fecaa7a1f86 100644 --- a/docs/sources/mimir/configure/about-versioning.md +++ b/docs/sources/mimir/configure/about-versioning.md @@ -71,6 +71,8 @@ The following features are currently experimental: - `-compactor.in-memory-tenant-meta-cache-size` - Limit blocks processed in each compaction cycle. Blocks uploaded prior to the maximum lookback aren't processed. - `-compactor.max-lookback` + - Enable the compactor to upload sparse index headers to object storage during compaction cycles. + - `-compactor.upload-sparse-index-headers` - Ruler - Aligning of evaluation timestamp on interval (`align_evaluation_time_on_interval`) - Allow defining limits on the maximum number of rules allowed in a rule group by namespace and the maximum number of rule groups by namespace. If set, this supersedes the `-ruler.max-rules-per-rule-group` and `-ruler.max-rule-groups-per-tenant` limits. diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md index 61eb675d6a3..3d791404f29 100644 --- a/docs/sources/mimir/configure/configuration-parameters/index.md +++ b/docs/sources/mimir/configure/configuration-parameters/index.md @@ -4997,6 +4997,13 @@ sharding_ring: # blocks are considered regardless of their upload time. # CLI flag: -compactor.max-lookback [max_lookback: | default = 0s] + +# (experimental) If enabled, the compactor constructs and uploads sparse index +# headers to object storage during each compaction cycle. This allows +# store-gateway instances to use the sparse headers from object storage instead +# of recreating them locally. +# CLI flag: -compactor.upload-sparse-index-headers +[upload_sparse_index_headers: | default = false] ``` ### store_gateway diff --git a/go.mod b/go.mod index 7f7cf2df5cc..27ea1aabdc8 100644 --- a/go.mod +++ b/go.mod @@ -298,7 +298,7 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250305224633-8c45fc54920d +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250306234455-f6f6f2cceada // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index 6011d94ef53..6b1de7a9633 100644 --- a/go.sum +++ b/go.sum @@ -1284,8 +1284,8 @@ github.com/grafana/gomemcache v0.0.0-20250228145437-da7b95fd2ac1 h1:vR5nELq+KtGO github.com/grafana/gomemcache v0.0.0-20250228145437-da7b95fd2ac1/go.mod h1:j/s0jkda4UXTemDs7Pgw/vMT06alWc42CHisvYac0qw= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20250305224633-8c45fc54920d h1:ff6cIM9Z2ew3nbXVjwEttZFdIEAe6X1lMfMc+xCIjKM= -github.com/grafana/mimir-prometheus v0.0.0-20250305224633-8c45fc54920d/go.mod h1:jC5V3PuoN3nxpvsvZipB+iOf6H/Np1uW+e3r9TTxJMA= +github.com/grafana/mimir-prometheus v0.0.0-20250306234455-f6f6f2cceada h1:8MLoP1fblwE72Bk4G66nmhXwoHDcpHQcfjrC+kLoXAg= +github.com/grafana/mimir-prometheus v0.0.0-20250306234455-f6f6f2cceada/go.mod h1:jC5V3PuoN3nxpvsvZipB+iOf6H/Np1uW+e3r9TTxJMA= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= github.com/grafana/prometheus-alertmanager v0.25.1-0.20250211112812-e32be5e2a455 h1:yidC1xzk4fedLZ/iXEqSJopkw3jPZPwoMqqzue4eFEA= diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index 4e4d922405f..070f3ead92e 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -39,6 +39,7 @@ Entries should include a reference to the Pull Request that introduced the chang * [ENHANCEMENT] Individual mimir components can override their container images via the *.image values. The component's image definitions always override the values set in global `image` or `enterprise.image`. #10340 * [ENHANCEMENT] Alertmanager, compactor, ingester, and store-gateway StatefulSets can configure their PVC template name via the corresponding *.persistentVolume.name values. #10376 * [ENHANCEMENT] Set resources for smoke-test job. #10608 +* [ENHANCEMENT] All components can expose additional ports with their respective services via the *.service.extraPorts values. This allows exposing the containers that components declare in `extraContainers`. #10659 * [BUGFIX] Create proper in-cluster remote URLs when gateway and nginx are disabled. #10625 * [BUGFIX] Fix calculation of `mimir.siToBytes` and use floating point arithmetics. #10044 diff --git a/operations/helm/charts/mimir-distributed/templates/admin-api/admin-api-svc.yaml b/operations/helm/charts/mimir-distributed/templates/admin-api/admin-api-svc.yaml index 7216b017906..b56015671d1 100644 --- a/operations/helm/charts/mimir-distributed/templates/admin-api/admin-api-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/admin-api/admin-api-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.admin_api.service.extraPorts }} + {{- toYaml .Values.admin_api.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "admin-api" "memberlist" true) | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc-headless.yaml index 78a2267c522..927233d21b9 100644 --- a/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc-headless.yaml @@ -29,6 +29,9 @@ spec: - port: {{ $clusterPort }} protocol: TCP name: cluster + {{- if .Values.alertmanager.service.extraPorts }} + {{- toYaml .Values.alertmanager.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "alertmanager" "memberlist" true) | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc.yaml b/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc.yaml index a511f5c3bb7..8c93674bc71 100644 --- a/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/alertmanager/alertmanager-svc.yaml @@ -34,6 +34,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.alertmanager.service.extraPorts }} + {{- toYaml .Values.alertmanager.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" $args | nindent 4 }} diff --git a/operations/helm/charts/mimir-distributed/templates/compactor/compactor-svc.yaml b/operations/helm/charts/mimir-distributed/templates/compactor/compactor-svc.yaml index 5f768404f1e..50c6bc50851 100644 --- a/operations/helm/charts/mimir-distributed/templates/compactor/compactor-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/compactor/compactor-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.compactor.service.extraPorts }} + {{- toYaml .Values.compactor.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "compactor" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/continuous_test/continuous-test-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/continuous_test/continuous-test-svc-headless.yaml index 59aceb6d50a..65e38cd774d 100644 --- a/operations/helm/charts/mimir-distributed/templates/continuous_test/continuous-test-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/continuous_test/continuous-test-svc-headless.yaml @@ -20,6 +20,9 @@ spec: protocol: TCP name: http-metrics targetPort: http-metrics + {{- if .Values.continuous_test.service.extraPorts }} + {{- toYaml .Values.continuous_test.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "continuous-test") | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc-headless.yaml index aae207d8596..a53b66f574a 100644 --- a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc-headless.yaml @@ -24,6 +24,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.distributor.service.extraPorts }} + {{- toYaml .Values.distributor.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "distributor" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc.yaml b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc.yaml index 1f3741f2c80..4cbc4195bc1 100644 --- a/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/distributor/distributor-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.distributor.service.extraPorts }} + {{- toYaml .Values.distributor.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "distributor" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/federation-frontend/federation-frontend-svc.yaml b/operations/helm/charts/mimir-distributed/templates/federation-frontend/federation-frontend-svc.yaml index 9cb1691da28..c4f8964b603 100644 --- a/operations/helm/charts/mimir-distributed/templates/federation-frontend/federation-frontend-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/federation-frontend/federation-frontend-svc.yaml @@ -22,6 +22,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.federation_frontend.service.extraPorts }} + {{- toYaml .Values.federation_frontend.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "federation-frontend") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/gateway/gateway-svc.yaml b/operations/helm/charts/mimir-distributed/templates/gateway/gateway-svc.yaml index 88032a5f723..ce59807de67 100644 --- a/operations/helm/charts/mimir-distributed/templates/gateway/gateway-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/gateway/gateway-svc.yaml @@ -36,6 +36,9 @@ spec: name: legacy-http-metrics targetPort: http-metrics {{- end }} + {{- if .Values.gateway.service.extraPorts }} + {{- toYaml .Values.gateway.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" $ "component" "gateway") | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-querier/graphite-querier-svc.yaml b/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-querier/graphite-querier-svc.yaml index b954ebe8155..e8e029ea48c 100644 --- a/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-querier/graphite-querier-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-querier/graphite-querier-svc.yaml @@ -22,6 +22,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.graphite.querier.service.extraPorts }} + {{- toYaml .Values.graphite.querier.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "graphite-querier") | nindent 4 }} {{- end -}} \ No newline at end of file diff --git a/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-write-proxy/graphite-write-proxy-svc.yaml b/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-write-proxy/graphite-write-proxy-svc.yaml index 07c02f16867..f5c1b684d6e 100644 --- a/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-write-proxy/graphite-write-proxy-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/graphite-proxy/graphite-write-proxy/graphite-write-proxy-svc.yaml @@ -22,6 +22,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.graphite.write_proxy.service.extraPorts }} + {{- toYaml .Values.graphite.write_proxy.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "graphite-write-proxy") | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc-headless.yaml index c0e572e42c9..677168e02a7 100644 --- a/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc-headless.yaml @@ -24,6 +24,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ingester.service.extraPorts }} + {{- toYaml .Values.ingester.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ingester" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc.yaml b/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc.yaml index 7ed6bd2d66c..bfbdf017a7a 100644 --- a/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ingester/ingester-svc.yaml @@ -34,6 +34,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ingester.service.extraPorts }} + {{- toYaml .Values.ingester.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" $args | nindent 4 }} diff --git a/operations/helm/charts/mimir-distributed/templates/memcached/_memcached-svc.tpl b/operations/helm/charts/mimir-distributed/templates/memcached/_memcached-svc.tpl index cb41f7fa4e1..6bcb3a1d128 100644 --- a/operations/helm/charts/mimir-distributed/templates/memcached/_memcached-svc.tpl +++ b/operations/helm/charts/mimir-distributed/templates/memcached/_memcached-svc.tpl @@ -28,6 +28,9 @@ spec: port: 9150 targetPort: 9150 {{ end }} + {{- if .service.extraPorts }} + {{- toYaml .service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" $.ctx "component" $.component) | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/nginx/nginx-svc.yaml b/operations/helm/charts/mimir-distributed/templates/nginx/nginx-svc.yaml index cb87a760daf..1af4e9cb604 100644 --- a/operations/helm/charts/mimir-distributed/templates/nginx/nginx-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/nginx/nginx-svc.yaml @@ -31,6 +31,9 @@ spec: nodePort: {{ .Values.nginx.service.nodePort }} {{- end }} protocol: TCP + {{- if .Values.nginx.service.extraPorts }} + {{- toYaml .Values.nginx.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "nginx") | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/overrides-exporter/overrides-exporter-svc.yaml b/operations/helm/charts/mimir-distributed/templates/overrides-exporter/overrides-exporter-svc.yaml index 3f26a9689c9..e4b3fb5f396 100644 --- a/operations/helm/charts/mimir-distributed/templates/overrides-exporter/overrides-exporter-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/overrides-exporter/overrides-exporter-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.overrides_exporter.service.extraPorts }} + {{- toYaml .Values.overrides_exporter.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "overrides-exporter") | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/querier/querier-svc.yaml b/operations/helm/charts/mimir-distributed/templates/querier/querier-svc.yaml index 309b49730f6..faa68299e67 100644 --- a/operations/helm/charts/mimir-distributed/templates/querier/querier-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/querier/querier-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.querier.service.extraPorts }} + {{- toYaml .Values.querier.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "querier" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml index 6fd0ef2c9a9..46e213973b2 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc-headless.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.query_frontend.service.extraPorts }} + {{- toYaml .Values.query_frontend.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-frontend") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc.yaml b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc.yaml index b4a704bd39e..5fcfa412eb6 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-frontend/query-frontend-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.query_frontend.service.extraPorts }} + {{- toYaml .Values.query_frontend.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-frontend") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc-headless.yaml index f79753c3ddd..4c659dadb77 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc-headless.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.query_scheduler.service.extraPorts }} + {{- toYaml .Values.query_scheduler.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-scheduler") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc.yaml b/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc.yaml index 0aebe04c39d..eb99539e651 100644 --- a/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/query-scheduler/query-scheduler-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.query_scheduler.service.extraPorts }} + {{- toYaml .Values.query_scheduler.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "query-scheduler") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-svc.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-svc.yaml index 99c1e5e3f6b..934151e0714 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-querier/ruler-querier-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ruler_querier.service.extraPorts }} + {{- toYaml .Values.ruler_querier.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler-querier" "memberlist" true) | nindent 4 }} {{- end }} \ No newline at end of file diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-svc.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-svc.yaml index d259578daf7..014289b9dad 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-frontend/ruler-query-frontend-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ruler_query_frontend.service.extraPorts }} + {{- toYaml .Values.ruler_query_frontend.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler-query-frontend") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc-headless.yaml index 073adbfd122..0461e7617ba 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc-headless.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ruler_query_scheduler.service.extraPorts }} + {{- toYaml .Values.ruler_query_scheduler.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler-query-scheduler") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc.yaml b/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc.yaml index fa0ee303937..454fcf8cb27 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler-query-scheduler/ruler-query-scheduler-svc.yaml @@ -25,6 +25,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.ruler_query_scheduler.service.extraPorts }} + {{- toYaml .Values.ruler_query_scheduler.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler-query-scheduler") | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-svc.yaml b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-svc.yaml index 77709c445e0..5771d515fe0 100644 --- a/operations/helm/charts/mimir-distributed/templates/ruler/ruler-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/ruler/ruler-svc.yaml @@ -21,6 +21,9 @@ spec: protocol: TCP name: http-metrics targetPort: http-metrics + {{- if .Values.ruler.service.extraPorts }} + {{- toYaml .Values.ruler.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "ruler" "memberlist" true) | nindent 4 }} {{- end -}} diff --git a/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc-headless.yaml b/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc-headless.yaml index 0f55dc68057..c8ec148e8a7 100644 --- a/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc-headless.yaml +++ b/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc-headless.yaml @@ -24,6 +24,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.store_gateway.service.extraPorts }} + {{- toYaml .Values.store_gateway.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" (dict "ctx" . "component" "store-gateway" "memberlist" true) | nindent 4 }} {{- end }} diff --git a/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc.yaml b/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc.yaml index ac805b66714..afae230e2c9 100644 --- a/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc.yaml +++ b/operations/helm/charts/mimir-distributed/templates/store-gateway/store-gateway-svc.yaml @@ -34,6 +34,9 @@ spec: protocol: TCP name: grpc targetPort: grpc + {{- if .Values.store_gateway.service.extraPorts }} + {{- toYaml .Values.store_gateway.service.extraPorts | nindent 4 }} + {{- end }} selector: {{- include "mimir.selectorLabels" $args | nindent 4 }} diff --git a/operations/helm/charts/mimir-distributed/values.yaml b/operations/helm/charts/mimir-distributed/values.yaml index b6613c9366e..de1b3a662bb 100644 --- a/operations/helm/charts/mimir-distributed/values.yaml +++ b/operations/helm/charts/mimir-distributed/values.yaml @@ -595,6 +595,11 @@ alertmanager: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] + # - port: 11811 + # protocol: TCP + # name: reverse-proxy + # targetPort: 11811 # -- Optionally set the scheduler for pods of the alertmanager schedulerName: "" @@ -879,6 +884,7 @@ distributor: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -982,6 +988,7 @@ ingester: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] # -- Optionally set the scheduler for pods of the ingester schedulerName: "" @@ -1215,6 +1222,7 @@ overrides_exporter: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] strategy: type: RollingUpdate @@ -1329,6 +1337,7 @@ ruler: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] # -- Dedicated service account for ruler pods. # If not set, the default service account defined at the begining of this file will be used. @@ -1466,6 +1475,7 @@ ruler_querier: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -1582,6 +1592,7 @@ ruler_query_frontend: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -1675,6 +1686,7 @@ ruler_query_scheduler: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -1812,6 +1824,7 @@ querier: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -1930,6 +1943,7 @@ query_frontend: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -2024,6 +2038,7 @@ query_scheduler: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] resources: requests: @@ -2123,6 +2138,7 @@ store_gateway: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] # -- Optionally set the scheduler for pods of the store-gateway schedulerName: "" @@ -2346,6 +2362,7 @@ compactor: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] # -- Optionally set the scheduler for pods of the compactor schedulerName: "" @@ -2619,6 +2636,7 @@ chunks-cache: service: annotations: {} labels: {} + extraPorts: [] index-cache: # -- Specifies whether memcached based index-cache should be enabled @@ -2719,6 +2737,7 @@ index-cache: service: annotations: {} labels: {} + extraPorts: [] metadata-cache: # -- Specifies whether memcached based metadata-cache should be enabled @@ -2819,6 +2838,7 @@ metadata-cache: service: annotations: {} labels: {} + extraPorts: [] results-cache: # -- Specifies whether memcached based results-cache should be enabled @@ -2919,6 +2939,7 @@ results-cache: service: annotations: {} labels: {} + extraPorts: [] # -- Setting for the Grafana Rollout Operator https://github.com/grafana/helm-charts/tree/main/charts/rollout-operator rollout_operator: @@ -3088,6 +3109,7 @@ nginx: labels: {} # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster + extraPorts: [] # Ingress configuration ingress: # -- Specifies whether an ingress for the nginx should be created @@ -3497,6 +3519,7 @@ gateway: nameOverride: "" # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster + extraPorts: [] ingress: enabled: false @@ -4142,6 +4165,7 @@ admin_api: # -- https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/ internalTrafficPolicy: Cluster type: ClusterIP + extraPorts: [] initContainers: [] @@ -4293,6 +4317,7 @@ admin-cache: service: annotations: {} labels: {} + extraPorts: [] graphite: # -- If true, enables graphite querier and graphite write proxy functionality. @@ -4318,6 +4343,7 @@ graphite: service: annotations: {} labels: {} + extraPorts: [] # -- Resources for graphite-querier pods resources: @@ -4399,6 +4425,7 @@ graphite: service: annotations: {} labels: {} + extraPorts: [] # -- Resources for graphite-write-proxy pods resources: @@ -4539,6 +4566,7 @@ gr-aggr-cache: service: annotations: {} labels: {} + extraPorts: [] # Graphite's metric name cache. If you want to know more about it please check # https://grafana.com/docs/enterprise-metrics/latest/graphite/graphite_querier/#metric-name-cache @@ -4604,6 +4632,7 @@ gr-metricname-cache: service: annotations: {} labels: {} + extraPorts: [] federation_frontend: # -- Specifies whether the federation-frontend should be enabled @@ -4695,6 +4724,8 @@ federation_frontend: annotations: {} # -- Additional labels for the federation-frontend service labels: {} + extraPorts: [] + # -- Pod Disruption Budget configuration podDisruptionBudget: maxUnavailable: 1 @@ -4811,6 +4842,7 @@ continuous_test: service: annotations: {} labels: {} + extraPorts: [] # -- Upgrade strategy for the continuous test Deployment strategy: type: RollingUpdate diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go index 1978f8ea892..626cbb5c538 100644 --- a/pkg/compactor/bucket_compactor.go +++ b/pkg/compactor/bucket_compactor.go @@ -29,8 +29,10 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb" "github.com/thanos-io/objstore" + "github.com/thanos-io/objstore/providers/filesystem" "go.uber.org/atomic" + "github.com/grafana/mimir/pkg/storage/indexheader" "github.com/grafana/mimir/pkg/storage/sharding" mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storage/tsdb/block" @@ -394,11 +396,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul } blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger) + + // update labels and verify all blocks err = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error { blockToUpload := blocksToUpload[idx] - - uploadedBlocks.Inc() - bdir := filepath.Join(subDir, blockToUpload.ulid.String()) // When splitting is enabled, we need to inject the shard ID as an external label. @@ -406,6 +407,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul if job.UseSplitting() { newLabels[mimir_tsdb.CompactorShardIDExternalLabel] = sharding.FormatShardIDLabelValue(uint64(blockToUpload.shardIndex), uint64(job.SplittingShards())) } + blocksToUpload[idx].labels = newLabels newMeta, err := block.InjectThanosMeta(jobLogger, bdir, block.ThanosMeta{ Labels: newLabels, @@ -413,6 +415,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul Source: block.CompactorSource, SegmentFiles: block.GetSegmentFiles(bdir), }, nil) + if err != nil { return errors.Wrapf(err, "failed to finalize the block %s", bdir) } @@ -421,18 +424,47 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul return errors.Wrap(err, "remove tombstones") } - // Ensure the compacted block is valid. if err := block.VerifyBlock(ctx, jobLogger, bdir, newMeta.MinTime, newMeta.MaxTime, false); err != nil { return errors.Wrapf(err, "invalid result block %s", bdir) } + return nil + }) + if err != nil { + return false, nil, err + } + + // Optionally build sparse-index-headers. Building sparse-index-headers is best effort, we do not skip uploading a + // compacted block if there's an error affecting sparse-index-headers. + if c.uploadSparseIndexHeaders { + // Create a bucket backed by the local compaction directory, allows calls to prepareSparseIndexHeader to + // construct sparse-index-headers without making requests to object storage. + fsbkt, err := filesystem.NewBucket(subDir) + if err != nil { + level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err) + return + } + _ = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error { + blockToUpload := blocksToUpload[idx] + err := prepareSparseIndexHeader(ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig) + if err != nil { + level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "shard", blockToUpload.shardIndex, "err", err) + } + return nil + }) + } + // upload all blocks + err = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error { + blockToUpload := blocksToUpload[idx] + uploadedBlocks.Inc() + bdir := filepath.Join(subDir, blockToUpload.ulid.String()) begin := time.Now() if err := block.Upload(ctx, jobLogger, c.bkt, bdir, nil); err != nil { return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid) } elapsed := time.Since(begin) - level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(newLabels)) + level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(blockToUpload.labels)) return nil }) if err != nil { @@ -457,10 +489,19 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul return false, nil, errors.Wrapf(err, "mark old block for deletion from bucket") } } - return true, compIDs, nil } +func prepareSparseIndexHeader(ctx context.Context, logger log.Logger, bkt objstore.Bucket, dir string, id ulid.ULID, sampling int, cfg indexheader.Config) error { + // Calling NewStreamBinaryReader reads a block's index and writes a sparse-index-header to disk. + mets := indexheader.NewStreamBinaryReaderMetrics(nil) + br, err := indexheader.NewStreamBinaryReader(ctx, logger, bkt, dir, id, sampling, mets, cfg) + if err != nil { + return err + } + return br.Close() +} + // verifyCompactedBlocksTimeRanges does a full run over the compacted blocks // and verifies that they satisfy the min/maxTime from the source blocks func verifyCompactedBlocksTimeRanges(compIDs []ulid.ULID, sourceBlocksMinTime, sourceBlocksMaxTime int64, subDir string) error { @@ -530,6 +571,7 @@ func convertCompactionResultToForEachJobs(compactedBlocks []ulid.ULID, splitJob type ulidWithShardIndex struct { ulid ulid.ULID shardIndex int + labels map[string]string } // issue347Error is a type wrapper for errors that should invoke the repair process for broken block. @@ -747,20 +789,23 @@ var ownAllJobs = func(*Job) (bool, error) { // BucketCompactor compacts blocks in a bucket. type BucketCompactor struct { - logger log.Logger - sy *metaSyncer - grouper Grouper - comp Compactor - planner Planner - compactDir string - bkt objstore.Bucket - concurrency int - skipUnhealthyBlocks bool - ownJob ownCompactionJobFunc - sortJobs JobsOrderFunc - waitPeriod time.Duration - blockSyncConcurrency int - metrics *BucketCompactorMetrics + logger log.Logger + sy *metaSyncer + grouper Grouper + comp Compactor + planner Planner + compactDir string + bkt objstore.Bucket + concurrency int + skipUnhealthyBlocks bool + uploadSparseIndexHeaders bool + sparseIndexHeaderSamplingRate int + sparseIndexHeaderconfig indexheader.Config + ownJob ownCompactionJobFunc + sortJobs JobsOrderFunc + waitPeriod time.Duration + blockSyncConcurrency int + metrics *BucketCompactorMetrics } // NewBucketCompactor creates a new bucket compactor. @@ -779,25 +824,31 @@ func NewBucketCompactor( waitPeriod time.Duration, blockSyncConcurrency int, metrics *BucketCompactorMetrics, + uploadSparseIndexHeaders bool, + sparseIndexHeaderSamplingRate int, + sparseIndexHeaderconfig indexheader.Config, ) (*BucketCompactor, error) { if concurrency <= 0 { return nil, errors.Errorf("invalid concurrency level (%d), concurrency level must be > 0", concurrency) } return &BucketCompactor{ - logger: logger, - sy: sy, - grouper: grouper, - planner: planner, - comp: comp, - compactDir: compactDir, - bkt: bkt, - concurrency: concurrency, - skipUnhealthyBlocks: skipUnhealthyBlocks, - ownJob: ownJob, - sortJobs: sortJobs, - waitPeriod: waitPeriod, - blockSyncConcurrency: blockSyncConcurrency, - metrics: metrics, + logger: logger, + sy: sy, + grouper: grouper, + planner: planner, + comp: comp, + compactDir: compactDir, + bkt: bkt, + concurrency: concurrency, + skipUnhealthyBlocks: skipUnhealthyBlocks, + ownJob: ownJob, + sortJobs: sortJobs, + waitPeriod: waitPeriod, + blockSyncConcurrency: blockSyncConcurrency, + metrics: metrics, + uploadSparseIndexHeaders: uploadSparseIndexHeaders, + sparseIndexHeaderSamplingRate: sparseIndexHeaderSamplingRate, + sparseIndexHeaderconfig: sparseIndexHeaderconfig, }, nil } diff --git a/pkg/compactor/bucket_compactor_e2e_test.go b/pkg/compactor/bucket_compactor_e2e_test.go index 1acc4b7814f..3ff3984f7fa 100644 --- a/pkg/compactor/bucket_compactor_e2e_test.go +++ b/pkg/compactor/bucket_compactor_e2e_test.go @@ -37,6 +37,7 @@ import ( "github.com/thanos-io/objstore/providers/filesystem" "golang.org/x/sync/errgroup" + "github.com/grafana/mimir/pkg/storage/indexheader" "github.com/grafana/mimir/pkg/storage/tsdb/block" util_log "github.com/grafana/mimir/pkg/util/log" ) @@ -240,7 +241,10 @@ func TestGroupCompactE2E(t *testing.T) { planner := NewSplitAndMergePlanner([]int64{1000, 3000}) grouper := NewSplitAndMergeGrouper("user-1", []int64{1000, 3000}, 0, 0, logger) metrics := NewBucketCompactorMetrics(blocksMarkedForDeletion, prometheus.NewPedanticRegistry()) - bComp, err := NewBucketCompactor(logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics) + cfg := indexheader.Config{VerifyOnLoad: true} + bComp, err := NewBucketCompactor( + logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics, true, 32, cfg, + ) require.NoError(t, err) // Compaction on empty should not fail. @@ -374,6 +378,21 @@ func TestGroupCompactE2E(t *testing.T) { return nil })) + // expect the blocks that are compacted to have sparse-index-headers in object storage. + require.NoError(t, bkt.Iter(ctx, "", func(n string) error { + id, ok := block.IsBlockDir(n) + if !ok { + return nil + } + + if _, ok := others[id.String()]; ok { + p := path.Join(id.String(), block.SparseIndexHeaderFilename) + exists, _ := bkt.Exists(ctx, p) + assert.True(t, exists, "expected sparse index headers not found %s", p) + } + return nil + })) + for id, found := range nonCompactedExpected { assert.True(t, found, "not found expected block %s", id.String()) } diff --git a/pkg/compactor/bucket_compactor_test.go b/pkg/compactor/bucket_compactor_test.go index 7584db247ef..fe6b2d86213 100644 --- a/pkg/compactor/bucket_compactor_test.go +++ b/pkg/compactor/bucket_compactor_test.go @@ -23,6 +23,7 @@ import ( "github.com/stretchr/testify/require" "github.com/thanos-io/objstore" + "github.com/grafana/mimir/pkg/storage/indexheader" "github.com/grafana/mimir/pkg/storage/tsdb/block" "github.com/grafana/mimir/pkg/util/extprom" ) @@ -118,9 +119,10 @@ func TestBucketCompactor_FilterOwnJobs(t *testing.T) { } m := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil) + cfg := indexheader.Config{VerifyOnLoad: true} for testName, testCase := range tests { t.Run(testName, func(t *testing.T) { - bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m) + bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m, false, 32, cfg) require.NoError(t, err) res, err := bc.filterOwnJobs(jobsFn()) @@ -155,8 +157,9 @@ func TestBlockMaxTimeDeltas(t *testing.T) { })) metrics := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil) + cfg := indexheader.Config{VerifyOnLoad: true} now := time.UnixMilli(1500002900159) - bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics) + bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics, true, 32, cfg) require.NoError(t, err) deltas := bc.blockMaxTimeDeltas(now, []*Job{j1, j2}) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 83b751a6478..44d074e3e63 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -33,6 +33,7 @@ import ( "go.uber.org/atomic" "github.com/grafana/mimir/pkg/storage/bucket" + "github.com/grafana/mimir/pkg/storage/indexheader" mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storage/tsdb/block" "github.com/grafana/mimir/pkg/util" @@ -130,6 +131,11 @@ type Config struct { // Allow downstream projects to customise the blocks compactor. BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` + + // Allow compactor to upload sparse-index-header files + UploadSparseIndexHeaders bool `yaml:"upload_sparse_index_headers" category:"experimental"` + SparseIndexHeadersSamplingRate int `yaml:"-"` + SparseIndexHeadersConfig indexheader.Config `yaml:"-"` } // RegisterFlags registers the MultitenantCompactor flags. @@ -158,6 +164,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant.") f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.") f.DurationVar(&cfg.MaxLookback, "compactor.max-lookback", 0*time.Second, "Blocks uploaded before the lookback aren't considered in compactor cycles. If set, this value should be larger than all values in `-blocks-storage.tsdb.block-ranges-period`. A value of 0s means that all blocks are considered regardless of their upload time.") + f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.") // compactor concurrency options f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 1, "Number of goroutines opening blocks before compaction.") @@ -834,6 +841,9 @@ func (c *MultitenantCompactor) compactUser(ctx context.Context, userID string) e c.compactorCfg.CompactionWaitPeriod, c.compactorCfg.BlockSyncConcurrency, c.bucketCompactorMetrics, + c.compactorCfg.UploadSparseIndexHeaders, + c.compactorCfg.SparseIndexHeadersSamplingRate, + c.compactorCfg.SparseIndexHeadersConfig, ) if err != nil { return errors.Wrap(err, "failed to create bucket compactor") diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go index cea5f08b186..d21e9de7212 100644 --- a/pkg/mimir/modules.go +++ b/pkg/mimir/modules.go @@ -1030,6 +1030,8 @@ func (t *Mimir) initAlertManager() (serv services.Service, err error) { func (t *Mimir) initCompactor() (serv services.Service, err error) { t.Cfg.Compactor.ShardingRing.Common.ListenPort = t.Cfg.Server.GRPCListenPort + t.Cfg.Compactor.SparseIndexHeadersConfig = t.Cfg.BlocksStorage.BucketStore.IndexHeader + t.Cfg.Compactor.SparseIndexHeadersSamplingRate = t.Cfg.BlocksStorage.BucketStore.PostingOffsetsInMemSampling t.Compactor, err = compactor.NewMultitenantCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, t.Registerer) if err != nil { diff --git a/pkg/storegateway/indexheader/binary_reader.go b/pkg/storage/indexheader/binary_reader.go similarity index 100% rename from pkg/storegateway/indexheader/binary_reader.go rename to pkg/storage/indexheader/binary_reader.go diff --git a/pkg/storegateway/indexheader/encoding/encoding.go b/pkg/storage/indexheader/encoding/encoding.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/encoding.go rename to pkg/storage/indexheader/encoding/encoding.go diff --git a/pkg/storegateway/indexheader/encoding/encoding_test.go b/pkg/storage/indexheader/encoding/encoding_test.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/encoding_test.go rename to pkg/storage/indexheader/encoding/encoding_test.go diff --git a/pkg/storegateway/indexheader/encoding/factory.go b/pkg/storage/indexheader/encoding/factory.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/factory.go rename to pkg/storage/indexheader/encoding/factory.go diff --git a/pkg/storegateway/indexheader/encoding/factory_test.go b/pkg/storage/indexheader/encoding/factory_test.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/factory_test.go rename to pkg/storage/indexheader/encoding/factory_test.go diff --git a/pkg/storegateway/indexheader/encoding/reader.go b/pkg/storage/indexheader/encoding/reader.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/reader.go rename to pkg/storage/indexheader/encoding/reader.go diff --git a/pkg/storegateway/indexheader/encoding/reader_test.go b/pkg/storage/indexheader/encoding/reader_test.go similarity index 100% rename from pkg/storegateway/indexheader/encoding/reader_test.go rename to pkg/storage/indexheader/encoding/reader_test.go diff --git a/pkg/storegateway/indexheader/header.go b/pkg/storage/indexheader/header.go similarity index 98% rename from pkg/storegateway/indexheader/header.go rename to pkg/storage/indexheader/header.go index c018351d4d5..108311dc46e 100644 --- a/pkg/storegateway/indexheader/header.go +++ b/pkg/storage/indexheader/header.go @@ -13,7 +13,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/tsdb/index" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" ) const ( diff --git a/pkg/storegateway/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go similarity index 71% rename from pkg/storegateway/indexheader/header_test.go rename to pkg/storage/indexheader/header_test.go index 87ecde23d23..bb12e36698a 100644 --- a/pkg/storegateway/indexheader/header_test.go +++ b/pkg/storage/indexheader/header_test.go @@ -129,6 +129,169 @@ func TestReadersComparedToIndexHeader(t *testing.T) { } +func Test_DownsampleSparseIndexHeader(t *testing.T) { + tests := map[string]struct { + protoRate int + inMemSamplingRate int + expected map[string]int + }{ + "downsample_1_to_32": { + protoRate: 1, + inMemSamplingRate: 32, + expected: map[string]int{ + "__name__": 4, + "": 1, + "__blockgen_target__": 4, + }, + }, + "downsample_4_to_16": { + protoRate: 4, + inMemSamplingRate: 16, + expected: map[string]int{ + "__name__": 7, + "": 1, + "__blockgen_target__": 7, + }, + }, + "downsample_8_to_24": { + protoRate: 8, + inMemSamplingRate: 24, + expected: map[string]int{ + "__name__": 5, + "": 1, + "__blockgen_target__": 5, + }, + }, + "downsample_17_to_51": { + protoRate: 17, + inMemSamplingRate: 51, + expected: map[string]int{ + "__name__": 3, + "": 1, + "__blockgen_target__": 3, + }, + }, + "noop_on_same_sampling_rate": { + protoRate: 32, + inMemSamplingRate: 32, + }, + "rebuild_proto_sampling_rate_not_divisible": { + protoRate: 8, + inMemSamplingRate: 20, + }, + "rebuild_cannot_upsample_from_proto_48_to_32": { + protoRate: 48, + inMemSamplingRate: 32, + }, + "rebuild_cannot_upsample_from_proto_64_to_32": { + protoRate: 64, + inMemSamplingRate: 32, + }, + "downsample_to_low_frequency": { + protoRate: 4, + inMemSamplingRate: 16384, + expected: map[string]int{ + "__name__": 2, + "": 1, + "__blockgen_target__": 2, + }, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + m, err := block.ReadMetaFromDir("./testdata/index_format_v2") + require.NoError(t, err) + + tmpDir := t.TempDir() + test.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String())) + + bkt, err := filesystem.NewBucket(tmpDir) + require.NoError(t, err) + + ctx := context.Background() + noopMetrics := NewStreamBinaryReaderMetrics(nil) + + // write a sparse index-header file to disk + br1, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.protoRate, noopMetrics, Config{}) + require.NoError(t, err) + require.Equal(t, tt.protoRate, br1.postingsOffsetTable.PostingOffsetInMemSampling()) + + origLabelNames, err := br1.postingsOffsetTable.LabelNames() + require.NoError(t, err) + + // a second call to NewStreamBinaryReader loads the previously written sparse index-header and downsamples + // the header from tt.protoRate to tt.inMemSamplingRate entries for each posting + br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{}) + require.NoError(t, err) + require.Equal(t, tt.inMemSamplingRate, br2.postingsOffsetTable.PostingOffsetInMemSampling()) + + downsampleLabelNames, err := br2.postingsOffsetTable.LabelNames() + require.NoError(t, err) + + // label names are equal between original and downsampled sparse index-headers + require.ElementsMatch(t, downsampleLabelNames, origLabelNames) + + origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable() + downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable() + + for name, vals := range origIdxpbTbl.Postings { + downsampledOffsets := downsampleIdxpbTbl.Postings[name].Offsets + // downsampled postings are a subset of the original sparse index-header postings + if (tt.inMemSamplingRate > tt.protoRate) && (tt.inMemSamplingRate%tt.protoRate == 0) { + require.Equal(t, tt.expected[name], len(downsampledOffsets)) + require.Subset(t, vals.Offsets, downsampledOffsets, "downsampled offsets not a subset of original for name '%s'", name) + + require.Equal(t, downsampledOffsets[0], vals.Offsets[0], "downsampled offsets do not contain first value for name '%s'", name) + require.Equal(t, downsampledOffsets[len(downsampledOffsets)-1], vals.Offsets[len(vals.Offsets)-1], "downsampled offsets do not contain last value for name '%s'", name) + } + + // check first and last entry from the original postings in downsampled set + require.NotZero(t, downsampleIdxpbTbl.Postings[name].LastValOffset) + } + }) + } +} + +func compareIndexToHeaderPostings(t *testing.T, indexByteSlice index.ByteSlice, sbr *StreamBinaryReader) { + + ir, err := index.NewReader(indexByteSlice, index.DecodePostingsRaw) + require.NoError(t, err) + defer func() { + _ = ir.Close() + }() + + toc, err := index.NewTOCFromByteSlice(indexByteSlice) + require.NoError(t, err) + + tblOffsetBounds := make(map[string][2]int64) + + // Read the postings offset table and record first and last offset for each label. Adjust offsets in ReadPostingsOffsetTable + // by 4B (int32 count of postings in table) to align with postings in index headers. + err = index.ReadPostingsOffsetTable(indexByteSlice, toc.PostingsTable, func(label []byte, _ []byte, _ uint64, offset int) error { + name := string(label) + off := int64(offset + 4) + if v, ok := tblOffsetBounds[name]; ok { + v[1] = off + tblOffsetBounds[name] = v + } else { + tblOffsetBounds[name] = [2]int64{off, off} + } + return nil + }) + require.NoError(t, err) + + tbl := sbr.postingsOffsetTable.NewSparsePostingOffsetTable() + + expLabelNames, err := ir.LabelNames(context.Background()) + require.NoError(t, err) + for _, lname := range expLabelNames { + offsets := tbl.Postings[lname].Offsets + assert.Equal(t, offsets[0].TableOff, tblOffsetBounds[lname][0]) + assert.Equal(t, offsets[len(offsets)-1].TableOff, tblOffsetBounds[lname][1]) + } +} + func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) { ctx := context.Background() @@ -192,15 +355,22 @@ func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerRe for _, v := range valOffsets { ptr, err := headerReader.PostingsOffset(ctx, lname, v.LabelValue) require.NoError(t, err) - assert.Equal(t, expRanges[labels.Label{Name: lname, Value: v.LabelValue}], ptr) - assert.Equal(t, expRanges[labels.Label{Name: lname, Value: v.LabelValue}], v.Off) + label := labels.Label{Name: lname, Value: v.LabelValue} + assert.Equal(t, expRanges[label], ptr) + assert.Equal(t, expRanges[label], v.Off) + delete(expRanges, label) } } + allPName, allPValue := index.AllPostingsKey() ptr, err := headerReader.PostingsOffset(ctx, allPName, allPValue) require.NoError(t, err) - require.Equal(t, expRanges[labels.Label{Name: "", Value: ""}].Start, ptr.Start) - require.Equal(t, expRanges[labels.Label{Name: "", Value: ""}].End, ptr.End) + + emptyLabel := labels.Label{Name: "", Value: ""} + require.Equal(t, expRanges[emptyLabel].Start, ptr.Start) + require.Equal(t, expRanges[emptyLabel].End, ptr.End) + delete(expRanges, emptyLabel) + require.Empty(t, expRanges) } func prepareIndexV2Block(t testing.TB, tmpDir string, bkt objstore.Bucket) *block.Meta { diff --git a/pkg/storegateway/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go similarity index 91% rename from pkg/storegateway/indexheader/index/postings.go rename to pkg/storage/indexheader/index/postings.go index 8b3d2de39f7..28505dffbab 100644 --- a/pkg/storegateway/indexheader/index/postings.go +++ b/pkg/storage/indexheader/index/postings.go @@ -17,8 +17,8 @@ import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/tsdb/index" - streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding" - "github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb" + streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding" + "github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb" ) const ( @@ -43,6 +43,10 @@ type PostingOffsetTable interface { LabelNames() ([]string, error) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) + + // PostingOffsetInMemSampling returns the inverse of the fraction of postings held in memory. A lower value indicates + // postings are sample more frequently. + PostingOffsetInMemSampling() int } // PostingListOffset contains the start and end offset of a posting list. @@ -234,18 +238,42 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory postingOffsetsInMemSampling: postingOffsetsInMemSampling, } + pbSampling := int(postingsOffsetTable.GetPostingOffsetInMemorySampling()) + if pbSampling == 0 { + return nil, fmt.Errorf("sparse index-header sampling rate not set") + } + + if pbSampling > postingOffsetsInMemSampling { + return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate") + } + + // if the sampling rate in the sparse index-header is set lower (more frequent) than + // the configured postingOffsetsInMemSampling we downsample to the configured rate + step, ok := stepSize(pbSampling, postingOffsetsInMemSampling) + if !ok { + return nil, fmt.Errorf("sparse index-header sampling rate not compatible with in-mem-sampling rate") + } + for sName, sOffsets := range postingsOffsetTable.Postings { - t.postings[sName] = &postingValueOffsets{ - offsets: make([]postingOffset, len(sOffsets.Offsets)), + + olen := len(sOffsets.Offsets) + downsampledLen := (olen + step - 1) / step + if (olen > 1) && (downsampledLen == 1) { + downsampledLen++ } + t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)} for i, sPostingOff := range sOffsets.Offsets { - t.postings[sName].offsets[i] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)} - } + if i%step == 0 { + t.postings[sName].offsets[i/step] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)} + } + if i == olen-1 { + t.postings[sName].offsets[downsampledLen-1] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)} + } + } t.postings[sName].lastValOffset = sOffsets.LastValOffset } - return &t, err } @@ -330,6 +358,10 @@ func (t *PostingOffsetTableV1) LabelNames() ([]string, error) { return labelNames, nil } +func (t *PostingOffsetTableV1) PostingOffsetInMemSampling() int { + return 0 +} + func (t *PostingOffsetTableV1) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) { return &indexheaderpb.PostingOffsetTable{} } @@ -608,10 +640,18 @@ func (t *PostingOffsetTableV2) LabelNames() ([]string, error) { return labelNames, nil } +func (t *PostingOffsetTableV2) PostingOffsetInMemSampling() int { + if t != nil { + return t.postingOffsetsInMemSampling + } + return 0 +} + // NewSparsePostingOffsetTable loads all postings offset table data into a sparse index-header to be persisted to disk func (t *PostingOffsetTableV2) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) { sparseHeaders := &indexheaderpb.PostingOffsetTable{ - Postings: make(map[string]*indexheaderpb.PostingValueOffsets, len(t.postings)), + Postings: make(map[string]*indexheaderpb.PostingValueOffsets, len(t.postings)), + PostingOffsetInMemorySampling: int64(t.postingOffsetsInMemSampling), } for name, offsets := range t.postings { @@ -640,3 +680,10 @@ func skipNAndName(d *streamencoding.Decbuf, buf *int) { } d.Skip(*buf) } + +func stepSize(cur, tgt int) (int, bool) { + if cur > tgt || cur <= 0 || tgt <= 0 || tgt%cur != 0 { + return 0, false + } + return tgt / cur, true +} diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go new file mode 100644 index 00000000000..e3a76723391 --- /dev/null +++ b/pkg/storage/indexheader/index/postings_test.go @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package index + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + + streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding" + "github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb" +) + +func TestPostingValueOffsets(t *testing.T) { + testCases := map[string]struct { + existingOffsets []postingOffset + prefix string + expectedFound bool + expectedStart int + expectedEnd int + }{ + "prefix not found": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "a", + expectedFound: false, + }, + "prefix matches only one sampled offset": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "02", + expectedFound: true, + expectedStart: 1, + expectedEnd: 2, + }, + "prefix matches all offsets": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "0", + expectedFound: true, + expectedStart: 0, + expectedEnd: 4, + }, + "prefix matches only last offset": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "031", + expectedFound: true, + expectedStart: 3, + expectedEnd: 4, + }, + "prefix matches multiple offsets": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "020"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "02", + expectedFound: true, + expectedStart: 1, + expectedEnd: 3, + }, + "prefix matches only first offset": { + existingOffsets: []postingOffset{ + {value: "010"}, + {value: "019"}, + {value: "020"}, + {value: "030"}, + {value: "031"}, + }, + prefix: "015", + expectedFound: true, + expectedStart: 0, + expectedEnd: 1, + }, + } + + for testName, testCase := range testCases { + t.Run(testName, func(t *testing.T) { + offsets := postingValueOffsets{offsets: testCase.existingOffsets} + start, end, found := offsets.prefixOffsets(testCase.prefix) + assert.Equal(t, testCase.expectedStart, start) + assert.Equal(t, testCase.expectedEnd, end) + assert.Equal(t, testCase.expectedFound, found) + }) + } +} + +func createPostingOffset(n int) []*indexheaderpb.PostingOffset { + offsets := make([]*indexheaderpb.PostingOffset, n) + for i := 0; i < n; i++ { + offsets[i] = &indexheaderpb.PostingOffset{Value: fmt.Sprintf("%d", i), TableOff: int64(i)} + } + return offsets +} + +func Test_NewPostingOffsetTableFromSparseHeader(t *testing.T) { + + testCases := map[string]struct { + existingOffsetsLen int + postingOffsetsInMemSamplingRate int + protoSamplingRate int64 + expectedLen int + expectErr bool + }{ + "downsample_noop_proto_has_equal_sampling_rate": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 32, + expectedLen: 100, + }, + "downsample_noop_preserve": { + existingOffsetsLen: 1, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 1, + }, + "downsample_noop_retain_first_and_last_posting": { + existingOffsetsLen: 2, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 2, + }, + "downsample_noop_retain_first_and_last_posting_larger_sampling_rates_ratio": { + existingOffsetsLen: 2, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 8, + expectedLen: 2, + }, + "downsample_short_offsets": { + existingOffsetsLen: 2, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 2, + }, + "downsample_noop_short_offsets": { + existingOffsetsLen: 1, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 1, + }, + "downsample_proto_has_divisible_sampling_rate": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 50, + }, + "cannot_downsample_proto_has_no_sampling_rate": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 0, + expectErr: true, + }, + "cannot_upsample_proto_has_less_frequent_sampling_rate": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 64, + expectErr: true, + }, + "cannot_downsample_proto_has_non_divisible_sampling_rate": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 10, + expectErr: true, + }, + "downsample_sampling_rates_ratio_does_not_divide_offsets": { + existingOffsetsLen: 33, + postingOffsetsInMemSamplingRate: 32, + protoSamplingRate: 16, + expectedLen: 17, + }, + "downsample_sampling_rates_ratio_exceeds_offset_len": { + existingOffsetsLen: 10, + postingOffsetsInMemSamplingRate: 1024, + protoSamplingRate: 8, + expectedLen: 2, + }, + "downsample_sampling_rates_ratio_equals_offset_len": { + existingOffsetsLen: 100, + postingOffsetsInMemSamplingRate: 100, + protoSamplingRate: 1, + expectedLen: 2, + }, + } + + for testName, testCase := range testCases { + t.Run(testName, func(t *testing.T) { + factory := streamencoding.DecbufFactory{} + + postingsMap := make(map[string]*indexheaderpb.PostingValueOffsets) + postingsMap["__name__"] = &indexheaderpb.PostingValueOffsets{Offsets: createPostingOffset(testCase.existingOffsetsLen)} + + protoTbl := indexheaderpb.PostingOffsetTable{ + Postings: postingsMap, + PostingOffsetInMemorySampling: testCase.protoSamplingRate, + } + + tbl, err := NewPostingOffsetTableFromSparseHeader(&factory, &protoTbl, 0, testCase.postingOffsetsInMemSamplingRate) + if testCase.expectErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, testCase.expectedLen, len(tbl.postings["__name__"].offsets)) + } + + }) + } + +} diff --git a/pkg/storegateway/indexheader/index/symbols.go b/pkg/storage/indexheader/index/symbols.go similarity index 98% rename from pkg/storegateway/indexheader/index/symbols.go rename to pkg/storage/indexheader/index/symbols.go index 649e2a43c33..e6268655501 100644 --- a/pkg/storegateway/indexheader/index/symbols.go +++ b/pkg/storage/indexheader/index/symbols.go @@ -16,8 +16,8 @@ import ( "github.com/grafana/dskit/runutil" "github.com/prometheus/prometheus/tsdb/index" - streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding" - "github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb" + streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding" + "github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb" ) // The table gets initialized with sync.Once but may still cause a race diff --git a/pkg/storegateway/indexheader/index/symbols_test.go b/pkg/storage/indexheader/index/symbols_test.go similarity index 98% rename from pkg/storegateway/indexheader/index/symbols_test.go rename to pkg/storage/indexheader/index/symbols_test.go index 81d1fc42a56..eb399f43330 100644 --- a/pkg/storegateway/indexheader/index/symbols_test.go +++ b/pkg/storage/indexheader/index/symbols_test.go @@ -18,7 +18,7 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/stretchr/testify/require" - streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding" + streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding" "github.com/grafana/mimir/pkg/util/test" ) diff --git a/pkg/storegateway/indexheader/indexheaderpb/sparse.pb.go b/pkg/storage/indexheader/indexheaderpb/sparse.pb.go similarity index 90% rename from pkg/storegateway/indexheader/indexheaderpb/sparse.pb.go rename to pkg/storage/indexheader/indexheaderpb/sparse.pb.go index 4e96f6aeba7..2408ca63c3e 100644 --- a/pkg/storegateway/indexheader/indexheaderpb/sparse.pb.go +++ b/pkg/storage/indexheader/indexheaderpb/sparse.pb.go @@ -130,7 +130,8 @@ func (m *Symbols) GetSymbolsCount() int64 { type PostingOffsetTable struct { // Postings is a map of label names -> PostingValueOffsets - Postings map[string]*PostingValueOffsets `protobuf:"bytes,1,rep,name=postings,proto3" json:"postings,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + Postings map[string]*PostingValueOffsets `protobuf:"bytes,1,rep,name=postings,proto3" json:"postings,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + PostingOffsetInMemorySampling int64 `protobuf:"varint,2,opt,name=postingOffsetInMemorySampling,proto3" json:"postingOffsetInMemorySampling,omitempty"` } func (m *PostingOffsetTable) Reset() { *m = PostingOffsetTable{} } @@ -172,6 +173,13 @@ func (m *PostingOffsetTable) GetPostings() map[string]*PostingValueOffsets { return nil } +func (m *PostingOffsetTable) GetPostingOffsetInMemorySampling() int64 { + if m != nil { + return m.PostingOffsetInMemorySampling + } + return 0 +} + // PostingValueOffsets stores a list of the first, last, and every 32nd (config default) PostingOffset for this label name. type PostingValueOffsets struct { Offsets []*PostingOffset `protobuf:"bytes,1,rep,name=offsets,proto3" json:"offsets,omitempty"` @@ -289,33 +297,34 @@ func init() { func init() { proto.RegisterFile("sparse.proto", fileDescriptor_c442573753a956c7) } var fileDescriptor_c442573753a956c7 = []byte{ - // 402 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x52, 0x3f, 0x4f, 0xf2, 0x40, - 0x18, 0xef, 0xd1, 0xbc, 0xc0, 0x7b, 0x40, 0x42, 0x0e, 0x63, 0x08, 0x21, 0x17, 0x6d, 0x1c, 0x58, - 0x2c, 0x06, 0x13, 0x43, 0xdc, 0x84, 0x38, 0x31, 0x60, 0x8a, 0x61, 0x70, 0x31, 0xad, 0xb4, 0x85, - 0x58, 0x7a, 0x4d, 0xef, 0x6a, 0x64, 0xf3, 0x1b, 0xe8, 0xc7, 0xf0, 0x73, 0x38, 0x39, 0x32, 0x32, - 0x4a, 0x59, 0x1c, 0xf9, 0x08, 0xa6, 0x77, 0x2d, 0x5a, 0x25, 0xba, 0xdd, 0xf3, 0x3c, 0xbf, 0x3f, - 0xcf, 0xf3, 0xcb, 0xc1, 0x22, 0xf5, 0x74, 0x9f, 0x9a, 0xaa, 0xe7, 0x13, 0x46, 0x50, 0x69, 0xe2, - 0x8e, 0xcc, 0xfb, 0xb1, 0xa9, 0x8f, 0x4c, 0xdf, 0x33, 0x6a, 0x87, 0xf6, 0x84, 0x8d, 0x03, 0x43, - 0xbd, 0x21, 0xd3, 0xa6, 0x4d, 0x6c, 0xd2, 0xe4, 0x28, 0x23, 0xb0, 0x78, 0xc5, 0x0b, 0xfe, 0x12, - 0x6c, 0xe5, 0x11, 0xc0, 0xec, 0x80, 0xcb, 0xa1, 0x23, 0x98, 0xa3, 0xb3, 0xa9, 0x41, 0x1c, 0x5a, - 0x05, 0x7b, 0xa0, 0x51, 0x68, 0xed, 0xaa, 0x29, 0x69, 0x75, 0x20, 0xa6, 0x5a, 0x02, 0x43, 0x03, - 0x58, 0xf1, 0x08, 0x65, 0x13, 0xd7, 0xa6, 0x7d, 0xcb, 0xa2, 0x26, 0xbb, 0xd4, 0x0d, 0xc7, 0xac, - 0x66, 0x38, 0x7b, 0xff, 0x1b, 0xfb, 0x42, 0x20, 0xbf, 0x00, 0xb5, 0x6d, 0x6c, 0xa5, 0x07, 0x73, - 0xb1, 0x11, 0xaa, 0xc3, 0x1c, 0xe1, 0x93, 0x68, 0x23, 0xb9, 0x21, 0x77, 0x32, 0x65, 0xa0, 0x25, - 0x2d, 0xa4, 0xc0, 0x62, 0xbc, 0x48, 0x97, 0x04, 0x2e, 0xe3, 0xb6, 0xb2, 0x96, 0xea, 0x29, 0x2f, - 0x00, 0xa2, 0x9f, 0xc6, 0xa8, 0x07, 0xf3, 0x89, 0x35, 0x57, 0x2e, 0xb4, 0x9a, 0x7f, 0x6e, 0x9b, - 0xb4, 0xe8, 0xb9, 0xcb, 0xfc, 0x99, 0xb6, 0x11, 0xa8, 0x5d, 0xc3, 0x52, 0x6a, 0x84, 0xca, 0x50, - 0xbe, 0x35, 0x67, 0x3c, 0xc4, 0xff, 0x5a, 0xf4, 0x44, 0x6d, 0xf8, 0xef, 0x4e, 0x77, 0x82, 0x24, - 0x1a, 0x65, 0xbb, 0xd9, 0x30, 0x82, 0x08, 0x47, 0xaa, 0x09, 0xc2, 0x69, 0xa6, 0x0d, 0x14, 0x0a, - 0x2b, 0x5b, 0x10, 0xe8, 0x24, 0x9d, 0x4e, 0xa1, 0x55, 0xff, 0xed, 0x86, 0xcf, 0xdc, 0x0e, 0x60, - 0xc9, 0xd1, 0x29, 0x1b, 0xea, 0x8e, 0x98, 0xc4, 0xc1, 0xa5, 0x9b, 0xca, 0xd9, 0xe6, 0x2a, 0xd1, - 0x40, 0x3b, 0xc9, 0x0d, 0xe2, 0x2e, 0x51, 0xa0, 0x1a, 0xcc, 0xb3, 0x28, 0x9d, 0xbe, 0x65, 0xc5, - 0x3a, 0x9b, 0xba, 0xd3, 0x9d, 0x2f, 0xb1, 0xb4, 0x58, 0x62, 0x69, 0xbd, 0xc4, 0xe0, 0x21, 0xc4, - 0xe0, 0x39, 0xc4, 0xe0, 0x35, 0xc4, 0x60, 0x1e, 0x62, 0xf0, 0x16, 0x62, 0xf0, 0x1e, 0x62, 0x69, - 0x1d, 0x62, 0xf0, 0xb4, 0xc2, 0xd2, 0x7c, 0x85, 0xa5, 0xc5, 0x0a, 0x4b, 0x57, 0xe9, 0xff, 0x6c, - 0x64, 0xf9, 0x3f, 0x3d, 0xfe, 0x08, 0x00, 0x00, 0xff, 0xff, 0x81, 0x2f, 0x9e, 0xda, 0xf5, 0x02, - 0x00, 0x00, + // 432 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x53, 0xb1, 0x8e, 0xd3, 0x40, + 0x10, 0xf5, 0xda, 0xe2, 0x72, 0x4c, 0x2e, 0xd2, 0x69, 0x0f, 0xa1, 0x53, 0x74, 0xac, 0xc0, 0xa2, + 0xb8, 0x06, 0x07, 0x05, 0x09, 0x9d, 0xe8, 0xb8, 0x40, 0x81, 0x22, 0x14, 0x64, 0xa3, 0x14, 0x34, + 0xc8, 0x26, 0x6b, 0xc7, 0xc2, 0xf6, 0x5a, 0xde, 0x35, 0xc2, 0x1d, 0x25, 0x1d, 0x7c, 0x06, 0x9f, + 0x42, 0x99, 0x32, 0x25, 0x71, 0x1a, 0xca, 0x7c, 0x02, 0xf2, 0xae, 0x1d, 0x62, 0x88, 0x72, 0xdd, + 0xce, 0xcc, 0x9b, 0xf7, 0xde, 0xbc, 0xc4, 0x70, 0xc2, 0x53, 0x37, 0xe3, 0xd4, 0x4a, 0x33, 0x26, + 0x18, 0xee, 0x85, 0xc9, 0x8c, 0x7e, 0x9e, 0x53, 0x77, 0x46, 0xb3, 0xd4, 0xeb, 0x3f, 0x0a, 0x42, + 0x31, 0xcf, 0x3d, 0xeb, 0x03, 0x8b, 0x07, 0x01, 0x0b, 0xd8, 0x40, 0xa2, 0xbc, 0xdc, 0x97, 0x95, + 0x2c, 0xe4, 0x4b, 0x6d, 0x9b, 0xdf, 0x10, 0x1c, 0x39, 0x92, 0x0e, 0x3f, 0x86, 0x0e, 0x2f, 0x62, + 0x8f, 0x45, 0xfc, 0x1c, 0xdd, 0x47, 0x97, 0xdd, 0xe1, 0x5d, 0xab, 0x45, 0x6d, 0x39, 0x6a, 0x6a, + 0x37, 0x30, 0xec, 0xc0, 0x59, 0xca, 0xb8, 0x08, 0x93, 0x80, 0x4f, 0x7c, 0x9f, 0x53, 0xf1, 0xd6, + 0xf5, 0x22, 0x7a, 0xae, 0xcb, 0xed, 0x07, 0xff, 0x6c, 0xbf, 0x51, 0xc8, 0x1d, 0xa0, 0xbd, 0x6f, + 0xdb, 0x1c, 0x43, 0xa7, 0x16, 0xc2, 0x17, 0xd0, 0x61, 0x72, 0x52, 0x39, 0x32, 0x2e, 0x8d, 0x6b, + 0xfd, 0x14, 0xd9, 0x4d, 0x0b, 0x9b, 0x70, 0x52, 0x1b, 0x19, 0xb1, 0x3c, 0x11, 0x52, 0xd6, 0xb0, + 0x5b, 0x3d, 0xf3, 0xab, 0x0e, 0xf8, 0x7f, 0x61, 0x3c, 0x86, 0xe3, 0x46, 0x5a, 0x32, 0x77, 0x87, + 0x83, 0x1b, 0xdd, 0x36, 0x2d, 0xfe, 0x32, 0x11, 0x59, 0x61, 0x6f, 0x09, 0xf0, 0x0b, 0xb8, 0x97, + 0xee, 0xa2, 0x5f, 0x25, 0xaf, 0x69, 0xcc, 0xb2, 0xc2, 0x71, 0xe3, 0x34, 0x0a, 0x93, 0xa0, 0x36, + 0x76, 0x18, 0xd4, 0x7f, 0x0f, 0xbd, 0x96, 0x00, 0x3e, 0x05, 0xe3, 0x23, 0x2d, 0xe4, 0x4f, 0x71, + 0xdb, 0xae, 0x9e, 0xf8, 0x0a, 0x6e, 0x7d, 0x72, 0xa3, 0xbc, 0x09, 0xd8, 0xdc, 0x6f, 0x79, 0x5a, + 0x41, 0x94, 0x08, 0xb7, 0xd5, 0xc2, 0x33, 0xfd, 0x0a, 0x99, 0x1c, 0xce, 0xf6, 0x20, 0xf0, 0xd3, + 0x76, 0xc6, 0xdd, 0xe1, 0xc5, 0xa1, 0x24, 0xfe, 0xa6, 0xff, 0x10, 0x7a, 0x91, 0xcb, 0xc5, 0xd4, + 0x8d, 0xd4, 0xa4, 0xbe, 0xb2, 0xdd, 0x34, 0x9f, 0x6f, 0xaf, 0x52, 0x0d, 0x7c, 0xa7, 0xb9, 0x41, + 0xdd, 0xa5, 0x0a, 0xdc, 0x87, 0x63, 0x51, 0x65, 0x3c, 0xf1, 0xfd, 0x9a, 0x67, 0x5b, 0x5f, 0x8f, + 0x16, 0x2b, 0xa2, 0x2d, 0x57, 0x44, 0xdb, 0xac, 0x08, 0xfa, 0x52, 0x12, 0xf4, 0xa3, 0x24, 0xe8, + 0x67, 0x49, 0xd0, 0xa2, 0x24, 0xe8, 0x57, 0x49, 0xd0, 0xef, 0x92, 0x68, 0x9b, 0x92, 0xa0, 0xef, + 0x6b, 0xa2, 0x2d, 0xd6, 0x44, 0x5b, 0xae, 0x89, 0xf6, 0xae, 0xfd, 0x55, 0x78, 0x47, 0xf2, 0xdf, + 0xfe, 0xe4, 0x4f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x4c, 0x91, 0x1d, 0x5b, 0x3b, 0x03, 0x00, 0x00, } func (this *Sparse) Equal(that interface{}) bool { @@ -404,6 +413,9 @@ func (this *PostingOffsetTable) Equal(that interface{}) bool { return false } } + if this.PostingOffsetInMemorySampling != that1.PostingOffsetInMemorySampling { + return false + } return true } func (this *PostingValueOffsets) Equal(that interface{}) bool { @@ -495,7 +507,7 @@ func (this *PostingOffsetTable) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 5) + s := make([]string, 0, 6) s = append(s, "&indexheaderpb.PostingOffsetTable{") keysForPostings := make([]string, 0, len(this.Postings)) for k, _ := range this.Postings { @@ -510,6 +522,7 @@ func (this *PostingOffsetTable) GoString() string { if this.Postings != nil { s = append(s, "Postings: "+mapStringForPostings+",\n") } + s = append(s, "PostingOffsetInMemorySampling: "+fmt.Sprintf("%#v", this.PostingOffsetInMemorySampling)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -659,6 +672,11 @@ func (m *PostingOffsetTable) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.PostingOffsetInMemorySampling != 0 { + i = encodeVarintSparse(dAtA, i, uint64(m.PostingOffsetInMemorySampling)) + i-- + dAtA[i] = 0x10 + } if len(m.Postings) > 0 { for k := range m.Postings { v := m.Postings[k] @@ -831,6 +849,9 @@ func (m *PostingOffsetTable) Size() (n int) { n += mapEntrySize + 1 + sovSparse(uint64(mapEntrySize)) } } + if m.PostingOffsetInMemorySampling != 0 { + n += 1 + sovSparse(uint64(m.PostingOffsetInMemorySampling)) + } return n } @@ -912,6 +933,7 @@ func (this *PostingOffsetTable) String() string { mapStringForPostings += "}" s := strings.Join([]string{`&PostingOffsetTable{`, `Postings:` + mapStringForPostings + `,`, + `PostingOffsetInMemorySampling:` + fmt.Sprintf("%v", this.PostingOffsetInMemorySampling) + `,`, `}`, }, "") return s @@ -1382,6 +1404,25 @@ func (m *PostingOffsetTable) Unmarshal(dAtA []byte) error { } m.Postings[mapkey] = mapvalue iNdEx = postIndex + case 2: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field PostingOffsetInMemorySampling", wireType) + } + m.PostingOffsetInMemorySampling = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowSparse + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.PostingOffsetInMemorySampling |= int64(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipSparse(dAtA[iNdEx:]) diff --git a/pkg/storegateway/indexheader/indexheaderpb/sparse.proto b/pkg/storage/indexheader/indexheaderpb/sparse.proto similarity index 96% rename from pkg/storegateway/indexheader/indexheaderpb/sparse.proto rename to pkg/storage/indexheader/indexheaderpb/sparse.proto index c20c45f1ebd..6027e938f88 100644 --- a/pkg/storegateway/indexheader/indexheaderpb/sparse.proto +++ b/pkg/storage/indexheader/indexheaderpb/sparse.proto @@ -26,6 +26,7 @@ message Symbols { message PostingOffsetTable { // Postings is a map of label names -> PostingValueOffsets map postings = 1; + int64 postingOffsetInMemorySampling = 2; } // PostingValueOffsets stores a list of the first, last, and every 32nd (config default) PostingOffset for this label name. diff --git a/pkg/storegateway/indexheader/lazy_binary_reader.go b/pkg/storage/indexheader/lazy_binary_reader.go similarity index 99% rename from pkg/storegateway/indexheader/lazy_binary_reader.go rename to pkg/storage/indexheader/lazy_binary_reader.go index 98c587ced42..40cb377a38e 100644 --- a/pkg/storegateway/indexheader/lazy_binary_reader.go +++ b/pkg/storage/indexheader/lazy_binary_reader.go @@ -24,8 +24,8 @@ import ( "github.com/thanos-io/objstore" "go.uber.org/atomic" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/tsdb/block" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" ) var ( diff --git a/pkg/storegateway/indexheader/lazy_binary_reader_test.go b/pkg/storage/indexheader/lazy_binary_reader_test.go similarity index 99% rename from pkg/storegateway/indexheader/lazy_binary_reader_test.go rename to pkg/storage/indexheader/lazy_binary_reader_test.go index 13e79f92b5e..ab038957835 100644 --- a/pkg/storegateway/indexheader/lazy_binary_reader_test.go +++ b/pkg/storage/indexheader/lazy_binary_reader_test.go @@ -28,8 +28,8 @@ import ( "github.com/thanos-io/objstore/providers/filesystem" "go.uber.org/atomic" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/tsdb/block" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" "github.com/grafana/mimir/pkg/util/test" ) diff --git a/pkg/storegateway/indexheader/reader_benchmarks_test.go b/pkg/storage/indexheader/reader_benchmarks_test.go similarity index 100% rename from pkg/storegateway/indexheader/reader_benchmarks_test.go rename to pkg/storage/indexheader/reader_benchmarks_test.go diff --git a/pkg/storegateway/indexheader/reader_pool.go b/pkg/storage/indexheader/reader_pool.go similarity index 100% rename from pkg/storegateway/indexheader/reader_pool.go rename to pkg/storage/indexheader/reader_pool.go diff --git a/pkg/storegateway/indexheader/reader_pool_test.go b/pkg/storage/indexheader/reader_pool_test.go similarity index 100% rename from pkg/storegateway/indexheader/reader_pool_test.go rename to pkg/storage/indexheader/reader_pool_test.go diff --git a/pkg/storegateway/indexheader/snapshotter.go b/pkg/storage/indexheader/snapshotter.go similarity index 100% rename from pkg/storegateway/indexheader/snapshotter.go rename to pkg/storage/indexheader/snapshotter.go diff --git a/pkg/storegateway/indexheader/snapshotter_test.go b/pkg/storage/indexheader/snapshotter_test.go similarity index 100% rename from pkg/storegateway/indexheader/snapshotter_test.go rename to pkg/storage/indexheader/snapshotter_test.go diff --git a/pkg/storegateway/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go similarity index 85% rename from pkg/storegateway/indexheader/stream_binary_reader.go rename to pkg/storage/indexheader/stream_binary_reader.go index 88d9ab1657b..4fa3cafd444 100644 --- a/pkg/storegateway/indexheader/stream_binary_reader.go +++ b/pkg/storage/indexheader/stream_binary_reader.go @@ -24,10 +24,10 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/thanos-io/objstore" + streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" + "github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb" "github.com/grafana/mimir/pkg/storage/tsdb/block" - streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" - "github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb" "github.com/grafana/mimir/pkg/util/atomicfs" "github.com/grafana/mimir/pkg/util/spanlogger" ) @@ -93,7 +93,9 @@ func NewStreamBinaryReader(ctx context.Context, logger log.Logger, bkt objstore. } // newFileStreamBinaryReader loads sparse index-headers from disk or constructs it from the index-header if not available. -func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath string, postingOffsetsInMemSampling int, logger *spanlogger.SpanLogger, metrics *StreamBinaryReaderMetrics, cfg Config) (bw *StreamBinaryReader, err error) { +func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath string, postingOffsetsInMemSampling int, logger log.Logger, metrics *StreamBinaryReaderMetrics, cfg Config) (bw *StreamBinaryReader, err error) { + logger = log.With(logger, "id", id, "path", sparseHeadersPath, "inmem_sampling_rate", postingOffsetsInMemSampling) + r := &StreamBinaryReader{ factory: streamencoding.NewDecbufFactory(binPath, cfg.MaxIdleFileHandles, metrics.decbufFactory), } @@ -141,33 +143,37 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s // Load in sparse symbols and postings offset table; from disk if this is a v2 index. if r.indexVersion == index.FormatV2 { + var reconstruct bool sparseData, err := os.ReadFile(sparseHeadersPath) if err != nil && !os.IsNotExist(err) { - level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "id", id, "err", err) + level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "err", err) + } + + if err == nil { + if err = r.loadFromSparseIndexHeader(logger, sparseData, postingOffsetsInMemSampling); err != nil { + reconstruct = true + } } - // If sparseHeaders are not on disk, construct sparseHeaders and write to disk. - if err != nil { - if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil { + // reconstruct from index if the sparse index-header file isn't on disk or if the sampling rate of the headers + // on disk can't be downsampled to the desired rate. + if err != nil || reconstruct { + if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil { return nil, fmt.Errorf("cannot load sparse index-header: %w", err) } - if err := writeSparseHeadersToFile(logger, id, sparseHeadersPath, r); err != nil { + if err := writeSparseHeadersToFile(logger, sparseHeadersPath, r); err != nil { return nil, fmt.Errorf("cannot write sparse index-header to disk: %w", err) } - - level.Debug(logger).Log("msg", "built sparse index-header file", "id", id, "path", sparseHeadersPath) - } else { - // Otherwise, read persisted sparseHeaders from disk to memory. - if err = r.loadFromSparseIndexHeader(logger, id, sparseHeadersPath, sparseData, postingOffsetsInMemSampling); err != nil { - return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err) - } + level.Debug(logger).Log("msg", "built sparse index-header file") } + } else { - if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil { + if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil { return nil, fmt.Errorf("cannot load sparse index-header: %w", err) } } + level.Debug(logger).Log("msg", "built sparse index-header file") labelNames, err := r.postingsOffsetTable.LabelNames() if err != nil { return nil, fmt.Errorf("cannot load label names from postings offset table: %w", err) @@ -185,13 +191,13 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s } // loadFromSparseIndexHeader load from sparse index-header on disk. -func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger *spanlogger.SpanLogger, id ulid.ULID, sparseHeadersPath string, sparseData []byte, postingOffsetsInMemSampling int) (err error) { +func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger log.Logger, sparseData []byte, postingOffsetsInMemSampling int) (err error) { start := time.Now() defer func() { - level.Info(logger).Log("msg", "loaded sparse index-header from disk", "id", id, "path", sparseHeadersPath, "elapsed", time.Since(start)) + level.Info(logger).Log("msg", "loaded sparse index-header from disk", "elapsed", time.Since(start)) }() - level.Info(logger).Log("msg", "loading sparse index-header from disk", "id", id, "path", sparseHeadersPath) + level.Info(logger).Log("msg", "loading sparse index-header from disk") sparseHeaders := &indexheaderpb.Sparse{} gzipped := bytes.NewReader(sparseData) @@ -224,13 +230,13 @@ func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger *spanlogger.SpanLo } // loadFromIndexHeader loads in symbols and postings offset table from the index-header. -func (r *StreamBinaryReader) loadFromIndexHeader(logger *spanlogger.SpanLogger, id ulid.ULID, cfg Config, indexLastPostingListEndBound uint64, postingOffsetsInMemSampling int) (err error) { +func (r *StreamBinaryReader) loadFromIndexHeader(logger log.Logger, cfg Config, indexLastPostingListEndBound uint64, postingOffsetsInMemSampling int) (err error) { start := time.Now() defer func() { - level.Info(logger).Log("msg", "loaded sparse index-header from full index-header", "id", id, "elapsed", time.Since(start)) + level.Info(logger).Log("msg", "loaded sparse index-header from full index-header", "elapsed", time.Since(start)) }() - level.Info(logger).Log("msg", "loading sparse index-header from full index-header", "id", id) + level.Info(logger).Log("msg", "loading sparse index-header from full index-header") r.symbols, err = streamindex.NewSymbols(r.factory, r.indexVersion, int(r.toc.Symbols), cfg.VerifyOnLoad) if err != nil { @@ -246,13 +252,13 @@ func (r *StreamBinaryReader) loadFromIndexHeader(logger *spanlogger.SpanLogger, } // writeSparseHeadersToFile uses protocol buffer to write StreamBinaryReader to disk at sparseHeadersPath. -func writeSparseHeadersToFile(logger *spanlogger.SpanLogger, id ulid.ULID, sparseHeadersPath string, reader *StreamBinaryReader) error { +func writeSparseHeadersToFile(logger log.Logger, sparseHeadersPath string, reader *StreamBinaryReader) error { start := time.Now() defer func() { - level.Info(logger).Log("msg", "wrote sparse index-header to disk", "id", id, "path", sparseHeadersPath, "elapsed", time.Since(start)) + level.Info(logger).Log("msg", "wrote sparse index-header to disk", "elapsed", time.Since(start)) }() - level.Info(logger).Log("msg", "writing sparse index-header to disk", "id", id, "path", sparseHeadersPath) + level.Info(logger).Log("msg", "writing sparse index-header to disk") sparseHeaders := &indexheaderpb.Sparse{} sparseHeaders.Symbols = reader.symbols.NewSparseSymbol() diff --git a/pkg/storegateway/indexheader/stream_binary_reader_test.go b/pkg/storage/indexheader/stream_binary_reader_test.go similarity index 96% rename from pkg/storegateway/indexheader/stream_binary_reader_test.go rename to pkg/storage/indexheader/stream_binary_reader_test.go index 219b9b0c109..1335c3c4fb2 100644 --- a/pkg/storegateway/indexheader/stream_binary_reader_test.go +++ b/pkg/storage/indexheader/stream_binary_reader_test.go @@ -16,8 +16,8 @@ import ( "github.com/stretchr/testify/require" "github.com/thanos-io/objstore/providers/filesystem" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/tsdb/block" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" "github.com/grafana/mimir/pkg/util/spanlogger" ) @@ -50,7 +50,7 @@ func TestStreamBinaryReader_ShouldBuildSparseHeadersFromFileSimple(t *testing.T) require.NoError(t, err) logger := spanlogger.FromContext(context.Background(), log.NewNopLogger()) - err = r.loadFromSparseIndexHeader(logger, blockID, sparseHeadersPath, sparseData, 3) + err = r.loadFromSparseIndexHeader(logger, sparseData, 3) require.NoError(t, err) } @@ -91,6 +91,7 @@ func TestStreamBinaryReader_CheckSparseHeadersCorrectnessExtensive(t *testing.T) // Check correctness of sparse index headers. compareIndexToHeader(t, b, r2) + compareIndexToHeaderPostings(t, b, r2) }) } } diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/chunks/.gitkeep b/pkg/storage/indexheader/testdata/index_format_v1/chunks/.gitkeep similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v1/chunks/.gitkeep rename to pkg/storage/indexheader/testdata/index_format_v1/chunks/.gitkeep diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/index b/pkg/storage/indexheader/testdata/index_format_v1/index similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v1/index rename to pkg/storage/indexheader/testdata/index_format_v1/index diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/meta.json b/pkg/storage/indexheader/testdata/index_format_v1/meta.json similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v1/meta.json rename to pkg/storage/indexheader/testdata/index_format_v1/meta.json diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/chunks/.gitkeep b/pkg/storage/indexheader/testdata/index_format_v2/chunks/.gitkeep similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v2/chunks/.gitkeep rename to pkg/storage/indexheader/testdata/index_format_v2/chunks/.gitkeep diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/index b/pkg/storage/indexheader/testdata/index_format_v2/index similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v2/index rename to pkg/storage/indexheader/testdata/index_format_v2/index diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/meta.json b/pkg/storage/indexheader/testdata/index_format_v2/meta.json similarity index 100% rename from pkg/storegateway/indexheader/testdata/index_format_v2/meta.json rename to pkg/storage/indexheader/testdata/index_format_v2/meta.json diff --git a/pkg/storage/tsdb/block/block.go b/pkg/storage/tsdb/block/block.go index be8d04ee3ca..e53f8820fbb 100644 --- a/pkg/storage/tsdb/block/block.go +++ b/pkg/storage/tsdb/block/block.go @@ -121,6 +121,15 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, blockDi return cleanUp(logger, bkt, id, errors.Wrap(err, "upload index")) } + src := filepath.Join(blockDir, SparseIndexHeaderFilename) + dst := filepath.Join(id.String(), SparseIndexHeaderFilename) + if _, err := os.Stat(src); err == nil { + if err := objstore.UploadFile(ctx, logger, bkt, src, dst); err != nil { + // Don't call cleanUp. Uploading sparse index headers is best effort. + level.Warn(logger).Log("msg", "failed to upload sparse index headers", "block", id.String(), "err", err) + } + } + // Meta.json always need to be uploaded as a last item. This will allow to assume block directories without meta file to be pending uploads. if err := bkt.Upload(ctx, path.Join(id.String(), MetaFilename), strings.NewReader(metaEncoded.String())); err != nil { // Don't call cleanUp here. Despite getting error, meta.json may have been uploaded in certain cases, diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go index 6c7dfd2d573..b7ea189b6e0 100644 --- a/pkg/storage/tsdb/config.go +++ b/pkg/storage/tsdb/config.go @@ -20,7 +20,7 @@ import ( "github.com/grafana/mimir/pkg/ingester/activeseries" "github.com/grafana/mimir/pkg/storage/bucket" - "github.com/grafana/mimir/pkg/storegateway/indexheader" + "github.com/grafana/mimir/pkg/storage/indexheader" ) const ( diff --git a/pkg/storegateway/bucket.go b/pkg/storegateway/bucket.go index 1fd9dd72758..bd7ac467701 100644 --- a/pkg/storegateway/bucket.go +++ b/pkg/storegateway/bucket.go @@ -45,14 +45,14 @@ import ( "google.golang.org/grpc/status" "github.com/grafana/mimir/pkg/mimirpb" + "github.com/grafana/mimir/pkg/storage/indexheader" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/sharding" "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storage/tsdb/block" "github.com/grafana/mimir/pkg/storage/tsdb/bucketcache" "github.com/grafana/mimir/pkg/storegateway/hintspb" "github.com/grafana/mimir/pkg/storegateway/indexcache" - "github.com/grafana/mimir/pkg/storegateway/indexheader" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" "github.com/grafana/mimir/pkg/storegateway/storegatewaypb" "github.com/grafana/mimir/pkg/storegateway/storepb" "github.com/grafana/mimir/pkg/util" diff --git a/pkg/storegateway/bucket_e2e_test.go b/pkg/storegateway/bucket_e2e_test.go index 72976f55b6e..32d3f384327 100644 --- a/pkg/storegateway/bucket_e2e_test.go +++ b/pkg/storegateway/bucket_e2e_test.go @@ -34,10 +34,10 @@ import ( "google.golang.org/grpc/codes" "github.com/grafana/mimir/pkg/mimirpb" + "github.com/grafana/mimir/pkg/storage/indexheader" mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storage/tsdb/block" "github.com/grafana/mimir/pkg/storegateway/indexcache" - "github.com/grafana/mimir/pkg/storegateway/indexheader" "github.com/grafana/mimir/pkg/storegateway/storepb" "github.com/grafana/mimir/pkg/util/test" ) diff --git a/pkg/storegateway/bucket_index_postings.go b/pkg/storegateway/bucket_index_postings.go index 6111374c6cc..98b7022f4ba 100644 --- a/pkg/storegateway/bucket_index_postings.go +++ b/pkg/storegateway/bucket_index_postings.go @@ -17,10 +17,10 @@ import ( "github.com/prometheus/prometheus/tsdb/encoding" "github.com/prometheus/prometheus/tsdb/index" + "github.com/grafana/mimir/pkg/storage/indexheader" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/sharding" "github.com/grafana/mimir/pkg/storage/tsdb" - "github.com/grafana/mimir/pkg/storegateway/indexheader" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" ) // rawPostingGroup keeps posting keys for single matcher. It is raw because there is no guarantee diff --git a/pkg/storegateway/bucket_index_postings_test.go b/pkg/storegateway/bucket_index_postings_test.go index 3f0eccdb667..12e1d566708 100644 --- a/pkg/storegateway/bucket_index_postings_test.go +++ b/pkg/storegateway/bucket_index_postings_test.go @@ -15,7 +15,7 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/stretchr/testify/assert" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" ) func TestBigEndianPostingsCount(t *testing.T) { diff --git a/pkg/storegateway/bucket_index_reader.go b/pkg/storegateway/bucket_index_reader.go index f97d1507235..8b744661a45 100644 --- a/pkg/storegateway/bucket_index_reader.go +++ b/pkg/storegateway/bucket_index_reader.go @@ -31,10 +31,10 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "golang.org/x/sync/errgroup" + "github.com/grafana/mimir/pkg/storage/indexheader" + streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storegateway/indexcache" - "github.com/grafana/mimir/pkg/storegateway/indexheader" - streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index" "github.com/grafana/mimir/pkg/util" "github.com/grafana/mimir/pkg/util/pool" "github.com/grafana/mimir/pkg/util/spanlogger" diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go index 4a411be1dff..e6b3255494f 100644 --- a/pkg/storegateway/bucket_store_metrics.go +++ b/pkg/storegateway/bucket_store_metrics.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/grafana/mimir/pkg/storegateway/indexheader" + "github.com/grafana/mimir/pkg/storage/indexheader" ) // BucketStoreMetrics holds all the metrics tracked by BucketStore. These metrics diff --git a/pkg/storegateway/bucket_test.go b/pkg/storegateway/bucket_test.go index 2ea825294db..1f0cab5c9e2 100644 --- a/pkg/storegateway/bucket_test.go +++ b/pkg/storegateway/bucket_test.go @@ -56,13 +56,13 @@ import ( "github.com/grafana/mimir/pkg/mimirpb" "github.com/grafana/mimir/pkg/storage/bucket" + "github.com/grafana/mimir/pkg/storage/indexheader" + "github.com/grafana/mimir/pkg/storage/indexheader/index" "github.com/grafana/mimir/pkg/storage/sharding" mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb" "github.com/grafana/mimir/pkg/storage/tsdb/block" "github.com/grafana/mimir/pkg/storegateway/hintspb" "github.com/grafana/mimir/pkg/storegateway/indexcache" - "github.com/grafana/mimir/pkg/storegateway/indexheader" - "github.com/grafana/mimir/pkg/storegateway/indexheader/index" "github.com/grafana/mimir/pkg/storegateway/storepb" "github.com/grafana/mimir/pkg/util/pool" "github.com/grafana/mimir/pkg/util/test" diff --git a/pkg/storegateway/indexheader/index/postings_test.go b/pkg/storegateway/indexheader/index/postings_test.go deleted file mode 100644 index 2a7cd943265..00000000000 --- a/pkg/storegateway/indexheader/index/postings_test.go +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-only - -package index - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestPostingValueOffsets(t *testing.T) { - testCases := map[string]struct { - existingOffsets []postingOffset - prefix string - expectedFound bool - expectedStart int - expectedEnd int - }{ - "prefix not found": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "a", - expectedFound: false, - }, - "prefix matches only one sampled offset": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "02", - expectedFound: true, - expectedStart: 1, - expectedEnd: 2, - }, - "prefix matches all offsets": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "0", - expectedFound: true, - expectedStart: 0, - expectedEnd: 4, - }, - "prefix matches only last offset": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "031", - expectedFound: true, - expectedStart: 3, - expectedEnd: 4, - }, - "prefix matches multiple offsets": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "020"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "02", - expectedFound: true, - expectedStart: 1, - expectedEnd: 3, - }, - "prefix matches only first offset": { - existingOffsets: []postingOffset{ - {value: "010"}, - {value: "019"}, - {value: "020"}, - {value: "030"}, - {value: "031"}, - }, - prefix: "015", - expectedFound: true, - expectedStart: 0, - expectedEnd: 1, - }, - } - - for testName, testCase := range testCases { - t.Run(testName, func(t *testing.T) { - offsets := postingValueOffsets{offsets: testCase.existingOffsets} - start, end, found := offsets.prefixOffsets(testCase.prefix) - assert.Equal(t, testCase.expectedStart, start) - assert.Equal(t, testCase.expectedEnd, end) - assert.Equal(t, testCase.expectedFound, found) - }) - } -} diff --git a/pkg/streamingpromql/benchmarks/benchmarks.go b/pkg/streamingpromql/benchmarks/benchmarks.go index 31c066852cb..35d4c829b2d 100644 --- a/pkg/streamingpromql/benchmarks/benchmarks.go +++ b/pkg/streamingpromql/benchmarks/benchmarks.go @@ -275,6 +275,12 @@ func TestCases(metricSizes []int) []BenchCase { { Expr: "topk by (le) (5, h_X)", }, + { + Expr: "quantile(0.9, a_X)", + }, + { + Expr: "quantile by (le) (0.1, h_X)", + }, // Combinations. { Expr: "rate(a_X[1m]) + rate(b_X[1m])", diff --git a/pkg/streamingpromql/engine_test.go b/pkg/streamingpromql/engine_test.go index 6c2a6171b17..41ddf89d8bc 100644 --- a/pkg/streamingpromql/engine_test.go +++ b/pkg/streamingpromql/engine_test.go @@ -50,7 +50,7 @@ func TestUnsupportedPromQLFeatures(t *testing.T) { // The goal of this is not to list every conceivable expression that is unsupported, but to cover all the // different cases and make sure we produce a reasonable error message when these cases are encountered. unsupportedExpressions := map[string]string{ - "quantile(0.95, metric{})": "'quantile' aggregation with parameter", + "absent_over_time(nonexistent{}[1h])": "'absent_over_time' function", } for expression, expectedError := range unsupportedExpressions { @@ -2057,6 +2057,11 @@ func runAnnotationTests(t *testing.T, testCases map[string]annotationTestCase) { } func TestAnnotations(t *testing.T) { + floatData := ` + metric{type="float", series="1"} 0+1x3 + metric{type="float", series="2"} 1+1x3 + ` + mixedFloatHistogramData := ` metric{type="float", series="1"} 0+1x3 metric{type="float", series="2"} 1+1x3 @@ -2070,14 +2075,14 @@ func TestAnnotations(t *testing.T) { metric{series="custom-buckets-2"} {{schema:-53 sum:1 count:1 custom_values:[2 3] buckets:[1]}}+{{schema:-53 sum:5 count:4 custom_values:[2 3] buckets:[1 2 1]}}x3 metric{series="mixed-exponential-custom-buckets"} {{schema:0 sum:1 count:1 buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:0 sum:5 count:4 buckets:[1 2 1]}} metric{series="incompatible-custom-buckets"} {{schema:-53 sum:1 count:1 custom_values:[5 10] buckets:[1]}} {{schema:-53 sum:1 count:1 custom_values:[2 3] buckets:[1]}} {{schema:-53 sum:5 count:4 custom_values:[5 10] buckets:[1 2 1]}} - ` + ` nativeHistogramsWithResetHintsMix := ` metric{reset_hint="unknown"} {{schema:0 sum:0 count:0}}+{{schema:0 sum:5 count:4 buckets:[1 2 1]}}x3 metric{reset_hint="gauge"} {{schema:0 sum:0 count:0 counter_reset_hint:gauge}}+{{schema:0 sum:5 count:4 buckets:[1 2 1] counter_reset_hint:gauge}}x3 metric{reset_hint="gauge-unknown"} {{schema:0 sum:0 count:0 counter_reset_hint:gauge}} {{schema:0 sum:0 count:0}}+{{schema:0 sum:5 count:4 buckets:[1 2 1]}}x3 metric{reset_hint="unknown-gauge"} {{schema:0 sum:0 count:0}}+{{schema:0 sum:5 count:4 buckets:[1 2 1] counter_reset_hint:gauge}}x3 - ` + ` testCases := map[string]annotationTestCase{ "sum() with float and native histogram at same step": { @@ -2344,6 +2349,21 @@ func TestAnnotations(t *testing.T) { `PromQL info: metric might not be a counter, name does not end in _total/_sum/_count/_bucket: "other_float_metric" (1:105)`, }, }, + + "quantile with mixed histograms": { + data: mixedFloatHistogramData, + expr: "quantile(0.9, metric)", + expectedInfoAnnotations: []string{ + `PromQL info: ignored histogram in quantile aggregation (1:15)`, + }, + }, + "quantile with invalid param": { + data: floatData, + expr: "quantile(1.5, metric)", + expectedWarningAnnotations: []string{ + `PromQL warning: quantile value should be between 0 and 1, got 1.5 (1:10)`, + }, + }, } for _, f := range []string{"min_over_time", "max_over_time", "stddev_over_time", "stdvar_over_time"} { @@ -2996,6 +3016,10 @@ func TestCompareVariousMixedMetricsAggregations(t *testing.T) { expressions = append(expressions, fmt.Sprintf(`%s by (group) (series{label=~"(%s)"})`, aggFunc, labelRegex)) expressions = append(expressions, fmt.Sprintf(`%s without (group) (series{label=~"(%s)"})`, aggFunc, labelRegex)) } + // NOTE(jhesketh): We do not test a changing quantile factor here as prometheus currently + // does not support it (https://github.com/prometheus/prometheus/issues/15971) + expressions = append(expressions, fmt.Sprintf(`quantile (0.9, series{label=~"(%s)"})`, labelRegex)) + expressions = append(expressions, fmt.Sprintf(`quantile by (group) (0.9, series{label=~"(%s)"})`, labelRegex)) expressions = append(expressions, fmt.Sprintf(`count_values("value", series{label="%s"})`, labelRegex)) } diff --git a/pkg/streamingpromql/operators/aggregations/aggregation.go b/pkg/streamingpromql/operators/aggregations/aggregation.go index 2bf902c148d..e6970be27b6 100644 --- a/pkg/streamingpromql/operators/aggregations/aggregation.go +++ b/pkg/streamingpromql/operators/aggregations/aggregation.go @@ -45,6 +45,11 @@ type Aggregation struct { remainingGroups []*group // One entry per group, in the order we want to return them haveEmittedMixedFloatsAndHistogramsWarning bool + + // If the aggregation has a parameter, its values are expected + // to be filled here by the wrapping operator. + // Currently only used by the quantile aggregation. + ParamData types.ScalarData } func NewAggregation( @@ -218,7 +223,7 @@ func (a *Aggregation) NextSeries(ctx context.Context) (types.InstantVectorSeries } // Construct the group and return it - seriesData, hasMixedData, err := thisGroup.aggregation.ComputeOutputSeries(a.TimeRange, a.MemoryConsumptionTracker) + seriesData, hasMixedData, err := thisGroup.aggregation.ComputeOutputSeries(a.ParamData, a.TimeRange, a.MemoryConsumptionTracker) if err != nil { return types.InstantVectorSeriesData{}, err } @@ -248,7 +253,7 @@ func (a *Aggregation) accumulateUntilGroupComplete(ctx context.Context, g *group thisSeriesGroup := a.remainingInnerSeriesToGroup[0] a.remainingInnerSeriesToGroup = a.remainingInnerSeriesToGroup[1:] - if err := thisSeriesGroup.aggregation.AccumulateSeries(s, a.TimeRange, a.MemoryConsumptionTracker, a.emitAnnotationFunc); err != nil { + if err := thisSeriesGroup.aggregation.AccumulateSeries(s, a.TimeRange, a.MemoryConsumptionTracker, a.emitAnnotationFunc, thisSeriesGroup.remainingSeriesCount); err != nil { return err } thisSeriesGroup.remainingSeriesCount-- @@ -264,6 +269,8 @@ func (a *Aggregation) emitAnnotation(generator types.AnnotationGenerator) { } func (a *Aggregation) Close() { + // The wrapping operator is responsible for returning any a.ParamData slice + // since it is responsible for setting them up. a.Inner.Close() } diff --git a/pkg/streamingpromql/operators/aggregations/aggregations_safety_test.go b/pkg/streamingpromql/operators/aggregations/aggregations_safety_test.go index 2dd25012c8f..fc78b01bc8d 100644 --- a/pkg/streamingpromql/operators/aggregations/aggregations_safety_test.go +++ b/pkg/streamingpromql/operators/aggregations/aggregations_safety_test.go @@ -44,7 +44,7 @@ func TestAggregationGroupNativeHistogramSafety(t *testing.T) { histograms = append(histograms, promql.HPoint{T: 4, H: h4}) series := types.InstantVectorSeriesData{Histograms: histograms} - require.NoError(t, group.AccumulateSeries(series, timeRange, memoryConsumptionTracker, nil)) + require.NoError(t, group.AccumulateSeries(series, timeRange, memoryConsumptionTracker, nil, 1)) require.Equal(t, []promql.HPoint{{T: 0, H: nil}, {T: 1, H: nil}, {T: 2, H: nil}, {T: 4, H: nil}}, series.Histograms, "all histograms retained should be nil-ed out after accumulating series") // Second series: all histograms that are not retained should be nil-ed out after returning. @@ -62,7 +62,7 @@ func TestAggregationGroupNativeHistogramSafety(t *testing.T) { histograms = append(histograms, promql.HPoint{T: 4, H: h9}) series = types.InstantVectorSeriesData{Histograms: histograms} - require.NoError(t, group.AccumulateSeries(series, timeRange, memoryConsumptionTracker, nil)) + require.NoError(t, group.AccumulateSeries(series, timeRange, memoryConsumptionTracker, nil, 1)) expected := []promql.HPoint{ {T: 0, H: h5}, // h5 not retained (added to h1) diff --git a/pkg/streamingpromql/operators/aggregations/avg.go b/pkg/streamingpromql/operators/aggregations/avg.go index a1e783d617f..1c9f31c386f 100644 --- a/pkg/streamingpromql/operators/aggregations/avg.go +++ b/pkg/streamingpromql/operators/aggregations/avg.go @@ -33,7 +33,7 @@ type AvgAggregationGroup struct { groupSeriesCounts []float64 } -func (g *AvgAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc) error { +func (g *AvgAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc, _ uint) error { defer types.PutInstantVectorSeriesData(data, memoryConsumptionTracker) if len(data.Floats) == 0 && len(data.Histograms) == 0 { // Nothing to do @@ -256,7 +256,7 @@ func (g *AvgAggregationGroup) reconcileAndCountFloatPoints() (int, bool) { return floatPointCount, haveMixedFloatsAndHistograms } -func (g *AvgAggregationGroup) ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { +func (g *AvgAggregationGroup) ComputeOutputSeries(_ types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { floatPointCount, hasMixedData := g.reconcileAndCountFloatPoints() var floatPoints []promql.FPoint var err error diff --git a/pkg/streamingpromql/operators/aggregations/common.go b/pkg/streamingpromql/operators/aggregations/common.go index 297a0421fb5..fc1c7aa17d0 100644 --- a/pkg/streamingpromql/operators/aggregations/common.go +++ b/pkg/streamingpromql/operators/aggregations/common.go @@ -16,22 +16,24 @@ import ( // AggregationGroup accumulates series that have been grouped together and computes the output series data. type AggregationGroup interface { // AccumulateSeries takes in a series as part of the group - AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc) error + // remainingSeriesInGroup includes the current series (ie if data is the last series, then remainingSeriesInGroup is 1) + AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc, remainingSeriesInGroup uint) error // ComputeOutputSeries does any final calculations and returns the grouped series data - ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) + ComputeOutputSeries(param types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) } type AggregationGroupFactory func() AggregationGroup var AggregationGroupFactories = map[parser.ItemType]AggregationGroupFactory{ - parser.AVG: func() AggregationGroup { return &AvgAggregationGroup{} }, - parser.COUNT: func() AggregationGroup { return NewCountGroupAggregationGroup(true) }, - parser.GROUP: func() AggregationGroup { return NewCountGroupAggregationGroup(false) }, - parser.MAX: func() AggregationGroup { return NewMinMaxAggregationGroup(true) }, - parser.MIN: func() AggregationGroup { return NewMinMaxAggregationGroup(false) }, - parser.STDDEV: func() AggregationGroup { return NewStddevStdvarAggregationGroup(true) }, - parser.STDVAR: func() AggregationGroup { return NewStddevStdvarAggregationGroup(false) }, - parser.SUM: func() AggregationGroup { return &SumAggregationGroup{} }, + parser.AVG: func() AggregationGroup { return &AvgAggregationGroup{} }, + parser.COUNT: func() AggregationGroup { return NewCountGroupAggregationGroup(true) }, + parser.GROUP: func() AggregationGroup { return NewCountGroupAggregationGroup(false) }, + parser.MAX: func() AggregationGroup { return NewMinMaxAggregationGroup(true) }, + parser.MIN: func() AggregationGroup { return NewMinMaxAggregationGroup(false) }, + parser.QUANTILE: func() AggregationGroup { return &QuantileAggregationGroup{} }, + parser.STDDEV: func() AggregationGroup { return NewStddevStdvarAggregationGroup(true) }, + parser.STDVAR: func() AggregationGroup { return NewStddevStdvarAggregationGroup(false) }, + parser.SUM: func() AggregationGroup { return &SumAggregationGroup{} }, } // Sentinel value used to indicate a sample has seen an invalid combination of histograms and should be ignored. diff --git a/pkg/streamingpromql/operators/aggregations/count.go b/pkg/streamingpromql/operators/aggregations/count.go index 64b6d92d0d7..d78bf8ac312 100644 --- a/pkg/streamingpromql/operators/aggregations/count.go +++ b/pkg/streamingpromql/operators/aggregations/count.go @@ -37,7 +37,7 @@ func (g *CountGroupAggregationGroup) groupAccumulatePoint(idx int64) { g.values[idx] = 1 } -func (g *CountGroupAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, _ types.EmitAnnotationFunc) error { +func (g *CountGroupAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, _ types.EmitAnnotationFunc, _ uint) error { if (len(data.Floats) > 0 || len(data.Histograms) > 0) && g.values == nil { var err error // First series with values for this group, populate it. @@ -64,7 +64,7 @@ func (g *CountGroupAggregationGroup) AccumulateSeries(data types.InstantVectorSe return nil } -func (g *CountGroupAggregationGroup) ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { +func (g *CountGroupAggregationGroup) ComputeOutputSeries(_ types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { floatPointCount := 0 for _, fv := range g.values { if fv > 0 { diff --git a/pkg/streamingpromql/operators/aggregations/min_max.go b/pkg/streamingpromql/operators/aggregations/min_max.go index 53d893e7f41..4a3eafdc93f 100644 --- a/pkg/streamingpromql/operators/aggregations/min_max.go +++ b/pkg/streamingpromql/operators/aggregations/min_max.go @@ -51,7 +51,7 @@ func (g *MinMaxAggregationGroup) minAccumulatePoint(idx int64, f float64) { } } -func (g *MinMaxAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotation types.EmitAnnotationFunc) error { +func (g *MinMaxAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotation types.EmitAnnotationFunc, _ uint) error { // Native histograms are ignored for min and max. if len(data.Histograms) > 0 { emitAnnotation(func(_ string, expressionPosition posrange.PositionRange) error { @@ -90,7 +90,7 @@ func (g *MinMaxAggregationGroup) AccumulateSeries(data types.InstantVectorSeries return nil } -func (g *MinMaxAggregationGroup) ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { +func (g *MinMaxAggregationGroup) ComputeOutputSeries(_ types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { floatPointCount := 0 for _, p := range g.floatPresent { if p { diff --git a/pkg/streamingpromql/operators/aggregations/quantile.go b/pkg/streamingpromql/operators/aggregations/quantile.go new file mode 100644 index 00000000000..a97541f28d6 --- /dev/null +++ b/pkg/streamingpromql/operators/aggregations/quantile.go @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/prometheus/prometheus/blob/main/promql/engine.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Prometheus Authors + +package aggregations + +import ( + "context" + "math" + "unsafe" + + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" + "github.com/prometheus/prometheus/util/annotations" + + "github.com/grafana/mimir/pkg/streamingpromql/limiting" + "github.com/grafana/mimir/pkg/streamingpromql/operators/functions" + "github.com/grafana/mimir/pkg/streamingpromql/types" + "github.com/grafana/mimir/pkg/util/pool" +) + +// QuantileAggregation is a small wrapper around Aggregation to pre-process and validate +// the quantile parameter and fill it into Aggregation.ParamData +type QuantileAggregation struct { + Param types.ScalarOperator + Aggregation *Aggregation + MemoryConsumptionTracker *limiting.MemoryConsumptionTracker + Annotations *annotations.Annotations +} + +func NewQuantileAggregation( + inner types.InstantVectorOperator, + param types.ScalarOperator, + timeRange types.QueryTimeRange, + grouping []string, + without bool, + memoryConsumptionTracker *limiting.MemoryConsumptionTracker, + annotations *annotations.Annotations, + expressionPosition posrange.PositionRange, +) (*QuantileAggregation, error) { + + a, err := NewAggregation( + inner, + timeRange, + grouping, + without, + parser.QUANTILE, + memoryConsumptionTracker, + annotations, + expressionPosition, + ) + if err != nil { + return nil, err + } + + q := &QuantileAggregation{ + Aggregation: a, + Param: param, + MemoryConsumptionTracker: memoryConsumptionTracker, + Annotations: annotations, + } + + return q, nil +} + +func (q *QuantileAggregation) SeriesMetadata(ctx context.Context) ([]types.SeriesMetadata, error) { + var err error + q.Aggregation.ParamData, err = q.Param.GetValues(ctx) + if err != nil { + return nil, err + } + // Validate the parameter now so we only have to do it once for each group + for _, p := range q.Aggregation.ParamData.Samples { + if math.IsNaN(p.F) || p.F < 0 || p.F > 1 { + q.Annotations.Add(annotations.NewInvalidQuantileWarning(p.F, q.Param.ExpressionPosition())) + } + } + + return q.Aggregation.SeriesMetadata(ctx) +} + +func (q *QuantileAggregation) NextSeries(ctx context.Context) (types.InstantVectorSeriesData, error) { + return q.Aggregation.NextSeries(ctx) +} + +func (q *QuantileAggregation) Close() { + if q.Aggregation.ParamData.Samples != nil { + types.FPointSlicePool.Put(q.Aggregation.ParamData.Samples, q.MemoryConsumptionTracker) + } + if q.Param != nil { + q.Param.Close() + } + q.Aggregation.Close() +} + +func (q *QuantileAggregation) ExpressionPosition() posrange.PositionRange { + return q.Aggregation.ExpressionPosition() +} + +type QuantileAggregationGroup struct { + qGroups []qGroup // A group per point in time +} + +type qGroup struct { + points []float64 // All of the floats for this group of series at a point in time +} + +const maxExpectedQuantileGroups = 64 // There isn't much science to this + +var qGroupPool = types.NewLimitingBucketedPool( + pool.NewBucketedPool(maxExpectedQuantileGroups, func(size int) []qGroup { + return make([]qGroup, 0, size) + }), + uint64(unsafe.Sizeof(qGroup{})), + false, + nil, +) + +func (q *QuantileAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc, remainingSeriesInGroup uint) error { + defer types.PutInstantVectorSeriesData(data, memoryConsumptionTracker) + + if len(data.Histograms) > 0 { + emitAnnotationFunc(func(_ string, expressionPosition posrange.PositionRange) error { + return annotations.NewHistogramIgnoredInAggregationInfo("quantile", expressionPosition) + }) + } + + if len(data.Floats) == 0 { + // Nothing to do + return nil + } + + var err error + if q.qGroups == nil { + q.qGroups, err = qGroupPool.Get(timeRange.StepCount, memoryConsumptionTracker) + if err != nil { + return err + } + q.qGroups = q.qGroups[:timeRange.StepCount] + } + + for _, p := range data.Floats { + idx := timeRange.PointIndex(p.T) + + if q.qGroups[idx].points == nil { + q.qGroups[idx].points, err = types.Float64SlicePool.Get(int(remainingSeriesInGroup), memoryConsumptionTracker) + if err != nil { + return err + } + } + q.qGroups[idx].points = append(q.qGroups[idx].points, p.F) + } + + return nil +} + +func (q *QuantileAggregationGroup) ComputeOutputSeries(param types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { + quantilePoints, err := types.FPointSlicePool.Get(timeRange.StepCount, memoryConsumptionTracker) + if err != nil { + return types.InstantVectorSeriesData{}, false, err + } + + for i, qGroup := range q.qGroups { + if qGroup.points == nil { + // No series have any points at this time step, so nothing to output + continue + } + p := param.Samples[i].F + t := timeRange.StartT + int64(i)*timeRange.IntervalMilliseconds + f := functions.Quantile(p, qGroup.points) + quantilePoints = append(quantilePoints, promql.FPoint{T: t, F: f}) + types.Float64SlicePool.Put(qGroup.points, memoryConsumptionTracker) + q.qGroups[i].points = nil + } + + qGroupPool.Put(q.qGroups, memoryConsumptionTracker) + return types.InstantVectorSeriesData{Floats: quantilePoints}, false, nil +} diff --git a/pkg/streamingpromql/operators/aggregations/stddev_stdvar.go b/pkg/streamingpromql/operators/aggregations/stddev_stdvar.go index 92308b9569b..0f025635540 100644 --- a/pkg/streamingpromql/operators/aggregations/stddev_stdvar.go +++ b/pkg/streamingpromql/operators/aggregations/stddev_stdvar.go @@ -35,7 +35,7 @@ type StddevStdvarAggregationGroup struct { groupSeriesCounts []float64 } -func (g *StddevStdvarAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotation types.EmitAnnotationFunc) error { +func (g *StddevStdvarAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotation types.EmitAnnotationFunc, _ uint) error { // Native histograms are ignored for stddev and stdvar. if len(data.Histograms) > 0 { emitAnnotation(func(_ string, expressionPosition posrange.PositionRange) error { @@ -83,7 +83,7 @@ func (g *StddevStdvarAggregationGroup) AccumulateSeries(data types.InstantVector return nil } -func (g *StddevStdvarAggregationGroup) ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { +func (g *StddevStdvarAggregationGroup) ComputeOutputSeries(_ types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { floatPointCount := 0 for _, sc := range g.groupSeriesCounts { if sc > 0 { diff --git a/pkg/streamingpromql/operators/aggregations/sum.go b/pkg/streamingpromql/operators/aggregations/sum.go index d3bea361462..5a2f2d47dce 100644 --- a/pkg/streamingpromql/operators/aggregations/sum.go +++ b/pkg/streamingpromql/operators/aggregations/sum.go @@ -24,7 +24,7 @@ type SumAggregationGroup struct { histogramPointCount int } -func (g *SumAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc) error { +func (g *SumAggregationGroup) AccumulateSeries(data types.InstantVectorSeriesData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker, emitAnnotationFunc types.EmitAnnotationFunc, _ uint) error { defer types.PutInstantVectorSeriesData(data, memoryConsumptionTracker) if len(data.Floats) == 0 && len(data.Histograms) == 0 { // Nothing to do @@ -164,7 +164,7 @@ func (g *SumAggregationGroup) reconcileAndCountFloatPoints() (int, bool) { return floatPointCount, haveMixedFloatsAndHistograms } -func (g *SumAggregationGroup) ComputeOutputSeries(timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { +func (g *SumAggregationGroup) ComputeOutputSeries(_ types.ScalarData, timeRange types.QueryTimeRange, memoryConsumptionTracker *limiting.MemoryConsumptionTracker) (types.InstantVectorSeriesData, bool, error) { floatPointCount, hasMixedData := g.reconcileAndCountFloatPoints() var floatPoints []promql.FPoint var err error diff --git a/pkg/streamingpromql/operators/functions/quantile.go b/pkg/streamingpromql/operators/functions/quantile.go index e712f60b7e9..33c5e34d70f 100644 --- a/pkg/streamingpromql/operators/functions/quantile.go +++ b/pkg/streamingpromql/operators/functions/quantile.go @@ -476,14 +476,14 @@ func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bo return forcedMonotonic, fixedPrecision } -// quantile calculates the given quantile of a vector of samples. +// Quantile calculates the given quantile of a vector of samples. // // values will be sorted in place. // If values has zero elements, NaN is returned. // If q==NaN, NaN is returned. // If q<0, -Inf is returned. // If q>1, +Inf is returned. -func quantile(q float64, values []float64) float64 { +func Quantile(q float64, values []float64) float64 { if len(values) == 0 || math.IsNaN(q) { return math.NaN() } diff --git a/pkg/streamingpromql/operators/functions/range_vectors.go b/pkg/streamingpromql/operators/functions/range_vectors.go index fc7de807637..41b19acd70e 100644 --- a/pkg/streamingpromql/operators/functions/range_vectors.go +++ b/pkg/streamingpromql/operators/functions/range_vectors.go @@ -728,5 +728,5 @@ func quantileOverTime(step *types.RangeVectorStepData, _ float64, args []types.S values = append(values, p.F) } - return quantile(q, values), true, nil, nil + return Quantile(q, values), true, nil, nil } diff --git a/pkg/streamingpromql/query.go b/pkg/streamingpromql/query.go index f351a4916ea..29db14633de 100644 --- a/pkg/streamingpromql/query.go +++ b/pkg/streamingpromql/query.go @@ -192,6 +192,21 @@ func (q *Query) convertToInstantVectorOperator(expr parser.Expr, timeRange types } return topkbottomk.New(inner, param, timeRange, e.Grouping, e.Without, e.Op == parser.TOPK, q.memoryConsumptionTracker, q.annotations, e.PosRange), nil + case parser.QUANTILE: + param, err := q.convertToScalarOperator(e.Param, timeRange) + if err != nil { + return nil, err + } + return aggregations.NewQuantileAggregation( + inner, + param, + timeRange, + e.Grouping, + e.Without, + q.memoryConsumptionTracker, + q.annotations, + e.PosRange, + ) case parser.COUNT_VALUES: param, err := q.convertToStringOperator(e.Param) if err != nil { diff --git a/pkg/streamingpromql/testdata/ours-only/aggregators.test b/pkg/streamingpromql/testdata/ours-only/aggregators.test index 223bdf5a7e9..ee189be2ed8 100644 --- a/pkg/streamingpromql/testdata/ours-only/aggregators.test +++ b/pkg/streamingpromql/testdata/ours-only/aggregators.test @@ -11,3 +11,15 @@ eval range from 0 to 30m step 6m topk(scalar(param), series) series{env="prod", instance="1"} _ 4 9 20 _ _ series{env="prod", instance="2"} 2 3 10 _ _ 1 series{env="prod", instance="3"} _ 0 _ _ _ _ + +clear + +# This case currently fails with Prometheus' engine due to https://github.com/prometheus/prometheus/issues/15971. +load 6m + series{env="prod", instance="1"} 1 4 9 20 _ _ _ _ + series{env="prod", instance="2"} 2 3 10 _ _ 1 _ _ + series{env="prod", instance="3"} 0 0 8 _ _ _ _ _ + param 0.5 0.1 0.9 0.1 0.2 0.3 _ Inf + +eval_warn range from 0 to 42m step 6m quantile(scalar(param), series) + {} 1 0.6000000000000001 9.799999999999999 20 _ 1 _ _ \ No newline at end of file diff --git a/pkg/streamingpromql/testdata/ours/aggregators.test b/pkg/streamingpromql/testdata/ours/aggregators.test index 2a265d40ef5..8396ff5bfd6 100644 --- a/pkg/streamingpromql/testdata/ours/aggregators.test +++ b/pkg/streamingpromql/testdata/ours/aggregators.test @@ -17,6 +17,9 @@ eval range from 0 to 4m step 1m sum(some_metric) eval range from 0 to 4m step 1m avg(some_metric) {} 0 2.5 5 7.5 10 +eval range from 0 to 4m step 1m quantile(0.5, some_metric) + {} 0 2.5 5 7.5 10 + # Range query, aggregating to multiple groups with 'by'. eval range from 0 to 4m step 1m sum by (env) (some_metric) {env="prod"} 0 3 6 9 12 @@ -26,6 +29,10 @@ eval range from 0 to 4m step 1m avg by (env) (some_metric) {env="prod"} 0 1.5 3 4.5 6 {env="test"} 0 3.5 7 10.5 14 +eval range from 0 to 4m step 1m quantile by (env) (0.5, some_metric) + {env="prod"} 0 1.5 3 4.5 6 + {env="test"} 0 3.5 7 10.5 14 + # Range query, aggregating to multiple groups with 'without'. eval range from 0 to 4m step 1m sum without (env) (some_metric) {cluster="eu"} 0 4 8 12 16 @@ -35,6 +42,10 @@ eval range from 0 to 4m step 1m avg without (env) (some_metric) {cluster="eu"} 0 2 4 6 8 {cluster="us"} 0 3 6 9 12 +eval range from 0 to 4m step 1m quantile without (env) (0.5, some_metric) + {cluster="eu"} 0 2 4 6 8 + {cluster="us"} 0 3 6 9 12 + # Range query, aggregating to a single group with 'without'. eval range from 0 to 4m step 1m sum without (env, cluster) (some_metric) {} 0 10 20 30 40 @@ -42,6 +53,9 @@ eval range from 0 to 4m step 1m sum without (env, cluster) (some_metric) eval range from 0 to 4m step 1m avg without (env, cluster) (some_metric) {} 0 2.5 5 7.5 10 +eval range from 0 to 4m step 1m quantile without (env, cluster) (0.5, some_metric) + {} 0 2.5 5 7.5 10 + # 'without' should always ignore the metric name. eval range from 0 to 4m step 1m sum without(cluster) ({cluster="us"}) {env="prod"} 0 2 4 6 8 @@ -51,6 +65,10 @@ eval range from 0 to 4m step 1m avg without(cluster) ({cluster="us"}) {env="prod"} 0 2 4 6 8 {env="test"} 0 4.5 9 13.5 18 +eval range from 0 to 4m step 1m quantile without(cluster) (0.5, {cluster="us"}) + {env="prod"} 0 2 4 6 8 + {env="test"} 0 4.5 9 13.5 18 + # If no series are matched, we shouldn't return any results. eval range from 0 to 4m step 1m sum(some_nonexistent_metric) # Should return no results. @@ -80,6 +98,13 @@ eval range from 0 to 4m step 1m avg without(env) (some_metric) {cluster="us", group="a", subgroup="1"} 0 4 8 12 16 {cluster="us", group="a", subgroup="2"} 0 5.5 11 16.5 22 +eval range from 0 to 4m step 1m quantile without(env) (0.5, some_metric) + {cluster="eu", group="a", subgroup="1"} 0 1 2 3 4 + {cluster="eu", group="a", subgroup="2"} 0 2 4 6 8 + {cluster="eu", group="b", subgroup="1"} 0 3 6 9 12 + {cluster="us", group="a", subgroup="1"} 0 4 8 12 16 + {cluster="us", group="a", subgroup="2"} 0 5.5 11 16.5 22 + eval range from 0 to 4m step 1m sum without(env, cluster) (some_metric) {group="a", subgroup="1"} 0 5 10 15 20 {group="a", subgroup="2"} 0 13 26 39 52 @@ -90,6 +115,11 @@ eval range from 0 to 4m step 1m avg without(env, cluster) (some_metric) {group="a", subgroup="2"} 0 4.333333333333333 8.666666666666666 13 17.333333333333332 {group="b", subgroup="1"} 0 3 6 9 12 +eval range from 0 to 4m step 1m quantile without(env, cluster) (0.5, some_metric) + {group="a", subgroup="1"} 0 2.5 5 7.5 10 + {group="a", subgroup="2"} 0 5 10 15 20 + {group="b", subgroup="1"} 0 3 6 9 12 + # 'without' with duplicate labels to remove. eval range from 0 to 4m step 1m sum without(env, cluster, env) (some_metric) {group="a", subgroup="1"} 0 5 10 15 20 @@ -101,6 +131,11 @@ eval range from 0 to 4m step 1m avg without(env, cluster, env) (some_metric) {group="a", subgroup="2"} 0 4.333333333333333 8.666666666666666 13 17.333333333333332 {group="b", subgroup="1"} 0 3 6 9 12 +eval range from 0 to 4m step 1m quantile without(env, cluster, env) (0.5, some_metric) + {group="a", subgroup="1"} 0 2.5 5 7.5 10 + {group="a", subgroup="2"} 0 5 10 15 20 + {group="b", subgroup="1"} 0 3 6 9 12 + # 'by' with duplicate grouping labels. eval range from 0 to 4m step 1m sum by(env, cluster, env) (some_metric) {cluster="eu", env="prod"} 0 6 12 18 24 @@ -112,6 +147,11 @@ eval range from 0 to 4m step 1m avg by(env, cluster, env) (some_metric) {cluster="us", env="prod"} 0 4.5 9 13.5 18 {cluster="us", env="test"} 0 6 12 18 24 +eval range from 0 to 4m step 1m quantile by(env, cluster, env) (0.5, some_metric) + {cluster="eu", env="prod"} 0 2 4 6 8 + {cluster="us", env="prod"} 0 4.5 9 13.5 18 + {cluster="us", env="test"} 0 6 12 18 24 + clear load 1m @@ -124,6 +164,9 @@ eval range from 1m to 1m30s step 1s sum(some_metric_with_staleness) eval range from 1m to 1m30s step 1s avg(some_metric_with_staleness) # Should return no results. +eval range from 1m to 1m30s step 1s quantile(0.5, some_metric_with_staleness) + # Should return no results. + clear # Test native histogram aggregations @@ -138,6 +181,8 @@ eval instant at 0m sum(single_histogram) eval instant at 0m avg(single_histogram) {} {{schema:0 sum:5.666666666666667 count:6 buckets:[1.6666666666666667 4 0.33333333333333337]}} +eval_info instant at 0m quantile(0.5, single_histogram) + eval instant at 0m sum by (label) (single_histogram) {label="value"} {{schema:0 count:4 sum:2 buckets:[1 2 1]}} {label="value2"} {{schema:1 count:14 sum:15 buckets:[4 6 4]}} @@ -171,20 +216,24 @@ load 1m # See: https://github.com/prometheus/prometheus/issues/14172 # What I would expect -# eval range from 0 to 4m step 1m sum by (label) (single_histogram) +# eval range from 0 to 4m step 1m sum by (label) (single_histogram) # {label="value"} 0 1 {{count:4 sum:2 buckets:[1 2 1]}} {{sum:2 count:4 buckets:[1 2 1]}} 2 # {label="value2"} 0 5 {{schema:1 count:14 sum:15 buckets:[4 6 4]}} {{schema:2 count:8 sum:4 buckets:[4 6 4]}} 10 # -# eval range from 0 to 4m step 1m avg by (label) (single_histogram) +# eval range from 0 to 4m step 1m avg by (label) (single_histogram) # {label="value"} 0 1 {{count:4 sum:2 buckets:[1 2 1]}} {{sum:2 count:4 buckets:[1 2 1]}} 2 # {label="value2"} 0 2.5 {{schema:1 count:7 sum:7.5 buckets:[2 3 2]}} {{schema:2 count:4 sum:2 buckets:[1 2 1]}} 5 # What both engines return -eval range from 0 to 4m step 1m sum by (label) (single_histogram) +eval range from 0 to 4m step 1m sum by (label) (single_histogram) {label="value"} 0 1 1 1 2 {label="value2"} 0 5 5 5 10 -eval range from 0 to 4m step 1m avg by (label) (single_histogram) +eval range from 0 to 4m step 1m avg by (label) (single_histogram) + {label="value"} 0 1 1 1 2 + {label="value2"} 0 2.5 2.5 2.5 5 + +eval range from 0 to 4m step 1m quantile by (label) (0.5, single_histogram) {label="value"} 0 1 1 1 2 {label="value2"} 0 2.5 2.5 2.5 5 @@ -197,12 +246,15 @@ load 1m # If a float is present, the histogram is ignored. # If a float comes after a histogram, a lookback'd float is used instead of the histogram (see: https://github.com/prometheus/prometheus/issues/14172) -eval range from 0 to 5m step 1m sum(single_histogram) +eval range from 0 to 5m step 1m sum(single_histogram) {} 0 1 1 5 8 {{sum:4 count:8 buckets:[2 4 2]}} -eval range from 0 to 5m step 1m avg(single_histogram) +eval range from 0 to 5m step 1m avg(single_histogram) {} 0 0.5 0.5 2.5 4 {{sum:2 count:4 buckets:[1 2 1]}} +eval_info range from 0 to 5m step 1m quantile(0.5, single_histogram) + {} 0 0.5 0.5 2.5 4 + clear # Test a mix of float and histogram values at the same point @@ -215,6 +267,9 @@ eval_warn instant at 1m sum(single_histogram) eval_warn instant at 1m avg(single_histogram) +eval_info instant at 1m quantile(0.5, single_histogram) + {} 3 + clear # Test a mix of float and histogram values at the same point @@ -227,6 +282,9 @@ eval_warn instant at 1m sum(single_histogram) eval_warn instant at 1m avg(single_histogram) +eval_info instant at 1m quantile(0.5, single_histogram) + {} 3 + clear # Test a mix of float and histogram values at the same point, where after adding 2 conflicting series and removing a point, @@ -241,6 +299,9 @@ eval_warn instant at 1m sum(single_histogram) eval_warn instant at 1m avg(single_histogram) +eval_info instant at 1m quantile(0.5, single_histogram) + {} 2 + clear # Test min/max aggregation with histograms and a mix of histogram+float values @@ -625,3 +686,26 @@ eval range from 0 to 12m step 6m count_values by (idx) ("idx", series) # Once that fix is vendored into Mimir, we can remove the two (\\")? groups below. eval_fail instant at 0 count_values("a\xc5z", series) expected_fail_regexp invalid label name "(\\")?a\\(\\)?xc5z(\\")?"( for count_values)? + +clear + +load 6m + series{idx="1"} 1 2 3 4 5 6 + series{idx="2"} 4 5 6 7 8 9 + series{idx="3"} 7 8 Inf NaN -Inf + +# Quantile value warning is emitted even when no series are returned +eval_warn range from 0 to 12m step 6m quantile(20, noseries) + +eval_warn range from 0 to 12m step 6m quantile(Inf, noseries) + +eval_warn range from 0 to 12m step 6m quantile(-Inf, noseries) + +eval range from 0 to 30m step 6m quantile(0.9, series) + {} 6.4 7.4 +Inf 6.4 7.4 8.7 + +eval range from 0 to 30m step 6m quantile(0, series) + {} 1 2 3 NaN -Inf 6 + +eval range from 0 to 30m step 6m quantile(1, series) + {} 7 8 NaN 7 8 9 diff --git a/pkg/streamingpromql/testdata/upstream/aggregators.test b/pkg/streamingpromql/testdata/upstream/aggregators.test index 5f657a647f3..223b6dc8ad1 100644 --- a/pkg/streamingpromql/testdata/upstream/aggregators.test +++ b/pkg/streamingpromql/testdata/upstream/aggregators.test @@ -450,42 +450,36 @@ load 10s data_histogram{test="histogram sample", point="c"} {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}} foo .8 -# Unsupported by streaming engine. -# eval instant at 1m quantile without(point)(0.8, data) -# {test="two samples"} 0.8 -# {test="three samples"} 1.6 -# {test="uneven samples"} 2.8 +eval instant at 1m quantile without(point)(0.8, data) + {test="two samples"} 0.8 + {test="three samples"} 1.6 + {test="uneven samples"} 2.8 # The histogram is ignored here so the result doesn't change but it has an info annotation now. -# Unsupported by streaming engine. -# eval_info instant at 1m quantile without(point)(0.8, {__name__=~"data(_histogram)?"}) -# {test="two samples"} 0.8 -# {test="three samples"} 1.6 -# {test="uneven samples"} 2.8 +eval_info instant at 1m quantile without(point)(0.8, {__name__=~"data(_histogram)?"}) + {test="two samples"} 0.8 + {test="three samples"} 1.6 + {test="uneven samples"} 2.8 # The histogram is ignored here so there is no result but it has an info annotation now. -# Unsupported by streaming engine. -# eval_info instant at 1m quantile(0.8, data_histogram) +eval_info instant at 1m quantile(0.8, data_histogram) # Bug #5276. -# Unsupported by streaming engine. -# eval instant at 1m quantile without(point)(scalar(foo), data) -# {test="two samples"} 0.8 -# {test="three samples"} 1.6 -# {test="uneven samples"} 2.8 - - -# Unsupported by streaming engine. -# eval instant at 1m quantile without(point)((scalar(foo)), data) -# {test="two samples"} 0.8 -# {test="three samples"} 1.6 -# {test="uneven samples"} 2.8 - -# Unsupported by streaming engine. -# eval_warn instant at 1m quantile without(point)(NaN, data) -# {test="two samples"} NaN -# {test="three samples"} NaN -# {test="uneven samples"} NaN +eval instant at 1m quantile without(point)(scalar(foo), data) + {test="two samples"} 0.8 + {test="three samples"} 1.6 + {test="uneven samples"} 2.8 + + +eval instant at 1m quantile without(point)((scalar(foo)), data) + {test="two samples"} 0.8 + {test="three samples"} 1.6 + {test="uneven samples"} 2.8 + +eval_warn instant at 1m quantile without(point)(NaN, data) + {test="two samples"} NaN + {test="three samples"} NaN + {test="uneven samples"} NaN # Tests for group. clear diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head.go b/vendor/github.com/prometheus/prometheus/tsdb/head.go index 8db86e78a0d..0fb7a1605a9 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head.go @@ -96,8 +96,8 @@ type Head struct { // All series addressable by their ID or hash. series *stripeSeries - deletedMtx sync.Mutex - deleted map[chunks.HeadSeriesRef]int // Deleted series, and what WAL segment they must be kept until. + walExpiriesMtx sync.Mutex + walExpiries map[chunks.HeadSeriesRef]int // Series no longer in the head, and what WAL segment they must be kept until. // TODO(codesome): Extend MemPostings to return only OOOPostings, Set OOOStatus, ... Like an additional map of ooo postings. postings *index.MemPostings // Postings lists for terms. @@ -362,7 +362,7 @@ func (h *Head) resetInMemoryState() error { h.exemplars = es h.postings = index.NewUnorderedMemPostings() h.tombstones = tombstones.NewMemTombstones() - h.deleted = map[chunks.HeadSeriesRef]int{} + h.walExpiries = map[chunks.HeadSeriesRef]int{} h.chunkRange.Store(h.opts.ChunkRange) h.minTime.Store(math.MaxInt64) h.maxTime.Store(math.MinInt64) @@ -785,7 +785,7 @@ func (h *Head) Init(minValidTime int64) error { // A corrupted checkpoint is a hard error for now and requires user // intervention. There's likely little data that can be recovered anyway. - if err := h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks); err != nil { + if err := h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks, endAt); err != nil { return fmt.Errorf("backfill checkpoint: %w", err) } h.updateWALReplayStatusRead(startFrom) @@ -818,7 +818,7 @@ func (h *Head) Init(minValidTime int64) error { if err != nil { return fmt.Errorf("segment reader (offset=%d): %w", offset, err) } - err = h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks) + err = h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks, endAt) if err := sr.Close(); err != nil { h.logger.Warn("Error while closing the wal segments reader", "err", err) } @@ -1285,6 +1285,34 @@ func (h *Head) IsQuerierCollidingWithTruncation(querierMint, querierMaxt int64) return false, false, 0 } +func (h *Head) getWALExpiry(id chunks.HeadSeriesRef) (int, bool) { + h.walExpiriesMtx.Lock() + defer h.walExpiriesMtx.Unlock() + + keepUntil, ok := h.walExpiries[id] + return keepUntil, ok +} + +func (h *Head) setWALExpiry(id chunks.HeadSeriesRef, keepUntil int) { + h.walExpiriesMtx.Lock() + defer h.walExpiriesMtx.Unlock() + + h.walExpiries[id] = keepUntil +} + +// keepSeriesInWALCheckpoint is used to determine whether a series record should be kept in the checkpoint +// last is the last WAL segment that was considered for checkpointing. +func (h *Head) keepSeriesInWALCheckpoint(id chunks.HeadSeriesRef, last int) bool { + // Keep the record if the series exists in the head. + if h.series.getByID(id) != nil { + return true + } + + // Keep the record if the series has an expiry set. + keepUntil, ok := h.getWALExpiry(id) + return ok && keepUntil > last +} + // truncateWAL removes old data before mint from the WAL. func (h *Head) truncateWAL(mint int64) error { h.chunkSnapshotMtx.Lock() @@ -1318,17 +1346,8 @@ func (h *Head) truncateWAL(mint int64) error { return nil } - keep := func(id chunks.HeadSeriesRef) bool { - if h.series.getByID(id) != nil { - return true - } - h.deletedMtx.Lock() - keepUntil, ok := h.deleted[id] - h.deletedMtx.Unlock() - return ok && keepUntil > last - } h.metrics.checkpointCreationTotal.Inc() - if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, keep, mint); err != nil { + if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpoint, mint); err != nil { h.metrics.checkpointCreationFail.Inc() var cerr *chunks.CorruptionErr if errors.As(err, &cerr) { @@ -1343,15 +1362,15 @@ func (h *Head) truncateWAL(mint int64) error { h.logger.Error("truncating segments failed", "err", err) } - // The checkpoint is written and segments before it is truncated, so we no - // longer need to track deleted series that are before it. - h.deletedMtx.Lock() - for ref, segment := range h.deleted { + // The checkpoint is written and segments before it is truncated, so stop + // tracking expired series. + h.walExpiriesMtx.Lock() + for ref, segment := range h.walExpiries { if segment <= last { - delete(h.deleted, ref) + delete(h.walExpiries, ref) } } - h.deletedMtx.Unlock() + h.walExpiriesMtx.Unlock() h.metrics.checkpointDeleteTotal.Inc() if err := wlog.DeleteCheckpoints(h.wal.Dir(), last); err != nil { @@ -1618,7 +1637,7 @@ func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) { if h.wal != nil { _, last, _ := wlog.Segments(h.wal.Dir()) - h.deletedMtx.Lock() + h.walExpiriesMtx.Lock() // Keep series records until we're past segment 'last' // because the WAL will still have samples records with // this ref ID. If we didn't keep these series records then @@ -1626,9 +1645,9 @@ func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) { // that reads the WAL, wouldn't be able to use those // samples since we would have no labels for that ref ID. for ref := range deleted { - h.deleted[chunks.HeadSeriesRef(ref)] = last + h.walExpiries[chunks.HeadSeriesRef(ref)] = last } - h.deletedMtx.Unlock() + h.walExpiriesMtx.Unlock() } return actualInOrderMint, minOOOTime, minMmapFile diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go index 0cc56256d49..18260d97a78 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_wal.go @@ -52,7 +52,7 @@ type histogramRecord struct { fh *histogram.FloatHistogram } -func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (err error) { +func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk, lastSegment int) (err error) { // Track number of samples that referenced a series we don't know about // for error reporting. var unknownRefs atomic.Uint64 @@ -247,6 +247,8 @@ Outer: } if !created { multiRef[walSeries.Ref] = mSeries.ref + // Set the WAL expiry for the duplicate series, so it is kept in subsequent WAL checkpoints. + h.setWALExpiry(walSeries.Ref, lastSegment) } idx := uint64(mSeries.ref) % uint64(concurrency) diff --git a/vendor/github.com/prometheus/prometheus/tsdb/wlog/checkpoint.go b/vendor/github.com/prometheus/prometheus/tsdb/wlog/checkpoint.go index 5c607d70302..2c1b0c0534d 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/wlog/checkpoint.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/wlog/checkpoint.go @@ -93,7 +93,7 @@ const CheckpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef, last int) bool, mint int64) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser @@ -181,7 +181,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He // Drop irrelevant series in place. repl := series[:0] for _, s := range series { - if keep(s.Ref) { + if keep(s.Ref, to) { repl = append(repl, s) } } @@ -323,7 +323,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He // Only keep reference to the latest found metadata for each refID. repl := 0 for _, m := range metadata { - if keep(m.Ref) { + if keep(m.Ref, to) { if _, ok := latestMetadataMap[m.Ref]; !ok { repl++ } diff --git a/vendor/modules.txt b/vendor/modules.txt index 21c54455524..6d27c7b4f00 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1076,7 +1076,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20250305224633-8c45fc54920d +# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20250306234455-f6f6f2cceada ## explicit; go 1.22.7 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1761,7 +1761,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250305224633-8c45fc54920d +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250306234455-f6f6f2cceada # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 # github.com/grafana/regexp => github.com/grafana/regexp v0.0.0-20240531075221-3685f1377d7b