-
Notifications
You must be signed in to change notification settings - Fork 569
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
querier: allow to enforce series query request limit #10748
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3635,6 +3635,13 @@ The `limits` block configures default and per-tenant limits imposed by component | |||||||||||||
# CLI flag: -store.max-labels-query-length | ||||||||||||||
[max_labels_query_length: <duration> | default = 0s] | ||||||||||||||
|
||||||||||||||
# Maximum number of items, series queries. This limit is enforced in the | ||||||||||||||
# querier. If the requested limit is outside the allowed value, the request will | ||||||||||||||
# not fail but will be manipulated to only query data up to allowed limit. 0 to | ||||||||||||||
# disable. | ||||||||||||||
Comment on lines
+3639
to
+3641
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
# CLI flag: -querier.max-series-query-limit | ||||||||||||||
[max_series_query_limit: <int> | default = 0] | ||||||||||||||
|
||||||||||||||
# (advanced) Most recent allowed cacheable result per-tenant, to prevent caching | ||||||||||||||
# very recent results that might still be in flux. | ||||||||||||||
# CLI flag: -query-frontend.max-cache-freshness | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
package querier | ||
|
||
import ( | ||
"cmp" | ||
"context" | ||
"errors" | ||
"flag" | ||
|
@@ -30,6 +31,7 @@ import ( | |
"github.com/grafana/mimir/pkg/querier/stats" | ||
"github.com/grafana/mimir/pkg/storage/chunk" | ||
"github.com/grafana/mimir/pkg/storage/lazyquery" | ||
"github.com/grafana/mimir/pkg/storage/series" | ||
"github.com/grafana/mimir/pkg/streamingpromql" | ||
"github.com/grafana/mimir/pkg/streamingpromql/compat" | ||
"github.com/grafana/mimir/pkg/util" | ||
|
@@ -318,7 +320,7 @@ func (mq multiQuerier) getQueriers(ctx context.Context, matchers ...*labels.Matc | |
|
||
// Select implements storage.Querier interface. | ||
// The bool passed is ignored because the series are always sorted. | ||
func (mq multiQuerier) Select(ctx context.Context, _ bool, sp *storage.SelectHints, matchers ...*labels.Matcher) storage.SeriesSet { | ||
func (mq multiQuerier) Select(ctx context.Context, _ bool, sp *storage.SelectHints, matchers ...*labels.Matcher) (set storage.SeriesSet) { | ||
spanLog, ctx := spanlogger.NewWithLogger(ctx, mq.logger, "querier.Select") | ||
defer spanLog.Span.Finish() | ||
|
||
|
@@ -356,21 +358,24 @@ func (mq multiQuerier) Select(ctx context.Context, _ bool, sp *storage.SelectHin | |
return storage.ErrSeriesSet(err) | ||
} | ||
|
||
// Validate query time range. Even if the time range has already been validated when we created | ||
// the querier, we need to check it again here because the time range specified in hints may be | ||
// different. | ||
// Validate query parameters. | ||
// Even if the time range has already been validated when we created the querier, we need to check it again here | ||
// because the time range specified in hints may be different. | ||
limit := sp.Limit | ||
startMs, endMs, err := validateQueryTimeRange(userID, sp.Start, sp.End, now.UnixMilli(), mq.limits, spanLog) | ||
if errors.Is(err, errEmptyTimeRange) { | ||
return storage.NoopSeriesSet() | ||
} else if err != nil { | ||
return storage.ErrSeriesSet(err) | ||
} | ||
if sp.Func == "series" { // Clamp max time range for series-only queries, before we check max length. | ||
if sp.Func == "series" { | ||
// Clamp max time range for series-only queries, before we check max length. | ||
startMs = clampToMaxLabelQueryLength(spanLog, startMs, endMs, now.UnixMilli(), mq.limits.MaxLabelsQueryLength(userID).Milliseconds()) | ||
// Clamp the limit for series-only queries. | ||
limit = clampToMaxSeriesQueryLimit(spanLog, limit, mq.limits.MaxSeriesQueryLimit(userID)) | ||
} | ||
|
||
// The time range may have been manipulated during the validation, | ||
// so we make sure changes are reflected back to hints. | ||
// The query parameters may have been manipulated during the validation, so we make sure changes are reflected back to hints. | ||
sp.Start = startMs | ||
sp.End = endMs | ||
|
||
|
@@ -382,6 +387,16 @@ func (mq multiQuerier) Select(ctx context.Context, _ bool, sp *storage.SelectHin | |
return storage.ErrSeriesSet(NewMaxQueryLengthError(endTime.Sub(startTime), maxQueryLength)) | ||
} | ||
|
||
// If the original request didn't have a limit but the limit was enforced, annotate the resulting series set with a warning. | ||
if sp.Limit == 0 && sp.Limit != limit { | ||
defer func() { | ||
var warning annotations.Annotations | ||
warning.Add(errors.New("results may be truncated due to limit")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we add more details - what is the limit and what was the requested limit There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This only adds a warning when the request didn't send a limit. My thinking was that when I pass a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, i misread There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure. We don't do anything like that when we clamp other request parameters. Should we do that for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i agree that putting a limit when the requester intended to get unlimited results is most surprising. But also getting N results when you limited to N+1 would imply there are only N results. That's also a lie and somewhat surprising. I think it's not a bad idea to add a warning when we clamp the start/end times (I suppose that's what you meant when you said "other request parameter"). I can do that next week There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Creating a new comment to not derail the conversation you're having with Dimitar. I suggest to be more verbose and clarify what we did, e.g. "the request had no limit, but a default limit of XXX has been enforced" (or something similar). |
||
set = series.NewSeriesSetWithWarnings(set, warning) | ||
}() | ||
} | ||
sp.Limit = limit | ||
|
||
if len(queriers) == 1 { | ||
return queriers[0].Select(ctx, true, sp, matchers...) | ||
} | ||
|
@@ -450,6 +465,24 @@ func clampToMaxLabelQueryLength(spanLog *spanlogger.SpanLogger, startMs, endMs, | |
return startMs | ||
} | ||
|
||
func clampToMaxSeriesQueryLimit(spanLog *spanlogger.SpanLogger, limit, maxSeriesQueryLimit int) int { | ||
if maxSeriesQueryLimit == 0 { | ||
// No request limit is enforced. | ||
return limit | ||
} | ||
|
||
// Request limit is enforced. | ||
newLimit := min(cmp.Or(limit, maxSeriesQueryLimit), maxSeriesQueryLimit) | ||
if newLimit != limit { | ||
spanLog.DebugLog( | ||
"msg", "the request limit of the query has been manipulated because of the 'max-series-query-limit' setting", | ||
"original", limit, | ||
"updated", newLimit, | ||
) | ||
} | ||
return newLimit | ||
} | ||
|
||
// LabelValues implements storage.Querier. | ||
func (mq multiQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { | ||
ctx, queriers, err := mq.getQueriers(ctx, matchers...) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a little confused by the phrasing here. Is this the maximum number of series queries permitted? If so, we can remove the
items
and just sayMaximum number of series queries
.