diff --git a/go.mod b/go.mod index 3f26f40bb1d..af968561f7c 100644 --- a/go.mod +++ b/go.mod @@ -144,6 +144,7 @@ require ( github.com/spf13/cast v1.5.0 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect + github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/collector/component v0.116.0 // indirect @@ -298,7 +299,7 @@ require ( sigs.k8s.io/yaml v1.4.0 // indirect ) -replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250302213708-bd234c29eed4 +replace github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250307115605-76de169c0ae4 // Replace memberlist with our fork which includes some fixes that haven't been // merged upstream yet: diff --git a/go.sum b/go.sum index f2c3023da17..73d82988800 100644 --- a/go.sum +++ b/go.sum @@ -945,6 +945,8 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8 github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/d4l3k/messagediff v1.2.1 h1:ZcAIMYsUg0EAp9X+tt8/enBE/Q8Yd5kzPynLyKptt9U= +github.com/d4l3k/messagediff v1.2.1/go.mod h1:Oozbb1TVXFac9FtSIxHBMnBCq2qeH/2KkEQxENCrlLo= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -1284,8 +1286,8 @@ github.com/grafana/gomemcache v0.0.0-20250228145437-da7b95fd2ac1 h1:vR5nELq+KtGO github.com/grafana/gomemcache v0.0.0-20250228145437-da7b95fd2ac1/go.mod h1:j/s0jkda4UXTemDs7Pgw/vMT06alWc42CHisvYac0qw= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= -github.com/grafana/mimir-prometheus v0.0.0-20250302213708-bd234c29eed4 h1:z3cxARHOrF+l39pYXGhQ5ykMy35VP6xpavZHSeCv6Bw= -github.com/grafana/mimir-prometheus v0.0.0-20250302213708-bd234c29eed4/go.mod h1:TRDP3hIlMiItiCmzGthWfWxgsltR8keKOQW0MmUfkKk= +github.com/grafana/mimir-prometheus v0.0.0-20250307115605-76de169c0ae4 h1:J7lenrAXBCAPDLJqetZikQOrngJYHrwNWWbDfIUjbaM= +github.com/grafana/mimir-prometheus v0.0.0-20250307115605-76de169c0ae4/go.mod h1:GvDB7ooVHnRILyNkG4GlUWMAIjrEiNNkWB0K2WgUqr4= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956 h1:em1oddjXL8c1tL0iFdtVtPloq2hRPen2MJQKoAWpxu0= github.com/grafana/opentracing-contrib-go-stdlib v0.0.0-20230509071955-f410e79da956/go.mod h1:qtI1ogk+2JhVPIXVc6q+NHziSmy2W5GbdQZFUHADCBU= github.com/grafana/prometheus-alertmanager v0.25.1-0.20250211112812-e32be5e2a455 h1:yidC1xzk4fedLZ/iXEqSJopkw3jPZPwoMqqzue4eFEA= @@ -1761,6 +1763,8 @@ github.com/twmb/franz-go/plugin/kotel v1.5.0 h1:TiPfGUbQK384OO7ZYGdo7JuPCbJn+/8n github.com/twmb/franz-go/plugin/kotel v1.5.0/go.mod h1:wRXzRo76x1myOUMaVHAyraXoGBdEcvlLChGTVv5+DWU= github.com/twmb/franz-go/plugin/kprom v1.1.0 h1:grGeIJbm4llUBF8jkDjTb/b8rKllWSXjMwIqeCCcNYQ= github.com/twmb/franz-go/plugin/kprom v1.1.0/go.mod h1:cTDrPMSkyrO99LyGx3AtiwF9W6+THHjZrkDE2+TEBIU= +github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 h1:QEePdg0ty2r0t1+qwfZmQ4OOl/MB2UXIeJSpIZv56lg= +github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43/go.mod h1:OYRfF6eb5wY9VRFkXJH8FFBi3plw2v+giaIu7P054pM= github.com/uber/jaeger-client-go v2.30.0+incompatible h1:D6wyKGCecFaSRUpo8lCVbaOOb6ThwMmTEbhRwtKR97o= github.com/uber/jaeger-client-go v2.30.0+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= diff --git a/integration/compactor_test.go b/integration/compactor_test.go index 30b4e0ea7ca..665f3ef6059 100644 --- a/integration/compactor_test.go +++ b/integration/compactor_test.go @@ -6,6 +6,7 @@ package integration import ( "context" "encoding/json" + "fmt" "os" "path" "path/filepath" @@ -133,7 +134,7 @@ func TestCompactBlocksContainingNativeHistograms(t *testing.T) { chkReader, err := chunks.NewDirReader(filepath.Join(outDir, blockID, block.ChunksDirname), nil) require.NoError(t, err) - ixReader, err := index.NewFileReader(filepath.Join(outDir, blockID, block.IndexFilename), index.DecodePostingsRaw) + ixReader, err := index.NewFileReader(filepath.Join(outDir, blockID, block.IndexFilename), index.DecodePostingsRaw, emptyStats{}) require.NoError(t, err) n, v := index.AllPostingsKey() @@ -190,6 +191,21 @@ func TestCompactBlocksContainingNativeHistograms(t *testing.T) { require.Equal(t, expectedSeries, compactedSeries) } +type emptyStats struct { +} + +func (e emptyStats) TotalSeries() int64 { + return 0 +} + +func (e emptyStats) LabelValuesCount(context.Context, string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + +func (e emptyStats) LabelValuesCardinality(context.Context, string, ...string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + func isMarkedForDeletionDueToCompaction(t *testing.T, blockPath string) bool { deletionMarkFilePath := filepath.Join(blockPath, block.DeletionMarkFilename) b, err := os.ReadFile(deletionMarkFilePath) diff --git a/pkg/compactor/split_merge_compactor_test.go b/pkg/compactor/split_merge_compactor_test.go index 4a37abf698c..47db513e7b1 100644 --- a/pkg/compactor/split_merge_compactor_test.go +++ b/pkg/compactor/split_merge_compactor_test.go @@ -784,16 +784,26 @@ func TestMultitenantCompactor_ShouldGuaranteeSeriesShardingConsistencyOverTheTim require.NoError(t, err) // Find all series in the block. - postings, err := indexReader.PostingsForMatchers(ctx, false, labels.MustNewMatcher(labels.MatchRegexp, "series_id", ".+")) + postings, pendingMatchers, err := indexReader.PostingsForMatchers(ctx, false, labels.MustNewMatcher(labels.MatchRegexp, "series_id", ".+")) require.NoError(t, err) builder := labels.NewScratchBuilder(1) for postings.Next() { // Symbolize the series labels. require.NoError(t, indexReader.Series(postings.At(), &builder, nil)) + seriesLabels := builder.Labels() + allMatch := true + for _, m := range pendingMatchers { + if !m.Matches(seriesLabels.Get(m.Name)) { + allMatch = false + break + } + } + if !allMatch { + continue + } // Ensure the series below to the right shard. - seriesLabels := builder.Labels() seriesID, err := strconv.Atoi(seriesLabels.Get("series_id")) require.NoError(t, err) assert.Contains(t, expectedSeriesIDs, seriesID, "series:", seriesLabels.String()) diff --git a/pkg/ingester/active_series.go b/pkg/ingester/active_series.go index abf45e77adf..29a11d5bb14 100644 --- a/pkg/ingester/active_series.go +++ b/pkg/ingester/active_series.go @@ -61,7 +61,7 @@ func (i *Ingester) ActiveSeries(request *client.ActiveSeriesRequest, stream clie } isNativeHistogram := request.GetType() == client.NATIVE_HISTOGRAM_SERIES - postings, err := getPostings(ctx, db, idx, matchers, isNativeHistogram) + postings, pendingMatchers, err := getPostings(ctx, db, idx, matchers, isNativeHistogram) if err != nil { return fmt.Errorf("error listing active series: %w", err) } @@ -79,7 +79,19 @@ func (i *Ingester) ActiveSeries(request *client.ActiveSeriesRequest, stream clie } return fmt.Errorf("error getting series: %w", err) } - m := &mimirpb.Metric{Labels: mimirpb.FromLabelsToLabelAdapters(buf.Labels())} + lbsl := buf.Labels() + allMatch := true + for _, m := range pendingMatchers { + if !m.Matches(lbsl.Get(m.Name)) { + allMatch = false + break + } + } + if !allMatch { + continue + } + + m := &mimirpb.Metric{Labels: mimirpb.FromLabelsToLabelAdapters(lbsl)} mSize := m.Size() if isNativeHistogram { mSize += 8 // 8 bytes for the bucket count. @@ -110,19 +122,19 @@ func (i *Ingester) ActiveSeries(request *client.ActiveSeriesRequest, stream clie return nil } -func getPostings(ctx context.Context, db *userTSDB, idx tsdb.IndexReader, matchers []*labels.Matcher, isNativeHistogram bool) (activeseries.BucketCountPostings, error) { +func getPostings(ctx context.Context, db *userTSDB, idx tsdb.IndexReader, matchers []*labels.Matcher, isNativeHistogram bool) (activeseries.BucketCountPostings, []*labels.Matcher, error) { if db.activeSeries == nil { - return nil, fmt.Errorf("active series tracker is not initialized") + return nil, nil, fmt.Errorf("active series tracker is not initialized") } shard, matchers, err := sharding.RemoveShardFromMatchers(matchers) if err != nil { - return nil, fmt.Errorf("error removing shard matcher: %w", err) + return nil, nil, fmt.Errorf("error removing shard matcher: %w", err) } - postings, err := tsdb.PostingsForMatchers(ctx, idx, matchers...) + postings, pendingMatchers, err := tsdb.PostingsForMatchers(ctx, idx, matchers...) if err != nil { - return nil, fmt.Errorf("error getting postings: %w", err) + return nil, nil, fmt.Errorf("error getting postings: %w", err) } if shard != nil { @@ -130,10 +142,10 @@ func getPostings(ctx context.Context, db *userTSDB, idx tsdb.IndexReader, matche } if isNativeHistogram { - return activeseries.NewNativeHistogramPostings(db.activeSeries, postings), nil + return activeseries.NewNativeHistogramPostings(db.activeSeries, postings), pendingMatchers, nil } - return &ZeroBucketCountPostings{*activeseries.NewPostings(db.activeSeries, postings)}, nil + return &ZeroBucketCountPostings{*activeseries.NewPostings(db.activeSeries, postings)}, pendingMatchers, nil } // listActiveSeries is used for testing purposes, builds the whole array of active series in memory. @@ -142,10 +154,14 @@ func listActiveSeries(ctx context.Context, db *userTSDB, matchers []*labels.Matc if err != nil { return nil, fmt.Errorf("error getting index: %w", err) } - postings, err := getPostings(ctx, db, idx, matchers, false) + ctx = context.WithValue(ctx, "disable_optimized_index_lookup", true) + postings, pendingMatchers, err := getPostings(ctx, db, idx, matchers, false) if err != nil { return nil, err } + if len(pendingMatchers) > 0 { + return nil, fmt.Errorf("pending matchers: %v", pendingMatchers) + } return NewSeries(postings, idx), nil } diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 5752bbebca4..05d114b8e18 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -2138,13 +2138,22 @@ func (i *Ingester) LabelValuesCardinality(req *client.LabelValuesCardinalityRequ var postingsForMatchersFn func(context.Context, tsdb.IndexPostingsReader, ...*labels.Matcher) (index.Postings, error) switch req.GetCountMethod() { case client.IN_MEMORY: - postingsForMatchersFn = tsdb.PostingsForMatchers + postingsForMatchersFn = func(ctx context.Context, reader tsdb.IndexPostingsReader, matcher ...*labels.Matcher) (index.Postings, error) { + postings, pendingMatchers, err := tsdb.PostingsForMatchers(context.WithValue(ctx, "disable_optimized_index_lookup", true), reader, matcher...) + if len(pendingMatchers) > 0 { + return nil, fmt.Errorf("unsupported pending matchers %v", pendingMatchers) + } + return postings, err + } case client.ACTIVE: postingsForMatchersFn = func(ctx context.Context, ix tsdb.IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, error) { - postings, err := tsdb.PostingsForMatchers(ctx, ix, ms...) + postings, pendingMatchers, err := tsdb.PostingsForMatchers(context.WithValue(ctx, "disable_optimized_index_lookup", true), ix, ms...) if err != nil { return nil, err } + if len(pendingMatchers) > 0 { + return nil, fmt.Errorf("unsupported pending matchers %v", pendingMatchers) + } return activeseries.NewPostings(db.activeSeries, postings), nil } default: diff --git a/pkg/storage/tsdb/block/index.go b/pkg/storage/tsdb/block/index.go index 87911452bf2..3564b7cda38 100644 --- a/pkg/storage/tsdb/block/index.go +++ b/pkg/storage/tsdb/block/index.go @@ -210,6 +210,21 @@ func (n *minMaxSumInt64) Avg() int64 { return n.sum / n.cnt } +type emptyStats struct { +} + +func (e emptyStats) TotalSeries() int64 { + return 0 +} + +func (e emptyStats) LabelValuesCount(context.Context, string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + +func (e emptyStats) LabelValuesCardinality(context.Context, string, ...string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + // GatherBlockHealthStats returns useful counters as well as outsider chunks (chunks outside of block time range) that // helps to assess index and optionally chunk health. // It considers https://github.com/prometheus/tsdb/issues/347 as something that Thanos can handle. @@ -218,7 +233,8 @@ func GatherBlockHealthStats(ctx context.Context, logger log.Logger, blockDir str indexFn := filepath.Join(blockDir, IndexFilename) chunkDir := filepath.Join(blockDir, ChunksDirname) // index reader - r, err := index.NewFileReader(indexFn, index.DecodePostingsRaw) + // TODO dimitarvdimitrov do we need to fix this? + r, err := index.NewFileReader(indexFn, index.DecodePostingsRaw, emptyStats{}) if err != nil { return stats, errors.Wrap(err, "open index file") } diff --git a/pkg/storage/tsdb/block/index_test.go b/pkg/storage/tsdb/block/index_test.go index 4089213eb12..04cd35d607d 100644 --- a/pkg/storage/tsdb/block/index_test.go +++ b/pkg/storage/tsdb/block/index_test.go @@ -36,7 +36,7 @@ func TestRewrite(t *testing.T) { }, 150, 0, 1000, labels.EmptyLabels()) require.NoError(t, err) - ir, err := index.NewFileReader(filepath.Join(tmpDir, b.String(), IndexFilename), index.DecodePostingsRaw) + ir, err := index.NewFileReader(filepath.Join(tmpDir, b.String(), IndexFilename), index.DecodePostingsRaw, emptyStats{}) require.NoError(t, err) defer func() { require.NoError(t, ir.Close()) }() @@ -78,7 +78,7 @@ func TestRewrite(t *testing.T) { require.NoError(t, iw.Close()) require.NoError(t, cw.Close()) - ir2, err := index.NewFileReader(filepath.Join(tmpDir, m.ULID.String(), IndexFilename), index.DecodePostingsRaw) + ir2, err := index.NewFileReader(filepath.Join(tmpDir, m.ULID.String(), IndexFilename), index.DecodePostingsRaw, emptyStats{}) require.NoError(t, err) defer func() { require.NoError(t, ir2.Close()) }() diff --git a/pkg/storegateway/indexheader/header_test.go b/pkg/storegateway/indexheader/header_test.go index 87ecde23d23..d7eaf0467ac 100644 --- a/pkg/storegateway/indexheader/header_test.go +++ b/pkg/storegateway/indexheader/header_test.go @@ -129,10 +129,25 @@ func TestReadersComparedToIndexHeader(t *testing.T) { } +type emptyStats struct { +} + +func (e emptyStats) TotalSeries() int64 { + return 0 +} + +func (e emptyStats) LabelValuesCount(context.Context, string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + +func (e emptyStats) LabelValuesCardinality(context.Context, string, ...string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) { ctx := context.Background() - indexReader, err := index.NewReader(indexByteSlice, index.DecodePostingsRaw) + indexReader, err := index.NewReader(indexByteSlice, index.DecodePostingsRaw, emptyStats{}) require.NoError(t, err) defer func() { _ = indexReader.Close() }() diff --git a/pkg/storegateway/prometheus_test.go b/pkg/storegateway/prometheus_test.go index ac3f5db2653..2dd86d793b0 100644 --- a/pkg/storegateway/prometheus_test.go +++ b/pkg/storegateway/prometheus_test.go @@ -38,10 +38,12 @@ func queryPromSeriesChunkMetas(t testing.TB, series labels.Labels, block promtsd series.Range(func(l labels.Label) { matchers = append(matchers, labels.MustNewMatcher(labels.MatchEqual, l.Name, l.Value)) }) - postings, err := promReader.PostingsForMatchers(ctx, false, matchers...) + ctx = context.WithValue(ctx, "disable_optimized_index_lookup", true) + postings, pendingMatchers, err := promReader.PostingsForMatchers(ctx, false, matchers...) if err != nil { require.NoError(t, err) } + require.Empty(t, pendingMatchers) if !postings.Next() { require.Truef(t, false, "selecting from prometheus returned no series for %s", util.MatchersStringer(matchers)) diff --git a/tools/splitblocks/main_test.go b/tools/splitblocks/main_test.go index 7c64fe413ae..6058d18e702 100644 --- a/tools/splitblocks/main_test.go +++ b/tools/splitblocks/main_test.go @@ -4,6 +4,7 @@ package main import ( "context" + "fmt" "os" "path" "path/filepath" @@ -200,6 +201,21 @@ func buildSeriesSpec(startOfDay time.Time) []*block.SeriesSpec { } } +type emptyStats struct { +} + +func (e emptyStats) TotalSeries() int64 { + return 0 +} + +func (e emptyStats) LabelValuesCount(context.Context, string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + +func (e emptyStats) LabelValuesCardinality(context.Context, string, ...string) (int64, error) { + return 0, fmt.Errorf("not implemented") +} + func listSeriesAndChunksFromBlock(t *testing.T, blockDir string) []*block.SeriesSpec { blk, err := tsdb.OpenBlock(promslog.NewNopLogger(), blockDir, nil, nil) require.NoError(t, err) @@ -208,7 +224,7 @@ func listSeriesAndChunksFromBlock(t *testing.T, blockDir string) []*block.Series defer require.NoError(t, chunkReader.Close()) allKey, allValue := index.AllPostingsKey() - r, err := index.NewFileReader(filepath.Join(blockDir, block.IndexFilename), index.DecodePostingsRaw) + r, err := index.NewFileReader(filepath.Join(blockDir, block.IndexFilename), index.DecodePostingsRaw, emptyStats{}) require.NoError(t, err) defer runutil.CloseWithErrCapture(&err, r, "gather index issue file reader") it, err := r.Postings(context.Background(), allKey, allValue) diff --git a/tools/tsdb-gaps/main.go b/tools/tsdb-gaps/main.go index af7e61603f2..033349abc9f 100644 --- a/tools/tsdb-gaps/main.go +++ b/tools/tsdb-gaps/main.go @@ -238,10 +238,14 @@ func analyzeBlockForGaps(ctx context.Context, cfg config, blockDir string, match } defer idx.Close() - p, err := idx.PostingsForMatchers(ctx, true, matchers...) + p, pendingMatchers, err := idx.PostingsForMatchers(ctx, true, matchers...) if err != nil { return blockStats, err } + if len(pendingMatchers) > 0 { + // TODO dimitarvdimitrov fix this + return blockStats, fmt.Errorf("didn't expect pending matchers: %v", pendingMatchers) + } var builder labels.ScratchBuilder for p.Next() { diff --git a/vendor/github.com/prometheus/prometheus/model/labels/matcher.go b/vendor/github.com/prometheus/prometheus/model/labels/matcher.go index a09c838e3f8..85c0b863385 100644 --- a/vendor/github.com/prometheus/prometheus/model/labels/matcher.go +++ b/vendor/github.com/prometheus/prometheus/model/labels/matcher.go @@ -168,3 +168,80 @@ func (m *Matcher) IsRegexOptimized() bool { } return m.re.IsOptimized() } + +const ( + estimatedStringEqualityCost = 1.0 + estimatedStingHasPrefixCost = 0.5 + estimatedSliceContainsCostPerElement = 1.0 + estimatedMapContainsCostPerElement = 0.01 + estimatedRegexMatchCost = 10.0 +) + +// FixedCost returns the fixed cost of running this matcher against an arbitrary label value. +// TODO dimitarvdimitrov benchmark relative cost of different matchers +// TODO dimitarvdimitrov use the complexity of the regex string as a cost +func (m *Matcher) FixedCost() float64 { + switch m.Type { + case MatchEqual, MatchNotEqual: + // String equality/inequality comparison is simple + return estimatedStringEqualityCost + case MatchRegexp, MatchNotRegexp: + // If we have optimized set matches, use those + if len(m.re.setMatches) > 0 { + return estimatedSliceContainsCostPerElement * float64(len(m.re.setMatches)) + } + + // If we have a string matcher with a map, use that + if mm, ok := m.re.stringMatcher.(*equalMultiStringMapMatcher); ok { + return estimatedMapContainsCostPerElement*float64(len(mm.values)) + estimatedStringEqualityCost + } + + // If we have a prefix optimization, use that + if m.re.prefix != "" { + return estimatedStingHasPrefixCost + } + + // Fallback to default cost for unoptimized regex + return estimatedRegexMatchCost + } + + panic("labels.Matcher.FixedCost: invalid match type " + m.Type.String()) +} + +// EstimateSelectivity is the estimated fraction of all strings that it would match. +// For example +// * namespace!="" will match all values, so its selectivity is 1 +// * namespace=~"foo" will match only a single value, so its selectivity across 100 values is 0.01 +// * namespace=~"foo|bar" will match two values, so its selectivity across 100 values is 0.02 +func (m *Matcher) EstimateSelectivity(totalLabelValues int64) float64 { + var selectivity float64 + switch m.Type { + case MatchEqual, MatchNotEqual: + // For exact match, we expect to match exactly one value + selectivity = 1.0 / float64(totalLabelValues) + + case MatchRegexp, MatchNotRegexp: + // If we have optimized set matches, we know exactly how many values we'll match. + // We assume that all of them will be present in the corpus we're testing against. + if setMatchesSize := len(m.re.setMatches); setMatchesSize > 0 { + selectivity = float64(setMatchesSize) / float64(totalLabelValues) + break + } + + // For prefix matches, estimate we'll match ~10% of values. + if m.re.prefix != "" { + selectivity = 0.1 + break + } + + // For unoptimized regex, assume we'll match ~10% of values + selectivity = 0.1 + break + } + + switch m.Type { + case MatchNotEqual, MatchNotRegexp: + selectivity = 1.0 - selectivity + } + return selectivity +} diff --git a/vendor/github.com/prometheus/prometheus/tsdb/block.go b/vendor/github.com/prometheus/prometheus/tsdb/block.go index 0d06971c1ef..ba9a5214eb6 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/block.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/block.go @@ -62,6 +62,8 @@ type IndexWriter interface { // IndexReader provides reading access of serialized index data. type IndexReader interface { + index.Statistics + // Symbols return an iterator over sorted string symbols that may occur in // series' labels and indices. It is not safe to use the returned strings // beyond the lifetime of the index reader. @@ -91,7 +93,8 @@ type IndexReader interface { // The resulting postings are not ordered by series. // If concurrent hint is set to true, call will be optimized for a (most likely) concurrent call with same matchers, // avoiding same calculations twice, however this implementation may lead to a worse performance when called once. - PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) + // The returned pendingMatchers are matchers that have not been applied to the returned postings yet. + PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) // SortedPostings returns a postings list that is reordered to be sorted // by the label set of the underlying series. @@ -340,14 +343,29 @@ type Block struct { numBytesMeta int64 } +type emptyStats struct { +} + +func (e emptyStats) TotalSeries() int64 { + return 0 +} + +func (e emptyStats) LabelValuesCount(ctx context.Context, name string) (int64, error) { + return 0, errors.New("statistics not propagated") +} + +func (e emptyStats) LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) { + return 0, errors.New("statistics not propagated") +} + // OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used // to instantiate chunk structs. func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (pb *Block, err error) { - return OpenBlockWithOptions(logger, dir, pool, postingsDecoderFactory, nil, DefaultPostingsForMatchersCacheTTL, DefaultPostingsForMatchersCacheMaxItems, DefaultPostingsForMatchersCacheMaxBytes, DefaultPostingsForMatchersCacheForce, NewPostingsForMatchersCacheMetrics(nil)) + return OpenBlockWithOptions(logger, dir, pool, postingsDecoderFactory, nil, DefaultPostingsForMatchersCacheTTL, DefaultPostingsForMatchersCacheMaxItems, DefaultPostingsForMatchersCacheMaxBytes, DefaultPostingsForMatchersCacheForce, NewPostingsForMatchersCacheMetrics(nil), emptyStats{}) } // OpenBlockWithOptions is like OpenBlock but allows to pass a cache provider and sharding function. -func OpenBlockWithOptions(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory, cache index.ReaderCacheProvider, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool, postingsCacheMetrics *PostingsForMatchersCacheMetrics) (pb *Block, err error) { +func OpenBlockWithOptions(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory, cache index.ReaderCacheProvider, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool, postingsCacheMetrics *PostingsForMatchersCacheMetrics, stats index.Statistics) (pb *Block, err error) { if logger == nil { logger = promslog.NewNopLogger() } @@ -372,7 +390,7 @@ func OpenBlockWithOptions(logger *slog.Logger, dir string, pool chunkenc.Pool, p if postingsDecoderFactory != nil { decoder = postingsDecoderFactory(meta) } - indexReader, err := index.NewFileReaderWithOptions(filepath.Join(dir, indexFilename), decoder, cache) + indexReader, err := index.NewFileReaderWithOptions(filepath.Join(dir, indexFilename), decoder, cache, stats) if err != nil { return nil, err } @@ -496,6 +514,18 @@ type blockIndexReader struct { b *Block } +func (r blockIndexReader) TotalSeries() int64 { + return r.ir.TotalSeries() +} + +func (r blockIndexReader) LabelValuesCount(ctx context.Context, name string) (int64, error) { + return r.ir.LabelValuesCount(ctx, name) +} + +func (r blockIndexReader) LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) { + return r.ir.LabelValuesCardinality(ctx, name, values...) +} + func (r blockIndexReader) Symbols() index.StringIter { return r.ir.Symbols() } @@ -554,8 +584,10 @@ func (r blockIndexReader) PostingsForAllLabelValues(ctx context.Context, name st return r.ir.PostingsForAllLabelValues(ctx, name) } -func (r blockIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { - return r.ir.PostingsForMatchers(ctx, concurrent, ms...) +func (r blockIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { + // For now, we're not implementing any pending matchers, returning an empty slice + p, pendingMatchers, err := PostingsForMatchers(ctx, r, ms...) + return p, pendingMatchers, err } func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings { @@ -629,11 +661,13 @@ func (pb *Block) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Mat return ErrClosing } - p, err := pb.indexr.PostingsForMatchers(ctx, false, ms...) + p, _, err := pb.indexr.PostingsForMatchers(ctx, false, ms...) if err != nil { return fmt.Errorf("select series: %w", err) } + // TODO dimitarvdimitrov handle pending matchers + ir := pb.indexr // Choose only valid postings which have chunks in the time-range. diff --git a/vendor/github.com/prometheus/prometheus/tsdb/compact.go b/vendor/github.com/prometheus/prometheus/tsdb/compact.go index 651736ba005..c8369a72d50 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/compact.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/compact.go @@ -1086,7 +1086,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa postings := postingsFunc(ctx, indexr) // Blocks meta is half open: [min, max), so subtract 1 to ensure we don't hold samples with exact meta.MaxTime timestamp. - sets = append(sets, NewBlockChunkSeriesSet(b.Meta().ULID, indexr, chunkr, tombsr, postings, minT, maxT-1, false)) + sets = append(sets, NewBlockChunkSeriesSet(b.Meta().ULID, indexr, chunkr, tombsr, postings, minT, maxT-1, false, nil)) if len(outBlocks) > 1 { // To iterate series when populating symbols, we cannot reuse postings we just got, but need to get a new copy. @@ -1098,7 +1098,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa } all = indexr.SortedPostings(all) // Blocks meta is half open: [min, max), so subtract 1 to ensure we don't hold samples with exact meta.MaxTime timestamp. - symbolsSets = append(symbolsSets, NewBlockChunkSeriesSet(b.Meta().ULID, indexr, chunkr, tombsr, all, minT, maxT-1, false)) + symbolsSets = append(symbolsSets, NewBlockChunkSeriesSet(b.Meta().ULID, indexr, chunkr, tombsr, all, minT, maxT-1, false, nil)) } else { syms := indexr.Symbols() if i == 0 { diff --git a/vendor/github.com/prometheus/prometheus/tsdb/db.go b/vendor/github.com/prometheus/prometheus/tsdb/db.go index 69278947aca..5b14d1869f5 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/db.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/db.go @@ -359,6 +359,7 @@ type DB struct { blockQuerierFunc BlockQuerierFunc blockChunkQuerierFunc BlockChunkQuerierFunc + stats index.Statistics } type dbMetrics struct { @@ -710,7 +711,8 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { return nil, ErrClosed default: } - loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil, DefaultPostingsDecoderFactory, nil, DefaultPostingsForMatchersCacheTTL, DefaultPostingsForMatchersCacheMaxItems, DefaultPostingsForMatchersCacheMaxBytes, DefaultPostingsForMatchersCacheForce, NewPostingsForMatchersCacheMetrics(nil)) + // TODO dimitarvdimitrov propagate this so utils also benefit from stats + loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil, DefaultPostingsDecoderFactory, nil, DefaultPostingsForMatchersCacheTTL, DefaultPostingsForMatchersCacheMaxItems, DefaultPostingsForMatchersCacheMaxBytes, DefaultPostingsForMatchersCacheForce, NewPostingsForMatchersCacheMetrics(nil), emptyStats{}) if err != nil { return nil, err } @@ -1065,6 +1067,7 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn return nil, err } db.head.writeNotified = db.writeNotified + db.stats = db.head.postingsStats // Register metrics after assigning the head block. db.metrics = newDBMetrics(db, r) @@ -1118,6 +1121,10 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn go db.run(ctx) + // load statistics before we return + // TODO dimitarvdimitrov find a more normal way + _ = db.stats.TotalSeries() + return db, nil } @@ -1694,7 +1701,7 @@ func (db *DB) reloadBlocks() (err error) { db.mtx.Lock() defer db.mtx.Unlock() - loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool, db.opts.PostingsDecoderFactory, db.opts.SeriesHashCache, db.opts.BlockPostingsForMatchersCacheTTL, db.opts.BlockPostingsForMatchersCacheMaxItems, db.opts.BlockPostingsForMatchersCacheMaxBytes, db.opts.BlockPostingsForMatchersCacheForce, db.opts.BlockPostingsForMatchersCacheMetrics) + loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool, db.opts.PostingsDecoderFactory, db.opts.SeriesHashCache, db.opts.BlockPostingsForMatchersCacheTTL, db.opts.BlockPostingsForMatchersCacheMaxItems, db.opts.BlockPostingsForMatchersCacheMaxBytes, db.opts.BlockPostingsForMatchersCacheForce, db.opts.BlockPostingsForMatchersCacheMetrics, db.stats) if err != nil { return err } @@ -1789,7 +1796,20 @@ func (db *DB) reloadBlocks() (err error) { return nil } -func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory, cache *hashcache.SeriesHashCache, postingsCacheTTL time.Duration, postingsCacheMaxItems int, postingsCacheMaxBytes int64, postingsCacheForce bool, postingsCacheMetrics *PostingsForMatchersCacheMetrics) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { +func openBlocks( + l *slog.Logger, + dir string, + loaded []*Block, + chunkPool chunkenc.Pool, + postingsDecoderFactory PostingsDecoderFactory, + cache *hashcache.SeriesHashCache, + postingsCacheTTL time.Duration, + postingsCacheMaxItems int, + postingsCacheMaxBytes int64, + postingsCacheForce bool, + postingsCacheMetrics *PostingsForMatchersCacheMetrics, + stats index.Statistics, +) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { return nil, nil, fmt.Errorf("find blocks: %w", err) @@ -1811,7 +1831,7 @@ func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc. cacheProvider = cache.GetBlockCacheProvider(meta.ULID.String()) } - block, err = OpenBlockWithOptions(l, bDir, chunkPool, postingsDecoderFactory, cacheProvider, postingsCacheTTL, postingsCacheMaxItems, postingsCacheMaxBytes, postingsCacheForce, postingsCacheMetrics) + block, err = OpenBlockWithOptions(l, bDir, chunkPool, postingsDecoderFactory, cacheProvider, postingsCacheTTL, postingsCacheMaxItems, postingsCacheMaxBytes, postingsCacheForce, postingsCacheMetrics, stats) if err != nil { corrupted[meta.ULID] = err continue @@ -2437,6 +2457,36 @@ func (db *DB) SetWriteNotified(wn wlog.WriteNotified) { db.head.writeNotified = wn } +type reloadableStats struct { + lastReload time.Time + source func() index.Statistics + + stats index.Statistics +} + +func (r *reloadableStats) TotalSeries() int64 { + r.ensureLoaded() + return r.stats.TotalSeries() +} + +func (r *reloadableStats) LabelValuesCount(ctx context.Context, name string) (int64, error) { + r.ensureLoaded() + return r.stats.LabelValuesCount(ctx, name) +} + +func (r *reloadableStats) LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) { + r.ensureLoaded() + return r.stats.LabelValuesCardinality(ctx, name, values...) +} + +func (r *reloadableStats) ensureLoaded() { + // TODO dimitarvdimitrov make this reload in the background, we don't want to do it on the hot path + if time.Since(r.lastReload) > time.Hour || r.stats == nil { + r.stats = r.source() + r.lastReload = time.Now() + } +} + func isBlockDir(fi fs.DirEntry) bool { if !fi.IsDir() { return false diff --git a/vendor/github.com/prometheus/prometheus/tsdb/hashcache/series_hash_cache.go b/vendor/github.com/prometheus/prometheus/tsdb/hashcache/series_hash_cache.go index dd70212076e..d82da5ee32a 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/hashcache/series_hash_cache.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/hashcache/series_hash_cache.go @@ -13,7 +13,7 @@ const ( // approxBytesPerEntry is the estimated memory footprint (in bytes) of 1 cache // entry, measured with TestSeriesHashCache_MeasureApproximateSizePerEntry(). - approxBytesPerEntry = 38 + approxBytesPerEntry = 36 ) // SeriesHashCache is a bounded cache mapping the per-block series ID with diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head.go b/vendor/github.com/prometheus/prometheus/tsdb/head.go index 8f8b709be1d..6151e6136b2 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head.go @@ -100,8 +100,9 @@ type Head struct { deleted map[chunks.HeadSeriesRef]int // Deleted series, and what WAL segment they must be kept until. // TODO(codesome): Extend MemPostings to return only OOOPostings, Set OOOStatus, ... Like an additional map of ooo postings. - postings *index.MemPostings // Postings lists for terms. - pfmc *PostingsForMatchersCache + postings *index.MemPostings // Postings lists for terms. + pfmc *PostingsForMatchersCache + postingsStats index.Statistics tombstones *tombstones.MemTombstones @@ -300,6 +301,12 @@ func NewHead(r prometheus.Registerer, l *slog.Logger, wal, wbl *wlog.WL, opts *H secondaryHashFunc: shf, pfmc: NewPostingsForMatchersCache(opts.PostingsForMatchersCacheTTL, opts.PostingsForMatchersCacheMaxItems, opts.PostingsForMatchersCacheMaxBytes, opts.PostingsForMatchersCacheForce, opts.PostingsForMatchersCacheMetrics), } + h.postingsStats = &reloadableStats{source: func() index.Statistics { + return completeStatistics{ + numSeries: int64(h.numSeries.Load()), + LabelValuesSketches: h.postings.LabelValuesSketches(), + } + }} if err := h.resetInMemoryState(); err != nil { return nil, err } @@ -370,6 +377,7 @@ func (h *Head) resetInMemoryState() error { h.maxOOOTime.Store(math.MinInt64) h.lastWALTruncationTime.Store(math.MinInt64) h.lastMemoryTruncationTime.Store(math.MinInt64) + return nil } @@ -1540,10 +1548,11 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match ir := h.indexRange(mint, maxt) - p, err := ir.PostingsForMatchers(ctx, false, ms...) + p, _, err := ir.PostingsForMatchers(ctx, false, ms...) if err != nil { return fmt.Errorf("select series: %w", err) } + // TODO dimitarvdimitrov handle pending matchers var stones []tombstones.Stone for p.Next() { @@ -2451,6 +2460,15 @@ func (h *Head) ForEachSecondaryHash(fn func(ref []chunks.HeadSeriesRef, secondar } } +type completeStatistics struct { + numSeries int64 // TODO dimitarvdimitrov change this throughout to be uint64 + index.LabelValuesSketches +} + +func (c completeStatistics) TotalSeries() int64 { + return c.numSeries +} + type pairOfSlices[T1, T2 any] struct { slice1 []T1 slice2 []T2 diff --git a/vendor/github.com/prometheus/prometheus/tsdb/head_read.go b/vendor/github.com/prometheus/prometheus/tsdb/head_read.go index 675639db0b0..209159f43c8 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/head_read.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/head_read.go @@ -41,10 +41,12 @@ func (h *Head) indexRange(mint, maxt int64) *headIndexReader { if hmin := h.MinTime(); hmin > mint { mint = hmin } - return &headIndexReader{head: h, mint: mint, maxt: maxt} + return &headIndexReader{head: h, mint: mint, maxt: maxt, Statistics: h.postingsStats} } type headIndexReader struct { + index.Statistics + head *Head mint, maxt int64 } @@ -114,7 +116,7 @@ func (h *headIndexReader) PostingsForAllLabelValues(ctx context.Context, name st return h.head.postings.PostingsForAllLabelValues(ctx, name) } -func (h *headIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { +func (h *headIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { return h.head.pfmc.PostingsForMatchers(ctx, h, concurrent, ms...) } diff --git a/vendor/github.com/prometheus/prometheus/tsdb/index/index.go b/vendor/github.com/prometheus/prometheus/tsdb/index/index.go index 6a1064b356f..6e0ef293526 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/index/index.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/index/index.go @@ -1114,7 +1114,15 @@ type ReaderCacheProvider interface { SeriesHashCache() *hashcache.BlockSeriesHashCache } +type Statistics interface { + TotalSeries() int64 + LabelValuesCount(ctx context.Context, name string) (int64, error) + LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) +} + type Reader struct { + Statistics + b ByteSlice toc *TOC @@ -1167,27 +1175,27 @@ func (b realByteSlice) Sub(start, end int) ByteSlice { // NewReader returns a new index reader on the given byte slice. It automatically // handles different format versions. -func NewReader(b ByteSlice, decoder PostingsDecoder) (*Reader, error) { - return newReader(b, io.NopCloser(nil), decoder, nil) +func NewReader(b ByteSlice, decoder PostingsDecoder, stats Statistics) (*Reader, error) { + return newReader(stats, b, io.NopCloser(nil), decoder, nil) } // NewReaderWithCache is like NewReader but allows to pass a cache provider. -func NewReaderWithCache(b ByteSlice, decoder PostingsDecoder, cacheProvider ReaderCacheProvider) (*Reader, error) { - return newReader(b, io.NopCloser(nil), decoder, cacheProvider) +func NewReaderWithCache(b ByteSlice, decoder PostingsDecoder, cacheProvider ReaderCacheProvider, stats Statistics) (*Reader, error) { + return newReader(stats, b, io.NopCloser(nil), decoder, cacheProvider) } // NewFileReader returns a new index reader against the given index file. -func NewFileReader(path string, decoder PostingsDecoder) (*Reader, error) { - return NewFileReaderWithOptions(path, decoder, nil) +func NewFileReader(path string, decoder PostingsDecoder, stats Statistics) (*Reader, error) { + return NewFileReaderWithOptions(path, decoder, nil, stats) } // NewFileReaderWithOptions is like NewFileReader but allows to pass a cache provider. -func NewFileReaderWithOptions(path string, decoder PostingsDecoder, cacheProvider ReaderCacheProvider) (*Reader, error) { +func NewFileReaderWithOptions(path string, decoder PostingsDecoder, cacheProvider ReaderCacheProvider, stats Statistics) (*Reader, error) { f, err := fileutil.OpenMmapFile(path) if err != nil { return nil, err } - r, err := newReader(realByteSlice(f.Bytes()), f, decoder, cacheProvider) + r, err := newReader(stats, realByteSlice(f.Bytes()), f, decoder, cacheProvider) if err != nil { return nil, tsdb_errors.NewMulti( err, @@ -1198,8 +1206,9 @@ func NewFileReaderWithOptions(path string, decoder PostingsDecoder, cacheProvide return r, nil } -func newReader(b ByteSlice, c io.Closer, postingsDecoder PostingsDecoder, cacheProvider ReaderCacheProvider) (*Reader, error) { +func newReader(stats Statistics, b ByteSlice, c io.Closer, postingsDecoder PostingsDecoder, cacheProvider ReaderCacheProvider) (*Reader, error) { r := &Reader{ + Statistics: stats, b: b, c: c, postings: map[string][]postingOffset{}, diff --git a/vendor/github.com/prometheus/prometheus/tsdb/index/postingsstats.go b/vendor/github.com/prometheus/prometheus/tsdb/index/postingsstats.go index f9ee640ff5e..5fe43e9995a 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/index/postingsstats.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/index/postingsstats.go @@ -14,8 +14,13 @@ package index import ( + "context" + "fmt" "math" "slices" + "unsafe" + + "github.com/tylertreat/BoomFilters" ) // Stat holds values for a single cardinality statistic. @@ -74,3 +79,67 @@ func (m *maxHeap) get() []Stat { }) return m.Items } + +type labelValueSketch struct { + s *boom.CountMinSketch + distinctValues int64 +} + +type LabelValuesSketches struct { + labelNames map[string]labelValueSketch +} + +func (l LabelValuesSketches) LabelValuesCount(ctx context.Context, name string) (int64, error) { + s, ok := l.labelNames[name] + if !ok { + return 0, fmt.Errorf("no sketch found for label %q", name) + } + return int64(s.distinctValues), nil +} + +func (l LabelValuesSketches) LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) { + valueSketch, ok := l.labelNames[name] + if !ok { + return 0, fmt.Errorf("no sketch found for label %q", name) + } + totalCount := 0 + if len(values) == 0 { + return int64(valueSketch.s.TotalCount()), nil + } + for _, value := range values { + valBytes := yoloBytes(value) + totalCount += int(valueSketch.s.Count(valBytes)) + } + return int64(totalCount), nil +} + +func yoloBytes(s string) []byte { + return *(*[]byte)(unsafe.Pointer(&s)) +} + +func (p *MemPostings) LabelValuesSketches() LabelValuesSketches { + p.mtx.RLock() + defer p.mtx.RUnlock() + + sketches := LabelValuesSketches{} + + sketches.labelNames = make(map[string]labelValueSketch, len(p.m)) + + for name, m := range p.m { + if name == "" { + continue + } + sketch := labelValueSketch{ + s: boom.NewCountMinSketch(0.01, 0.01), + distinctValues: int64(len(m)), + } + for value, postings := range m { + valBytes := yoloBytes(value) + for range postings { + sketch.s.Add(valBytes) + } + } + sketches.labelNames[name] = sketch + } + return sketches +} diff --git a/vendor/github.com/prometheus/prometheus/tsdb/ooo_head_read.go b/vendor/github.com/prometheus/prometheus/tsdb/ooo_head_read.go index 745cd5d5fea..939338b417c 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/ooo_head_read.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/ooo_head_read.go @@ -52,9 +52,10 @@ func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator func NewHeadAndOOOIndexReader(head *Head, inoMint, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader { hr := &headIndexReader{ - head: head, - mint: mint, - maxt: maxt, + Statistics: head.postingsStats, + head: head, + mint: mint, + maxt: maxt, } return &HeadAndOOOIndexReader{hr, inoMint, lastGarbageCollectedMmapRef} } @@ -170,7 +171,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap // PostingsForMatchers needs to be overridden so that the right IndexReader // implementation gets passed down to the PostingsForMatchers call. -func (oh *HeadAndOOOIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { +func (oh *HeadAndOOOIndexReader) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { return oh.head.pfmc.PostingsForMatchers(ctx, oh, concurrent, ms...) } @@ -294,6 +295,8 @@ func (cr *HeadAndOOOChunkReader) Close() error { } type OOOCompactionHead struct { + index.Statistics + head *Head lastMmapRef chunks.ChunkDiskMapperRef lastWBLFile int @@ -315,6 +318,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead, chunkRange: head.chunkRange.Load(), mint: math.MaxInt64, maxt: math.MinInt64, + Statistics: emptyStats{}, } if head.wbl != nil { @@ -325,7 +329,7 @@ func NewOOOCompactionHead(ctx context.Context, head *Head) (*OOOCompactionHead, ch.lastWBLFile = lastWBLFile } - hr := headIndexReader{head: head, mint: ch.mint, maxt: ch.maxt} + hr := headIndexReader{head: head, mint: ch.mint, maxt: ch.maxt, Statistics: ch.Statistics} n, v := index.AllPostingsKey() // TODO: filter to series with OOO samples, before sorting. p, err := hr.Postings(ctx, n, v) @@ -440,7 +444,7 @@ func NewOOOCompactionHeadIndexReader(ch *OOOCompactionHead) IndexReader { } func (ir *OOOCompactionHeadIndexReader) Symbols() index.StringIter { - hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} + hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt, Statistics: ir.ch.Statistics} return hr.Symbols() } @@ -466,17 +470,17 @@ func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.P } func (ir *OOOCompactionHeadIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings { - hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} + hr := headIndexReader{Statistics: ir.ch.Statistics, head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} return hr.ShardedPostings(p, shardIndex, shardCount) } func (ir *OOOCompactionHeadIndexReader) LabelValuesFor(postings index.Postings, name string) storage.LabelValues { - hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} + hr := headIndexReader{Statistics: ir.ch.Statistics, head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} return hr.LabelValuesFor(postings, name) } func (ir *OOOCompactionHeadIndexReader) LabelValuesExcluding(postings index.Postings, name string) storage.LabelValues { - hr := headIndexReader{head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} + hr := headIndexReader{Statistics: ir.ch.Statistics, head: ir.ch.head, mint: ir.ch.mint, maxt: ir.ch.maxt} return hr.LabelValuesExcluding(postings, name) } @@ -508,8 +512,8 @@ func (ir *OOOCompactionHeadIndexReader) LabelValues(_ context.Context, name stri return nil, errors.New("not implemented") } -func (ir *OOOCompactionHeadIndexReader) PostingsForMatchers(_ context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { - return nil, errors.New("not implemented") +func (ir *OOOCompactionHeadIndexReader) PostingsForMatchers(_ context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { + return nil, nil, errors.New("not implemented") } func (ir *OOOCompactionHeadIndexReader) LabelNames(context.Context, ...*labels.Matcher) ([]string, error) { @@ -528,6 +532,18 @@ func (ir *OOOCompactionHeadIndexReader) Close() error { return nil } +func (ir *OOOCompactionHeadIndexReader) TotalSeries() int64 { + return ir.ch.Statistics.TotalSeries() +} + +func (ir *OOOCompactionHeadIndexReader) LabelValuesCount(ctx context.Context, name string) (int64, error) { + return ir.ch.Statistics.LabelValuesCount(ctx, name) +} + +func (ir *OOOCompactionHeadIndexReader) LabelValuesCardinality(ctx context.Context, name string, values ...string) (int64, error) { + return ir.ch.Statistics.LabelValuesCardinality(ctx, name, values...) +} + // HeadAndOOOQuerier queries both the head and the out-of-order head. type HeadAndOOOQuerier struct { mint, maxt int64 diff --git a/vendor/github.com/prometheus/prometheus/tsdb/plan.go b/vendor/github.com/prometheus/prometheus/tsdb/plan.go new file mode 100644 index 00000000000..75ee89679b0 --- /dev/null +++ b/vendor/github.com/prometheus/prometheus/tsdb/plan.go @@ -0,0 +1,270 @@ +package tsdb + +import ( + "context" + "fmt" + "slices" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/tsdb/index" +) + +const ( + // TODO dimitarvdimitrov establish relative costs here + costPerIteratedPosting = 0.01 + costPerPostingListRetrieval = 10.0 +) + +type planPredicate struct { + matcher *labels.Matcher + + // selectivity is between 0 and 1. 1 indicates that the matcher will match all label values, 0 indicates it will match no values. NB: label values, not series + selectivity float64 + // cardinality is the estimation of how many series this matcher matches on its own. + cardinality int64 + labelNameUniqueVals int64 + // perMatchCost is how much it costs to run this matcher against an arbitrary label value. + perMatchCost float64 + // indexScanCost is the perMatchCost to run the matcher against all label values (or at least enough to know all the values it matches). + // This is naively perMatchCost * labelNameUniqueVals, but it might be lower if the matcher is a prefix matcher or an exact matcher. + indexScanCost float64 +} + +type plan struct { + predicates []planPredicate + applied []bool + totalSeries int64 + + indexLookupCost float64 + intersectionCost float64 + filterCost float64 + totalCost float64 +} + +func estimatePlan(predicates []planPredicate, totalSeries int64) plan { + p := plan{ + predicates: predicates, + applied: make([]bool, len(predicates)), + totalSeries: totalSeries, + } + return estimateTotalCost(p) +} + +func (p plan) applyPredicate(predicateIdx int) plan { + p.applied = slices.Clone(p.applied) + p.applied[predicateIdx] = true + return estimateTotalCost(p) +} + +func (p plan) unapplyPredicate(predicateIdx int) plan { + p.applied = slices.Clone(p.applied) + p.applied[predicateIdx] = false + return estimateTotalCost(p) +} + +func estimateTotalCost(p plan) plan { + p.indexLookupCost = 0 + p.intersectionCost = 0 + p.filterCost = 0 + p.totalCost = 0 + + for i, pr := range p.predicates { + if p.applied[i] { + p.indexLookupCost += p.calculateIndexLookupCost(pr) + } + } + + p.intersectionCost = p.calculateIntersectionCost() + + fetchedSeries := p.intersectionSize() + + for i, m := range p.predicates { + // In reality we will apply all the predicates for each series and stop once one predicate doesn't match. + // But we calculate for the worst case where we have to run all predicates for all series. + if !p.applied[i] { + p.filterCost += p.calculateFilterCost(fetchedSeries, m) + } + } + + p.totalCost = p.indexLookupCost + p.intersectionCost + p.filterCost + + return p +} + +func (p plan) calculateIndexLookupCost(pr planPredicate) float64 { + cost := 0.0 + // Runing the matcher against all label values. + cost += pr.indexScanCost + + // Retrieving each posting list (e.g. checksumming, disk seeking) + cost += costPerPostingListRetrieval * float64(pr.labelNameUniqueVals) * pr.selectivity + + return cost +} + +func (p plan) calculateIntersectionCost() float64 { + iteratedPostings := int64(0) + for i, pr := range p.predicates { + if !p.applied[i] { + continue + } + + iteratedPostings += pr.cardinality + } + + return float64(iteratedPostings) * costPerIteratedPosting +} + +func (p plan) intersectionSize() int64 { + finalSelectivity := 1.0 + for i, pr := range p.predicates { + if !p.applied[i] { + continue + } + + // We use the selectivity across all series instead of the selectivity across label values. + // For example, if {protocol=~.*} matches all values, it doesn't mean it won't reduce the result set after intersection. + // + // We also assume idependence between the predicates. This is a simplification. + // For example, the selectivity of {pod=~prometheus.*} doesn't depend if we have already applied {statefulset=prometheus}. + finalSelectivity *= float64(pr.cardinality) / float64(p.totalSeries) + } + return int64(finalSelectivity * float64(p.totalSeries)) +} + +// filterCost is the perMatchCost to run the matcher against all series. +func (p plan) calculateFilterCost(series int64, m planPredicate) float64 { + return float64(series) * m.perMatchCost +} + +func (p plan) indexMatchers() []*labels.Matcher { + var matchers []*labels.Matcher + for i, pr := range p.predicates { + if p.applied[i] { + matchers = append(matchers, pr.matcher) + } + } + return matchers +} + +func (p plan) pendingMatchers() []*labels.Matcher { + var matchers []*labels.Matcher + for i, pr := range p.predicates { + if !p.applied[i] { + matchers = append(matchers, pr.matcher) + } + } + return matchers +} + +func planIndexLookup(ctx context.Context, ms []*labels.Matcher, ix IndexPostingsReader, isSubtractingMatcher func(m *labels.Matcher) bool) (plan, error) { + allPlans, err := generatePlans(ctx, ms, ix, isSubtractingMatcher) + if err != nil { + return plan{}, fmt.Errorf("error generating plans: %w", err) + } + + lowestCostPlan := allPlans[0] + for _, plan := range allPlans { + if plan.totalCost < lowestCostPlan.totalCost { + lowestCostPlan = plan + } + } + + return lowestCostPlan, nil +} + +func generatePlans(ctx context.Context, ms []*labels.Matcher, stats index.Statistics, isSubtractingMatcher func(m *labels.Matcher) bool) ([]plan, error) { + predicates := make([]planPredicate, 0, len(ms)) + for _, m := range ms { + predicate, err := matcherToPlanPredicate(ctx, m, stats, isSubtractingMatcher) + if err != nil { + return nil, fmt.Errorf("error converting matcher to plan predicate: %w", err) + } + predicates = append(predicates, predicate) + } + + allPlans := make([]plan, 0, 1< 0 { + seriesBehindSelectedValues, err = stats.LabelValuesCardinality(ctx, m.Name, setMatches...) + } else { + seriesBehindSelectedValues, err = stats.LabelValuesCardinality(ctx, m.Name) + seriesBehindSelectedValues = int64(float64(seriesBehindSelectedValues) * p.selectivity) + } + } + if isSubtractingMatcher(m) { + p.cardinality = stats.TotalSeries() - seriesBehindSelectedValues + } else { + p.cardinality = seriesBehindSelectedValues + } + if err != nil { + return p, fmt.Errorf("error getting series per label value for label %s: %w", m.Name, err) + } + + switch p.matcher.Type { + case labels.MatchEqual, labels.MatchNotEqual: + if m.Value == "" { + p.indexScanCost = p.perMatchCost * float64(p.labelNameUniqueVals) + } else { + p.indexScanCost = p.perMatchCost * 32 // for on-disk index we'd scan through 32 label values and compare them to the needle before returning. + } + case labels.MatchRegexp, labels.MatchNotRegexp: + // TODO dimitarvdimitrov benchmark relative cost + switch { + case p.matcher.Prefix() != "": + p.indexScanCost = p.perMatchCost * float64(p.labelNameUniqueVals) * 0.1 + case p.matcher.IsRegexOptimized(): + if len(setMatches) > 0 { + p.indexScanCost = p.perMatchCost * float64(len(setMatches)) + } else { + p.indexScanCost = p.perMatchCost * float64(p.labelNameUniqueVals) / 10 // Optimized regexes are expected to be faster. + } + default: + p.indexScanCost = p.perMatchCost * float64(p.labelNameUniqueVals) + } + } + + return p, nil +} + +func generatePredicateCombinations(plans []plan, currentPlan plan, decidedPredicates int) []plan { + if decidedPredicates == len(currentPlan.predicates) { + return append(plans, currentPlan) + } + + // Generate two plans, one with the current predicate applied and one without. + // This is done by copying the current plan and applying the predicate to the copy. + // The copy is then added to the list of plans to be returned. + plans = generatePredicateCombinations(plans, currentPlan, decidedPredicates+1) + + p := currentPlan.applyPredicate(decidedPredicates) + plans = generatePredicateCombinations(plans, p, decidedPredicates+1) + + return plans +} diff --git a/vendor/github.com/prometheus/prometheus/tsdb/postings_for_matchers_cache.go b/vendor/github.com/prometheus/prometheus/tsdb/postings_for_matchers_cache.go index b498eff19f6..9fc614c7485 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/postings_for_matchers_cache.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/postings_for_matchers_cache.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "slices" "strings" "sync" "time" @@ -41,6 +42,8 @@ const ( // IndexPostingsReader is a subset of IndexReader methods, the minimum required to evaluate PostingsForMatchers. type IndexPostingsReader interface { + index.Statistics + // LabelValues returns possible label values which may not be sorted. LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) @@ -110,7 +113,7 @@ type PostingsForMatchersCache struct { timeNow func() time.Time // postingsForMatchers can be replaced for testing purposes - postingsForMatchers func(ctx context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, error) + postingsForMatchers func(ctx context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) // onPromiseExecutionDoneBeforeHook is used for testing purposes. It allows to hook at the // beginning of onPromiseExecutionDone() execution. @@ -125,7 +128,7 @@ type PostingsForMatchersCache struct { forceAttrib attribute.KeyValue } -func (c *PostingsForMatchersCache) PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { +func (c *PostingsForMatchersCache) PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { c.metrics.requests.Inc() span := trace.SpanFromContext(ctx) @@ -140,22 +143,22 @@ func (c *PostingsForMatchersCache) PostingsForMatchers(ctx context.Context, ix I c.metrics.skipsBecauseIneligible.Inc() span.AddEvent("cache not used") - p, err := c.postingsForMatchers(ctx, ix, ms...) + p, pendingMatchers, err := c.postingsForMatchers(ctx, ix, ms...) if err != nil { span.SetStatus(codes.Error, "getting postings for matchers without cache failed") span.RecordError(err) } - return p, err + return p, pendingMatchers, err } span.AddEvent("using cache") c.expire() - p, err := c.postingsForMatchersPromise(ctx, ix, ms)(ctx) + p, pendingMatchers, err := c.postingsForMatchersPromise(ctx, ix, ms)(ctx) if err != nil { span.SetStatus(codes.Error, "getting postings for matchers with cache failed") span.RecordError(err) } - return p, err + return p, pendingMatchers, err } type postingsForMatcherPromise struct { @@ -165,27 +168,28 @@ type postingsForMatcherPromise struct { // The result of the promise is stored either in cloner or err (only of the two is valued). // Do not access these fields until the done channel is closed. - done chan struct{} - cloner *index.PostingsCloner - err error + done chan struct{} + cloner *index.PostingsCloner + pendingMatchers []*labels.Matcher + err error // Keep track of the time this promise completed evaluation. // Do not access this field until the done channel is closed. evaluationCompletedAt time.Time } -func (p *postingsForMatcherPromise) result(ctx context.Context) (index.Postings, error) { +func (p *postingsForMatcherPromise) result(ctx context.Context) (index.Postings, []*labels.Matcher, error) { select { case <-ctx.Done(): - return nil, fmt.Errorf("interrupting wait on postingsForMatchers promise due to context error: %w", ctx.Err()) + return nil, nil, fmt.Errorf("interrupting wait on postingsForMatchers promise due to context error: %w", ctx.Err()) case <-p.done: // Checking context error is necessary for deterministic tests, // as channel selection order is random if ctx.Err() != nil { - return nil, fmt.Errorf("completed postingsForMatchers promise, but context has error: %w", ctx.Err()) + return nil, nil, fmt.Errorf("completed postingsForMatchers promise, but context has error: %w", ctx.Err()) } if p.err != nil { - return nil, fmt.Errorf("postingsForMatchers promise completed with error: %w", p.err) + return nil, nil, fmt.Errorf("postingsForMatchers promise completed with error: %w", p.err) } trace.SpanFromContext(ctx).AddEvent("completed postingsForMatchers promise", trace.WithAttributes( @@ -193,14 +197,14 @@ func (p *postingsForMatcherPromise) result(ctx context.Context) (index.Postings, attribute.Int64("evaluation completed at (epoch seconds)", p.evaluationCompletedAt.Unix()), )) - return p.cloner.Clone(), nil + return p.cloner.Clone(), slices.Clone(p.pendingMatchers), nil } } -func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Context, ix IndexPostingsReader, ms []*labels.Matcher) func(context.Context) (index.Postings, error) { +func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Context, ix IndexPostingsReader, ms []*labels.Matcher) func(context.Context) (index.Postings, []*labels.Matcher, error) { span := trace.SpanFromContext(ctx) - promiseCallersCtxTracker, promiseExecCtx := newContextsTracker() + promiseCallersCtxTracker, promiseExecCtx := newContextsTracker(ctx) promise := &postingsForMatcherPromise{ done: make(chan struct{}), callersCtxTracker: promiseCallersCtxTracker, @@ -242,7 +246,7 @@ func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Contex )) c.metrics.skipsBecauseStale.Inc() - return func(ctx context.Context) (index.Postings, error) { + return func(ctx context.Context) (index.Postings, []*labels.Matcher, error) { return c.postingsForMatchers(ctx, ix, ms...) } } @@ -264,7 +268,7 @@ func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Contex attribute.String("cache_key", key), )) - return func(ctx context.Context) (index.Postings, error) { + return func(ctx context.Context) (index.Postings, []*labels.Matcher, error) { return c.postingsForMatchers(ctx, ix, ms...) } } @@ -289,10 +293,11 @@ func (c *PostingsForMatchersCache) postingsForMatchersPromise(ctx context.Contex // 2. Cancel postingsForMatchers() once all callers contexts have been canceled, so that we don't waist // resources computing postingsForMatchers() is all requests have been canceled (this is particularly // important if the postingsForMatchers() is very slow due to expensive regexp matchers). - if postings, err := c.postingsForMatchers(promiseExecCtx, ix, ms...); err != nil { + if postings, pendingMatchers, err := c.postingsForMatchers(promiseExecCtx, ix, ms...); err != nil { promise.err = err } else { promise.cloner = index.NewPostingsCloner(postings) + promise.pendingMatchers = pendingMatchers } // Keep track of when the evaluation completed. @@ -489,7 +494,7 @@ type indexReaderWithPostingsForMatchers struct { pfmc *PostingsForMatchersCache } -func (ir indexReaderWithPostingsForMatchers) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, error) { +func (ir indexReaderWithPostingsForMatchers) PostingsForMatchers(ctx context.Context, concurrent bool, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { return ir.pfmc.PostingsForMatchers(ctx, ir, concurrent, ms...) } @@ -527,12 +532,12 @@ type contextsTracker struct { trackedStopFuncs []func() bool // The stop watching functions for all tracked contexts. } -func newContextsTracker() (*contextsTracker, context.Context) { +func newContextsTracker(ctx context.Context) (*contextsTracker, context.Context) { t := &contextsTracker{} // Create a new execution context that will be canceled only once all tracked contexts have done. var execCtx context.Context - execCtx, t.cancelExecCtx = context.WithCancel(context.Background()) + execCtx, t.cancelExecCtx = context.WithCancel(context.WithoutCancel(ctx)) return t, execCtx } diff --git a/vendor/github.com/prometheus/prometheus/tsdb/querier.go b/vendor/github.com/prometheus/prometheus/tsdb/querier.go index 0d2f1ddcd89..e6601efa280 100644 --- a/vendor/github.com/prometheus/prometheus/tsdb/querier.go +++ b/vendor/github.com/prometheus/prometheus/tsdb/querier.go @@ -123,7 +123,7 @@ func selectSeriesSet(ctx context.Context, sortSeries bool, hints *storage.Select ) storage.SeriesSet { disableTrimming := false sharded := hints != nil && hints.ShardCount > 0 - p, err := index.PostingsForMatchers(ctx, sharded, ms...) + p, pendingMatchers, err := index.PostingsForMatchers(ctx, sharded, ms...) if err != nil { return storage.ErrSeriesSet(err) } @@ -140,11 +140,11 @@ func selectSeriesSet(ctx context.Context, sortSeries bool, hints *storage.Select disableTrimming = hints.DisableTrimming if hints.Func == "series" { // When you're only looking up metadata (for example series API), you don't need to load any chunks. - return newBlockSeriesSet(index, newNopChunkReader(), tombstones, p, mint, maxt, disableTrimming) + return newBlockSeriesSet(index, newNopChunkReader(), tombstones, p, mint, maxt, disableTrimming, pendingMatchers) } } - return newBlockSeriesSet(index, chunks, tombstones, p, mint, maxt, disableTrimming) + return newBlockSeriesSet(index, chunks, tombstones, p, mint, maxt, disableTrimming, pendingMatchers) } // blockChunkQuerier provides chunk querying access to a single block database. @@ -176,7 +176,7 @@ func selectChunkSeriesSet(ctx context.Context, sortSeries bool, hints *storage.S maxt = hints.End disableTrimming = hints.DisableTrimming } - p, err := index.PostingsForMatchers(ctx, sharded, ms...) + p, pendingMatchers, err := index.PostingsForMatchers(ctx, sharded, ms...) if err != nil { return storage.ErrChunkSeriesSet(err) } @@ -186,18 +186,21 @@ func selectChunkSeriesSet(ctx context.Context, sortSeries bool, hints *storage.S if sortSeries { p = index.SortedPostings(p) } - return NewBlockChunkSeriesSet(blockID, index, chunks, tombstones, p, mint, maxt, disableTrimming) + + return NewBlockChunkSeriesSet(blockID, index, chunks, tombstones, p, mint, maxt, disableTrimming, pendingMatchers) } +var TryOptimizing = true + // PostingsForMatchers assembles a single postings iterator against the index reader // based on the given matchers. The resulting postings are not ordered by series. -func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, error) { +// The returned pendingMatchers are matchers that have not been applied to the returned postings yet. +func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*labels.Matcher) (index.Postings, []*labels.Matcher, error) { if len(ms) == 1 && ms[0].Name == "" && ms[0].Value == "" { k, v := index.AllPostingsKey() - return ix.Postings(ctx, k, v) + p, err := ix.Postings(ctx, k, v) + return p, nil, err } - - var its, notIts []index.Postings // See which label must be non-empty. // Optimization for case like {l=~".", l!="1"}. labelMustBeSet := make(map[string]bool, len(ms)) @@ -212,6 +215,22 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab } return (m.Type == labels.MatchNotEqual || m.Type == labels.MatchNotRegexp) && m.Matches("") } + var pendingMatchers []*labels.Matcher + if ctxAskedToDisable, _ := ctx.Value("disable_optimized_index_lookup").(bool); TryOptimizing && !ctxAskedToDisable { + p, err := planIndexLookup(ctx, ms, ix, isSubtractingMatcher) + if err == nil { + pendingMatchers = p.pendingMatchers() + ms = p.indexMatchers() + } + + if len(ms) == 0 { + k, v := index.AllPostingsKey() + p, err := ix.Postings(ctx, k, v) + return p, pendingMatchers, err + } + } + var its, notIts []index.Postings + hasSubtractingMatchers, hasIntersectingMatchers := false, false for _, m := range ms { if isSubtractingMatcher(m) { @@ -228,7 +247,7 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab k, v := index.AllPostingsKey() allPostings, err := ix.Postings(ctx, k, v) if err != nil { - return nil, err + return nil, nil, err } its = append(its, allPostings) } @@ -248,24 +267,24 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab for _, m := range ms { if ctx.Err() != nil { - return nil, ctx.Err() + return nil, nil, ctx.Err() } switch { case m.Name == "" && m.Value == "": // We already handled the case at the top of the function, // and it is unexpected to get all postings again here. - return nil, errors.New("unexpected all postings") + return nil, nil, errors.New("unexpected all postings") case m.Type == labels.MatchRegexp && m.Value == ".*": // .* regexp matches any string: do nothing. case m.Type == labels.MatchNotRegexp && m.Value == ".*": - return index.EmptyPostings(), nil + return index.EmptyPostings(), nil, nil case m.Type == labels.MatchRegexp && m.Value == ".+": // .+ regexp matches any non-empty string: get postings for all label values. it := ix.PostingsForAllLabelValues(ctx, m.Name) if index.IsEmptyPostingsType(it) { - return index.EmptyPostings(), nil + return index.EmptyPostings(), nil, nil } its = append(its, it) case m.Type == labels.MatchNotRegexp && m.Value == ".+": @@ -282,12 +301,12 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab // doesn't match empty, then subtract it out at the end. inverse, err := m.Inverse() if err != nil { - return nil, err + return nil, nil, err } it, err := postingsForMatcher(ctx, ix, inverse) if err != nil { - return nil, err + return nil, nil, err } notIts = append(notIts, it) case isNot && !matchesEmpty: // l!="" @@ -295,25 +314,25 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab // be empty we need to use inversePostingsForMatcher. inverse, err := m.Inverse() if err != nil { - return nil, err + return nil, nil, err } it, err := inversePostingsForMatcher(ctx, ix, inverse) if err != nil { - return nil, err + return nil, nil, err } if index.IsEmptyPostingsType(it) { - return index.EmptyPostings(), nil + return index.EmptyPostings(), nil, nil } its = append(its, it) default: // l="a", l=~"a|b", l=~"a.b", etc. // Non-Not matcher, use normal postingsForMatcher. it, err := postingsForMatcher(ctx, ix, m) if err != nil { - return nil, err + return nil, nil, err } if index.IsEmptyPostingsType(it) { - return index.EmptyPostings(), nil + return index.EmptyPostings(), nil, nil } its = append(its, it) } @@ -324,7 +343,7 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab // https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555 it, err := inversePostingsForMatcher(ctx, ix, m) if err != nil { - return nil, err + return nil, nil, err } notIts = append(notIts, it) } @@ -336,7 +355,7 @@ func PostingsForMatchers(ctx context.Context, ix IndexPostingsReader, ms ...*lab it = index.Without(it, n) } - return it, nil + return it, pendingMatchers, nil } func postingsForMatcher(ctx context.Context, ix IndexPostingsReader, m *labels.Matcher) (index.Postings, error) { @@ -432,11 +451,14 @@ func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, ma return allValues, nil } - p, err := r.PostingsForMatchers(ctx, false, matchers...) + p, pendingMatchers, err := r.PostingsForMatchers(ctx, false, matchers...) if err != nil { return nil, fmt.Errorf("fetching postings for matchers: %w", err) } + // TODO dimitarvdimitrov handle pending matchers + _ = pendingMatchers + // Let's see if expanded postings for matchers have smaller cardinality than label values. // Since computing label values from series is expensive, we apply a limit on number of expanded // postings (and series). @@ -567,10 +589,14 @@ func (p *prependPostings) Err() error { } func labelNamesWithMatchers(ctx context.Context, r IndexReader, matchers ...*labels.Matcher) ([]string, error) { - p, err := r.PostingsForMatchers(ctx, false, matchers...) + p, pendingMatchers, err := r.PostingsForMatchers(ctx, false, matchers...) if err != nil { return nil, err } + + // TODO dimitarvdimitrov handle pending matchers + _ = pendingMatchers + return r.LabelNamesFor(ctx, p) } @@ -593,6 +619,10 @@ type blockBaseSeriesSet struct { index IndexReader chunks ChunkReader tombstones tombstones.Reader + pendingMatchers []*labels.Matcher + + closed bool + mint, maxt int64 disableTrimming bool @@ -604,6 +634,7 @@ type blockBaseSeriesSet struct { } func (b *blockBaseSeriesSet) Next() bool { + tempLabels := labels.Labels{} for b.p.Next() { if err := b.index.Series(b.p.At(), &b.builder, &b.bufChks); err != nil { // Postings may be stale. Skip if no underlying series exists. @@ -614,6 +645,25 @@ func (b *blockBaseSeriesSet) Next() bool { return false } + // Check pending matchers before loading chunks + if len(b.pendingMatchers) > 0 { + b.builder.Overwrite(&tempLabels) + if tempLabels.Len() == 0 { + // There's a potential bug with stringlabels where Overwrite doesn't work as expected. + tempLabels = b.builder.Labels() + } + allMatch := true + for _, m := range b.pendingMatchers { + if !m.Matches(tempLabels.Get(m.Name)) { + allMatch = false + break + } + } + if !allMatch { + continue + } + } + if len(b.bufChks) == 0 { continue } @@ -1178,7 +1228,7 @@ type blockSeriesSet struct { blockBaseSeriesSet } -func newBlockSeriesSet(i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool) storage.SeriesSet { +func newBlockSeriesSet(i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool, pendingMatchers []*labels.Matcher) *blockSeriesSet { return &blockSeriesSet{ blockBaseSeriesSet{ index: i, @@ -1188,6 +1238,7 @@ func newBlockSeriesSet(i IndexReader, c ChunkReader, t tombstones.Reader, p inde mint: mint, maxt: maxt, disableTrimming: disableTrimming, + pendingMatchers: pendingMatchers, }, } } @@ -1208,7 +1259,7 @@ type blockChunkSeriesSet struct { blockBaseSeriesSet } -func NewBlockChunkSeriesSet(id ulid.ULID, i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool) storage.ChunkSeriesSet { +func NewBlockChunkSeriesSet(id ulid.ULID, i IndexReader, c ChunkReader, t tombstones.Reader, p index.Postings, mint, maxt int64, disableTrimming bool, pendingMatchers []*labels.Matcher) storage.ChunkSeriesSet { return &blockChunkSeriesSet{ blockBaseSeriesSet{ blockID: id, @@ -1219,6 +1270,7 @@ func NewBlockChunkSeriesSet(id ulid.ULID, i IndexReader, c ChunkReader, t tombst mint: mint, maxt: maxt, disableTrimming: disableTrimming, + pendingMatchers: pendingMatchers, }, } } diff --git a/vendor/github.com/tylertreat/BoomFilters/.gitignore b/vendor/github.com/tylertreat/BoomFilters/.gitignore new file mode 100644 index 00000000000..daf913b1b34 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/tylertreat/BoomFilters/.travis.yml b/vendor/github.com/tylertreat/BoomFilters/.travis.yml new file mode 100644 index 00000000000..8c180735f49 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/.travis.yml @@ -0,0 +1,12 @@ +language: go + +go: + - 1.10.x + - 1.11.x + - tip + +before_install: go get golang.org/x/tools/cmd/cover +script: go test -cover ./... + +notifications: + email: false diff --git a/vendor/github.com/tylertreat/BoomFilters/LICENSE b/vendor/github.com/tylertreat/BoomFilters/LICENSE new file mode 100644 index 00000000000..e06d2081865 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/LICENSE @@ -0,0 +1,202 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/vendor/github.com/tylertreat/BoomFilters/README.md b/vendor/github.com/tylertreat/BoomFilters/README.md new file mode 100644 index 00000000000..88f4455d978 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/README.md @@ -0,0 +1,429 @@ +# Boom Filters +[![Build Status](https://travis-ci.org/tylertreat/BoomFilters.svg?branch=master)](https://travis-ci.org/tylertreat/BoomFilters) [![GoDoc](https://godoc.org/github.com/tylertreat/BoomFilters?status.png)](https://godoc.org/github.com/tylertreat/BoomFilters) + +**Boom Filters** are probabilistic data structures for [processing continuous, unbounded streams](http://www.bravenewgeek.com/stream-processing-and-probabilistic-methods/). This includes **Stable Bloom Filters**, **Scalable Bloom Filters**, **Counting Bloom Filters**, **Inverse Bloom Filters**, **Cuckoo Filters**, several variants of **traditional Bloom filters**, **HyperLogLog**, **Count-Min Sketch**, and **MinHash**. + +Classic Bloom filters generally require a priori knowledge of the data set in order to allocate an appropriately sized bit array. This works well for offline processing, but online processing typically involves unbounded data streams. With enough data, a traditional Bloom filter "fills up", after which it has a false-positive probability of 1. + +Boom Filters are useful for situations where the size of the data set isn't known ahead of time. For example, a Stable Bloom Filter can be used to deduplicate events from an unbounded event stream with a specified upper bound on false positives and minimal false negatives. Alternatively, an Inverse Bloom Filter is ideal for deduplicating a stream where duplicate events are relatively close together. This results in no false positives and, depending on how close together duplicates are, a small probability of false negatives. Scalable Bloom Filters place a tight upper bound on false positives while avoiding false negatives but require allocating memory proportional to the size of the data set. Counting Bloom Filters and Cuckoo Filters are useful for cases which require adding and removing elements to and from a set. + +For large or unbounded data sets, calculating the exact cardinality is impractical. HyperLogLog uses a fraction of the memory while providing an accurate approximation. Similarly, Count-Min Sketch provides an efficient way to estimate event frequency for data streams, while Top-K tracks the top-k most frequent elements. + +MinHash is a probabilistic algorithm to approximate the similarity between two sets. This can be used to cluster or compare documents by splitting the corpus into a bag of words. + +## Installation + +``` +$ go get github.com/tylertreat/BoomFilters +``` + +## Stable Bloom Filter + +This is an implementation of Stable Bloom Filters as described by Deng and Rafiei in [Approximately Detecting Duplicates for Streaming Data using Stable Bloom Filters](http://webdocs.cs.ualberta.ca/~drafiei/papers/DupDet06Sigmod.pdf). + +A Stable Bloom Filter (SBF) continuously evicts stale information so that it has room for more recent elements. Like traditional Bloom filters, an SBF has a non-zero probability of false positives, which is controlled by several parameters. Unlike the classic Bloom filter, an SBF has a tight upper bound on the rate of false positives while introducing a non-zero rate of false negatives. The false-positive rate of a classic Bloom filter eventually reaches 1, after which all queries result in a false positive. The stable-point property of an SBF means the false-positive rate asymptotically approaches a configurable fixed constant. A classic Bloom filter is actually a special case of SBF where the eviction rate is zero and the cell size is one, so this provides support for them as well (in addition to bitset-based Bloom filters). + +Stable Bloom Filters are useful for cases where the size of the data set isn't known a priori and memory is bounded. For example, an SBF can be used to deduplicate events from an unbounded event stream with a specified upper bound on false positives and minimal false negatives. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + sbf := boom.NewDefaultStableBloomFilter(10000, 0.01) + fmt.Println("stable point", sbf.StablePoint()) + + sbf.Add([]byte(`a`)) + if sbf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if !sbf.TestAndAdd([]byte(`b`)) { + fmt.Println("doesn't contain b") + } + + if sbf.Test([]byte(`b`)) { + fmt.Println("now it contains b!") + } + + // Restore to initial state. + sbf.Reset() +} +``` + +## Scalable Bloom Filter + +This is an implementation of a Scalable Bloom Filter as described by Almeida, Baquero, Preguica, and Hutchison in [Scalable Bloom Filters](http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf). + +A Scalable Bloom Filter (SBF) dynamically adapts to the size of the data set while enforcing a tight upper bound on the rate of false positives and a false-negative probability of zero. This works by adding Bloom filters with geometrically decreasing false-positive rates as filters become full. A tightening ratio, r, controls the filter growth. The compounded probability over the whole series converges to a target value, even accounting for an infinite series. + +Scalable Bloom Filters are useful for cases where the size of the data set isn't known a priori and memory constraints aren't of particular concern. For situations where memory is bounded, consider using Inverse or Stable Bloom Filters. + +The core parts of this implementation were originally written by Jian Zhen as discussed in [Benchmarking Bloom Filters and Hash Functions in Go](http://zhen.org/blog/benchmarking-bloom-filters-and-hash-functions-in-go/). + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + sbf := boom.NewDefaultScalableBloomFilter(0.01) + + sbf.Add([]byte(`a`)) + if sbf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if !sbf.TestAndAdd([]byte(`b`)) { + fmt.Println("doesn't contain b") + } + + if sbf.Test([]byte(`b`)) { + fmt.Println("now it contains b!") + } + + // Restore to initial state. + sbf.Reset() +} +``` + +## Inverse Bloom Filter + +An Inverse Bloom Filter, or "the opposite of a Bloom filter", is a concurrent, probabilistic data structure used to test whether an item has been observed or not. This implementation, [originally described and written by Jeff Hodges](http://www.somethingsimilar.com/2012/05/21/the-opposite-of-a-bloom-filter/), replaces the use of MD5 hashing with a non-cryptographic FNV-1 function. + +The Inverse Bloom Filter may report a false negative but can never report a false positive. That is, it may report that an item has not been seen when it actually has, but it will never report an item as seen which it hasn't come across. This behaves in a similar manner to a fixed-size hashmap which does not handle conflicts. + +This structure is particularly well-suited to streams in which duplicates are relatively close together. It uses a CAS-style approach, which makes it thread-safe. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + ibf := boom.NewInverseBloomFilter(10000) + + ibf.Add([]byte(`a`)) + if ibf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if !ibf.TestAndAdd([]byte(`b`)) { + fmt.Println("doesn't contain b") + } + + if ibf.Test([]byte(`b`)) { + fmt.Println("now it contains b!") + } +} +``` + +## Counting Bloom Filter + +This is an implementation of a Counting Bloom Filter as described by Fan, Cao, Almeida, and Broder in [Summary Cache: A Scalable Wide-Area Web Cache Sharing Protocol](http://pages.cs.wisc.edu/~jussara/papers/00ton.pdf). + +A Counting Bloom Filter (CBF) provides a way to remove elements by using an array of n-bit buckets. When an element is added, the respective buckets are incremented. To remove an element, the respective buckets are decremented. A query checks that each of the respective buckets are non-zero. Because CBFs allow elements to be removed, they introduce a non-zero probability of false negatives in addition to the possibility of false positives. + +Counting Bloom Filters are useful for cases where elements are both added and removed from the data set. Since they use n-bit buckets, CBFs use roughly n-times more memory than traditional Bloom filters. + +See Deletable Bloom Filter for an alternative which avoids false negatives. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + bf := boom.NewDefaultCountingBloomFilter(1000, 0.01) + + bf.Add([]byte(`a`)) + if bf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if !bf.TestAndAdd([]byte(`b`)) { + fmt.Println("doesn't contain b") + } + + if bf.TestAndRemove([]byte(`b`)) { + fmt.Println("removed b") + } + + // Restore to initial state. + bf.Reset() +} +``` + +## Cuckoo Filter + +This is an implementation of a Cuckoo Filter as described by Andersen, Kaminsky, and Mitzenmacher in [Cuckoo Filter: Practically Better Than Bloom](http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf). The Cuckoo Filter is similar to the Counting Bloom Filter in that it supports adding and removing elements, but it does so in a way that doesn't significantly degrade space and performance. + +It works by using a cuckoo hashing scheme for inserting items. Instead of storing the elements themselves, it stores their fingerprints which also allows for item removal without false negatives (if you don't attempt to remove an item not contained in the filter). + +For applications that store many items and target moderately low false-positive rates, cuckoo filters have lower space overhead than space-optimized Bloom filters. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + cf := boom.NewCuckooFilter(1000, 0.01) + + cf.Add([]byte(`a`)) + if cf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if contains, _ := cf.TestAndAdd([]byte(`b`)); !contains { + fmt.Println("doesn't contain b") + } + + if cf.TestAndRemove([]byte(`b`)) { + fmt.Println("removed b") + } + + // Restore to initial state. + cf.Reset() +} +``` + +## Classic Bloom Filter + +A classic Bloom filter is a special case of a Stable Bloom Filter whose eviction rate is zero and cell size is one. We call this special case an Unstable Bloom Filter. Because cells require more memory overhead, this package also provides two bitset-based Bloom filter variations. The first variation is the traditional implementation consisting of a single bit array. The second implementation is a partitioned approach which uniformly distributes the probability of false positives across all elements. + +Bloom filters have a limited capacity, depending on the configured size. Once all bits are set, the probability of a false positive is 1. However, traditional Bloom filters cannot return a false negative. + +A Bloom filter is ideal for cases where the data set is known a priori because the false-positive rate can be configured by the size and number of hash functions. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + // We could also use boom.NewUnstableBloomFilter or boom.NewPartitionedBloomFilter. + bf := boom.NewBloomFilter(1000, 0.01) + + bf.Add([]byte(`a`)) + if bf.Test([]byte(`a`)) { + fmt.Println("contains a") + } + + if !bf.TestAndAdd([]byte(`b`)) { + fmt.Println("doesn't contain b") + } + + if bf.Test([]byte(`b`)) { + fmt.Println("now it contains b!") + } + + // Restore to initial state. + bf.Reset() +} +``` + +## Count-Min Sketch + +This is an implementation of a Count-Min Sketch as described by Cormode and Muthukrishnan in [An Improved Data Stream Summary: The Count-Min Sketch and its Applications](http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf). + +A Count-Min Sketch (CMS) is a probabilistic data structure which approximates the frequency of events in a data stream. Unlike a hash map, a CMS uses sub-linear space at the expense of a configurable error factor. Similar to Counting Bloom filters, items are hashed to a series of buckets, which increment a counter. The frequency of an item is estimated by taking the minimum of each of the item's respective counter values. + +Count-Min Sketches are useful for counting the frequency of events in massive data sets or unbounded streams online. In these situations, storing the entire data set or allocating counters for every event in memory is impractical. It may be possible for offline processing, but real-time processing requires fast, space-efficient solutions like the CMS. For approximating set cardinality, refer to the HyperLogLog. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + cms := boom.NewCountMinSketch(0.001, 0.99) + + cms.Add([]byte(`alice`)).Add([]byte(`bob`)).Add([]byte(`bob`)).Add([]byte(`frank`)) + fmt.Println("frequency of alice", cms.Count([]byte(`alice`))) + fmt.Println("frequency of bob", cms.Count([]byte(`bob`))) + fmt.Println("frequency of frank", cms.Count([]byte(`frank`))) + + + // Serialization example + buf := new(bytes.Buffer) + n, err := cms.WriteDataTo(buf) + if err != nil { + fmt.Println(err, n) + } + + // Restore to initial state. + cms.Reset() + + newCMS := boom.NewCountMinSketch(0.001, 0.99) + n, err = newCMS.ReadDataFrom(buf) + if err != nil { + fmt.Println(err, n) + } + + fmt.Println("frequency of frank", newCMS.Count([]byte(`frank`))) + + +} +``` + +## Top-K + +Top-K uses a Count-Min Sketch and min-heap to track the top-k most frequent elements in a stream. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + topk := boom.NewTopK(0.001, 0.99, 5) + + topk.Add([]byte(`bob`)).Add([]byte(`bob`)).Add([]byte(`bob`)) + topk.Add([]byte(`tyler`)).Add([]byte(`tyler`)).Add([]byte(`tyler`)).Add([]byte(`tyler`)) + topk.Add([]byte(`fred`)) + topk.Add([]byte(`alice`)).Add([]byte(`alice`)).Add([]byte(`alice`)).Add([]byte(`alice`)) + topk.Add([]byte(`james`)) + topk.Add([]byte(`fred`)) + topk.Add([]byte(`sara`)).Add([]byte(`sara`)) + topk.Add([]byte(`bill`)) + + for i, element := range topk.Elements() { + fmt.Println(i, string(element.Data), element.Freq) + } + + // Restore to initial state. + topk.Reset() +} +``` + +## HyperLogLog + +This is an implementation of HyperLogLog as described by Flajolet, Fusy, Gandouet, and Meunier in [HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf). + +HyperLogLog is a probabilistic algorithm which approximates the number of distinct elements in a multiset. It works by hashing values and calculating the maximum number of leading zeros in the binary representation of each hash. If the maximum number of leading zeros is n, the estimated number of distinct elements in the set is 2^n. To minimize variance, the multiset is split into a configurable number of registers, the maximum number of leading zeros is calculated in the numbers in each register, and a harmonic mean is used to combine the estimates. + +For large or unbounded data sets, calculating the exact cardinality is impractical. HyperLogLog uses a fraction of the memory while providing an accurate approximation. + +This implementation was [originally written by Eric Lesh](https://github.com/eclesh/hyperloglog). Some small changes and additions have been made, including a way to construct a HyperLogLog optimized for a particular relative accuracy and adding FNV hashing. For counting element frequency, refer to the Count-Min Sketch. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + hll, err := boom.NewDefaultHyperLogLog(0.1) + if err != nil { + panic(err) + } + + hll.Add([]byte(`alice`)).Add([]byte(`bob`)).Add([]byte(`bob`)).Add([]byte(`frank`)) + fmt.Println("count", hll.Count()) + + // Serialization example + buf := new(bytes.Buffer) + _, err = hll.WriteDataTo(buf) + if err != nil { + fmt.Println(err) + } + + // Restore to initial state. + hll.Reset() + + newHll, err := boom.NewDefaultHyperLogLog(0.1) + if err != nil { + fmt.Println(err) + } + + _, err = newHll.ReadDataFrom(buf) + if err != nil { + fmt.Println(err) + } + fmt.Println("count", newHll.Count()) + +} +``` + +## MinHash + +This is a variation of the technique for estimating similarity between two sets as presented by Broder in [On the resemblance and containment of documents](http://gatekeeper.dec.com/ftp/pub/dec/SRC/publications/broder/positano-final-wpnums.pdf). + +MinHash is a probabilistic algorithm which can be used to cluster or compare documents by splitting the corpus into a bag of words. MinHash returns the approximated similarity ratio of the two bags. The similarity is less accurate for very small bags of words. + +### Usage + +```go +package main + +import ( + "fmt" + "github.com/tylertreat/BoomFilters" +) + +func main() { + bag1 := []string{"bill", "alice", "frank", "bob", "sara", "tyler", "james"} + bag2 := []string{"bill", "alice", "frank", "bob", "sara"} + + fmt.Println("similarity", boom.MinHash(bag1, bag2)) +} +``` + +## References + +- [Approximately Detecting Duplicates for Streaming Data using Stable Bloom Filters](http://webdocs.cs.ualberta.ca/~drafiei/papers/DupDet06Sigmod.pdf) +- [Scalable Bloom Filters](http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf) +- [The Opposite of a Bloom Filter](http://www.somethingsimilar.com/2012/05/21/the-opposite-of-a-bloom-filter/) +- [Benchmarking Bloom Filters and Hash Functions in Go](http://zhen.org/blog/benchmarking-bloom-filters-and-hash-functions-in-go/) +- [Summary Cache: A Scalable Wide-Area Web Cache Sharing Protocol](http://pages.cs.wisc.edu/~jussara/papers/00ton.pdf) +- [An Improved Data Stream Summary: The Count-Min Sketch and its Applications](http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf) +- [HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) +- [Package hyperloglog](https://github.com/eclesh/hyperloglog) +- [On the resemblance and containment of documents](http://gatekeeper.dec.com/ftp/pub/dec/SRC/publications/broder/positano-final-wpnums.pdf) +- [Cuckoo Filter: Practically Better Than Bloom](http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf) diff --git a/vendor/github.com/tylertreat/BoomFilters/boom.go b/vendor/github.com/tylertreat/BoomFilters/boom.go new file mode 100644 index 00000000000..d14900f9e46 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/boom.go @@ -0,0 +1,84 @@ +/* +Package boom implements probabilistic data structures for processing +continuous, unbounded data streams. This includes Stable Bloom Filters, +Scalable Bloom Filters, Counting Bloom Filters, Inverse Bloom Filters, several +variants of traditional Bloom filters, HyperLogLog, Count-Min Sketch, and +MinHash. + +Classic Bloom filters generally require a priori knowledge of the data set +in order to allocate an appropriately sized bit array. This works well for +offline processing, but online processing typically involves unbounded data +streams. With enough data, a traditional Bloom filter "fills up", after +which it has a false-positive probability of 1. + +Boom Filters are useful for situations where the size of the data set isn't +known ahead of time. For example, a Stable Bloom Filter can be used to +deduplicate events from an unbounded event stream with a specified upper +bound on false positives and minimal false negatives. Alternatively, an +Inverse Bloom Filter is ideal for deduplicating a stream where duplicate +events are relatively close together. This results in no false positives +and, depending on how close together duplicates are, a small probability of +false negatives. Scalable Bloom Filters place a tight upper bound on false +positives while avoiding false negatives but require allocating memory +proportional to the size of the data set. Counting Bloom Filters and Cuckoo +Filters are useful for cases which require adding and removing elements to and +from a set. + +For large or unbounded data sets, calculating the exact cardinality is +impractical. HyperLogLog uses a fraction of the memory while providing an +accurate approximation. Similarly, Count-Min Sketch provides an efficient way +to estimate event frequency for data streams. TopK tracks the top-k most +frequent elements. + +MinHash is a probabilistic algorithm to approximate the similarity between two +sets. This can be used to cluster or compare documents by splitting the corpus +into a bag of words. +*/ +package boom + +import ( + "hash" + "math" +) + +const fillRatio = 0.5 + +// Filter is a probabilistic data structure which is used to test the +// membership of an element in a set. +type Filter interface { + // Test will test for membership of the data and returns true if it is a + // member, false if not. + Test([]byte) bool + + // Add will add the data to the Bloom filter. It returns the filter to + // allow for chaining. + Add([]byte) Filter + + // TestAndAdd is equivalent to calling Test followed by Add. It returns + // true if the data is a member, false if not. + TestAndAdd([]byte) bool +} + +// OptimalM calculates the optimal Bloom filter size, m, based on the number of +// items and the desired rate of false positives. +func OptimalM(n uint, fpRate float64) uint { + return uint(math.Ceil(float64(n) / ((math.Log(fillRatio) * + math.Log(1-fillRatio)) / math.Abs(math.Log(fpRate))))) +} + +// OptimalK calculates the optimal number of hash functions to use for a Bloom +// filter based on the desired rate of false positives. +func OptimalK(fpRate float64) uint { + return uint(math.Ceil(math.Log2(1 / fpRate))) +} + +// hashKernel returns the upper and lower base hash values from which the k +// hashes are derived. +func hashKernel(data []byte, hash hash.Hash64) (uint32, uint32) { + hash.Write(data) + sum := hash.Sum64() + hash.Reset() + upper := uint32(sum & 0xffffffff) + lower := uint32((sum >> 32) & 0xffffffff) + return upper, lower +} diff --git a/vendor/github.com/tylertreat/BoomFilters/buckets.go b/vendor/github.com/tylertreat/BoomFilters/buckets.go new file mode 100644 index 00000000000..1e9bac02c76 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/buckets.go @@ -0,0 +1,182 @@ +package boom + +import ( + "bytes" + "encoding/binary" + "io" +) + +// Buckets is a fast, space-efficient array of buckets where each bucket can +// store up to a configured maximum value. +type Buckets struct { + data []byte + bucketSize uint8 + max uint8 + count uint +} + +// NewBuckets creates a new Buckets with the provided number of buckets where +// each bucket is the specified number of bits. +func NewBuckets(count uint, bucketSize uint8) *Buckets { + return &Buckets{ + count: count, + data: make([]byte, (count*uint(bucketSize)+7)/8), + bucketSize: bucketSize, + max: (1 << bucketSize) - 1, + } +} + +// MaxBucketValue returns the maximum value that can be stored in a bucket. +func (b *Buckets) MaxBucketValue() uint8 { + return b.max +} + +// Count returns the number of buckets. +func (b *Buckets) Count() uint { + return b.count +} + +// Increment will increment the value in the specified bucket by the provided +// delta. A bucket can be decremented by providing a negative delta. The value +// is clamped to zero and the maximum bucket value. Returns itself to allow for +// chaining. +func (b *Buckets) Increment(bucket uint, delta int32) *Buckets { + val := int32(b.getBits(bucket*uint(b.bucketSize), uint(b.bucketSize))) + delta + if val > int32(b.max) { + val = int32(b.max) + } else if val < 0 { + val = 0 + } + + b.setBits(uint32(bucket)*uint32(b.bucketSize), uint32(b.bucketSize), uint32(val)) + return b +} + +// Set will set the bucket value. The value is clamped to zero and the maximum +// bucket value. Returns itself to allow for chaining. +func (b *Buckets) Set(bucket uint, value uint8) *Buckets { + if value > b.max { + value = b.max + } + + b.setBits(uint32(bucket)*uint32(b.bucketSize), uint32(b.bucketSize), uint32(value)) + return b +} + +// Get returns the value in the specified bucket. +func (b *Buckets) Get(bucket uint) uint32 { + return b.getBits(bucket*uint(b.bucketSize), uint(b.bucketSize)) +} + +// Reset restores the Buckets to the original state. Returns itself to allow +// for chaining. +func (b *Buckets) Reset() *Buckets { + b.data = make([]byte, (b.count*uint(b.bucketSize)+7)/8) + return b +} + +// getBits returns the bits at the specified offset and length. +func (b *Buckets) getBits(offset, length uint) uint32 { + byteIndex := offset / 8 + byteOffset := offset % 8 + if byteOffset+length > 8 { + rem := 8 - byteOffset + return b.getBits(offset, rem) | (b.getBits(offset+rem, length-rem) << rem) + } + bitMask := uint32((1 << length) - 1) + return (uint32(b.data[byteIndex]) & (bitMask << byteOffset)) >> byteOffset +} + +// setBits sets bits at the specified offset and length. +func (b *Buckets) setBits(offset, length, bits uint32) { + byteIndex := offset / 8 + byteOffset := offset % 8 + if byteOffset+length > 8 { + rem := 8 - byteOffset + b.setBits(offset, rem, bits) + b.setBits(offset+rem, length-rem, bits>>rem) + return + } + bitMask := uint32((1 << length) - 1) + b.data[byteIndex] = byte(uint32(b.data[byteIndex]) & ^(bitMask << byteOffset)) + b.data[byteIndex] = byte(uint32(b.data[byteIndex]) | ((bits & bitMask) << byteOffset)) +} + +// WriteTo writes a binary representation of Buckets to an i/o stream. +// It returns the number of bytes written. +func (b *Buckets) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, b.bucketSize) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, b.max) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(b.count)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(len(b.data))) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, b.data) + if err != nil { + return 0, err + } + return int64(len(b.data) + 2*binary.Size(uint8(0)) + 2*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of Buckets (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (b *Buckets) ReadFrom(stream io.Reader) (int64, error) { + var bucketSize, max uint8 + var count, len uint64 + err := binary.Read(stream, binary.BigEndian, &bucketSize) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &max) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &count) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &len) + if err != nil { + return 0, err + } + data := make([]byte, len) + err = binary.Read(stream, binary.BigEndian, &data) + if err != nil { + return 0, err + } + b.bucketSize = bucketSize + b.max = max + b.count = uint(count) + b.data = data + return int64(int(len) + 2*binary.Size(uint8(0)) + 2*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (b *Buckets) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := b.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (b *Buckets) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := b.ReadFrom(buf) + + return err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/classic.go b/vendor/github.com/tylertreat/BoomFilters/classic.go new file mode 100644 index 00000000000..2ef7808829e --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/classic.go @@ -0,0 +1,202 @@ +package boom + +import ( + "bytes" + "encoding/binary" + "hash" + "hash/fnv" + "io" + "math" +) + +// BloomFilter implements a classic Bloom filter. A Bloom filter has a non-zero +// probability of false positives and a zero probability of false negatives. +type BloomFilter struct { + buckets *Buckets // filter data + hash hash.Hash64 // hash function (kernel for all k functions) + m uint // filter size + k uint // number of hash functions + count uint // number of items added +} + +// NewBloomFilter creates a new Bloom filter optimized to store n items with a +// specified target false-positive rate. +func NewBloomFilter(n uint, fpRate float64) *BloomFilter { + m := OptimalM(n, fpRate) + return &BloomFilter{ + buckets: NewBuckets(m, 1), + hash: fnv.New64(), + m: m, + k: OptimalK(fpRate), + } +} + +// Capacity returns the Bloom filter capacity, m. +func (b *BloomFilter) Capacity() uint { + return b.m +} + +// K returns the number of hash functions. +func (b *BloomFilter) K() uint { + return b.k +} + +// Count returns the number of items added to the filter. +func (b *BloomFilter) Count() uint { + return b.count +} + +// EstimatedFillRatio returns the current estimated ratio of set bits. +func (b *BloomFilter) EstimatedFillRatio() float64 { + return 1 - math.Exp((-float64(b.count)*float64(b.k))/float64(b.m)) +} + +// FillRatio returns the ratio of set bits. +func (b *BloomFilter) FillRatio() float64 { + sum := uint32(0) + for i := uint(0); i < b.buckets.Count(); i++ { + sum += b.buckets.Get(i) + } + return float64(sum) / float64(b.m) +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives but a zero probability of false +// negatives. +func (b *BloomFilter) Test(data []byte) bool { + lower, upper := hashKernel(data, b.hash) + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < b.k; i++ { + if b.buckets.Get((uint(lower)+uint(upper)*i)%b.m) == 0 { + return false + } + } + + return true +} + +// Add will add the data to the Bloom filter. It returns the filter to allow +// for chaining. +func (b *BloomFilter) Add(data []byte) Filter { + lower, upper := hashKernel(data, b.hash) + + // Set the K bits. + for i := uint(0); i < b.k; i++ { + b.buckets.Set((uint(lower)+uint(upper)*i)%b.m, 1) + } + + b.count++ + return b +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (b *BloomFilter) TestAndAdd(data []byte) bool { + lower, upper := hashKernel(data, b.hash) + member := true + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < b.k; i++ { + idx := (uint(lower) + uint(upper)*i) % b.m + if b.buckets.Get(idx) == 0 { + member = false + } + b.buckets.Set(idx, 1) + } + + b.count++ + return member +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (b *BloomFilter) Reset() *BloomFilter { + b.buckets.Reset() + return b +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (b *BloomFilter) SetHash(h hash.Hash64) { + b.hash = h +} + +// WriteTo writes a binary representation of the BloomFilter to an i/o stream. +// It returns the number of bytes written. +func (b *BloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(b.count)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(b.m)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(b.k)) + if err != nil { + return 0, err + } + + writtenSize, err := b.buckets.WriteTo(stream) + if err != nil { + return 0, err + } + + return writtenSize + int64(3*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of BloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (b *BloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var count, m, k uint64 + var buckets Buckets + + err := binary.Read(stream, binary.BigEndian, &count) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &m) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &k) + if err != nil { + return 0, err + } + + readSize, err := buckets.ReadFrom(stream) + if err != nil { + return 0, err + } + + b.count = uint(count) + b.m = uint(m) + b.k = uint(k) + b.buckets = &buckets + return readSize + int64(3*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (b *BloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := b.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (b *BloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := b.ReadFrom(buf) + if b.hash == nil { + b.hash = fnv.New64() + } + + return err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/counting.go b/vendor/github.com/tylertreat/BoomFilters/counting.go new file mode 100644 index 00000000000..e27a4c483a2 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/counting.go @@ -0,0 +1,158 @@ +package boom + +import ( + "hash" + "hash/fnv" +) + +// CountingBloomFilter implements a Counting Bloom Filter as described by Fan, +// Cao, Almeida, and Broder in Summary Cache: A Scalable Wide-Area Web Cache +// Sharing Protocol: +// +// http://pages.cs.wisc.edu/~jussara/papers/00ton.pdf +// +// A Counting Bloom Filter (CBF) provides a way to remove elements by using an +// array of n-bit buckets. When an element is added, the respective buckets are +// incremented. To remove an element, the respective buckets are decremented. A +// query checks that each of the respective buckets are non-zero. Because CBFs +// allow elements to be removed, they introduce a non-zero probability of false +// negatives in addition to the possibility of false positives. +// +// Counting Bloom Filters are useful for cases where elements are both added +// and removed from the data set. Since they use n-bit buckets, CBFs use +// roughly n-times more memory than traditional Bloom filters. +type CountingBloomFilter struct { + buckets *Buckets // filter data + hash hash.Hash64 // hash function (kernel for all k functions) + m uint // number of buckets + k uint // number of hash functions + count uint // number of items in the filter + indexBuffer []uint // buffer used to cache indices +} + +// NewCountingBloomFilter creates a new Counting Bloom Filter optimized to +// store n items with a specified target false-positive rate and bucket size. +// If you don't know how many bits to use for buckets, use +// NewDefaultCountingBloomFilter for a sensible default. +func NewCountingBloomFilter(n uint, b uint8, fpRate float64) *CountingBloomFilter { + var ( + m = OptimalM(n, fpRate) + k = OptimalK(fpRate) + ) + return &CountingBloomFilter{ + buckets: NewBuckets(m, b), + hash: fnv.New64(), + m: m, + k: k, + indexBuffer: make([]uint, k), + } +} + +// NewDefaultCountingBloomFilter creates a new Counting Bloom Filter optimized +// to store n items with a specified target false-positive rate. Buckets are +// allocated four bits. +func NewDefaultCountingBloomFilter(n uint, fpRate float64) *CountingBloomFilter { + return NewCountingBloomFilter(n, 4, fpRate) +} + +// Capacity returns the Bloom filter capacity, m. +func (c *CountingBloomFilter) Capacity() uint { + return c.m +} + +// K returns the number of hash functions. +func (c *CountingBloomFilter) K() uint { + return c.k +} + +// Count returns the number of items in the filter. +func (c *CountingBloomFilter) Count() uint { + return c.count +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives and false negatives. +func (c *CountingBloomFilter) Test(data []byte) bool { + lower, upper := hashKernel(data, c.hash) + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < c.k; i++ { + if c.buckets.Get((uint(lower)+uint(upper)*i)%c.m) == 0 { + return false + } + } + + return true +} + +// Add will add the data to the Bloom filter. It returns the filter to allow +// for chaining. +func (c *CountingBloomFilter) Add(data []byte) Filter { + lower, upper := hashKernel(data, c.hash) + + // Set the K bits. + for i := uint(0); i < c.k; i++ { + c.buckets.Increment((uint(lower)+uint(upper)*i)%c.m, 1) + } + + c.count++ + return c +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (c *CountingBloomFilter) TestAndAdd(data []byte) bool { + lower, upper := hashKernel(data, c.hash) + member := true + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < c.k; i++ { + idx := (uint(lower) + uint(upper)*i) % c.m + if c.buckets.Get(idx) == 0 { + member = false + } + c.buckets.Increment(idx, 1) + } + + c.count++ + return member +} + +// TestAndRemove will test for membership of the data and remove it from the +// filter if it exists. Returns true if the data was a member, false if not. +func (c *CountingBloomFilter) TestAndRemove(data []byte) bool { + lower, upper := hashKernel(data, c.hash) + member := true + + // Set the K bits. + for i := uint(0); i < c.k; i++ { + c.indexBuffer[i] = (uint(lower) + uint(upper)*i) % c.m + if c.buckets.Get(c.indexBuffer[i]) == 0 { + member = false + } + } + + if member { + for _, idx := range c.indexBuffer { + c.buckets.Increment(idx, -1) + } + c.count-- + } + + return member +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (c *CountingBloomFilter) Reset() *CountingBloomFilter { + c.buckets.Reset() + c.count = 0 + return c +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (c *CountingBloomFilter) SetHash(h hash.Hash64) { + c.hash = h +} diff --git a/vendor/github.com/tylertreat/BoomFilters/countmin.go b/vendor/github.com/tylertreat/BoomFilters/countmin.go new file mode 100644 index 00000000000..2af125ab1b6 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/countmin.go @@ -0,0 +1,266 @@ +package boom + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "hash" + "hash/fnv" + "io" + "math" +) + +// CountMinSketch implements a Count-Min Sketch as described by Cormode and +// Muthukrishnan in An Improved Data Stream Summary: The Count-Min Sketch and +// its Applications: +// +// http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf +// +// A Count-Min Sketch (CMS) is a probabilistic data structure which +// approximates the frequency of events in a data stream. Unlike a hash map, a +// CMS uses sub-linear space at the expense of a configurable error factor. +// Similar to Counting Bloom filters, items are hashed to a series of buckets, +// which increment a counter. The frequency of an item is estimated by taking +// the minimum of each of the item's respective counter values. +// +// Count-Min Sketches are useful for counting the frequency of events in +// massive data sets or unbounded streams online. In these situations, storing +// the entire data set or allocating counters for every event in memory is +// impractical. It may be possible for offline processing, but real-time +// processing requires fast, space-efficient solutions like the CMS. For +// approximating set cardinality, refer to the HyperLogLog. +type CountMinSketch struct { + matrix [][]uint64 // count matrix + width uint // matrix width + depth uint // matrix depth + count uint64 // number of items added + epsilon float64 // relative-accuracy factor + delta float64 // relative-accuracy probability + hash hash.Hash64 // hash function (kernel for all depth functions) +} + +// NewCountMinSketch creates a new Count-Min Sketch whose relative accuracy is +// within a factor of epsilon with probability delta. Both of these parameters +// affect the space and time complexity. +func NewCountMinSketch(epsilon, delta float64) *CountMinSketch { + var ( + width = uint(math.Ceil(math.E / epsilon)) + depth = uint(math.Ceil(math.Log(1 / delta))) + matrix = make([][]uint64, depth) + ) + + for i := uint(0); i < depth; i++ { + matrix[i] = make([]uint64, width) + } + + return &CountMinSketch{ + matrix: matrix, + width: width, + depth: depth, + epsilon: epsilon, + delta: delta, + hash: fnv.New64(), + } +} + +// Epsilon returns the relative-accuracy factor, epsilon. +func (c *CountMinSketch) Epsilon() float64 { + return c.epsilon +} + +// Delta returns the relative-accuracy probability, delta. +func (c *CountMinSketch) Delta() float64 { + return c.delta +} + +// TotalCount returns the number of items added to the sketch. +func (c *CountMinSketch) TotalCount() uint64 { + return c.count +} + +// Add will add the data to the set. Returns the CountMinSketch to allow for +// chaining. +func (c *CountMinSketch) Add(data []byte) *CountMinSketch { + lower, upper := hashKernel(data, c.hash) + + // Increment count in each row. + for i := uint(0); i < c.depth; i++ { + c.matrix[i][(uint(lower)+uint(upper)*i)%c.width]++ + } + + c.count++ + return c +} + +// Count returns the approximate count for the specified item, correct within +// epsilon * total count with a probability of delta. +func (c *CountMinSketch) Count(data []byte) uint64 { + var ( + lower, upper = hashKernel(data, c.hash) + count = uint64(math.MaxUint64) + ) + + for i := uint(0); i < c.depth; i++ { + count = uint64(math.Min(float64(count), + float64(c.matrix[i][(uint(lower)+uint(upper)*i)%c.width]))) + } + + return count +} + +// Merge combines this CountMinSketch with another. Returns an error if the +// matrix width and depth are not equal. +func (c *CountMinSketch) Merge(other *CountMinSketch) error { + if c.depth != other.depth { + return errors.New("matrix depth must match") + } + + if c.width != other.width { + return errors.New("matrix width must match") + } + + for i := uint(0); i < c.depth; i++ { + for j := uint(0); j < c.width; j++ { + c.matrix[i][j] += other.matrix[i][j] + } + } + + c.count += other.count + return nil +} + +// Reset restores the CountMinSketch to its original state. It returns itself +// to allow for chaining. +func (c *CountMinSketch) Reset() *CountMinSketch { + for i := 0; i < len(c.matrix); i++ { + for j := 0; j < len(c.matrix[i]); j++ { + c.matrix[i][j] = 0 + } + } + + c.count = 0 + return c +} + +// SetHash sets the hashing function used. +func (c *CountMinSketch) SetHash(h hash.Hash64) { + c.hash = h +} + +// WriteDataTo writes a binary representation of the CMS data to +// an io stream. It returns the number of bytes written and error +func (c *CountMinSketch) WriteDataTo(stream io.Writer) (int, error) { + buf := new(bytes.Buffer) + // serialize epsilon and delta as cms configuration check + err := binary.Write(buf, binary.LittleEndian, c.epsilon) + if err != nil { + return 0, err + } + err = binary.Write(buf, binary.LittleEndian, c.delta) + if err != nil { + return 0, err + } + err = binary.Write(buf, binary.LittleEndian, c.count) + if err != nil { + return 0, err + } + // encode matrix + for i := range c.matrix { + err = binary.Write(buf, binary.LittleEndian, c.matrix[i]) + if err != nil { + return 0, err + } + } + + return stream.Write(buf.Bytes()) +} + +// ReadDataFrom reads a binary representation of the CMS data written +// by WriteDataTo() from io stream. It returns the number of bytes read +// and error +// If serialized CMS configuration is different it returns error with expected params +func (c *CountMinSketch) ReadDataFrom(stream io.Reader) (int, error) { + var ( + count uint64 + epsilon, delta float64 + ) + + err := binary.Read(stream, binary.LittleEndian, &epsilon) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.LittleEndian, &delta) + if err != nil { + return 0, err + } + + // check if serialized and target cms configurations are same + if c.epsilon != epsilon || c.delta != delta { + return 0, fmt.Errorf("expected cms values for epsilon %f and delta %f", epsilon, delta) + } + + err = binary.Read(stream, binary.LittleEndian, &count) + if err != nil { + return 0, err + } + + for i := uint(0); i < c.depth; i++ { + err = binary.Read(stream, binary.LittleEndian, c.matrix[i]) + } + // count size of matrix and count + size := int(c.depth*c.width)*binary.Size(uint64(0)) + binary.Size(count) + 2*binary.Size(float64(0)) + + c.count = count + + return size, err +} + +// TestAndRemove attemps to remove n counts of data from the CMS. If +// n is greater than the data count, TestAndRemove is a no-op and +// returns false. Else, return true and decrement count by n. +func (c *CountMinSketch) TestAndRemove(data []byte, n uint64) bool { + h, count := c.traverseDepth(data) + + if n > count { + return false + } + + for i := uint(0); i < c.depth; i++ { + *h[i] -= n + } + + return true +} + +// TestAndRemoveAll counts data frequency, performs TestAndRemove(data, count), +// and returns true if count is positive. If count is 0, TestAndRemoveAll is a +// no-op and returns false. +func (c *CountMinSketch) TestAndRemoveAll(data []byte) bool { + h, count := c.traverseDepth(data) + + if count == 0 { + return false + } + + for i := uint(0); i < c.depth; i++ { + *h[i] -= count + } + + return true +} + +func (c *CountMinSketch) traverseDepth(data []byte) ([]*uint64, uint64) { + var ( + lower, upper = hashKernel(data, c.hash) + count = uint64(math.MaxUint64) + h = make([]*uint64, c.depth) + ) + + for i := uint(0); i < c.depth; i++ { + h[i] = &c.matrix[i][(uint(lower)+uint(upper)*i)%c.width] + count = uint64(math.Min(float64(count), float64(*h[i]))) + } + + return h, count +} diff --git a/vendor/github.com/tylertreat/BoomFilters/cuckoo.go b/vendor/github.com/tylertreat/BoomFilters/cuckoo.go new file mode 100644 index 00000000000..4cf31264f8c --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/cuckoo.go @@ -0,0 +1,269 @@ +package boom + +import ( + "bytes" + "encoding/binary" + "errors" + "hash" + "hash/fnv" + "math" + "math/rand" +) + +// maxNumKicks is the maximum number of relocations to attempt when inserting +// an element before considering the filter full. +const maxNumKicks = 500 + +// bucket consists of a set of []byte entries. +type bucket [][]byte + +// contains indicates if the given fingerprint is contained in one of the +// bucket's entries. +func (b bucket) contains(f []byte) bool { + return b.indexOf(f) != -1 +} + +// indexOf returns the entry index of the given fingerprint or -1 if it's not +// in the bucket. +func (b bucket) indexOf(f []byte) int { + for i, fingerprint := range b { + if bytes.Equal(f, fingerprint) { + return i + } + } + return -1 +} + +// getEmptyEntry returns the index of the next available entry in the bucket or +// an error if it's full. +func (b bucket) getEmptyEntry() (int, error) { + for i, fingerprint := range b { + if fingerprint == nil { + return i, nil + } + } + return -1, errors.New("full") +} + +// CuckooFilter implements a Cuckoo Bloom filter as described by Andersen, +// Kaminsky, and Mitzenmacher in Cuckoo Filter: Practically Better Than Bloom: +// +// http://www.pdl.cmu.edu/PDL-FTP/FS/cuckoo-conext2014.pdf +// +// A Cuckoo Filter is a Bloom filter variation which provides support for +// removing elements without significantly degrading space and performance. It +// works by using a cuckoo hashing scheme for inserting items. Instead of +// storing the elements themselves, it stores their fingerprints which also +// allows for item removal without false negatives (if you don't attempt to +// remove an item not contained in the filter). +// +// For applications that store many items and target moderately low +// false-positive rates, cuckoo filters have lower space overhead than +// space-optimized Bloom filters. +type CuckooFilter struct { + buckets []bucket + hash hash.Hash32 // hash function (used for fingerprint and hash) + m uint // number of buckets + b uint // number of entries per bucket + f uint // length of fingerprints (in bytes) + count uint // number of items in the filter + n uint // filter capacity +} + +// NewCuckooFilter creates a new Cuckoo Bloom filter optimized to store n items +// with a specified target false-positive rate. +func NewCuckooFilter(n uint, fpRate float64) *CuckooFilter { + var ( + b = uint(4) + f = calculateF(b, fpRate) + m = power2(n / f * 8) + buckets = make([]bucket, m) + ) + + for i := uint(0); i < m; i++ { + buckets[i] = make(bucket, b) + } + + return &CuckooFilter{ + buckets: buckets, + hash: fnv.New32(), + m: m, + b: b, + f: f, + n: n, + } +} + +// Buckets returns the number of buckets. +func (c *CuckooFilter) Buckets() uint { + return c.m +} + +// Capacity returns the number of items the filter can store. +func (c *CuckooFilter) Capacity() uint { + return c.n +} + +// Count returns the number of items in the filter. +func (c *CuckooFilter) Count() uint { + return c.count +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives. +func (c *CuckooFilter) Test(data []byte) bool { + i1, i2, f := c.components(data) + + // If either bucket contains f, it's a member. + return c.buckets[i1%c.m].contains(f) || c.buckets[i2%c.m].contains(f) +} + +// Add will add the data to the Cuckoo Filter. It returns an error if the +// filter is full. If the filter is full, an item is removed to make room for +// the new item. This introduces a possibility for false negatives. To avoid +// this, use Count and Capacity to check if the filter is full before adding an +// item. +func (c *CuckooFilter) Add(data []byte) error { + return c.add(c.components(data)) +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. An error is returned if the filter is +// full. If the filter is full, an item is removed to make room for the new +// item. This introduces a possibility for false negatives. To avoid this, use +// Count and Capacity to check if the filter is full before adding an item. +func (c *CuckooFilter) TestAndAdd(data []byte) (bool, error) { + i1, i2, f := c.components(data) + + // If either bucket contains f, it's a member. + if c.buckets[i1%c.m].contains(f) || c.buckets[i2%c.m].contains(f) { + return true, nil + } + + return false, c.add(i1, i2, f) +} + +// TestAndRemove will test for membership of the data and remove it from the +// filter if it exists. Returns true if the data was a member, false if not. +func (c *CuckooFilter) TestAndRemove(data []byte) bool { + i1, i2, f := c.components(data) + + // Try to remove from bucket[i1]. + b1 := c.buckets[i1%c.m] + if idx := b1.indexOf(f); idx != -1 { + b1[idx] = nil + c.count-- + return true + } + + // Try to remove from bucket[i2]. + b2 := c.buckets[i2%c.m] + if idx := b2.indexOf(f); idx != -1 { + b2[idx] = nil + c.count-- + return true + } + + return false +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (c *CuckooFilter) Reset() *CuckooFilter { + buckets := make([]bucket, c.m) + for i := uint(0); i < c.m; i++ { + buckets[i] = make(bucket, c.b) + } + c.buckets = buckets + c.count = 0 + return c +} + +// add will insert the fingerprint into the filter returning an error if the +// filter is full. +func (c *CuckooFilter) add(i1, i2 uint, f []byte) error { + // Try to insert into bucket[i1]. + b1 := c.buckets[i1%c.m] + if idx, err := b1.getEmptyEntry(); err == nil { + b1[idx] = f + c.count++ + return nil + } + + // Try to insert into bucket[i2]. + b2 := c.buckets[i2%c.m] + if idx, err := b2.getEmptyEntry(); err == nil { + b2[idx] = f + c.count++ + return nil + } + + // Must relocate existing items. + i := i1 + for n := 0; n < maxNumKicks; n++ { + bucketIdx := i % c.m + entryIdx := rand.Intn(int(c.b)) + f, c.buckets[bucketIdx][entryIdx] = c.buckets[bucketIdx][entryIdx], f + i = i ^ uint(binary.BigEndian.Uint32(c.computeHash(f))) + b := c.buckets[i%c.m] + if idx, err := b.getEmptyEntry(); err == nil { + b[idx] = f + c.count++ + return nil + } + } + + return errors.New("full") +} + +// components returns the two hash values used to index into the buckets and +// the fingerprint for the given element. +func (c *CuckooFilter) components(data []byte) (uint, uint, []byte) { + var ( + hash = c.computeHash(data) + f = hash[0:c.f] + i1 = uint(binary.BigEndian.Uint32(hash)) + i2 = i1 ^ uint(binary.BigEndian.Uint32(c.computeHash(f))) + ) + + return i1, i2, f +} + +// computeHash returns a 32-bit hash value for the given data. +func (c *CuckooFilter) computeHash(data []byte) []byte { + c.hash.Write(data) + hash := c.hash.Sum(nil) + c.hash.Reset() + return hash +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (c *CuckooFilter) SetHash(h hash.Hash32) { + c.hash = h +} + +// calculateF returns the optimal fingerprint length in bytes for the given +// bucket size and false-positive rate epsilon. +func calculateF(b uint, epsilon float64) uint { + f := uint(math.Ceil(math.Log(2 * float64(b) / epsilon))) + f = f / 8 + if f <= 0 { + f = 1 + } + return f +} + +// power2 calculates the next power of two for the given value. +func power2(x uint) uint { + x-- + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + x |= x >> 32 + x++ + return x +} diff --git a/vendor/github.com/tylertreat/BoomFilters/deletable.go b/vendor/github.com/tylertreat/BoomFilters/deletable.go new file mode 100644 index 00000000000..c098d7df442 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/deletable.go @@ -0,0 +1,168 @@ +package boom + +import ( + "hash" + "hash/fnv" +) + +// DeletableBloomFilter implements a Deletable Bloom Filter as described by +// Rothenberg, Macapuna, Verdi, Magalhaes in The Deletable Bloom filter - A new +// member of the Bloom family: +// +// http://arxiv.org/pdf/1005.0352.pdf +// +// A Deletable Bloom Filter compactly stores information on collisions when +// inserting elements. This information is used to determine if elements are +// deletable. This design enables false-negative-free deletions at a fraction +// of the cost in memory consumption. +// +// Deletable Bloom Filters are useful for cases which require removing elements +// but cannot allow false negatives. This means they can be safely swapped in +// place of traditional Bloom filters. +type DeletableBloomFilter struct { + buckets *Buckets // filter data + collisions *Buckets // filter collision data + hash hash.Hash64 // hash function (kernel for all k functions) + m uint // filter size + regionSize uint // number of bits in a region + k uint // number of hash functions + count uint // number of items added + indexBuffer []uint // buffer used to cache indices +} + +// NewDeletableBloomFilter creates a new DeletableBloomFilter optimized to +// store n items with a specified target false-positive rate. The r value +// determines the number of bits to use to store collision information. This +// controls the deletability of an element. Refer to the paper for selecting an +// optimal value. +func NewDeletableBloomFilter(n, r uint, fpRate float64) *DeletableBloomFilter { + var ( + m = OptimalM(n, fpRate) + k = OptimalK(fpRate) + ) + return &DeletableBloomFilter{ + buckets: NewBuckets(m-r, 1), + collisions: NewBuckets(r+1, 1), + hash: fnv.New64(), + m: m - r, + regionSize: (m - r) / r, + k: k, + indexBuffer: make([]uint, k), + } +} + +// Capacity returns the Bloom filter capacity, m. +func (d *DeletableBloomFilter) Capacity() uint { + return d.m +} + +// K returns the number of hash functions. +func (d *DeletableBloomFilter) K() uint { + return d.k +} + +// Count returns the number of items added to the filter. +func (d *DeletableBloomFilter) Count() uint { + return d.count +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives but a zero probability of false +// negatives. +func (d *DeletableBloomFilter) Test(data []byte) bool { + lower, upper := hashKernel(data, d.hash) + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < d.k; i++ { + if d.buckets.Get((uint(lower)+uint(upper)*i)%d.m) == 0 { + return false + } + } + + return true +} + +// Add will add the data to the Bloom filter. It returns the filter to allow +// for chaining. +func (d *DeletableBloomFilter) Add(data []byte) Filter { + lower, upper := hashKernel(data, d.hash) + + // Set the K bits. + for i := uint(0); i < d.k; i++ { + idx := (uint(lower) + uint(upper)*i) % d.m + if d.buckets.Get(idx) != 0 { + // Collision, set corresponding region bit. + d.collisions.Set(idx/d.regionSize, 1) + } else { + d.buckets.Set(idx, 1) + } + } + + d.count++ + return d +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (d *DeletableBloomFilter) TestAndAdd(data []byte) bool { + lower, upper := hashKernel(data, d.hash) + member := true + + // If any of the K bits are not set, then it's not a member. + for i := uint(0); i < d.k; i++ { + idx := (uint(lower) + uint(upper)*i) % d.m + if d.buckets.Get(idx) == 0 { + member = false + } else { + // Collision, set corresponding region bit. + d.collisions.Set(idx/d.regionSize, 1) + } + d.buckets.Set(idx, 1) + } + + d.count++ + return member +} + +// TestAndRemove will test for membership of the data and remove it from the +// filter if it exists. Returns true if the data was a member, false if not. +func (d *DeletableBloomFilter) TestAndRemove(data []byte) bool { + lower, upper := hashKernel(data, d.hash) + member := true + + // Set the K bits. + for i := uint(0); i < d.k; i++ { + d.indexBuffer[i] = (uint(lower) + uint(upper)*i) % d.m + if d.buckets.Get(d.indexBuffer[i]) == 0 { + member = false + } + } + + if member { + for _, idx := range d.indexBuffer { + if d.collisions.Get(idx/d.regionSize) == 0 { + // Clear only bits located in collision-free zones. + d.buckets.Set(idx, 0) + } + } + d.count-- + } + + return member +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (d *DeletableBloomFilter) Reset() *DeletableBloomFilter { + d.buckets.Reset() + d.collisions.Reset() + d.count = 0 + return d +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (d *DeletableBloomFilter) SetHash(h hash.Hash64) { + d.hash = h +} diff --git a/vendor/github.com/tylertreat/BoomFilters/hyperloglog.go b/vendor/github.com/tylertreat/BoomFilters/hyperloglog.go new file mode 100644 index 00000000000..a9a29d4c67d --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/hyperloglog.go @@ -0,0 +1,253 @@ +/* +Original work Copyright 2013 Eric Lesh +Modified work Copyright 2015 Tyler Treat + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. +*/ + +package boom + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "hash" + "hash/fnv" + "io" + "math" +) + +var exp32 = math.Pow(2, 32) + +// HyperLogLog implements the HyperLogLog cardinality estimation algorithm as +// described by Flajolet, Fusy, Gandouet, and Meunier in HyperLogLog: the +// analysis of a near-optimal cardinality estimation algorithm: +// +// http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf +// +// HyperLogLog is a probabilistic algorithm which approximates the number of +// distinct elements in a multiset. It works by hashing values and calculating +// the maximum number of leading zeros in the binary representation of each +// hash. If the maximum number of leading zeros is n, the estimated number of +// distinct elements in the set is 2^n. To minimize variance, the multiset is +// split into a configurable number of registers, the maximum number of leading +// zeros is calculated in the numbers in each register, and a harmonic mean is +// used to combine the estimates. +// +// For large or unbounded data sets, calculating the exact cardinality is +// impractical. HyperLogLog uses a fraction of the memory while providing an +// accurate approximation. For counting element frequency, refer to the +// Count-Min Sketch. +type HyperLogLog struct { + registers []uint8 // counter registers + m uint // number of registers + b uint32 // number of bits to calculate register + alpha float64 // bias-correction constant + hash hash.Hash32 // hash function +} + +// NewHyperLogLog creates a new HyperLogLog with m registers. Returns an error +// if m isn't a power of two. +func NewHyperLogLog(m uint) (*HyperLogLog, error) { + if (m & (m - 1)) != 0 { + return nil, errors.New("m must be a power of two") + } + + return &HyperLogLog{ + registers: make([]uint8, m), + m: m, + b: uint32(math.Ceil(math.Log2(float64(m)))), + alpha: calculateAlpha(m), + hash: fnv.New32(), + }, nil +} + +// NewDefaultHyperLogLog creates a new HyperLogLog optimized for the specified +// standard error. Returns an error if the number of registers can't be +// calculated for the provided accuracy. +func NewDefaultHyperLogLog(e float64) (*HyperLogLog, error) { + m := math.Pow(1.04/e, 2) + return NewHyperLogLog(uint(math.Pow(2, math.Ceil(math.Log2(m))))) +} + +// Add will add the data to the set. Returns the HyperLogLog to allow for +// chaining. +func (h *HyperLogLog) Add(data []byte) *HyperLogLog { + var ( + hash = h.calculateHash(data) + k = 32 - h.b + r = calculateRho(hash<> uint(k) + ) + + if r > h.registers[j] { + h.registers[j] = r + } + + return h +} + +// Count returns the approximated cardinality of the set. +func (h *HyperLogLog) Count() uint64 { + sum := 0.0 + m := float64(h.m) + for _, val := range h.registers { + sum += 1.0 / math.Pow(2.0, float64(val)) + } + estimate := h.alpha * m * m / sum + if estimate <= 5.0/2.0*m { + // Small range correction + v := 0 + for _, r := range h.registers { + if r == 0 { + v++ + } + } + if v > 0 { + estimate = m * math.Log(m/float64(v)) + } + } else if estimate > 1.0/30.0*exp32 { + // Large range correction + estimate = -exp32 * math.Log(1-estimate/exp32) + } + return uint64(estimate) +} + +// Merge combines this HyperLogLog with another. Returns an error if the number +// of registers in the two HyperLogLogs are not equal. +func (h *HyperLogLog) Merge(other *HyperLogLog) error { + if h.m != other.m { + return errors.New("number of registers must match") + } + + for j, r := range other.registers { + if r > h.registers[j] { + h.registers[j] = r + } + } + + return nil +} + +// Reset restores the HyperLogLog to its original state. It returns itself to +// allow for chaining. +func (h *HyperLogLog) Reset() *HyperLogLog { + h.registers = make([]uint8, h.m) + return h +} + +// calculateHash calculates the 32-bit hash value for the provided data. +func (h *HyperLogLog) calculateHash(data []byte) uint32 { + h.hash.Write(data) + sum := h.hash.Sum32() + h.hash.Reset() + return sum +} + +// SetHash sets the hashing function used. +func (h *HyperLogLog) SetHash(ha hash.Hash32) { + h.hash = ha +} + +// calculateAlpha calculates the bias-correction constant alpha based on the +// number of registers, m. +func calculateAlpha(m uint) (result float64) { + switch m { + case 16: + result = 0.673 + case 32: + result = 0.697 + case 64: + result = 0.709 + default: + result = 0.7213 / (1.0 + 1.079/float64(m)) + } + return result +} + +// calculateRho calculates the position of the leftmost 1-bit. +func calculateRho(val, max uint32) uint8 { + r := uint32(1) + for val&0x80000000 == 0 && r <= max { + r++ + val <<= 1 + } + return uint8(r) +} + +// WriteDataTo writes a binary representation of the Hll data to +// an io stream. It returns the number of bytes written and error +func (h *HyperLogLog) WriteDataTo(stream io.Writer) (n int, err error) { + buf := new(bytes.Buffer) + // write register number first + err = binary.Write(buf, binary.LittleEndian, uint64(h.m)) + if err != nil { + return + } + + err = binary.Write(buf, binary.LittleEndian, h.b) + if err != nil { + return + } + + err = binary.Write(buf, binary.LittleEndian, h.alpha) + if err != nil { + return + } + + err = binary.Write(buf, binary.LittleEndian, h.registers) + if err != nil { + return + } + + n, err = stream.Write(buf.Bytes()) + return +} + +// ReadDataFrom reads a binary representation of the Hll data written +// by WriteDataTo() from io stream. It returns the number of bytes read +// and error. +// If serialized Hll configuration is different it returns error with expected params +func (h *HyperLogLog) ReadDataFrom(stream io.Reader) (int, error) { + var m uint64 + // read register number first + err := binary.Read(stream, binary.LittleEndian, &m) + if err != nil { + return 0, err + } + // check if register number is appropriate + // hll register number should be same with serialized hll + if uint64(h.m) != m { + return 0, fmt.Errorf("expected hll register number %d", m) + } + // set other values + err = binary.Read(stream, binary.LittleEndian, &h.b) + if err != nil { + return 0, err + } + + err = binary.Read(stream, binary.LittleEndian, &h.alpha) + if err != nil { + return 0, err + } + + err = binary.Read(stream, binary.LittleEndian, h.registers) + if err != nil { + return 0, err + } + + // count size of data in registers + m, b, alpha + size := int(h.m)*binary.Size(uint8(0)) + binary.Size(uint64(0)) + binary.Size(uint32(0)) + binary.Size(float64(0)) + + return size, err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/inverse.go b/vendor/github.com/tylertreat/BoomFilters/inverse.go new file mode 100644 index 00000000000..23e8446c253 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/inverse.go @@ -0,0 +1,269 @@ +/* +Original work Copyright (c) 2012 Jeff Hodges. All rights reserved. +Modified work Copyright (c) 2015 Tyler Treat. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Jeff Hodges nor the names of this project's +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +package boom + +import ( + "bytes" + "encoding/binary" + "encoding/gob" + "hash" + "hash/fnv" + "io" + "sync" + "sync/atomic" + "unsafe" +) + +// InverseBloomFilter is a concurrent "inverse" Bloom filter, which is +// effectively the opposite of a classic Bloom filter. This was originally +// described and written by Jeff Hodges: +// +// http://www.somethingsimilar.com/2012/05/21/the-opposite-of-a-bloom-filter/ +// +// The InverseBloomFilter may report a false negative but can never report a +// false positive. That is, it may report that an item has not been seen when +// it actually has, but it will never report an item as seen which it hasn't +// come across. This behaves in a similar manner to a fixed-size hashmap which +// does not handle conflicts. +// +// An example use case is deduplicating events while processing a stream of +// data. Ideally, duplicate events are relatively close together. +type InverseBloomFilter struct { + array []*[]byte + hashPool *sync.Pool + capacity uint +} + +// NewInverseBloomFilter creates and returns a new InverseBloomFilter with the +// specified capacity. +func NewInverseBloomFilter(capacity uint) *InverseBloomFilter { + return &InverseBloomFilter{ + array: make([]*[]byte, capacity), + hashPool: &sync.Pool{New: func() interface{} { return fnv.New32() }}, + capacity: capacity, + } +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false negatives but a zero probability of false +// positives. That is, it may return false even though the data was added, but +// it will never return true for data that hasn't been added. +func (i *InverseBloomFilter) Test(data []byte) bool { + index := i.index(data) + indexPtr := (*unsafe.Pointer)(unsafe.Pointer(&i.array[index])) + val := (*[]byte)(atomic.LoadPointer(indexPtr)) + if val == nil { + return false + } + return bytes.Equal(*val, data) +} + +// Add will add the data to the filter. It returns the filter to allow for +// chaining. +func (i *InverseBloomFilter) Add(data []byte) Filter { + index := i.index(data) + i.getAndSet(index, data) + return i +} + +// TestAndAdd is equivalent to calling Test followed by Add atomically. It +// returns true if the data is a member, false if not. +func (i *InverseBloomFilter) TestAndAdd(data []byte) bool { + oldID := i.getAndSet(i.index(data), data) + return bytes.Equal(oldID, data) +} + +// Capacity returns the filter capacity. +func (i *InverseBloomFilter) Capacity() uint { + return i.capacity +} + +// getAndSet returns the data that was in the slice at the given index after +// putting the new data in the slice at that index, atomically. +func (i *InverseBloomFilter) getAndSet(index uint32, data []byte) []byte { + indexPtr := (*unsafe.Pointer)(unsafe.Pointer(&i.array[index])) + keyUnsafe := unsafe.Pointer(&data) + var oldKey []byte + for { + oldKeyUnsafe := atomic.LoadPointer(indexPtr) + if atomic.CompareAndSwapPointer(indexPtr, oldKeyUnsafe, keyUnsafe) { + oldKeyPtr := (*[]byte)(oldKeyUnsafe) + if oldKeyPtr != nil { + oldKey = *oldKeyPtr + } + break + } + } + return oldKey +} + +// index returns the array index for the given data. +func (i *InverseBloomFilter) index(data []byte) uint32 { + hash := i.hashPool.Get().(hash.Hash32) + hash.Write(data) + index := hash.Sum32() % uint32(i.capacity) + hash.Reset() + i.hashPool.Put(hash) + return index +} + +// SetHashFactory sets the hashing function factory used in the filter. +func (i *InverseBloomFilter) SetHashFactory(h func() hash.Hash32) { + i.hashPool = &sync.Pool{New: func() interface{} { return h() }} +} + +// WriteTo writes a binary representation of the InverseBloomFilter to an i/o stream. +// It returns the number of bytes written. +func (i *InverseBloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(i.capacity)) + if err != nil { + return 0, err + } + + // Dereference all pointers to []byte + array := make([][]byte, int(i.capacity)) + for b := range i.array { + if i.array[b] != nil { + array[b] = *i.array[b] + } else { + array[b] = nil + } + } + + // Encode array into a []byte + var buf bytes.Buffer + gob.NewEncoder(&buf).Encode(array) + serialized := buf.Bytes() + + // Write the length of encoded slice + err = binary.Write(stream, binary.BigEndian, int64(len(serialized))) + if err != nil { + return 0, err + } + + // Write the serialized bytes + written, err := stream.Write(serialized) + if err != nil { + return 0, err + } + + return int64(written) + int64(2*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of InverseBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. ReadFrom replaces the +// array of its filter with the one read from disk. It returns the number +// of bytes read. +func (i *InverseBloomFilter) ReadFrom(stream io.Reader) (int64, error) { + decoded, capacity, size, err := i.decodeToArray(stream) + if err != nil { + return int64(0), err + } + + // Create []*[]byte and point to each item in decoded + decodedWithPointers := make([]*[]byte, capacity) + for p := range decodedWithPointers { + if len(decoded[p]) == 0 { + decodedWithPointers[p] = nil + } else { + decodedWithPointers[p] = &decoded[p] + } + } + + i.array = decodedWithPointers + i.capacity = uint(capacity) + return int64(size) + int64(2*binary.Size(uint64(0))), nil +} + +// ImportElementsFrom reads a binary representation of InverseBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream into a new bloom filter using the +// Add() method (skipping empty elements, if any). It returns the number of +// elements decoded from disk. +func (i *InverseBloomFilter) ImportElementsFrom(stream io.Reader) (int, error) { + decoded, _, _, err := i.decodeToArray(stream) + if err != nil { + return 0, err + } + + // Create []*[]byte and point to each item in decoded + for p := range decoded { + if len(decoded[p]) > 0 { + i.Add(decoded[p]) + } + } + + return len(decoded), nil +} + +// decodeToArray decodes an inverse bloom filter from an i/o stream into a 2-d byte slice. +func (i *InverseBloomFilter) decodeToArray(stream io.Reader) ([][]byte, uint64, uint64, error) { + var capacity, size uint64 + + err := binary.Read(stream, binary.BigEndian, &capacity) + if err != nil { + return nil, 0, 0, err + } + + err = binary.Read(stream, binary.BigEndian, &size) + if err != nil { + return nil, 0, 0, err + } + + // Read the encoded slice and decode into [][]byte + encoded := make([]byte, size) + stream.Read(encoded) + buf := bytes.NewBuffer(encoded) + dec := gob.NewDecoder(buf) + decoded := make([][]byte, capacity) + dec.Decode(&decoded) + + return decoded, capacity, size, nil +} + +// GobEncode implements gob.GobEncoder interface. +func (i *InverseBloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := i.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (i *InverseBloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := i.ReadFrom(buf) + return err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/minhash.go b/vendor/github.com/tylertreat/BoomFilters/minhash.go new file mode 100644 index 00000000000..5c21c45151e --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/minhash.go @@ -0,0 +1,104 @@ +package boom + +import ( + "math" + "math/rand" +) + +// MinHash is a variation of the technique for estimating similarity between +// two sets as presented by Broder in On the resemblance and containment of +// documents: +// +// http://gatekeeper.dec.com/ftp/pub/dec/SRC/publications/broder/positano-final-wpnums.pdf +// +// This can be used to cluster or compare documents by splitting the corpus +// into a bag of words. MinHash returns the approximated similarity ratio of +// the two bags. The similarity is less accurate for very small bags of words. +func MinHash(bag1, bag2 []string) float32 { + k := len(bag1) + len(bag2) + hashes := make([]int, k) + for i := 0; i < k; i++ { + a := uint(rand.Int()) + b := uint(rand.Int()) + c := uint(rand.Int()) + x := computeHash(a*b*c, a, b, c) + hashes[i] = int(x) + } + + bitMap := bitMap(bag1, bag2) + minHashValues := hashBuckets(2, k) + minHash(bag1, 0, minHashValues, bitMap, k, hashes) + minHash(bag2, 1, minHashValues, bitMap, k, hashes) + return similarity(minHashValues, k) +} + +func minHash(bag []string, bagIndex int, minHashValues [][]int, + bitArray map[string][]bool, k int, hashes []int) { + index := 0 + for element := range bitArray { + for i := 0; i < k; i++ { + if contains(bag, element) { + hindex := hashes[index] + if hindex < minHashValues[bagIndex][index] { + minHashValues[bagIndex][index] = hindex + } + } + } + index++ + } +} + +func contains(bag []string, element string) bool { + for _, e := range bag { + if e == element { + return true + } + } + return false +} + +func bitMap(bag1, bag2 []string) map[string][]bool { + bitArray := map[string][]bool{} + for _, element := range bag1 { + bitArray[element] = []bool{true, false} + } + + for _, element := range bag2 { + if _, ok := bitArray[element]; ok { + bitArray[element] = []bool{true, true} + } else if _, ok := bitArray[element]; !ok { + bitArray[element] = []bool{false, true} + } + } + + return bitArray +} + +func hashBuckets(numSets, k int) [][]int { + minHashValues := make([][]int, numSets) + for i := 0; i < numSets; i++ { + minHashValues[i] = make([]int, k) + } + + for i := 0; i < numSets; i++ { + for j := 0; j < k; j++ { + minHashValues[i][j] = math.MaxInt32 + } + } + return minHashValues +} + +func computeHash(x, a, b, u uint) uint { + return (a*x + b) >> (32 - u) +} + +func similarity(minHashValues [][]int, k int) float32 { + identicalMinHashes := 0 + for i := 0; i < k; i++ { + if minHashValues[0][i] == minHashValues[1][i] { + identicalMinHashes++ + } + } + + return (1.0 * float32(identicalMinHashes)) / float32(k) +} diff --git a/vendor/github.com/tylertreat/BoomFilters/partitioned.go b/vendor/github.com/tylertreat/BoomFilters/partitioned.go new file mode 100644 index 00000000000..f8fa6f561cf --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/partitioned.go @@ -0,0 +1,263 @@ +/* +Original work Copyright (c) 2013 zhenjl +Modified work Copyright (c) 2015 Tyler Treat + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +*/ + +package boom + +import ( + "bytes" + "encoding/binary" + "hash" + "hash/fnv" + "io" + "math" +) + +// PartitionedBloomFilter implements a variation of a classic Bloom filter as +// described by Almeida, Baquero, Preguica, and Hutchison in Scalable Bloom +// Filters: +// +// http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf +// +// This filter works by partitioning the M-sized bit array into k slices of +// size m = M/k bits. Each hash function produces an index over m for its +// respective slice. Thus, each element is described by exactly k bits, meaning +// the distribution of false positives is uniform across all elements. +type PartitionedBloomFilter struct { + partitions []*Buckets // partitioned filter data + hash hash.Hash64 // hash function (kernel for all k functions) + m uint // filter size (divided into k partitions) + k uint // number of hash functions (and partitions) + s uint // partition size (m / k) + count uint // number of items added +} + +// NewPartitionedBloomFilter creates a new partitioned Bloom filter optimized +// to store n items with a specified target false-positive rate. +func NewPartitionedBloomFilter(n uint, fpRate float64) *PartitionedBloomFilter { + var ( + m = OptimalM(n, fpRate) + k = OptimalK(fpRate) + partitions = make([]*Buckets, k) + s = uint(math.Ceil(float64(m) / float64(k))) + ) + + for i := uint(0); i < k; i++ { + partitions[i] = NewBuckets(s, 1) + } + + return &PartitionedBloomFilter{ + partitions: partitions, + hash: fnv.New64(), + m: m, + k: k, + s: s, + } +} + +// Capacity returns the Bloom filter capacity, m. +func (p *PartitionedBloomFilter) Capacity() uint { + return p.m +} + +// K returns the number of hash functions. +func (p *PartitionedBloomFilter) K() uint { + return p.k +} + +// Count returns the number of items added to the filter. +func (p *PartitionedBloomFilter) Count() uint { + return p.count +} + +// EstimatedFillRatio returns the current estimated ratio of set bits. +func (p *PartitionedBloomFilter) EstimatedFillRatio() float64 { + return 1 - math.Exp(-float64(p.count)/float64(p.s)) +} + +// FillRatio returns the average ratio of set bits across all partitions. +func (p *PartitionedBloomFilter) FillRatio() float64 { + t := float64(0) + for i := uint(0); i < p.k; i++ { + sum := uint32(0) + for j := uint(0); j < p.partitions[i].Count(); j++ { + sum += p.partitions[i].Get(j) + } + t += (float64(sum) / float64(p.s)) + } + return t / float64(p.k) +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives but a zero probability of false +// negatives. Due to the way the filter is partitioned, the probability of +// false positives is uniformly distributed across all elements. +func (p *PartitionedBloomFilter) Test(data []byte) bool { + lower, upper := hashKernel(data, p.hash) + + // If any of the K partition bits are not set, then it's not a member. + for i := uint(0); i < p.k; i++ { + if p.partitions[i].Get((uint(lower)+uint(upper)*i)%p.s) == 0 { + return false + } + } + + return true +} + +// Add will add the data to the Bloom filter. It returns the filter to allow +// for chaining. +func (p *PartitionedBloomFilter) Add(data []byte) Filter { + lower, upper := hashKernel(data, p.hash) + + // Set the K partition bits. + for i := uint(0); i < p.k; i++ { + p.partitions[i].Set((uint(lower)+uint(upper)*i)%p.s, 1) + } + + p.count++ + return p +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (p *PartitionedBloomFilter) TestAndAdd(data []byte) bool { + lower, upper := hashKernel(data, p.hash) + member := true + + // If any of the K partition bits are not set, then it's not a member. + for i := uint(0); i < p.k; i++ { + idx := (uint(lower) + uint(upper)*i) % p.s + if p.partitions[i].Get(idx) == 0 { + member = false + } + p.partitions[i].Set(idx, 1) + } + + p.count++ + return member +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (p *PartitionedBloomFilter) Reset() *PartitionedBloomFilter { + for _, partition := range p.partitions { + partition.Reset() + } + return p +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (p *PartitionedBloomFilter) SetHash(h hash.Hash64) { + p.hash = h +} + +// WriteTo writes a binary representation of the PartitionedBloomFilter to an i/o stream. +// It returns the number of bytes written. +func (p *PartitionedBloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(p.m)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(p.k)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(p.s)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(p.count)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(len(p.partitions))) + if err != nil { + return 0, err + } + var numBytes int64 + for _, partition := range p.partitions { + num, err := partition.WriteTo(stream) + if err != nil { + return 0, err + } + numBytes += num + } + return numBytes + int64(5*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of PartitionedBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (p *PartitionedBloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var m, k, s, count, len uint64 + err := binary.Read(stream, binary.BigEndian, &m) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &k) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &s) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &count) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &len) + if err != nil { + return 0, err + } + var numBytes int64 + partitions := make([]*Buckets, len) + for i := range partitions { + buckets := &Buckets{} + num, err := buckets.ReadFrom(stream) + if err != nil { + return 0, err + } + numBytes += num + partitions[i] = buckets + } + p.m = uint(m) + p.k = uint(k) + p.s = uint(s) + p.count = uint(count) + p.partitions = partitions + return numBytes + int64(5*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (p *PartitionedBloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := p.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (p *PartitionedBloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := p.ReadFrom(buf) + + return err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/scalable.go b/vendor/github.com/tylertreat/BoomFilters/scalable.go new file mode 100644 index 00000000000..590fd88d517 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/scalable.go @@ -0,0 +1,259 @@ +/* +Original work Copyright (c) 2013 zhenjl +Modified work Copyright (c) 2015 Tyler Treat + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +*/ + +package boom + +import ( + "bytes" + "encoding/binary" + "hash" + "io" + "math" +) + +// ScalableBloomFilter implements a Scalable Bloom Filter as described by +// Almeida, Baquero, Preguica, and Hutchison in Scalable Bloom Filters: +// +// http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf +// +// A Scalable Bloom Filter dynamically adapts to the number of elements in the +// data set while enforcing a tight upper bound on the false-positive rate. +// This works by adding Bloom filters with geometrically decreasing +// false-positive rates as filters become full. The tightening ratio, r, +// controls the filter growth. The compounded probability over the whole series +// converges to a target value, even accounting for an infinite series. +// +// Scalable Bloom Filters are useful for cases where the size of the data set +// isn't known a priori and memory constraints aren't of particular concern. +// For situations where memory is bounded, consider using Inverse or Stable +// Bloom Filters. +type ScalableBloomFilter struct { + filters []*PartitionedBloomFilter // filters with geometrically decreasing error rates + r float64 // tightening ratio + fp float64 // target false-positive rate + p float64 // partition fill ratio + hint uint // filter size hint +} + +// NewScalableBloomFilter creates a new Scalable Bloom Filter with the +// specified target false-positive rate and tightening ratio. Use +// NewDefaultScalableBloomFilter if you don't want to calculate these +// parameters. +func NewScalableBloomFilter(hint uint, fpRate, r float64) *ScalableBloomFilter { + s := &ScalableBloomFilter{ + filters: make([]*PartitionedBloomFilter, 0, 1), + r: r, + fp: fpRate, + p: fillRatio, + hint: hint, + } + + s.addFilter() + return s +} + +// NewDefaultScalableBloomFilter creates a new Scalable Bloom Filter with the +// specified target false-positive rate and an optimal tightening ratio. +func NewDefaultScalableBloomFilter(fpRate float64) *ScalableBloomFilter { + return NewScalableBloomFilter(10000, fpRate, 0.8) +} + +// Capacity returns the current Scalable Bloom Filter capacity, which is the +// sum of the capacities for the contained series of Bloom filters. +func (s *ScalableBloomFilter) Capacity() uint { + capacity := uint(0) + for _, bf := range s.filters { + capacity += bf.Capacity() + } + return capacity +} + +// K returns the number of hash functions used in each Bloom filter. +func (s *ScalableBloomFilter) K() uint { + // K is the same across every filter. + return s.filters[0].K() +} + +// FillRatio returns the average ratio of set bits across every filter. +func (s *ScalableBloomFilter) FillRatio() float64 { + sum := 0.0 + for _, filter := range s.filters { + sum += filter.FillRatio() + } + return sum / float64(len(s.filters)) +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives but a zero probability of false +// negatives. +func (s *ScalableBloomFilter) Test(data []byte) bool { + // Querying is made by testing for the presence in each filter. + for _, bf := range s.filters { + if bf.Test(data) { + return true + } + } + + return false +} + +// Add will add the data to the Bloom filter. It returns the filter to allow +// for chaining. +func (s *ScalableBloomFilter) Add(data []byte) Filter { + idx := len(s.filters) - 1 + + // If the last filter has reached its fill ratio, add a new one. + if s.filters[idx].EstimatedFillRatio() >= s.p { + s.addFilter() + idx++ + } + + s.filters[idx].Add(data) + return s +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (s *ScalableBloomFilter) TestAndAdd(data []byte) bool { + member := s.Test(data) + s.Add(data) + return member +} + +// Reset restores the Bloom filter to its original state. It returns the filter +// to allow for chaining. +func (s *ScalableBloomFilter) Reset() *ScalableBloomFilter { + s.filters = make([]*PartitionedBloomFilter, 0, 1) + s.addFilter() + return s +} + +// addFilter adds a new Bloom filter with a restricted false-positive rate to +// the Scalable Bloom Filter +func (s *ScalableBloomFilter) addFilter() { + fpRate := s.fp * math.Pow(s.r, float64(len(s.filters))) + p := NewPartitionedBloomFilter(s.hint, fpRate) + if len(s.filters) > 0 { + p.SetHash(s.filters[0].hash) + } + s.filters = append(s.filters, p) +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (s *ScalableBloomFilter) SetHash(h hash.Hash64) { + for _, bf := range s.filters { + bf.SetHash(h) + } +} + +// WriteTo writes a binary representation of the ScalableBloomFilter to an i/o stream. +// It returns the number of bytes written. +func (s *ScalableBloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, s.r) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, s.fp) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, s.p) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(s.hint)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(len(s.filters))) + if err != nil { + return 0, err + } + var numBytes int64 + for _, filter := range s.filters { + num, err := filter.WriteTo(stream) + if err != nil { + return 0, err + } + numBytes += num + } + return numBytes + int64(5*binary.Size(uint64(0))), err +} + +// ReadFrom reads a binary representation of ScalableBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (s *ScalableBloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var r, fp, p float64 + var hint, len uint64 + err := binary.Read(stream, binary.BigEndian, &r) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &fp) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &p) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &hint) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &len) + if err != nil { + return 0, err + } + var numBytes int64 + filters := make([]*PartitionedBloomFilter, len) + for i := range filters { + filter := NewPartitionedBloomFilter(0, fp) + num, err := filter.ReadFrom(stream) + if err != nil { + return 0, err + } + numBytes += num + filters[i] = filter + } + s.r = r + s.fp = fp + s.p = p + s.hint = uint(hint) + s.filters = filters + return numBytes + int64(5*binary.Size(uint64(0))), nil +} + +// GobEncode implements gob.GobEncoder interface. +func (s *ScalableBloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := s.WriteTo(&buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (s *ScalableBloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := s.ReadFrom(buf) + + return err +} diff --git a/vendor/github.com/tylertreat/BoomFilters/stable.go b/vendor/github.com/tylertreat/BoomFilters/stable.go new file mode 100644 index 00000000000..2b6f096c8d8 --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/stable.go @@ -0,0 +1,333 @@ +package boom + +import ( + "bytes" + "encoding/binary" + "hash" + "hash/fnv" + "io" + "math" + "math/rand" +) + +// StableBloomFilter implements a Stable Bloom Filter as described by Deng and +// Rafiei in Approximately Detecting Duplicates for Streaming Data using Stable +// Bloom Filters: +// +// http://webdocs.cs.ualberta.ca/~drafiei/papers/DupDet06Sigmod.pdf +// +// A Stable Bloom Filter (SBF) continuously evicts stale information so that it +// has room for more recent elements. Like traditional Bloom filters, an SBF +// has a non-zero probability of false positives, which is controlled by +// several parameters. Unlike the classic Bloom filter, an SBF has a tight +// upper bound on the rate of false positives while introducing a non-zero rate +// of false negatives. The false-positive rate of a classic Bloom filter +// eventually reaches 1, after which all queries result in a false positive. +// The stable-point property of an SBF means the false-positive rate +// asymptotically approaches a configurable fixed constant. A classic Bloom +// filter is actually a special case of SBF where the eviction rate is zero, so +// this package provides support for them as well. +// +// Stable Bloom Filters are useful for cases where the size of the data set +// isn't known a priori, which is a requirement for traditional Bloom filters, +// and memory is bounded. For example, an SBF can be used to deduplicate +// events from an unbounded event stream with a specified upper bound on false +// positives and minimal false negatives. +type StableBloomFilter struct { + cells *Buckets // filter data + hash hash.Hash64 // hash function (kernel for all k functions) + m uint // number of cells + p uint // number of cells to decrement + k uint // number of hash functions + max uint8 // cell max value + indexBuffer []uint // buffer used to cache indices +} + +// NewStableBloomFilter creates a new Stable Bloom Filter with m cells and d +// bits allocated per cell optimized for the target false-positive rate. Use +// NewDefaultStableFilter if you don't want to calculate d. +func NewStableBloomFilter(m uint, d uint8, fpRate float64) *StableBloomFilter { + k := OptimalK(fpRate) / 2 + if k > m { + k = m + } else if k <= 0 { + k = 1 + } + + cells := NewBuckets(m, d) + + return &StableBloomFilter{ + hash: fnv.New64(), + m: m, + k: k, + p: optimalStableP(m, k, d, fpRate), + max: cells.MaxBucketValue(), + cells: cells, + indexBuffer: make([]uint, k), + } +} + +// NewDefaultStableBloomFilter creates a new Stable Bloom Filter with m 1-bit +// cells and which is optimized for cases where there is no prior knowledge of +// the input data stream while maintaining an upper bound using the provided +// rate of false positives. +func NewDefaultStableBloomFilter(m uint, fpRate float64) *StableBloomFilter { + return NewStableBloomFilter(m, 1, fpRate) +} + +// NewUnstableBloomFilter creates a new special case of Stable Bloom Filter +// which is a traditional Bloom filter with m bits and an optimal number of +// hash functions for the target false-positive rate. Unlike the stable +// variant, data is not evicted and a cell contains a maximum of 1 hash value. +func NewUnstableBloomFilter(m uint, fpRate float64) *StableBloomFilter { + var ( + cells = NewBuckets(m, 1) + k = OptimalK(fpRate) + ) + + return &StableBloomFilter{ + hash: fnv.New64(), + m: m, + k: k, + p: 0, + max: cells.MaxBucketValue(), + cells: cells, + indexBuffer: make([]uint, k), + } +} + +// Cells returns the number of cells in the Stable Bloom Filter. +func (s *StableBloomFilter) Cells() uint { + return s.m +} + +// K returns the number of hash functions. +func (s *StableBloomFilter) K() uint { + return s.k +} + +// P returns the number of cells decremented on every add. +func (s *StableBloomFilter) P() uint { + return s.p +} + +// StablePoint returns the limit of the expected fraction of zeros in the +// Stable Bloom Filter when the number of iterations goes to infinity. When +// this limit is reached, the Stable Bloom Filter is considered stable. +func (s *StableBloomFilter) StablePoint() float64 { + var ( + subDenom = float64(s.p) * (1/float64(s.k) - 1/float64(s.m)) + denom = 1 + 1/subDenom + base = 1 / denom + ) + + return math.Pow(base, float64(s.max)) +} + +// FalsePositiveRate returns the upper bound on false positives when the filter +// has become stable. +func (s *StableBloomFilter) FalsePositiveRate() float64 { + return math.Pow(1-s.StablePoint(), float64(s.k)) +} + +// Test will test for membership of the data and returns true if it is a +// member, false if not. This is a probabilistic test, meaning there is a +// non-zero probability of false positives and false negatives. +func (s *StableBloomFilter) Test(data []byte) bool { + lower, upper := hashKernel(data, s.hash) + + // If any of the K cells are 0, then it's not a member. + for i := uint(0); i < s.k; i++ { + if s.cells.Get((uint(lower)+uint(upper)*i)%s.m) == 0 { + return false + } + } + + return true +} + +// Add will add the data to the Stable Bloom Filter. It returns the filter to +// allow for chaining. +func (s *StableBloomFilter) Add(data []byte) Filter { + // Randomly decrement p cells to make room for new elements. + s.decrement() + + lower, upper := hashKernel(data, s.hash) + + // Set the K cells to max. + for i := uint(0); i < s.k; i++ { + s.cells.Set((uint(lower)+uint(upper)*i)%s.m, s.max) + } + + return s +} + +// TestAndAdd is equivalent to calling Test followed by Add. It returns true if +// the data is a member, false if not. +func (s *StableBloomFilter) TestAndAdd(data []byte) bool { + lower, upper := hashKernel(data, s.hash) + member := true + + // If any of the K cells are 0, then it's not a member. + for i := uint(0); i < s.k; i++ { + s.indexBuffer[i] = (uint(lower) + uint(upper)*i) % s.m + if s.cells.Get(s.indexBuffer[i]) == 0 { + member = false + } + } + + // Randomly decrement p cells to make room for new elements. + s.decrement() + + // Set the K cells to max. + for _, idx := range s.indexBuffer { + s.cells.Set(idx, s.max) + } + + return member +} + +// Reset restores the Stable Bloom Filter to its original state. It returns the +// filter to allow for chaining. +func (s *StableBloomFilter) Reset() *StableBloomFilter { + s.cells.Reset() + return s +} + +// decrement will decrement a random cell and (p-1) adjacent cells by 1. This +// is faster than generating p random numbers. Although the processes of +// picking the p cells are not independent, each cell has a probability of p/m +// for being picked at each iteration, which means the properties still hold. +func (s *StableBloomFilter) decrement() { + r := rand.Intn(int(s.m)) + for i := uint(0); i < s.p; i++ { + idx := (r + int(i)) % int(s.m) + s.cells.Increment(uint(idx), -1) + } +} + +// SetHash sets the hashing function used in the filter. +// For the effect on false positive rates see: https://github.com/tylertreat/BoomFilters/pull/1 +func (s *StableBloomFilter) SetHash(h hash.Hash64) { + s.hash = h +} + +// WriteTo writes a binary representation of the StableBloomFilter to an i/o stream. +// It returns the number of bytes written. +func (s *StableBloomFilter) WriteTo(stream io.Writer) (int64, error) { + err := binary.Write(stream, binary.BigEndian, uint64(s.m)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(s.p)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, uint64(s.k)) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, s.max) + if err != nil { + return 0, err + } + err = binary.Write(stream, binary.BigEndian, int64(len(s.indexBuffer))) + if err != nil { + return 0, err + } + for _, index := range s.indexBuffer { + err = binary.Write(stream, binary.BigEndian, uint64(index)) + if err != nil { + return 0, err + } + } + n, err := s.cells.WriteTo(stream) + if err != nil { + return 0, err + } + return int64((3+len(s.indexBuffer))*binary.Size(uint64(0))) + + int64(1*binary.Size(uint8(0))) + int64(1*binary.Size(int64(0))) + n, err +} + +// ReadFrom reads a binary representation of StableBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream. It returns the number +// of bytes read. +func (s *StableBloomFilter) ReadFrom(stream io.Reader) (int64, error) { + var m, p, k, bufferLen uint64 + var max uint8 + err := binary.Read(stream, binary.BigEndian, &m) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &p) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &k) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &max) + if err != nil { + return 0, err + } + err = binary.Read(stream, binary.BigEndian, &bufferLen) + if err != nil { + return 0, err + } + indexBuffer := make([]uint, bufferLen) + var index uint64 + for i := range indexBuffer { + err = binary.Read(stream, binary.BigEndian, &index) + if err != nil { + return 0, err + } + indexBuffer[i] = uint(index) + } + s.m = uint(m) + s.p = uint(p) + s.k = uint(k) + s.max = max + s.indexBuffer = indexBuffer + + n, err := s.cells.ReadFrom(stream) + if err != nil { + return 0, err + } + return int64((3+len(s.indexBuffer))*binary.Size(uint64(0))) + + int64(1*binary.Size(uint8(0))) + int64(1*binary.Size(int64(0))) + n, nil +} + +// GobEncode implements gob.GobEncoder interface. +func (s *StableBloomFilter) GobEncode() ([]byte, error) { + var buf bytes.Buffer + _, err := s.WriteTo(&buf) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// GobDecode implements gob.GobDecoder interface. +func (s *StableBloomFilter) GobDecode(data []byte) error { + buf := bytes.NewBuffer(data) + _, err := s.ReadFrom(buf) + return err +} + +// optimalStableP returns the optimal number of cells to decrement, p, per +// iteration for the provided parameters of an SBF. +func optimalStableP(m, k uint, d uint8, fpRate float64) uint { + var ( + max = math.Pow(2, float64(d)) - 1 + subDenom = math.Pow(1-math.Pow(fpRate, 1/float64(k)), 1/max) + denom = (1/subDenom - 1) * (1/float64(k) - 1/float64(m)) + ) + + p := uint(1 / denom) + if p <= 0 { + p = 1 + } + + return p +} diff --git a/vendor/github.com/tylertreat/BoomFilters/topk.go b/vendor/github.com/tylertreat/BoomFilters/topk.go new file mode 100644 index 00000000000..91e605aa7ca --- /dev/null +++ b/vendor/github.com/tylertreat/BoomFilters/topk.go @@ -0,0 +1,128 @@ +package boom + +import ( + "bytes" + "container/heap" +) + +// Element represents a data and it's frequency +type Element struct { + Data []byte + Freq uint64 +} + +// An elementHeap is a min-heap of elements. +type elementHeap []*Element + +func (e elementHeap) Len() int { return len(e) } +func (e elementHeap) Less(i, j int) bool { return e[i].Freq < e[j].Freq } +func (e elementHeap) Swap(i, j int) { e[i], e[j] = e[j], e[i] } + +func (e *elementHeap) Push(x interface{}) { + *e = append(*e, x.(*Element)) +} + +func (e *elementHeap) Pop() interface{} { + old := *e + n := len(old) + x := old[n-1] + *e = old[0 : n-1] + return x +} + +// TopK uses a Count-Min Sketch to calculate the top-K frequent elements in a +// stream. +type TopK struct { + cms *CountMinSketch + k uint + n uint + elements *elementHeap +} + +// NewTopK creates a new TopK backed by a Count-Min sketch whose relative +// accuracy is within a factor of epsilon with probability delta. It tracks the +// k-most frequent elements. +func NewTopK(epsilon, delta float64, k uint) *TopK { + elements := make(elementHeap, 0, k) + heap.Init(&elements) + return &TopK{ + cms: NewCountMinSketch(epsilon, delta), + k: k, + elements: &elements, + } +} + +// Add will add the data to the Count-Min Sketch and update the top-k heap if +// applicable. Returns the TopK to allow for chaining. +func (t *TopK) Add(data []byte) *TopK { + t.cms.Add(data) + t.n++ + + freq := t.cms.Count(data) + if t.isTop(freq) { + t.insert(data, freq) + } + + return t +} + +// Elements returns the top-k elements from lowest to highest frequency. +func (t *TopK) Elements() []*Element { + if t.elements.Len() == 0 { + return make([]*Element, 0) + } + + elements := make(elementHeap, t.elements.Len()) + copy(elements, *t.elements) + heap.Init(&elements) + topK := make([]*Element, 0, t.k) + + for elements.Len() > 0 { + topK = append(topK, heap.Pop(&elements).(*Element)) + } + + return topK +} + +// Reset restores the TopK to its original state. It returns itself to allow +// for chaining. +func (t *TopK) Reset() *TopK { + t.cms.Reset() + elements := make(elementHeap, 0, t.k) + heap.Init(&elements) + t.elements = &elements + t.n = 0 + return t +} + +// isTop indicates if the given frequency falls within the top-k heap. +func (t *TopK) isTop(freq uint64) bool { + if t.elements.Len() < int(t.k) { + return true + } + + return freq >= (*t.elements)[0].Freq +} + +// insert adds the data to the top-k heap. If the data is already an element, +// the frequency is updated. If the heap already has k elements, the element +// with the minimum frequency is removed. +func (t *TopK) insert(data []byte, freq uint64) { + for i, element := range *t.elements { + if bytes.Equal(data, element.Data) { + // Element already in top-k, replace it with new frequency. + heap.Remove(t.elements, i) + element.Freq = freq + heap.Push(t.elements, element) + return + } + } + + if t.elements.Len() == int(t.k) { + // Remove minimum-frequency element. + heap.Pop(t.elements) + } + + // Add element to top-k. + heap.Push(t.elements, &Element{Data: data, Freq: freq}) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 421b707564e..08c022b949e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1075,7 +1075,7 @@ github.com/prometheus/exporter-toolkit/web github.com/prometheus/procfs github.com/prometheus/procfs/internal/fs github.com/prometheus/procfs/internal/util -# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20250302213708-bd234c29eed4 +# github.com/prometheus/prometheus v1.99.0 => github.com/grafana/mimir-prometheus v0.0.0-20250307115605-76de169c0ae4 ## explicit; go 1.22.7 github.com/prometheus/prometheus/config github.com/prometheus/prometheus/discovery @@ -1251,6 +1251,9 @@ github.com/twmb/franz-go/plugin/kotel # github.com/twmb/franz-go/plugin/kprom v1.1.0 ## explicit; go 1.18 github.com/twmb/franz-go/plugin/kprom +# github.com/tylertreat/BoomFilters v0.0.0-20210315201527-1a82519a3e43 +## explicit +github.com/tylertreat/BoomFilters # github.com/uber/jaeger-client-go v2.30.0+incompatible ## explicit github.com/uber/jaeger-client-go @@ -1759,7 +1762,7 @@ sigs.k8s.io/kustomize/kyaml/yaml/walk sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 -# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250302213708-bd234c29eed4 +# github.com/prometheus/prometheus => github.com/grafana/mimir-prometheus v0.0.0-20250307115605-76de169c0ae4 # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe # gopkg.in/yaml.v3 => github.com/colega/go-yaml-yaml v0.0.0-20220720105220-255a8d16d094 # github.com/grafana/regexp => github.com/grafana/regexp v0.0.0-20240531075221-3685f1377d7b