From c814a0318e28fadcbf9d7098df2bba4be2303f0d Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 14 Feb 2025 16:06:01 -0500
Subject: [PATCH 01/45] mv indexheader into utils

---
 pkg/compactor/bucket_compactor.go             |  23 ++++++++++++++++++
 pkg/storage/tsdb/config.go                    |   2 +-
 pkg/storegateway/bucket.go                    |   4 +--
 pkg/storegateway/bucket_e2e_test.go           |   2 +-
 pkg/storegateway/bucket_index_postings.go     |   4 +--
 .../bucket_index_postings_test.go             |   2 +-
 pkg/storegateway/bucket_index_reader.go       |   4 +--
 pkg/storegateway/bucket_store_metrics.go      |   2 +-
 pkg/storegateway/bucket_test.go               |   4 +--
 .../indexheader/binary_reader.go              |   0
 .../indexheader/encoding/encoding.go          |   0
 .../indexheader/encoding/encoding_test.go     |   0
 .../indexheader/encoding/factory.go           |   0
 .../indexheader/encoding/factory_test.go      |   0
 .../indexheader/encoding/reader.go            |   0
 .../indexheader/encoding/reader_test.go       |   0
 .../indexheader/header.go                     |   2 +-
 .../indexheader/header_test.go                |   0
 .../indexheader/index/postings.go             |   4 +--
 .../indexheader/index/postings_test.go        |   0
 .../indexheader/index/symbols.go              |   4 +--
 .../indexheader/index/symbols_test.go         |   2 +-
 .../indexheader/indexheaderpb/sparse.pb.go    |   0
 .../indexheader/indexheaderpb/sparse.proto    |   0
 .../indexheader/lazy_binary_reader.go         |   2 +-
 .../indexheader/lazy_binary_reader_test.go    |   2 +-
 .../indexheader/reader_benchmarks_test.go     |   0
 .../indexheader/reader_pool.go                |   0
 .../indexheader/reader_pool_test.go           |   0
 .../indexheader/snapshotter.go                |   0
 .../indexheader/snapshotter_test.go           |   0
 .../indexheader/stream_binary_reader.go       |   6 ++---
 .../indexheader/stream_binary_reader_test.go  |   2 +-
 .../testdata/index_format_v1/chunks/.gitkeep  |   0
 .../testdata/index_format_v1/index            | Bin
 .../testdata/index_format_v1/meta.json        |   0
 .../testdata/index_format_v2/chunks/.gitkeep  |   0
 .../testdata/index_format_v2/index            | Bin
 .../testdata/index_format_v2/meta.json        |   0
 39 files changed, 47 insertions(+), 24 deletions(-)
 rename pkg/{storegateway => util}/indexheader/binary_reader.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/encoding.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/encoding_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/factory.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/factory_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/reader.go (100%)
 rename pkg/{storegateway => util}/indexheader/encoding/reader_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/header.go (98%)
 rename pkg/{storegateway => util}/indexheader/header_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/index/postings.go (99%)
 rename pkg/{storegateway => util}/indexheader/index/postings_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/index/symbols.go (98%)
 rename pkg/{storegateway => util}/indexheader/index/symbols_test.go (98%)
 rename pkg/{storegateway => util}/indexheader/indexheaderpb/sparse.pb.go (100%)
 rename pkg/{storegateway => util}/indexheader/indexheaderpb/sparse.proto (100%)
 rename pkg/{storegateway => util}/indexheader/lazy_binary_reader.go (99%)
 rename pkg/{storegateway => util}/indexheader/lazy_binary_reader_test.go (99%)
 rename pkg/{storegateway => util}/indexheader/reader_benchmarks_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/reader_pool.go (100%)
 rename pkg/{storegateway => util}/indexheader/reader_pool_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/snapshotter.go (100%)
 rename pkg/{storegateway => util}/indexheader/snapshotter_test.go (100%)
 rename pkg/{storegateway => util}/indexheader/stream_binary_reader.go (98%)
 rename pkg/{storegateway => util}/indexheader/stream_binary_reader_test.go (98%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v1/chunks/.gitkeep (100%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v1/index (100%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v1/meta.json (100%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v2/chunks/.gitkeep (100%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v2/index (100%)
 rename pkg/{storegateway => util}/indexheader/testdata/index_format_v2/meta.json (100%)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 1978f8ea892..c298a936e4b 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -29,6 +29,8 @@ import (
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/tsdb"
 	"github.com/thanos-io/objstore"
+	"github.com/thanos-io/objstore/providers/filesystem"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 	"go.uber.org/atomic"
 
 	"github.com/grafana/mimir/pkg/storage/sharding"
@@ -393,6 +395,12 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
+	fsbkt, err := filesystem.NewBucket(subDir)
+	if err != nil {
+		// TODO: Handle
+		// return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
+	}
+
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
 	err = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
 		blockToUpload := blocksToUpload[idx]
@@ -431,6 +439,21 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
+		_, err = indexheader.NewStreamBinaryReader(
+			ctx,
+			jobLogger,
+			fsbkt,
+			bdir,
+			blockToUpload.ulid,
+			mimir_tsdb.DefaultPostingOffsetInMemorySampling,
+			indexheader.NewStreamBinaryReaderMetrics(nil),
+			indexheader.Config{MaxIdleFileHandles: 1},
+		)
+		if err != nil {
+			level.Warn(jobLogger).Log("msg", "failed to...", "block", "uhh", "err", err)
+		}
+
+
 		elapsed := time.Since(begin)
 		level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(newLabels))
 		return nil
diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go
index 6c7dfd2d573..94da8c46cea 100644
--- a/pkg/storage/tsdb/config.go
+++ b/pkg/storage/tsdb/config.go
@@ -20,7 +20,7 @@ import (
 
 	"github.com/grafana/mimir/pkg/ingester/activeseries"
 	"github.com/grafana/mimir/pkg/storage/bucket"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 )
 
 const (
diff --git a/pkg/storegateway/bucket.go b/pkg/storegateway/bucket.go
index 57f6f067cc4..fbab5d59d7e 100644
--- a/pkg/storegateway/bucket.go
+++ b/pkg/storegateway/bucket.go
@@ -51,8 +51,8 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb/bucketcache"
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	"github.com/grafana/mimir/pkg/util/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/storegateway/storegatewaypb"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util"
diff --git a/pkg/storegateway/bucket_e2e_test.go b/pkg/storegateway/bucket_e2e_test.go
index 88a1a106aa9..84b02b518c0 100644
--- a/pkg/storegateway/bucket_e2e_test.go
+++ b/pkg/storegateway/bucket_e2e_test.go
@@ -37,7 +37,7 @@ import (
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util/test"
 )
diff --git a/pkg/storegateway/bucket_index_postings.go b/pkg/storegateway/bucket_index_postings.go
index 6111374c6cc..9f90cca78a3 100644
--- a/pkg/storegateway/bucket_index_postings.go
+++ b/pkg/storegateway/bucket_index_postings.go
@@ -19,8 +19,8 @@ import (
 
 	"github.com/grafana/mimir/pkg/storage/sharding"
 	"github.com/grafana/mimir/pkg/storage/tsdb"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	"github.com/grafana/mimir/pkg/util/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 )
 
 // rawPostingGroup keeps posting keys for single matcher. It is raw because there is no guarantee
diff --git a/pkg/storegateway/bucket_index_postings_test.go b/pkg/storegateway/bucket_index_postings_test.go
index 3f0eccdb667..c8ee30d5b5e 100644
--- a/pkg/storegateway/bucket_index_postings_test.go
+++ b/pkg/storegateway/bucket_index_postings_test.go
@@ -15,7 +15,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/stretchr/testify/assert"
 
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 )
 
 func TestBigEndianPostingsCount(t *testing.T) {
diff --git a/pkg/storegateway/bucket_index_reader.go b/pkg/storegateway/bucket_index_reader.go
index f97d1507235..4560869ead9 100644
--- a/pkg/storegateway/bucket_index_reader.go
+++ b/pkg/storegateway/bucket_index_reader.go
@@ -33,8 +33,8 @@ import (
 
 	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	"github.com/grafana/mimir/pkg/util/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go
index 4a411be1dff..7ebc0a0d261 100644
--- a/pkg/storegateway/bucket_store_metrics.go
+++ b/pkg/storegateway/bucket_store_metrics.go
@@ -12,7 +12,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 )
 
 // BucketStoreMetrics holds all the metrics tracked by BucketStore. These metrics
diff --git a/pkg/storegateway/bucket_test.go b/pkg/storegateway/bucket_test.go
index 2ea825294db..e479a50b87e 100644
--- a/pkg/storegateway/bucket_test.go
+++ b/pkg/storegateway/bucket_test.go
@@ -61,8 +61,8 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	"github.com/grafana/mimir/pkg/util/indexheader"
+	"github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/test"
diff --git a/pkg/storegateway/indexheader/binary_reader.go b/pkg/util/indexheader/binary_reader.go
similarity index 100%
rename from pkg/storegateway/indexheader/binary_reader.go
rename to pkg/util/indexheader/binary_reader.go
diff --git a/pkg/storegateway/indexheader/encoding/encoding.go b/pkg/util/indexheader/encoding/encoding.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/encoding.go
rename to pkg/util/indexheader/encoding/encoding.go
diff --git a/pkg/storegateway/indexheader/encoding/encoding_test.go b/pkg/util/indexheader/encoding/encoding_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/encoding_test.go
rename to pkg/util/indexheader/encoding/encoding_test.go
diff --git a/pkg/storegateway/indexheader/encoding/factory.go b/pkg/util/indexheader/encoding/factory.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/factory.go
rename to pkg/util/indexheader/encoding/factory.go
diff --git a/pkg/storegateway/indexheader/encoding/factory_test.go b/pkg/util/indexheader/encoding/factory_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/factory_test.go
rename to pkg/util/indexheader/encoding/factory_test.go
diff --git a/pkg/storegateway/indexheader/encoding/reader.go b/pkg/util/indexheader/encoding/reader.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/reader.go
rename to pkg/util/indexheader/encoding/reader.go
diff --git a/pkg/storegateway/indexheader/encoding/reader_test.go b/pkg/util/indexheader/encoding/reader_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/encoding/reader_test.go
rename to pkg/util/indexheader/encoding/reader_test.go
diff --git a/pkg/storegateway/indexheader/header.go b/pkg/util/indexheader/header.go
similarity index 98%
rename from pkg/storegateway/indexheader/header.go
rename to pkg/util/indexheader/header.go
index c018351d4d5..41c835cab97 100644
--- a/pkg/storegateway/indexheader/header.go
+++ b/pkg/util/indexheader/header.go
@@ -13,7 +13,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 )
 
 const (
diff --git a/pkg/storegateway/indexheader/header_test.go b/pkg/util/indexheader/header_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/header_test.go
rename to pkg/util/indexheader/header_test.go
diff --git a/pkg/storegateway/indexheader/index/postings.go b/pkg/util/indexheader/index/postings.go
similarity index 99%
rename from pkg/storegateway/indexheader/index/postings.go
rename to pkg/util/indexheader/index/postings.go
index 8b3d2de39f7..eca9caca80c 100644
--- a/pkg/storegateway/indexheader/index/postings.go
+++ b/pkg/util/indexheader/index/postings.go
@@ -17,8 +17,8 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb"
+	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
+	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
 )
 
 const (
diff --git a/pkg/storegateway/indexheader/index/postings_test.go b/pkg/util/indexheader/index/postings_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/index/postings_test.go
rename to pkg/util/indexheader/index/postings_test.go
diff --git a/pkg/storegateway/indexheader/index/symbols.go b/pkg/util/indexheader/index/symbols.go
similarity index 98%
rename from pkg/storegateway/indexheader/index/symbols.go
rename to pkg/util/indexheader/index/symbols.go
index 649e2a43c33..fbc6340c80c 100644
--- a/pkg/storegateway/indexheader/index/symbols.go
+++ b/pkg/util/indexheader/index/symbols.go
@@ -16,8 +16,8 @@ import (
 	"github.com/grafana/dskit/runutil"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb"
+	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
+	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
 )
 
 // The table gets initialized with sync.Once but may still cause a race
diff --git a/pkg/storegateway/indexheader/index/symbols_test.go b/pkg/util/indexheader/index/symbols_test.go
similarity index 98%
rename from pkg/storegateway/indexheader/index/symbols_test.go
rename to pkg/util/indexheader/index/symbols_test.go
index 81d1fc42a56..4a49afa2647 100644
--- a/pkg/storegateway/indexheader/index/symbols_test.go
+++ b/pkg/util/indexheader/index/symbols_test.go
@@ -18,7 +18,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/stretchr/testify/require"
 
-	streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding"
+	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/storegateway/indexheader/indexheaderpb/sparse.pb.go b/pkg/util/indexheader/indexheaderpb/sparse.pb.go
similarity index 100%
rename from pkg/storegateway/indexheader/indexheaderpb/sparse.pb.go
rename to pkg/util/indexheader/indexheaderpb/sparse.pb.go
diff --git a/pkg/storegateway/indexheader/indexheaderpb/sparse.proto b/pkg/util/indexheader/indexheaderpb/sparse.proto
similarity index 100%
rename from pkg/storegateway/indexheader/indexheaderpb/sparse.proto
rename to pkg/util/indexheader/indexheaderpb/sparse.proto
diff --git a/pkg/storegateway/indexheader/lazy_binary_reader.go b/pkg/util/indexheader/lazy_binary_reader.go
similarity index 99%
rename from pkg/storegateway/indexheader/lazy_binary_reader.go
rename to pkg/util/indexheader/lazy_binary_reader.go
index 98c587ced42..d275b3bd713 100644
--- a/pkg/storegateway/indexheader/lazy_binary_reader.go
+++ b/pkg/util/indexheader/lazy_binary_reader.go
@@ -25,7 +25,7 @@ import (
 	"go.uber.org/atomic"
 
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 )
 
 var (
diff --git a/pkg/storegateway/indexheader/lazy_binary_reader_test.go b/pkg/util/indexheader/lazy_binary_reader_test.go
similarity index 99%
rename from pkg/storegateway/indexheader/lazy_binary_reader_test.go
rename to pkg/util/indexheader/lazy_binary_reader_test.go
index 13e79f92b5e..dcd924660c4 100644
--- a/pkg/storegateway/indexheader/lazy_binary_reader_test.go
+++ b/pkg/util/indexheader/lazy_binary_reader_test.go
@@ -29,7 +29,7 @@ import (
 	"go.uber.org/atomic"
 
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/storegateway/indexheader/reader_benchmarks_test.go b/pkg/util/indexheader/reader_benchmarks_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/reader_benchmarks_test.go
rename to pkg/util/indexheader/reader_benchmarks_test.go
diff --git a/pkg/storegateway/indexheader/reader_pool.go b/pkg/util/indexheader/reader_pool.go
similarity index 100%
rename from pkg/storegateway/indexheader/reader_pool.go
rename to pkg/util/indexheader/reader_pool.go
diff --git a/pkg/storegateway/indexheader/reader_pool_test.go b/pkg/util/indexheader/reader_pool_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/reader_pool_test.go
rename to pkg/util/indexheader/reader_pool_test.go
diff --git a/pkg/storegateway/indexheader/snapshotter.go b/pkg/util/indexheader/snapshotter.go
similarity index 100%
rename from pkg/storegateway/indexheader/snapshotter.go
rename to pkg/util/indexheader/snapshotter.go
diff --git a/pkg/storegateway/indexheader/snapshotter_test.go b/pkg/util/indexheader/snapshotter_test.go
similarity index 100%
rename from pkg/storegateway/indexheader/snapshotter_test.go
rename to pkg/util/indexheader/snapshotter_test.go
diff --git a/pkg/storegateway/indexheader/stream_binary_reader.go b/pkg/util/indexheader/stream_binary_reader.go
similarity index 98%
rename from pkg/storegateway/indexheader/stream_binary_reader.go
rename to pkg/util/indexheader/stream_binary_reader.go
index 88d9ab1657b..ccc9b2f9af1 100644
--- a/pkg/storegateway/indexheader/stream_binary_reader.go
+++ b/pkg/util/indexheader/stream_binary_reader.go
@@ -25,9 +25,9 @@ import (
 	"github.com/thanos-io/objstore"
 
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamencoding "github.com/grafana/mimir/pkg/storegateway/indexheader/encoding"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
-	"github.com/grafana/mimir/pkg/storegateway/indexheader/indexheaderpb"
+	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
 	"github.com/grafana/mimir/pkg/util/atomicfs"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/indexheader/stream_binary_reader_test.go b/pkg/util/indexheader/stream_binary_reader_test.go
similarity index 98%
rename from pkg/storegateway/indexheader/stream_binary_reader_test.go
rename to pkg/util/indexheader/stream_binary_reader_test.go
index 219b9b0c109..63ab4e13969 100644
--- a/pkg/storegateway/indexheader/stream_binary_reader_test.go
+++ b/pkg/util/indexheader/stream_binary_reader_test.go
@@ -17,7 +17,7 @@ import (
 	"github.com/thanos-io/objstore/providers/filesystem"
 
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/storegateway/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
 
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/chunks/.gitkeep b/pkg/util/indexheader/testdata/index_format_v1/chunks/.gitkeep
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v1/chunks/.gitkeep
rename to pkg/util/indexheader/testdata/index_format_v1/chunks/.gitkeep
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/index b/pkg/util/indexheader/testdata/index_format_v1/index
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v1/index
rename to pkg/util/indexheader/testdata/index_format_v1/index
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v1/meta.json b/pkg/util/indexheader/testdata/index_format_v1/meta.json
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v1/meta.json
rename to pkg/util/indexheader/testdata/index_format_v1/meta.json
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/chunks/.gitkeep b/pkg/util/indexheader/testdata/index_format_v2/chunks/.gitkeep
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v2/chunks/.gitkeep
rename to pkg/util/indexheader/testdata/index_format_v2/chunks/.gitkeep
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/index b/pkg/util/indexheader/testdata/index_format_v2/index
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v2/index
rename to pkg/util/indexheader/testdata/index_format_v2/index
diff --git a/pkg/storegateway/indexheader/testdata/index_format_v2/meta.json b/pkg/util/indexheader/testdata/index_format_v2/meta.json
similarity index 100%
rename from pkg/storegateway/indexheader/testdata/index_format_v2/meta.json
rename to pkg/util/indexheader/testdata/index_format_v2/meta.json

From df725dce47a87f94863e479353a47948ec8fc3b3 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 18 Feb 2025 12:09:52 -0500
Subject: [PATCH 02/45] fmt + benchmarks for BinaryWrite

---
 pkg/compactor/bucket_compactor.go            | 41 +++++++++++---------
 pkg/storegateway/bucket.go                   |  4 +-
 pkg/storegateway/bucket_e2e_test.go          |  2 +-
 pkg/storegateway/bucket_index_reader.go      |  2 +-
 pkg/storegateway/bucket_test.go              |  2 +-
 pkg/util/indexheader/binary_reader.go        | 37 ++++++++++++++++--
 pkg/util/indexheader/header_test.go          | 32 +++++++++++++++
 pkg/util/indexheader/stream_binary_reader.go |  2 +-
 8 files changed, 95 insertions(+), 27 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index c298a936e4b..f578a72567d 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -22,6 +22,7 @@ import (
 	"github.com/grafana/dskit/concurrency"
 	"github.com/grafana/dskit/multierror"
 	"github.com/grafana/dskit/runutil"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -30,7 +31,6 @@ import (
 	"github.com/prometheus/prometheus/tsdb"
 	"github.com/thanos-io/objstore"
 	"github.com/thanos-io/objstore/providers/filesystem"
-	"github.com/grafana/mimir/pkg/util/indexheader"
 	"go.uber.org/atomic"
 
 	"github.com/grafana/mimir/pkg/storage/sharding"
@@ -395,10 +395,13 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
-	fsbkt, err := filesystem.NewBucket(subDir)
-	if err != nil {
-		// TODO: Handle
-		// return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
+	var uploadSpaceHeaders = true // TODO: replace with flag...
+	if uploadSpaceHeaders {
+		fsbkt, err := filesystem.NewBucket(subDir)
+		if err != nil {
+			uploadSpaceHeaders = false
+			level.Warn(jobLogger).Log("msg", "failed to create local bucket, skipping sparse header upload", "err", err)
+		}
 	}
 
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
@@ -439,21 +442,23 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
-		_, err = indexheader.NewStreamBinaryReader(
-			ctx,
-			jobLogger,
-			fsbkt,
-			bdir,
-			blockToUpload.ulid,
-			mimir_tsdb.DefaultPostingOffsetInMemorySampling,
-			indexheader.NewStreamBinaryReaderMetrics(nil),
-			indexheader.Config{MaxIdleFileHandles: 1},
-		)
-		if err != nil {
-			level.Warn(jobLogger).Log("msg", "failed to...", "block", "uhh", "err", err)
+		if uploadSpaceHeaders {
+			if _, err := indexheader.NewStreamBinaryReader(
+				ctx,
+				jobLogger,
+				fsbkt,
+				bdir,
+				blockToUpload.ulid,
+				mimir_tsdb.DefaultPostingOffsetInMemorySampling,
+				indexheader.NewStreamBinaryReaderMetrics(nil),
+				indexheader.Config{MaxIdleFileHandles: 1},
+			); err != nil {
+				// updating sparse index headers is lower priority than uploading the new block, continue job warning logged
+				// and increment partial failure metric.
+				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "err", err)
+			}
 		}
 
-
 		elapsed := time.Since(begin)
 		level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(newLabels))
 		return nil
diff --git a/pkg/storegateway/bucket.go b/pkg/storegateway/bucket.go
index fbab5d59d7e..711b6cec6c4 100644
--- a/pkg/storegateway/bucket.go
+++ b/pkg/storegateway/bucket.go
@@ -51,12 +51,12 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb/bucketcache"
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/util/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/storegateway/storegatewaypb"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/globalerror"
+	"github.com/grafana/mimir/pkg/util/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_e2e_test.go b/pkg/storegateway/bucket_e2e_test.go
index 84b02b518c0..608f00e664b 100644
--- a/pkg/storegateway/bucket_e2e_test.go
+++ b/pkg/storegateway/bucket_e2e_test.go
@@ -37,8 +37,8 @@ import (
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
-	"github.com/grafana/mimir/pkg/util/indexheader"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
+	"github.com/grafana/mimir/pkg/util/indexheader"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/storegateway/bucket_index_reader.go b/pkg/storegateway/bucket_index_reader.go
index 4560869ead9..9a73818fed2 100644
--- a/pkg/storegateway/bucket_index_reader.go
+++ b/pkg/storegateway/bucket_index_reader.go
@@ -33,9 +33,9 @@ import (
 
 	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
+	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/indexheader"
 	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
-	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_test.go b/pkg/storegateway/bucket_test.go
index e479a50b87e..4f09e4f7b5c 100644
--- a/pkg/storegateway/bucket_test.go
+++ b/pkg/storegateway/bucket_test.go
@@ -61,9 +61,9 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
+	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util/indexheader"
 	"github.com/grafana/mimir/pkg/util/indexheader/index"
-	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/test"
 	"github.com/grafana/mimir/pkg/util/validation"
diff --git a/pkg/util/indexheader/binary_reader.go b/pkg/util/indexheader/binary_reader.go
index 03483a5c5ad..50db58c6743 100644
--- a/pkg/util/indexheader/binary_reader.go
+++ b/pkg/util/indexheader/binary_reader.go
@@ -9,6 +9,7 @@ import (
 	"bufio"
 	"context"
 	"encoding/binary"
+	"golang.org/x/sync/errgroup"
 	"hash"
 	"hash/crc32"
 	"io"
@@ -93,16 +94,46 @@ func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, f
 		return errors.Wrap(err, "add index meta")
 	}
 
-	if err := ir.CopySymbols(bw.SymbolsWriter(), buf); err != nil {
+	// Copying symbols and posting offsets into the encbuffer both require a range query against the provider.
+	// We make these calls in parallel and then syncronize before writing to buf
+	var g errgroup.Group
+	var sym, tbl io.ReadCloser
+	var symerr, tblerr error
+	g.Go(func() (err error) {
+		sym, symerr = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.Symbols), int64(ir.toc.Series-ir.toc.Symbols))
+		if symerr != nil {
+			return errors.Wrapf(err, "get symbols from object storage of %s", ir.path)
+		}
+		return
+	})
+
+	g.Go(func() (err error) {
+		tbl, tblerr = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.PostingsTable), int64(ir.size-ir.toc.PostingsTable))
+		if tblerr != nil {
+			return errors.Wrapf(err, "get posting offset table from object storage of %s", ir.path)
+		}
+		return
+	})
+
+	defer func() {
+		runutil.CloseWithErrCapture(&symerr, sym, "close symbol reader")
+		runutil.CloseWithErrCapture(&tblerr, tbl, "close posting offsets reader")
+	}()
+
+	if err := g.Wait(); err != nil {
 		return err
 	}
 
+	if _, err := io.CopyBuffer(bw.SymbolsWriter(), sym, buf); err != nil {
+		return errors.Wrap(err, "copy posting offsets")
+	}
+
 	if err := bw.f.Flush(); err != nil {
 		return errors.Wrap(err, "flush")
 	}
 
-	if err := ir.CopyPostingsOffsets(bw.PostingOffsetsWriter(), buf); err != nil {
-		return err
+	if _, err := io.CopyBuffer(bw.PostingOffsetsWriter(), tbl, buf); err != nil {
+		return errors.Wrap(err, "copy posting offsets")
 	}
 
 	if err := bw.f.Flush(); err != nil {
diff --git a/pkg/util/indexheader/header_test.go b/pkg/util/indexheader/header_test.go
index 87ecde23d23..1aa60ad4612 100644
--- a/pkg/util/indexheader/header_test.go
+++ b/pkg/util/indexheader/header_test.go
@@ -11,6 +11,7 @@ import (
 	"fmt"
 	"path/filepath"
 	"testing"
+	"time"
 
 	"github.com/go-kit/log"
 	"github.com/grafana/dskit/gate"
@@ -25,6 +26,7 @@ import (
 	"github.com/thanos-io/objstore"
 	"github.com/thanos-io/objstore/providers/filesystem"
 
+	"github.com/grafana/mimir/pkg/storage/bucket"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/util/test"
 )
@@ -419,6 +421,36 @@ func BenchmarkBinaryWrite(t *testing.B) {
 	}
 }
 
+func BenchmarkBinaryWrite_DelayedBucket(t *testing.B) {
+
+	tests := []struct {
+		low, high time.Duration
+	}{
+		{0, 10},
+		{0, 20},
+		{0, 50},
+		{10, 20},
+	}
+
+	for _, test := range tests {
+		t.Run(fmt.Sprintf("Between_%d_%d_ms_delay", test.low, test.high), func(t *testing.B) {
+			ctx := context.Background()
+			tmpDir := t.TempDir()
+			bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
+			require.NoError(t, err)
+			delaybkt := bucket.NewDelayedBucketClient(bkt, test.low, test.high)
+			defer func() { require.NoError(t, delaybkt.Close()) }()
+
+			m := prepareIndexV2Block(t, tmpDir, delaybkt)
+			fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename)
+			t.ResetTimer()
+			for i := 0; i < t.N; i++ {
+				require.NoError(t, WriteBinary(ctx, delaybkt, m.ULID, fn))
+			}
+		})
+	}
+}
+
 func getSymbolTable(b index.ByteSlice) (map[uint32]string, error) {
 	version := int(b.Range(4, 5)[0])
 
diff --git a/pkg/util/indexheader/stream_binary_reader.go b/pkg/util/indexheader/stream_binary_reader.go
index ccc9b2f9af1..764fc51aa70 100644
--- a/pkg/util/indexheader/stream_binary_reader.go
+++ b/pkg/util/indexheader/stream_binary_reader.go
@@ -25,10 +25,10 @@ import (
 	"github.com/thanos-io/objstore"
 
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
+	"github.com/grafana/mimir/pkg/util/atomicfs"
 	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
 	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
-	"github.com/grafana/mimir/pkg/util/atomicfs"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
 

From b3de32d3fc8a86e13f3c36fb84c6372997dd2385 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 18 Feb 2025 12:26:53 -0500
Subject: [PATCH 03/45] update benchmarks

---
 pkg/util/indexheader/header_test.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/util/indexheader/header_test.go b/pkg/util/indexheader/header_test.go
index 1aa60ad4612..18d576f7041 100644
--- a/pkg/util/indexheader/header_test.go
+++ b/pkg/util/indexheader/header_test.go
@@ -424,21 +424,21 @@ func BenchmarkBinaryWrite(t *testing.B) {
 func BenchmarkBinaryWrite_DelayedBucket(t *testing.B) {
 
 	tests := []struct {
-		low, high time.Duration
+		delay time.Duration
 	}{
-		{0, 10},
-		{0, 20},
-		{0, 50},
-		{10, 20},
+		{1 * time.Millisecond},
+		{5 * time.Millisecond},
+		{10 * time.Millisecond},
+		{20 * time.Millisecond},
 	}
 
 	for _, test := range tests {
-		t.Run(fmt.Sprintf("Between_%d_%d_ms_delay", test.low, test.high), func(t *testing.B) {
+		t.Run(fmt.Sprintf("max_delay_%d_ns", test.delay), func(t *testing.B) {
 			ctx := context.Background()
 			tmpDir := t.TempDir()
 			bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
 			require.NoError(t, err)
-			delaybkt := bucket.NewDelayedBucketClient(bkt, test.low, test.high)
+			delaybkt := bucket.NewDelayedBucketClient(bkt, 0, test.delay)
 			defer func() { require.NoError(t, delaybkt.Close()) }()
 
 			m := prepareIndexV2Block(t, tmpDir, delaybkt)

From 4c63eb974d2d8142bf47e3ab5df8d9f1843bd941 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 18 Feb 2025 14:43:55 -0500
Subject: [PATCH 04/45] update tests on compactor e2e

---
 pkg/compactor/bucket_compactor.go          | 29 ++++++++++++++--------
 pkg/compactor/bucket_compactor_e2e_test.go | 15 +++++++++++
 pkg/util/indexheader/header_test.go        |  3 +--
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index f578a72567d..859ee38bc93 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -395,13 +395,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
-	var uploadSpaceHeaders = true // TODO: replace with flag...
-	if uploadSpaceHeaders {
-		fsbkt, err := filesystem.NewBucket(subDir)
-		if err != nil {
-			uploadSpaceHeaders = false
-			level.Warn(jobLogger).Log("msg", "failed to create local bucket, skipping sparse header upload", "err", err)
-		}
+	fsbkt, err := filesystem.NewBucket(subDir)
+	if err != nil {
+		uploadSpaceHeaders = false
+		level.Warn(jobLogger).Log("msg", "failed to create local bucket, skipping sparse header upload", "err", err)
 	}
 
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
@@ -443,19 +440,31 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		}
 
 		if uploadSpaceHeaders {
+			// NewStreamBinaryReader reads a block's index writes the block's index-header and sparse-index-header files to disk
 			if _, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
 				fsbkt,
-				bdir,
+				subDir,
 				blockToUpload.ulid,
 				mimir_tsdb.DefaultPostingOffsetInMemorySampling,
 				indexheader.NewStreamBinaryReaderMetrics(nil),
 				indexheader.Config{MaxIdleFileHandles: 1},
 			); err != nil {
-				// updating sparse index headers is lower priority than uploading the new block, continue job warning logged
-				// and increment partial failure metric.
 				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "err", err)
+				return nil
+			}
+
+			err := objstore.UploadFile(
+				ctx,
+				jobLogger,
+				c.bkt,
+				path.Join(bdir, block.SparseIndexHeaderFilename),
+				path.Join(blockToUpload.ulid.String(), block.SparseIndexHeaderFilename),
+			)
+			if err != nil {
+				level.Warn(jobLogger).Log("msg", "failed to upload sparse index headers", "block", blockToUpload.ulid.String(), "err", err)
+				return nil
 			}
 		}
 
diff --git a/pkg/compactor/bucket_compactor_e2e_test.go b/pkg/compactor/bucket_compactor_e2e_test.go
index 1acc4b7814f..5260b06011c 100644
--- a/pkg/compactor/bucket_compactor_e2e_test.go
+++ b/pkg/compactor/bucket_compactor_e2e_test.go
@@ -374,6 +374,21 @@ func TestGroupCompactE2E(t *testing.T) {
 			return nil
 		}))
 
+		// expect the blocks that are compacted have sparse-index-headers in object storage
+		require.NoError(t, bkt.Iter(ctx, "", func(n string) error {
+			id, ok := block.IsBlockDir(n)
+			if !ok {
+				return nil
+			}
+
+			if _, ok := others[id.String()]; ok {
+				p := path.Join(id.String(), block.SparseIndexHeaderFilename)
+				exists, _ := bkt.Exists(ctx, p)
+				assert.True(t, exists, "expected sparse index headers not found %s", p)
+			}
+			return nil
+		}))
+
 		for id, found := range nonCompactedExpected {
 			assert.True(t, found, "not found expected block %s", id.String())
 		}
diff --git a/pkg/util/indexheader/header_test.go b/pkg/util/indexheader/header_test.go
index 18d576f7041..de544006d15 100644
--- a/pkg/util/indexheader/header_test.go
+++ b/pkg/util/indexheader/header_test.go
@@ -426,10 +426,9 @@ func BenchmarkBinaryWrite_DelayedBucket(t *testing.B) {
 	tests := []struct {
 		delay time.Duration
 	}{
-		{1 * time.Millisecond},
-		{5 * time.Millisecond},
 		{10 * time.Millisecond},
 		{20 * time.Millisecond},
+		{50 * time.Millisecond},
 	}
 
 	for _, test := range tests {

From 73292924840dc7a53456034ad31a114b257095fa Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 18 Feb 2025 15:35:59 -0500
Subject: [PATCH 05/45] update tests on compactor e2e

---
 pkg/compactor/bucket_compactor.go     | 33 ++++++++++++++-------------
 pkg/util/indexheader/binary_reader.go |  4 ++--
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 859ee38bc93..e88924ada9b 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -395,10 +395,12 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
+	// We create a bucket backed by the local compaction directory so that we can
+	var uploadSparseHeaders = true
 	fsbkt, err := filesystem.NewBucket(subDir)
 	if err != nil {
-		uploadSpaceHeaders = false
-		level.Warn(jobLogger).Log("msg", "failed to create local bucket, skipping sparse header upload", "err", err)
+		uploadSparseHeaders = false
+		level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
 	}
 
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
@@ -407,7 +409,8 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 
 		uploadedBlocks.Inc()
 
-		bdir := filepath.Join(subDir, blockToUpload.ulid.String())
+		blockID := blockToUpload.ulid.String()
+		bdir := filepath.Join(subDir, blockID)
 
 		// When splitting is enabled, we need to inject the shard ID as an external label.
 		newLabels := job.Labels().Map()
@@ -439,8 +442,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
-		if uploadSpaceHeaders {
-			// NewStreamBinaryReader reads a block's index writes the block's index-header and sparse-index-header files to disk
+		if uploadSparseHeaders {
+			// NewStreamBinaryReader reads a block's index and writes index-header and sparse-index-header
+			// files to disk. Discard the reader, only needs to be initialized without error for
+			// these files to be written.
 			if _, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
@@ -449,21 +454,17 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 				blockToUpload.ulid,
 				mimir_tsdb.DefaultPostingOffsetInMemorySampling,
 				indexheader.NewStreamBinaryReaderMetrics(nil),
-				indexheader.Config{MaxIdleFileHandles: 1},
+				indexheader.Config{},
 			); err != nil {
-				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "err", err)
+				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
 				return nil
 			}
 
-			err := objstore.UploadFile(
-				ctx,
-				jobLogger,
-				c.bkt,
-				path.Join(bdir, block.SparseIndexHeaderFilename),
-				path.Join(blockToUpload.ulid.String(), block.SparseIndexHeaderFilename),
-			)
-			if err != nil {
-				level.Warn(jobLogger).Log("msg", "failed to upload sparse index headers", "block", blockToUpload.ulid.String(), "err", err)
+			// upload local sparse-index-header to object storage
+			src := path.Join(bdir, block.SparseIndexHeaderFilename)
+			dst := path.Join(blockID, block.SparseIndexHeaderFilename)
+			if err := objstore.UploadFile(ctx, jobLogger, c.bkt, src, dst); err != nil {
+				level.Warn(jobLogger).Log("msg", "failed to upload sparse index headers", "block", blockID, "err", err)
 				return nil
 			}
 		}
diff --git a/pkg/util/indexheader/binary_reader.go b/pkg/util/indexheader/binary_reader.go
index 50db58c6743..329f94ef59d 100644
--- a/pkg/util/indexheader/binary_reader.go
+++ b/pkg/util/indexheader/binary_reader.go
@@ -94,8 +94,8 @@ func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, f
 		return errors.Wrap(err, "add index meta")
 	}
 
-	// Copying symbols and posting offsets into the encbuffer both require a range query against the provider.
-	// We make these calls in parallel and then syncronize before writing to buf
+	// Copying symbols and posting offsets into the encbuffer both require a range request against
+	// the objstore provider. We make these calls in parallel and then syncronize before writing to buf
 	var g errgroup.Group
 	var sym, tbl io.ReadCloser
 	var symerr, tblerr error

From 6ce1d79bd5279b6318b12b7ddbe461eed4f4dc0f Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 18 Feb 2025 16:20:26 -0500
Subject: [PATCH 06/45] fix err handling in WriteBinary

---
 pkg/compactor/bucket_compactor.go          |  3 +--
 pkg/compactor/bucket_compactor_e2e_test.go |  2 +-
 pkg/util/indexheader/binary_reader.go      | 18 ++++++++++--------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index e88924ada9b..dfc00c9f4cc 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -444,8 +444,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 
 		if uploadSparseHeaders {
 			// NewStreamBinaryReader reads a block's index and writes index-header and sparse-index-header
-			// files to disk. Discard the reader, only needs to be initialized without error for
-			// these files to be written.
+			// files to disk. Discard the reader, only needs to be initialized without error.
 			if _, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
diff --git a/pkg/compactor/bucket_compactor_e2e_test.go b/pkg/compactor/bucket_compactor_e2e_test.go
index 5260b06011c..a1bc56ff06d 100644
--- a/pkg/compactor/bucket_compactor_e2e_test.go
+++ b/pkg/compactor/bucket_compactor_e2e_test.go
@@ -374,7 +374,7 @@ func TestGroupCompactE2E(t *testing.T) {
 			return nil
 		}))
 
-		// expect the blocks that are compacted have sparse-index-headers in object storage
+		// expect the blocks that are compacted to have sparse-index-headers in object storage.
 		require.NoError(t, bkt.Iter(ctx, "", func(n string) error {
 			id, ok := block.IsBlockDir(n)
 			if !ok {
diff --git a/pkg/util/indexheader/binary_reader.go b/pkg/util/indexheader/binary_reader.go
index 329f94ef59d..bae9e72a0d0 100644
--- a/pkg/util/indexheader/binary_reader.go
+++ b/pkg/util/indexheader/binary_reader.go
@@ -17,6 +17,7 @@ import (
 	"os"
 	"path/filepath"
 
+	"github.com/grafana/dskit/multierror"
 	"github.com/grafana/dskit/runutil"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
@@ -98,30 +99,31 @@ func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, f
 	// the objstore provider. We make these calls in parallel and then syncronize before writing to buf
 	var g errgroup.Group
 	var sym, tbl io.ReadCloser
-	var symerr, tblerr error
 	g.Go(func() (err error) {
-		sym, symerr = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.Symbols), int64(ir.toc.Series-ir.toc.Symbols))
-		if symerr != nil {
+		sym, err = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.Symbols), int64(ir.toc.Series-ir.toc.Symbols))
+		if err != nil {
 			return errors.Wrapf(err, "get symbols from object storage of %s", ir.path)
 		}
 		return
 	})
 
 	g.Go(func() (err error) {
-		tbl, tblerr = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.PostingsTable), int64(ir.size-ir.toc.PostingsTable))
-		if tblerr != nil {
+		tbl, err = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.PostingsTable), int64(ir.size-ir.toc.PostingsTable))
+		if err != nil {
 			return errors.Wrapf(err, "get posting offset table from object storage of %s", ir.path)
 		}
 		return
 	})
 
+	merr := multierror.MultiError{}
 	defer func() {
-		runutil.CloseWithErrCapture(&symerr, sym, "close symbol reader")
-		runutil.CloseWithErrCapture(&tblerr, tbl, "close posting offsets reader")
+		merr.Add(errors.Wrapf(sym.Close(), "close symbol reader"))
+		merr.Add(errors.Wrapf(tbl.Close(), "close posting offsets reader"))
 	}()
 
 	if err := g.Wait(); err != nil {
-		return err
+		merr.Add(err)
+		return merr.Err()
 	}
 
 	if _, err := io.CopyBuffer(bw.SymbolsWriter(), sym, buf); err != nil {

From cc8b313208a32af1165332611a5c4a8c827dc535 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 12:16:55 -0500
Subject: [PATCH 07/45] mv indexheader to pkg/storage

---
 pkg/compactor/bucket_compactor.go                   |   2 +-
 pkg/{util => storage}/indexheader/binary_reader.go  |   1 +
 .../indexheader/encoding/encoding.go                |   0
 .../indexheader/encoding/encoding_test.go           |   0
 .../indexheader/encoding/factory.go                 |   0
 .../indexheader/encoding/factory_test.go            |   0
 .../indexheader/encoding/reader.go                  |   0
 .../indexheader/encoding/reader_test.go             |   0
 pkg/{util => storage}/indexheader/header.go         |   2 +-
 pkg/{util => storage}/indexheader/header_test.go    |   0
 pkg/{util => storage}/indexheader/index/postings.go |   4 ++--
 .../indexheader/index/postings_test.go              |   0
 pkg/{util => storage}/indexheader/index/symbols.go  |   4 ++--
 .../indexheader/index/symbols_test.go               |   2 +-
 .../indexheader/indexheaderpb/sparse.pb.go          |   0
 .../indexheader/indexheaderpb/sparse.proto          |   0
 .../indexheader/lazy_binary_reader.go               |   2 +-
 .../indexheader/lazy_binary_reader_test.go          |   2 +-
 .../indexheader/reader_benchmarks_test.go           |   0
 pkg/{util => storage}/indexheader/reader_pool.go    |   0
 .../indexheader/reader_pool_test.go                 |   0
 pkg/{util => storage}/indexheader/snapshotter.go    |   0
 .../indexheader/snapshotter_test.go                 |   0
 .../indexheader/stream_binary_reader.go             |   8 +++++---
 .../indexheader/stream_binary_reader_test.go        |   2 +-
 .../testdata/index_format_v1/chunks/.gitkeep        |   0
 .../indexheader/testdata/index_format_v1/index      | Bin
 .../indexheader/testdata/index_format_v1/meta.json  |   0
 .../testdata/index_format_v2/chunks/.gitkeep        |   0
 .../indexheader/testdata/index_format_v2/index      | Bin
 .../indexheader/testdata/index_format_v2/meta.json  |   0
 pkg/storage/tsdb/config.go                          |   2 +-
 pkg/storegateway/bucket.go                          |   4 ++--
 pkg/storegateway/bucket_e2e_test.go                 |   2 +-
 pkg/storegateway/bucket_index_postings.go           |   4 ++--
 pkg/storegateway/bucket_index_postings_test.go      |   2 +-
 pkg/storegateway/bucket_index_reader.go             |   4 ++--
 pkg/storegateway/bucket_store_metrics.go            |   2 +-
 pkg/storegateway/bucket_test.go                     |   4 ++--
 39 files changed, 28 insertions(+), 25 deletions(-)
 rename pkg/{util => storage}/indexheader/binary_reader.go (99%)
 rename pkg/{util => storage}/indexheader/encoding/encoding.go (100%)
 rename pkg/{util => storage}/indexheader/encoding/encoding_test.go (100%)
 rename pkg/{util => storage}/indexheader/encoding/factory.go (100%)
 rename pkg/{util => storage}/indexheader/encoding/factory_test.go (100%)
 rename pkg/{util => storage}/indexheader/encoding/reader.go (100%)
 rename pkg/{util => storage}/indexheader/encoding/reader_test.go (100%)
 rename pkg/{util => storage}/indexheader/header.go (98%)
 rename pkg/{util => storage}/indexheader/header_test.go (100%)
 rename pkg/{util => storage}/indexheader/index/postings.go (99%)
 rename pkg/{util => storage}/indexheader/index/postings_test.go (100%)
 rename pkg/{util => storage}/indexheader/index/symbols.go (98%)
 rename pkg/{util => storage}/indexheader/index/symbols_test.go (98%)
 rename pkg/{util => storage}/indexheader/indexheaderpb/sparse.pb.go (100%)
 rename pkg/{util => storage}/indexheader/indexheaderpb/sparse.proto (100%)
 rename pkg/{util => storage}/indexheader/lazy_binary_reader.go (99%)
 rename pkg/{util => storage}/indexheader/lazy_binary_reader_test.go (99%)
 rename pkg/{util => storage}/indexheader/reader_benchmarks_test.go (100%)
 rename pkg/{util => storage}/indexheader/reader_pool.go (100%)
 rename pkg/{util => storage}/indexheader/reader_pool_test.go (100%)
 rename pkg/{util => storage}/indexheader/snapshotter.go (100%)
 rename pkg/{util => storage}/indexheader/snapshotter_test.go (100%)
 rename pkg/{util => storage}/indexheader/stream_binary_reader.go (98%)
 rename pkg/{util => storage}/indexheader/stream_binary_reader_test.go (98%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v1/chunks/.gitkeep (100%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v1/index (100%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v1/meta.json (100%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v2/chunks/.gitkeep (100%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v2/index (100%)
 rename pkg/{util => storage}/indexheader/testdata/index_format_v2/meta.json (100%)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index dfc00c9f4cc..8aacf7b127e 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -22,7 +22,7 @@ import (
 	"github.com/grafana/dskit/concurrency"
 	"github.com/grafana/dskit/multierror"
 	"github.com/grafana/dskit/runutil"
-	"github.com/grafana/mimir/pkg/util/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
diff --git a/pkg/util/indexheader/binary_reader.go b/pkg/storage/indexheader/binary_reader.go
similarity index 99%
rename from pkg/util/indexheader/binary_reader.go
rename to pkg/storage/indexheader/binary_reader.go
index bae9e72a0d0..f351cce7b03 100644
--- a/pkg/util/indexheader/binary_reader.go
+++ b/pkg/storage/indexheader/binary_reader.go
@@ -121,6 +121,7 @@ func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, f
 		merr.Add(errors.Wrapf(tbl.Close(), "close posting offsets reader"))
 	}()
 
+	// wait until both GetRange requets have completed before writing bytes
 	if err := g.Wait(); err != nil {
 		merr.Add(err)
 		return merr.Err()
diff --git a/pkg/util/indexheader/encoding/encoding.go b/pkg/storage/indexheader/encoding/encoding.go
similarity index 100%
rename from pkg/util/indexheader/encoding/encoding.go
rename to pkg/storage/indexheader/encoding/encoding.go
diff --git a/pkg/util/indexheader/encoding/encoding_test.go b/pkg/storage/indexheader/encoding/encoding_test.go
similarity index 100%
rename from pkg/util/indexheader/encoding/encoding_test.go
rename to pkg/storage/indexheader/encoding/encoding_test.go
diff --git a/pkg/util/indexheader/encoding/factory.go b/pkg/storage/indexheader/encoding/factory.go
similarity index 100%
rename from pkg/util/indexheader/encoding/factory.go
rename to pkg/storage/indexheader/encoding/factory.go
diff --git a/pkg/util/indexheader/encoding/factory_test.go b/pkg/storage/indexheader/encoding/factory_test.go
similarity index 100%
rename from pkg/util/indexheader/encoding/factory_test.go
rename to pkg/storage/indexheader/encoding/factory_test.go
diff --git a/pkg/util/indexheader/encoding/reader.go b/pkg/storage/indexheader/encoding/reader.go
similarity index 100%
rename from pkg/util/indexheader/encoding/reader.go
rename to pkg/storage/indexheader/encoding/reader.go
diff --git a/pkg/util/indexheader/encoding/reader_test.go b/pkg/storage/indexheader/encoding/reader_test.go
similarity index 100%
rename from pkg/util/indexheader/encoding/reader_test.go
rename to pkg/storage/indexheader/encoding/reader_test.go
diff --git a/pkg/util/indexheader/header.go b/pkg/storage/indexheader/header.go
similarity index 98%
rename from pkg/util/indexheader/header.go
rename to pkg/storage/indexheader/header.go
index 41c835cab97..108311dc46e 100644
--- a/pkg/util/indexheader/header.go
+++ b/pkg/storage/indexheader/header.go
@@ -13,7 +13,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 )
 
 const (
diff --git a/pkg/util/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
similarity index 100%
rename from pkg/util/indexheader/header_test.go
rename to pkg/storage/indexheader/header_test.go
diff --git a/pkg/util/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
similarity index 99%
rename from pkg/util/indexheader/index/postings.go
rename to pkg/storage/indexheader/index/postings.go
index eca9caca80c..336c6dd28df 100644
--- a/pkg/util/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -17,8 +17,8 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
-	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
+	streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding"
+	"github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb"
 )
 
 const (
diff --git a/pkg/util/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
similarity index 100%
rename from pkg/util/indexheader/index/postings_test.go
rename to pkg/storage/indexheader/index/postings_test.go
diff --git a/pkg/util/indexheader/index/symbols.go b/pkg/storage/indexheader/index/symbols.go
similarity index 98%
rename from pkg/util/indexheader/index/symbols.go
rename to pkg/storage/indexheader/index/symbols.go
index fbc6340c80c..e6268655501 100644
--- a/pkg/util/indexheader/index/symbols.go
+++ b/pkg/storage/indexheader/index/symbols.go
@@ -16,8 +16,8 @@ import (
 	"github.com/grafana/dskit/runutil"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
-	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
+	streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding"
+	"github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb"
 )
 
 // The table gets initialized with sync.Once but may still cause a race
diff --git a/pkg/util/indexheader/index/symbols_test.go b/pkg/storage/indexheader/index/symbols_test.go
similarity index 98%
rename from pkg/util/indexheader/index/symbols_test.go
rename to pkg/storage/indexheader/index/symbols_test.go
index 4a49afa2647..eb399f43330 100644
--- a/pkg/util/indexheader/index/symbols_test.go
+++ b/pkg/storage/indexheader/index/symbols_test.go
@@ -18,7 +18,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/stretchr/testify/require"
 
-	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
+	streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/util/indexheader/indexheaderpb/sparse.pb.go b/pkg/storage/indexheader/indexheaderpb/sparse.pb.go
similarity index 100%
rename from pkg/util/indexheader/indexheaderpb/sparse.pb.go
rename to pkg/storage/indexheader/indexheaderpb/sparse.pb.go
diff --git a/pkg/util/indexheader/indexheaderpb/sparse.proto b/pkg/storage/indexheader/indexheaderpb/sparse.proto
similarity index 100%
rename from pkg/util/indexheader/indexheaderpb/sparse.proto
rename to pkg/storage/indexheader/indexheaderpb/sparse.proto
diff --git a/pkg/util/indexheader/lazy_binary_reader.go b/pkg/storage/indexheader/lazy_binary_reader.go
similarity index 99%
rename from pkg/util/indexheader/lazy_binary_reader.go
rename to pkg/storage/indexheader/lazy_binary_reader.go
index d275b3bd713..40cb377a38e 100644
--- a/pkg/util/indexheader/lazy_binary_reader.go
+++ b/pkg/storage/indexheader/lazy_binary_reader.go
@@ -24,8 +24,8 @@ import (
 	"github.com/thanos-io/objstore"
 	"go.uber.org/atomic"
 
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 )
 
 var (
diff --git a/pkg/util/indexheader/lazy_binary_reader_test.go b/pkg/storage/indexheader/lazy_binary_reader_test.go
similarity index 99%
rename from pkg/util/indexheader/lazy_binary_reader_test.go
rename to pkg/storage/indexheader/lazy_binary_reader_test.go
index dcd924660c4..ab038957835 100644
--- a/pkg/util/indexheader/lazy_binary_reader_test.go
+++ b/pkg/storage/indexheader/lazy_binary_reader_test.go
@@ -28,8 +28,8 @@ import (
 	"github.com/thanos-io/objstore/providers/filesystem"
 	"go.uber.org/atomic"
 
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/util/indexheader/reader_benchmarks_test.go b/pkg/storage/indexheader/reader_benchmarks_test.go
similarity index 100%
rename from pkg/util/indexheader/reader_benchmarks_test.go
rename to pkg/storage/indexheader/reader_benchmarks_test.go
diff --git a/pkg/util/indexheader/reader_pool.go b/pkg/storage/indexheader/reader_pool.go
similarity index 100%
rename from pkg/util/indexheader/reader_pool.go
rename to pkg/storage/indexheader/reader_pool.go
diff --git a/pkg/util/indexheader/reader_pool_test.go b/pkg/storage/indexheader/reader_pool_test.go
similarity index 100%
rename from pkg/util/indexheader/reader_pool_test.go
rename to pkg/storage/indexheader/reader_pool_test.go
diff --git a/pkg/util/indexheader/snapshotter.go b/pkg/storage/indexheader/snapshotter.go
similarity index 100%
rename from pkg/util/indexheader/snapshotter.go
rename to pkg/storage/indexheader/snapshotter.go
diff --git a/pkg/util/indexheader/snapshotter_test.go b/pkg/storage/indexheader/snapshotter_test.go
similarity index 100%
rename from pkg/util/indexheader/snapshotter_test.go
rename to pkg/storage/indexheader/snapshotter_test.go
diff --git a/pkg/util/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
similarity index 98%
rename from pkg/util/indexheader/stream_binary_reader.go
rename to pkg/storage/indexheader/stream_binary_reader.go
index 764fc51aa70..ab50682c334 100644
--- a/pkg/util/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -24,11 +24,11 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/thanos-io/objstore"
 
+	streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/util/atomicfs"
-	streamencoding "github.com/grafana/mimir/pkg/util/indexheader/encoding"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
-	"github.com/grafana/mimir/pkg/util/indexheader/indexheaderpb"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
 
@@ -69,6 +69,8 @@ type StreamBinaryReader struct {
 	indexVersion int
 }
 
+// dmwilson - TODO: add config option for no check
+//
 // NewStreamBinaryReader loads or builds new index-header if not present on disk.
 func NewStreamBinaryReader(ctx context.Context, logger log.Logger, bkt objstore.BucketReader, dir string, id ulid.ULID, postingOffsetsInMemSampling int, metrics *StreamBinaryReaderMetrics, cfg Config) (*StreamBinaryReader, error) {
 	spanLog, ctx := spanlogger.NewWithLogger(ctx, logger, "indexheader.NewStreamBinaryReader")
diff --git a/pkg/util/indexheader/stream_binary_reader_test.go b/pkg/storage/indexheader/stream_binary_reader_test.go
similarity index 98%
rename from pkg/util/indexheader/stream_binary_reader_test.go
rename to pkg/storage/indexheader/stream_binary_reader_test.go
index 63ab4e13969..1990949956e 100644
--- a/pkg/util/indexheader/stream_binary_reader_test.go
+++ b/pkg/storage/indexheader/stream_binary_reader_test.go
@@ -16,8 +16,8 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/thanos-io/objstore/providers/filesystem"
 
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
 
diff --git a/pkg/util/indexheader/testdata/index_format_v1/chunks/.gitkeep b/pkg/storage/indexheader/testdata/index_format_v1/chunks/.gitkeep
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v1/chunks/.gitkeep
rename to pkg/storage/indexheader/testdata/index_format_v1/chunks/.gitkeep
diff --git a/pkg/util/indexheader/testdata/index_format_v1/index b/pkg/storage/indexheader/testdata/index_format_v1/index
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v1/index
rename to pkg/storage/indexheader/testdata/index_format_v1/index
diff --git a/pkg/util/indexheader/testdata/index_format_v1/meta.json b/pkg/storage/indexheader/testdata/index_format_v1/meta.json
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v1/meta.json
rename to pkg/storage/indexheader/testdata/index_format_v1/meta.json
diff --git a/pkg/util/indexheader/testdata/index_format_v2/chunks/.gitkeep b/pkg/storage/indexheader/testdata/index_format_v2/chunks/.gitkeep
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v2/chunks/.gitkeep
rename to pkg/storage/indexheader/testdata/index_format_v2/chunks/.gitkeep
diff --git a/pkg/util/indexheader/testdata/index_format_v2/index b/pkg/storage/indexheader/testdata/index_format_v2/index
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v2/index
rename to pkg/storage/indexheader/testdata/index_format_v2/index
diff --git a/pkg/util/indexheader/testdata/index_format_v2/meta.json b/pkg/storage/indexheader/testdata/index_format_v2/meta.json
similarity index 100%
rename from pkg/util/indexheader/testdata/index_format_v2/meta.json
rename to pkg/storage/indexheader/testdata/index_format_v2/meta.json
diff --git a/pkg/storage/tsdb/config.go b/pkg/storage/tsdb/config.go
index 94da8c46cea..b7ea189b6e0 100644
--- a/pkg/storage/tsdb/config.go
+++ b/pkg/storage/tsdb/config.go
@@ -20,7 +20,7 @@ import (
 
 	"github.com/grafana/mimir/pkg/ingester/activeseries"
 	"github.com/grafana/mimir/pkg/storage/bucket"
-	"github.com/grafana/mimir/pkg/util/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 )
 
 const (
diff --git a/pkg/storegateway/bucket.go b/pkg/storegateway/bucket.go
index 711b6cec6c4..77e775173ef 100644
--- a/pkg/storegateway/bucket.go
+++ b/pkg/storegateway/bucket.go
@@ -55,8 +55,8 @@ import (
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/globalerror"
-	"github.com/grafana/mimir/pkg/util/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_e2e_test.go b/pkg/storegateway/bucket_e2e_test.go
index 608f00e664b..78d33ddf917 100644
--- a/pkg/storegateway/bucket_e2e_test.go
+++ b/pkg/storegateway/bucket_e2e_test.go
@@ -38,7 +38,7 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
-	"github.com/grafana/mimir/pkg/util/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/storegateway/bucket_index_postings.go b/pkg/storegateway/bucket_index_postings.go
index 9f90cca78a3..202759875a8 100644
--- a/pkg/storegateway/bucket_index_postings.go
+++ b/pkg/storegateway/bucket_index_postings.go
@@ -19,8 +19,8 @@ import (
 
 	"github.com/grafana/mimir/pkg/storage/sharding"
 	"github.com/grafana/mimir/pkg/storage/tsdb"
-	"github.com/grafana/mimir/pkg/util/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 )
 
 // rawPostingGroup keeps posting keys for single matcher. It is raw because there is no guarantee
diff --git a/pkg/storegateway/bucket_index_postings_test.go b/pkg/storegateway/bucket_index_postings_test.go
index c8ee30d5b5e..12e1d566708 100644
--- a/pkg/storegateway/bucket_index_postings_test.go
+++ b/pkg/storegateway/bucket_index_postings_test.go
@@ -15,7 +15,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/stretchr/testify/assert"
 
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 )
 
 func TestBigEndianPostingsCount(t *testing.T) {
diff --git a/pkg/storegateway/bucket_index_reader.go b/pkg/storegateway/bucket_index_reader.go
index 9a73818fed2..115c02f8fcc 100644
--- a/pkg/storegateway/bucket_index_reader.go
+++ b/pkg/storegateway/bucket_index_reader.go
@@ -34,8 +34,8 @@ import (
 	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/util"
-	"github.com/grafana/mimir/pkg/util/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/util/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go
index 7ebc0a0d261..e6b3255494f 100644
--- a/pkg/storegateway/bucket_store_metrics.go
+++ b/pkg/storegateway/bucket_store_metrics.go
@@ -12,7 +12,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
-	"github.com/grafana/mimir/pkg/util/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 )
 
 // BucketStoreMetrics holds all the metrics tracked by BucketStore. These metrics
diff --git a/pkg/storegateway/bucket_test.go b/pkg/storegateway/bucket_test.go
index 4f09e4f7b5c..e41bdcfa6e0 100644
--- a/pkg/storegateway/bucket_test.go
+++ b/pkg/storegateway/bucket_test.go
@@ -62,8 +62,8 @@ import (
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
-	"github.com/grafana/mimir/pkg/util/indexheader"
-	"github.com/grafana/mimir/pkg/util/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/test"
 	"github.com/grafana/mimir/pkg/util/validation"

From d771282bdfdc02604b06d8239b6aa9daa5ae6af0 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 13:38:52 -0500
Subject: [PATCH 08/45] rm change to BinaryWrite

---
 pkg/storage/indexheader/binary_reader.go | 42 +++---------------------
 pkg/storage/indexheader/header_test.go   | 29 ----------------
 2 files changed, 4 insertions(+), 67 deletions(-)

diff --git a/pkg/storage/indexheader/binary_reader.go b/pkg/storage/indexheader/binary_reader.go
index f351cce7b03..03483a5c5ad 100644
--- a/pkg/storage/indexheader/binary_reader.go
+++ b/pkg/storage/indexheader/binary_reader.go
@@ -9,7 +9,6 @@ import (
 	"bufio"
 	"context"
 	"encoding/binary"
-	"golang.org/x/sync/errgroup"
 	"hash"
 	"hash/crc32"
 	"io"
@@ -17,7 +16,6 @@ import (
 	"os"
 	"path/filepath"
 
-	"github.com/grafana/dskit/multierror"
 	"github.com/grafana/dskit/runutil"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
@@ -95,48 +93,16 @@ func WriteBinary(ctx context.Context, bkt objstore.BucketReader, id ulid.ULID, f
 		return errors.Wrap(err, "add index meta")
 	}
 
-	// Copying symbols and posting offsets into the encbuffer both require a range request against
-	// the objstore provider. We make these calls in parallel and then syncronize before writing to buf
-	var g errgroup.Group
-	var sym, tbl io.ReadCloser
-	g.Go(func() (err error) {
-		sym, err = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.Symbols), int64(ir.toc.Series-ir.toc.Symbols))
-		if err != nil {
-			return errors.Wrapf(err, "get symbols from object storage of %s", ir.path)
-		}
-		return
-	})
-
-	g.Go(func() (err error) {
-		tbl, err = ir.bkt.GetRange(ir.ctx, ir.path, int64(ir.toc.PostingsTable), int64(ir.size-ir.toc.PostingsTable))
-		if err != nil {
-			return errors.Wrapf(err, "get posting offset table from object storage of %s", ir.path)
-		}
-		return
-	})
-
-	merr := multierror.MultiError{}
-	defer func() {
-		merr.Add(errors.Wrapf(sym.Close(), "close symbol reader"))
-		merr.Add(errors.Wrapf(tbl.Close(), "close posting offsets reader"))
-	}()
-
-	// wait until both GetRange requets have completed before writing bytes
-	if err := g.Wait(); err != nil {
-		merr.Add(err)
-		return merr.Err()
-	}
-
-	if _, err := io.CopyBuffer(bw.SymbolsWriter(), sym, buf); err != nil {
-		return errors.Wrap(err, "copy posting offsets")
+	if err := ir.CopySymbols(bw.SymbolsWriter(), buf); err != nil {
+		return err
 	}
 
 	if err := bw.f.Flush(); err != nil {
 		return errors.Wrap(err, "flush")
 	}
 
-	if _, err := io.CopyBuffer(bw.PostingOffsetsWriter(), tbl, buf); err != nil {
-		return errors.Wrap(err, "copy posting offsets")
+	if err := ir.CopyPostingsOffsets(bw.PostingOffsetsWriter(), buf); err != nil {
+		return err
 	}
 
 	if err := bw.f.Flush(); err != nil {
diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index de544006d15..0941621a4b7 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -421,35 +421,6 @@ func BenchmarkBinaryWrite(t *testing.B) {
 	}
 }
 
-func BenchmarkBinaryWrite_DelayedBucket(t *testing.B) {
-
-	tests := []struct {
-		delay time.Duration
-	}{
-		{10 * time.Millisecond},
-		{20 * time.Millisecond},
-		{50 * time.Millisecond},
-	}
-
-	for _, test := range tests {
-		t.Run(fmt.Sprintf("max_delay_%d_ns", test.delay), func(t *testing.B) {
-			ctx := context.Background()
-			tmpDir := t.TempDir()
-			bkt, err := filesystem.NewBucket(filepath.Join(tmpDir, "bkt"))
-			require.NoError(t, err)
-			delaybkt := bucket.NewDelayedBucketClient(bkt, 0, test.delay)
-			defer func() { require.NoError(t, delaybkt.Close()) }()
-
-			m := prepareIndexV2Block(t, tmpDir, delaybkt)
-			fn := filepath.Join(tmpDir, m.ULID.String(), block.IndexHeaderFilename)
-			t.ResetTimer()
-			for i := 0; i < t.N; i++ {
-				require.NoError(t, WriteBinary(ctx, delaybkt, m.ULID, fn))
-			}
-		})
-	}
-}
-
 func getSymbolTable(b index.ByteSlice) (map[uint32]string, error) {
 	version := int(b.Range(4, 5)[0])
 

From 753572d8a9c9a91c6e0bd1b790c9b58ee9ead6e3 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 16:31:42 -0500
Subject: [PATCH 09/45] rm unused import related to BinaryWrite

---
 pkg/storage/indexheader/header_test.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 0941621a4b7..87ecde23d23 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -11,7 +11,6 @@ import (
 	"fmt"
 	"path/filepath"
 	"testing"
-	"time"
 
 	"github.com/go-kit/log"
 	"github.com/grafana/dskit/gate"
@@ -26,7 +25,6 @@ import (
 	"github.com/thanos-io/objstore"
 	"github.com/thanos-io/objstore/providers/filesystem"
 
-	"github.com/grafana/mimir/pkg/storage/bucket"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/util/test"
 )

From ccb49127689eb86ca2aeb69f851c393d347674ff Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 19:29:33 -0500
Subject: [PATCH 10/45] pass uploadSparseIndexHeaders through Config + update
 docs

---
 pkg/compactor/bucket_compactor.go          | 72 ++++++++++++----------
 pkg/compactor/bucket_compactor_e2e_test.go |  2 +-
 pkg/compactor/bucket_compactor_test.go     |  4 +-
 pkg/compactor/compactor.go                 |  5 ++
 4 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 8aacf7b127e..3fc970bc8bd 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -22,7 +22,6 @@ import (
 	"github.com/grafana/dskit/concurrency"
 	"github.com/grafana/dskit/multierror"
 	"github.com/grafana/dskit/runutil"
-	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -33,6 +32,7 @@ import (
 	"github.com/thanos-io/objstore/providers/filesystem"
 	"go.uber.org/atomic"
 
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/grafana/mimir/pkg/storage/sharding"
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
@@ -395,8 +395,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
-	// We create a bucket backed by the local compaction directory so that we can
-	var uploadSparseHeaders = true
+	// Create a bucket backed by the local compaction directory. This allows calls to NewStreamBinaryReader to
+	// construct index-headers and sparse-index-headers without making requests to object storage.
+	var uploadSparseHeaders = c.uploadSparseIndexHeaders
+	level.Warn(jobLogger).Log("msg", "starting to compact", "err", err, "state", uploadSparseHeaders)
 	fsbkt, err := filesystem.NewBucket(subDir)
 	if err != nil {
 		uploadSparseHeaders = false
@@ -443,8 +445,9 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		}
 
 		if uploadSparseHeaders {
-			// NewStreamBinaryReader reads a block's index and writes index-header and sparse-index-header
-			// files to disk. Discard the reader, only needs to be initialized without error.
+			// NewStreamBinaryReader reads a block's index and writes index-header and sparse-index-header files to disk. Discard
+			// the reader, only needs to be initialized without error. Because we don't need the StreamBinaryReader, use default
+			// indexheader.Config and do not register indexheader.StreamBinaryReaderMetrics.
 			if _, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
@@ -784,20 +787,21 @@ var ownAllJobs = func(*Job) (bool, error) {
 
 // BucketCompactor compacts blocks in a bucket.
 type BucketCompactor struct {
-	logger               log.Logger
-	sy                   *metaSyncer
-	grouper              Grouper
-	comp                 Compactor
-	planner              Planner
-	compactDir           string
-	bkt                  objstore.Bucket
-	concurrency          int
-	skipUnhealthyBlocks  bool
-	ownJob               ownCompactionJobFunc
-	sortJobs             JobsOrderFunc
-	waitPeriod           time.Duration
-	blockSyncConcurrency int
-	metrics              *BucketCompactorMetrics
+	logger                   log.Logger
+	sy                       *metaSyncer
+	grouper                  Grouper
+	comp                     Compactor
+	planner                  Planner
+	compactDir               string
+	bkt                      objstore.Bucket
+	concurrency              int
+	skipUnhealthyBlocks      bool
+	uploadSparseIndexHeaders bool
+	ownJob                   ownCompactionJobFunc
+	sortJobs                 JobsOrderFunc
+	waitPeriod               time.Duration
+	blockSyncConcurrency     int
+	metrics                  *BucketCompactorMetrics
 }
 
 // NewBucketCompactor creates a new bucket compactor.
@@ -816,25 +820,27 @@ func NewBucketCompactor(
 	waitPeriod time.Duration,
 	blockSyncConcurrency int,
 	metrics *BucketCompactorMetrics,
+	uploadSparseIndexHeaders bool,
 ) (*BucketCompactor, error) {
 	if concurrency <= 0 {
 		return nil, errors.Errorf("invalid concurrency level (%d), concurrency level must be > 0", concurrency)
 	}
 	return &BucketCompactor{
-		logger:               logger,
-		sy:                   sy,
-		grouper:              grouper,
-		planner:              planner,
-		comp:                 comp,
-		compactDir:           compactDir,
-		bkt:                  bkt,
-		concurrency:          concurrency,
-		skipUnhealthyBlocks:  skipUnhealthyBlocks,
-		ownJob:               ownJob,
-		sortJobs:             sortJobs,
-		waitPeriod:           waitPeriod,
-		blockSyncConcurrency: blockSyncConcurrency,
-		metrics:              metrics,
+		logger:                   logger,
+		sy:                       sy,
+		grouper:                  grouper,
+		planner:                  planner,
+		comp:                     comp,
+		compactDir:               compactDir,
+		bkt:                      bkt,
+		concurrency:              concurrency,
+		skipUnhealthyBlocks:      skipUnhealthyBlocks,
+		ownJob:                   ownJob,
+		sortJobs:                 sortJobs,
+		waitPeriod:               waitPeriod,
+		blockSyncConcurrency:     blockSyncConcurrency,
+		metrics:                  metrics,
+		uploadSparseIndexHeaders: uploadSparseIndexHeaders,
 	}, nil
 }
 
diff --git a/pkg/compactor/bucket_compactor_e2e_test.go b/pkg/compactor/bucket_compactor_e2e_test.go
index a1bc56ff06d..389818cc555 100644
--- a/pkg/compactor/bucket_compactor_e2e_test.go
+++ b/pkg/compactor/bucket_compactor_e2e_test.go
@@ -240,7 +240,7 @@ func TestGroupCompactE2E(t *testing.T) {
 		planner := NewSplitAndMergePlanner([]int64{1000, 3000})
 		grouper := NewSplitAndMergeGrouper("user-1", []int64{1000, 3000}, 0, 0, logger)
 		metrics := NewBucketCompactorMetrics(blocksMarkedForDeletion, prometheus.NewPedanticRegistry())
-		bComp, err := NewBucketCompactor(logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics)
+		bComp, err := NewBucketCompactor(logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics, true)
 		require.NoError(t, err)
 
 		// Compaction on empty should not fail.
diff --git a/pkg/compactor/bucket_compactor_test.go b/pkg/compactor/bucket_compactor_test.go
index 7584db247ef..f2159533a86 100644
--- a/pkg/compactor/bucket_compactor_test.go
+++ b/pkg/compactor/bucket_compactor_test.go
@@ -120,7 +120,7 @@ func TestBucketCompactor_FilterOwnJobs(t *testing.T) {
 	m := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil)
 	for testName, testCase := range tests {
 		t.Run(testName, func(t *testing.T) {
-			bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m)
+			bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m, false)
 			require.NoError(t, err)
 
 			res, err := bc.filterOwnJobs(jobsFn())
@@ -156,7 +156,7 @@ func TestBlockMaxTimeDeltas(t *testing.T) {
 
 	metrics := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil)
 	now := time.UnixMilli(1500002900159)
-	bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics)
+	bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics, true)
 	require.NoError(t, err)
 
 	deltas := bc.blockMaxTimeDeltas(now, []*Job{j1, j2})
diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go
index 83b751a6478..a2c6df63abd 100644
--- a/pkg/compactor/compactor.go
+++ b/pkg/compactor/compactor.go
@@ -130,6 +130,9 @@ type Config struct {
 	// Allow downstream projects to customise the blocks compactor.
 	BlocksGrouperFactory   BlocksGrouperFactory   `yaml:"-"`
 	BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"`
+
+	// Allow compactor to upload sparse-index-header files
+	UploadSparseIndexHeaders bool `yaml:"upload_sparse_index_headers" category:"experimental"`
 }
 
 // RegisterFlags registers the MultitenantCompactor flags.
@@ -158,6 +161,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
 	f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant.")
 	f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.")
 	f.DurationVar(&cfg.MaxLookback, "compactor.max-lookback", 0*time.Second, "Blocks uploaded before the lookback aren't considered in compactor cycles. If set, this value should be larger than all values in `-blocks-storage.tsdb.block-ranges-period`. A value of 0s means that all blocks are considered regardless of their upload time.")
+	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle")
 
 	// compactor concurrency options
 	f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 1, "Number of goroutines opening blocks before compaction.")
@@ -834,6 +838,7 @@ func (c *MultitenantCompactor) compactUser(ctx context.Context, userID string) e
 		c.compactorCfg.CompactionWaitPeriod,
 		c.compactorCfg.BlockSyncConcurrency,
 		c.bucketCompactorMetrics,
+		c.compactorCfg.UploadSparseIndexHeaders,
 	)
 	if err != nil {
 		return errors.Wrap(err, "failed to create bucket compactor")

From c432e3a8c59e987f034aa056a983ef158fd074a4 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 19:30:10 -0500
Subject: [PATCH 11/45] update docs

---
 CHANGELOG.md                                          |  1 +
 cmd/mimir/config-descriptor.json                      | 11 +++++++++++
 cmd/mimir/help-all.txt.tmpl                           |  2 ++
 docs/sources/mimir/configure/about-versioning.md      |  2 ++
 .../mimir/configure/configuration-parameters/index.md |  5 +++++
 pkg/storegateway/bucket.go                            |  4 ++--
 pkg/storegateway/bucket_e2e_test.go                   |  2 +-
 pkg/storegateway/bucket_index_postings.go             |  4 ++--
 pkg/storegateway/bucket_index_reader.go               |  4 ++--
 pkg/storegateway/bucket_test.go                       |  4 ++--
 10 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8fa0230d760..56769e27a41 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@
 * [CHANGE] Ingester: Set `-ingester.ooo-native-histograms-ingestion-enabled` to true by default. #10483
 * [CHANGE] Ruler: Add `user` and `reason` labels to `cortex_ruler_write_requests_failed_total` and `cortex_ruler_queries_failed_total`; add `user` to
     `cortex_ruler_write_requests_total` and `cortex_ruler_queries_total` metrics. #10536
+* [ENCHANCEMENT] Compactor: Add experimental `-compactor.upload-sparse-index-headers` option. When enabled, the compactor will attempt to upload sparse index headers to object storage. This prevents latency spikes after adding store-gateway replicas. #10684
 * [FEATURE] Distributor: Add experimental Influx handler. #10153
 * [ENHANCEMENT] Compactor: Expose `cortex_bucket_index_last_successful_update_timestamp_seconds` for all tenants assigned to the compactor before starting the block cleanup job. #10569
 * [ENHANCEMENT] Query Frontend: Return server-side `samples_processed` statistics. #10103
diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index 9402bea73aa..07845f4ff76 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -11534,6 +11534,17 @@
           "fieldFlag": "compactor.max-lookback",
           "fieldType": "duration",
           "fieldCategory": "experimental"
+        },
+        {
+          "kind": "field",
+          "name": "upload_sparse_index_headers",
+          "required": false,
+          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle",
+          "fieldValue": null,
+          "fieldDefaultValue": false,
+          "fieldFlag": "compactor.upload-sparse-index-headers",
+          "fieldType": "boolean",
+          "fieldCategory": "experimental"
         }
       ],
       "fieldValue": null,
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index 14d46fa710c..f1695e3c974 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1283,6 +1283,8 @@ Usage of ./cmd/mimir/mimir:
     	Number of symbols flushers used when doing split compaction. (default 1)
   -compactor.tenant-cleanup-delay duration
     	For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant. (default 6h0m0s)
+  -compactor.upload-sparse-index-headers
+    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle
   -config.expand-env
     	Expands ${var} or $var in config according to the values of the environment variables.
   -config.file value
diff --git a/docs/sources/mimir/configure/about-versioning.md b/docs/sources/mimir/configure/about-versioning.md
index 2eab3ba0241..57101e40075 100644
--- a/docs/sources/mimir/configure/about-versioning.md
+++ b/docs/sources/mimir/configure/about-versioning.md
@@ -71,6 +71,8 @@ The following features are currently experimental:
     - `-compactor.in-memory-tenant-meta-cache-size`
   - Limit blocks processed in each compaction cycle. Blocks uploaded prior to the maximum lookback aren't processed.
     - `-compactor.max-lookback`
+  - Enable the compactor to upload sparse index headers to object storage during compaction cycles.
+    - `-compactor.upload-sparse-index-headers`
 - Ruler
   - Aligning of evaluation timestamp on interval (`align_evaluation_time_on_interval`)
   - Allow defining limits on the maximum number of rules allowed in a rule group by namespace and the maximum number of rule groups by namespace. If set, this supersedes the `-ruler.max-rules-per-rule-group` and `-ruler.max-rule-groups-per-tenant` limits.
diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index b079f2dfd4f..59120ff56c2 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -4958,6 +4958,11 @@ sharding_ring:
 # blocks are considered regardless of their upload time.
 # CLI flag: -compactor.max-lookback
 [max_lookback: <duration> | default = 0s]
+
+# (experimental) If enabled, the compactor will construct and upload sparse
+# index headers to object storage during each compaction cycle
+# CLI flag: -compactor.upload-sparse-index-headers
+[upload_sparse_index_headers: <boolean> | default = false]
 ```
 
 ### store_gateway
diff --git a/pkg/storegateway/bucket.go b/pkg/storegateway/bucket.go
index 77e775173ef..7eb0a33e4d9 100644
--- a/pkg/storegateway/bucket.go
+++ b/pkg/storegateway/bucket.go
@@ -45,6 +45,8 @@ import (
 	"google.golang.org/grpc/status"
 
 	"github.com/grafana/mimir/pkg/mimirpb"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/sharding"
 	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
@@ -55,8 +57,6 @@ import (
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
 	"github.com/grafana/mimir/pkg/util"
 	"github.com/grafana/mimir/pkg/util/globalerror"
-	"github.com/grafana/mimir/pkg/storage/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_e2e_test.go b/pkg/storegateway/bucket_e2e_test.go
index 78d33ddf917..510e2a6c733 100644
--- a/pkg/storegateway/bucket_e2e_test.go
+++ b/pkg/storegateway/bucket_e2e_test.go
@@ -34,11 +34,11 @@ import (
 	"google.golang.org/grpc/codes"
 
 	"github.com/grafana/mimir/pkg/mimirpb"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
-	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/grafana/mimir/pkg/util/test"
 )
 
diff --git a/pkg/storegateway/bucket_index_postings.go b/pkg/storegateway/bucket_index_postings.go
index 202759875a8..98b7022f4ba 100644
--- a/pkg/storegateway/bucket_index_postings.go
+++ b/pkg/storegateway/bucket_index_postings.go
@@ -17,10 +17,10 @@ import (
 	"github.com/prometheus/prometheus/tsdb/encoding"
 	"github.com/prometheus/prometheus/tsdb/index"
 
-	"github.com/grafana/mimir/pkg/storage/sharding"
-	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/indexheader"
 	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
+	"github.com/grafana/mimir/pkg/storage/sharding"
+	"github.com/grafana/mimir/pkg/storage/tsdb"
 )
 
 // rawPostingGroup keeps posting keys for single matcher. It is raw because there is no guarantee
diff --git a/pkg/storegateway/bucket_index_reader.go b/pkg/storegateway/bucket_index_reader.go
index 115c02f8fcc..8b744661a45 100644
--- a/pkg/storegateway/bucket_index_reader.go
+++ b/pkg/storegateway/bucket_index_reader.go
@@ -31,11 +31,11 @@ import (
 	"github.com/prometheus/prometheus/tsdb/index"
 	"golang.org/x/sync/errgroup"
 
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/util"
-	"github.com/grafana/mimir/pkg/storage/indexheader"
-	streamindex "github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/spanlogger"
 )
diff --git a/pkg/storegateway/bucket_test.go b/pkg/storegateway/bucket_test.go
index e41bdcfa6e0..1f0cab5c9e2 100644
--- a/pkg/storegateway/bucket_test.go
+++ b/pkg/storegateway/bucket_test.go
@@ -56,14 +56,14 @@ import (
 
 	"github.com/grafana/mimir/pkg/mimirpb"
 	"github.com/grafana/mimir/pkg/storage/bucket"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
+	"github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/storage/sharding"
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/storegateway/hintspb"
 	"github.com/grafana/mimir/pkg/storegateway/indexcache"
 	"github.com/grafana/mimir/pkg/storegateway/storepb"
-	"github.com/grafana/mimir/pkg/storage/indexheader"
-	"github.com/grafana/mimir/pkg/storage/indexheader/index"
 	"github.com/grafana/mimir/pkg/util/pool"
 	"github.com/grafana/mimir/pkg/util/test"
 	"github.com/grafana/mimir/pkg/util/validation"

From fa47f6219edc0f732b1fe4874cb87a7c0be4435a Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 19 Feb 2025 21:58:45 -0500
Subject: [PATCH 12/45] docs

---
 docs/sources/mimir/configure/configuration-parameters/index.md | 2 +-
 pkg/compactor/compactor.go                                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index 59120ff56c2..1ac74233173 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -4960,7 +4960,7 @@ sharding_ring:
 [max_lookback: <duration> | default = 0s]
 
 # (experimental) If enabled, the compactor will construct and upload sparse
-# index headers to object storage during each compaction cycle
+# index headers to object storage during each compaction cycle.
 # CLI flag: -compactor.upload-sparse-index-headers
 [upload_sparse_index_headers: <boolean> | default = false]
 ```
diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go
index a2c6df63abd..5986e85100c 100644
--- a/pkg/compactor/compactor.go
+++ b/pkg/compactor/compactor.go
@@ -161,7 +161,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
 	f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant.")
 	f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.")
 	f.DurationVar(&cfg.MaxLookback, "compactor.max-lookback", 0*time.Second, "Blocks uploaded before the lookback aren't considered in compactor cycles. If set, this value should be larger than all values in `-blocks-storage.tsdb.block-ranges-period`. A value of 0s means that all blocks are considered regardless of their upload time.")
-	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle")
+	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.")
 
 	// compactor concurrency options
 	f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 1, "Number of goroutines opening blocks before compaction.")

From d8de8fabd390a88b6da118e4778dc86ba884b5e1 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 20 Feb 2025 08:30:05 -0500
Subject: [PATCH 13/45] docs

---
 cmd/mimir/config-descriptor.json | 2 +-
 cmd/mimir/help-all.txt.tmpl      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index 07845f4ff76..4f945dbaf90 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -11539,7 +11539,7 @@
           "kind": "field",
           "name": "upload_sparse_index_headers",
           "required": false,
-          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle",
+          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.",
           "fieldValue": null,
           "fieldDefaultValue": false,
           "fieldFlag": "compactor.upload-sparse-index-headers",
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index f1695e3c974..6f315cf8cd2 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1284,7 +1284,7 @@ Usage of ./cmd/mimir/mimir:
   -compactor.tenant-cleanup-delay duration
     	For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant. (default 6h0m0s)
   -compactor.upload-sparse-index-headers
-    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle
+    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.
   -config.expand-env
     	Expands ${var} or $var in config according to the values of the environment variables.
   -config.file value

From 00f9b415b89ed13abdc9e8c3146ed723856fce7d Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 20 Feb 2025 09:37:59 -0500
Subject: [PATCH 14/45] handrail comments; rm TODO

---
 pkg/compactor/bucket_compactor.go               | 1 +
 pkg/storage/indexheader/stream_binary_reader.go | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 3fc970bc8bd..b0d4277f230 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -458,6 +458,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 				indexheader.NewStreamBinaryReaderMetrics(nil),
 				indexheader.Config{},
 			); err != nil {
+				// creating and uploading sparse-index-headers is best-effort, warn on failure
 				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
 				return nil
 			}
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index ab50682c334..ac0a66cec38 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -69,8 +69,6 @@ type StreamBinaryReader struct {
 	indexVersion int
 }
 
-// dmwilson - TODO: add config option for no check
-//
 // NewStreamBinaryReader loads or builds new index-header if not present on disk.
 func NewStreamBinaryReader(ctx context.Context, logger log.Logger, bkt objstore.BucketReader, dir string, id ulid.ULID, postingOffsetsInMemSampling int, metrics *StreamBinaryReaderMetrics, cfg Config) (*StreamBinaryReader, error) {
 	spanLog, ctx := spanlogger.NewWithLogger(ctx, logger, "indexheader.NewStreamBinaryReader")

From ef039aa5ad347d7ffa10ea044597cca1f8ecc6ec Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 20 Feb 2025 19:50:19 -0500
Subject: [PATCH 15/45] comments

---
 pkg/compactor/bucket_compactor.go | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index b0d4277f230..87a74ea1751 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -395,14 +395,18 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
-	// Create a bucket backed by the local compaction directory. This allows calls to NewStreamBinaryReader to
-	// construct index-headers and sparse-index-headers without making requests to object storage.
-	var uploadSparseHeaders = c.uploadSparseIndexHeaders
-	level.Warn(jobLogger).Log("msg", "starting to compact", "err", err, "state", uploadSparseHeaders)
-	fsbkt, err := filesystem.NewBucket(subDir)
-	if err != nil {
-		uploadSparseHeaders = false
-		level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
+	var uploadSparseIndexHeaders = c.uploadSparseIndexHeaders
+	var fsbkt objstore.Bucket
+	if uploadSparseIndexHeaders {
+		// Create a bucket backed by the local compaction directory. This allows calls to NewStreamBinaryReader to
+		// construct sparse-index-headers without making requests to object storage. Building and uploading
+		// sparse-index-headers is best effort, and we do not skip uploading a compacted block if there's an
+		// error affecting the sparse-index-header upload.
+		fsbkt, err = filesystem.NewBucket(subDir)
+		if err != nil {
+			level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
+			uploadSparseIndexHeaders = false
+		}
 	}
 
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
@@ -444,10 +448,9 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
-		if uploadSparseHeaders {
-			// NewStreamBinaryReader reads a block's index and writes index-header and sparse-index-header files to disk. Discard
-			// the reader, only needs to be initialized without error. Because we don't need the StreamBinaryReader, use default
-			// indexheader.Config and do not register indexheader.StreamBinaryReaderMetrics.
+		if uploadSparseIndexHeaders {
+			// Calling NewStreamBinaryReader reads a block's index and writes a sparse-index-header to disk. Because we
+			// don't use the writer, we pass a default indexheader.Config and don't register metrics.
 			if _, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
@@ -458,7 +461,6 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 				indexheader.NewStreamBinaryReaderMetrics(nil),
 				indexheader.Config{},
 			); err != nil {
-				// creating and uploading sparse-index-headers is best-effort, warn on failure
 				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
 				return nil
 			}

From fe8a12e1076cd4e0faa04a1f3883591639568b1a Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 21 Feb 2025 19:14:51 -0500
Subject: [PATCH 16/45] add handling for configured sampling rate !=
 sparse-index-header sampling rate

---
 pkg/storage/indexheader/index/postings.go     | 35 ++++++-
 .../indexheader/indexheaderpb/sparse.pb.go    | 99 +++++++++++++------
 .../indexheader/indexheaderpb/sparse.proto    |  1 +
 .../indexheader/stream_binary_reader.go       | 43 ++++++--
 4 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 336c6dd28df..f1e3c9d426b 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -43,6 +43,9 @@ type PostingOffsetTable interface {
 	LabelNames() ([]string, error)
 
 	NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable)
+
+	// PostingOffsetInMemSampling ...
+	PostingOffsetInMemSampling() int
 }
 
 // PostingListOffset contains the start and end offset of a posting list.
@@ -330,6 +333,10 @@ func (t *PostingOffsetTableV1) LabelNames() ([]string, error) {
 	return labelNames, nil
 }
 
+func (t *PostingOffsetTableV1) PostingOffsetInMemSampling() int {
+	return 0
+}
+
 func (t *PostingOffsetTableV1) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) {
 	return &indexheaderpb.PostingOffsetTable{}
 }
@@ -350,6 +357,25 @@ type postingValueOffsets struct {
 	lastValOffset int64
 }
 
+func (t *PostingOffsetTableV2) Postings() map[string]*postingValueOffsets {
+	return t.postings
+}
+
+func (e *postingValueOffsets) Downsample(a int, b int) {
+	pvolen := len(e.offsets)
+	if pvolen == 0 || a <= b || a <= 0 || b <= 0 {
+		return
+	}
+
+	step := (a + b - 1) / b
+	j := 0
+	for i := 0; i < pvolen; i += step {
+		e.offsets[j] = e.offsets[i]
+		j++
+	}
+	e.offsets = e.offsets[:j]
+}
+
 // prefixOffsets returns the index of the first matching offset (start) and the index of the first non-matching (end).
 // If all offsets match the prefix, then end will equal the length of offsets.
 // prefixOffsets returns false when no offsets match this prefix.
@@ -608,10 +634,17 @@ func (t *PostingOffsetTableV2) LabelNames() ([]string, error) {
 	return labelNames, nil
 }
 
+// PostingOffsetInMemSampling ...
+func (t *PostingOffsetTableV2) PostingOffsetInMemSampling() int {
+	return t.postingOffsetsInMemSampling
+}
+
+
 // NewSparsePostingOffsetTable loads all postings offset table data into a sparse index-header to be persisted to disk
 func (t *PostingOffsetTableV2) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) {
 	sparseHeaders := &indexheaderpb.PostingOffsetTable{
-		Postings: make(map[string]*indexheaderpb.PostingValueOffsets, len(t.postings)),
+		Postings:                      make(map[string]*indexheaderpb.PostingValueOffsets, len(t.postings)),
+		PostingOffsetInMemorySampling: int64(t.postingOffsetsInMemSampling),
 	}
 
 	for name, offsets := range t.postings {
diff --git a/pkg/storage/indexheader/indexheaderpb/sparse.pb.go b/pkg/storage/indexheader/indexheaderpb/sparse.pb.go
index 4e96f6aeba7..2408ca63c3e 100644
--- a/pkg/storage/indexheader/indexheaderpb/sparse.pb.go
+++ b/pkg/storage/indexheader/indexheaderpb/sparse.pb.go
@@ -130,7 +130,8 @@ func (m *Symbols) GetSymbolsCount() int64 {
 
 type PostingOffsetTable struct {
 	// Postings is a map of label names -> PostingValueOffsets
-	Postings map[string]*PostingValueOffsets `protobuf:"bytes,1,rep,name=postings,proto3" json:"postings,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+	Postings                      map[string]*PostingValueOffsets `protobuf:"bytes,1,rep,name=postings,proto3" json:"postings,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+	PostingOffsetInMemorySampling int64                           `protobuf:"varint,2,opt,name=postingOffsetInMemorySampling,proto3" json:"postingOffsetInMemorySampling,omitempty"`
 }
 
 func (m *PostingOffsetTable) Reset()      { *m = PostingOffsetTable{} }
@@ -172,6 +173,13 @@ func (m *PostingOffsetTable) GetPostings() map[string]*PostingValueOffsets {
 	return nil
 }
 
+func (m *PostingOffsetTable) GetPostingOffsetInMemorySampling() int64 {
+	if m != nil {
+		return m.PostingOffsetInMemorySampling
+	}
+	return 0
+}
+
 // PostingValueOffsets stores a list of the first, last, and every 32nd (config default) PostingOffset for this label name.
 type PostingValueOffsets struct {
 	Offsets []*PostingOffset `protobuf:"bytes,1,rep,name=offsets,proto3" json:"offsets,omitempty"`
@@ -289,33 +297,34 @@ func init() {
 func init() { proto.RegisterFile("sparse.proto", fileDescriptor_c442573753a956c7) }
 
 var fileDescriptor_c442573753a956c7 = []byte{
-	// 402 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x52, 0x3f, 0x4f, 0xf2, 0x40,
-	0x18, 0xef, 0xd1, 0xbc, 0xc0, 0x7b, 0x40, 0x42, 0x0e, 0x63, 0x08, 0x21, 0x17, 0x6d, 0x1c, 0x58,
-	0x2c, 0x06, 0x13, 0x43, 0xdc, 0x84, 0x38, 0x31, 0x60, 0x8a, 0x61, 0x70, 0x31, 0xad, 0xb4, 0x85,
-	0x58, 0x7a, 0x4d, 0xef, 0x6a, 0x64, 0xf3, 0x1b, 0xe8, 0xc7, 0xf0, 0x73, 0x38, 0x39, 0x32, 0x32,
-	0x4a, 0x59, 0x1c, 0xf9, 0x08, 0xa6, 0x77, 0x2d, 0x5a, 0x25, 0xba, 0xdd, 0xf3, 0x3c, 0xbf, 0x3f,
-	0xcf, 0xf3, 0xcb, 0xc1, 0x22, 0xf5, 0x74, 0x9f, 0x9a, 0xaa, 0xe7, 0x13, 0x46, 0x50, 0x69, 0xe2,
-	0x8e, 0xcc, 0xfb, 0xb1, 0xa9, 0x8f, 0x4c, 0xdf, 0x33, 0x6a, 0x87, 0xf6, 0x84, 0x8d, 0x03, 0x43,
-	0xbd, 0x21, 0xd3, 0xa6, 0x4d, 0x6c, 0xd2, 0xe4, 0x28, 0x23, 0xb0, 0x78, 0xc5, 0x0b, 0xfe, 0x12,
-	0x6c, 0xe5, 0x11, 0xc0, 0xec, 0x80, 0xcb, 0xa1, 0x23, 0x98, 0xa3, 0xb3, 0xa9, 0x41, 0x1c, 0x5a,
-	0x05, 0x7b, 0xa0, 0x51, 0x68, 0xed, 0xaa, 0x29, 0x69, 0x75, 0x20, 0xa6, 0x5a, 0x02, 0x43, 0x03,
-	0x58, 0xf1, 0x08, 0x65, 0x13, 0xd7, 0xa6, 0x7d, 0xcb, 0xa2, 0x26, 0xbb, 0xd4, 0x0d, 0xc7, 0xac,
-	0x66, 0x38, 0x7b, 0xff, 0x1b, 0xfb, 0x42, 0x20, 0xbf, 0x00, 0xb5, 0x6d, 0x6c, 0xa5, 0x07, 0x73,
-	0xb1, 0x11, 0xaa, 0xc3, 0x1c, 0xe1, 0x93, 0x68, 0x23, 0xb9, 0x21, 0x77, 0x32, 0x65, 0xa0, 0x25,
-	0x2d, 0xa4, 0xc0, 0x62, 0xbc, 0x48, 0x97, 0x04, 0x2e, 0xe3, 0xb6, 0xb2, 0x96, 0xea, 0x29, 0x2f,
-	0x00, 0xa2, 0x9f, 0xc6, 0xa8, 0x07, 0xf3, 0x89, 0x35, 0x57, 0x2e, 0xb4, 0x9a, 0x7f, 0x6e, 0x9b,
-	0xb4, 0xe8, 0xb9, 0xcb, 0xfc, 0x99, 0xb6, 0x11, 0xa8, 0x5d, 0xc3, 0x52, 0x6a, 0x84, 0xca, 0x50,
-	0xbe, 0x35, 0x67, 0x3c, 0xc4, 0xff, 0x5a, 0xf4, 0x44, 0x6d, 0xf8, 0xef, 0x4e, 0x77, 0x82, 0x24,
-	0x1a, 0x65, 0xbb, 0xd9, 0x30, 0x82, 0x08, 0x47, 0xaa, 0x09, 0xc2, 0x69, 0xa6, 0x0d, 0x14, 0x0a,
-	0x2b, 0x5b, 0x10, 0xe8, 0x24, 0x9d, 0x4e, 0xa1, 0x55, 0xff, 0xed, 0x86, 0xcf, 0xdc, 0x0e, 0x60,
-	0xc9, 0xd1, 0x29, 0x1b, 0xea, 0x8e, 0x98, 0xc4, 0xc1, 0xa5, 0x9b, 0xca, 0xd9, 0xe6, 0x2a, 0xd1,
-	0x40, 0x3b, 0xc9, 0x0d, 0xe2, 0x2e, 0x51, 0xa0, 0x1a, 0xcc, 0xb3, 0x28, 0x9d, 0xbe, 0x65, 0xc5,
-	0x3a, 0x9b, 0xba, 0xd3, 0x9d, 0x2f, 0xb1, 0xb4, 0x58, 0x62, 0x69, 0xbd, 0xc4, 0xe0, 0x21, 0xc4,
-	0xe0, 0x39, 0xc4, 0xe0, 0x35, 0xc4, 0x60, 0x1e, 0x62, 0xf0, 0x16, 0x62, 0xf0, 0x1e, 0x62, 0x69,
-	0x1d, 0x62, 0xf0, 0xb4, 0xc2, 0xd2, 0x7c, 0x85, 0xa5, 0xc5, 0x0a, 0x4b, 0x57, 0xe9, 0xff, 0x6c,
-	0x64, 0xf9, 0x3f, 0x3d, 0xfe, 0x08, 0x00, 0x00, 0xff, 0xff, 0x81, 0x2f, 0x9e, 0xda, 0xf5, 0x02,
-	0x00, 0x00,
+	// 432 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x53, 0xb1, 0x8e, 0xd3, 0x40,
+	0x10, 0xf5, 0xda, 0xe2, 0x72, 0x4c, 0x2e, 0xd2, 0x69, 0x0f, 0xa1, 0x53, 0x74, 0xac, 0xc0, 0xa2,
+	0xb8, 0x06, 0x07, 0x05, 0x09, 0x9d, 0xe8, 0xb8, 0x40, 0x81, 0x22, 0x14, 0x64, 0xa3, 0x14, 0x34,
+	0xc8, 0x26, 0x6b, 0xc7, 0xc2, 0xf6, 0x5a, 0xde, 0x35, 0xc2, 0x1d, 0x25, 0x1d, 0x7c, 0x06, 0x9f,
+	0x42, 0x99, 0x32, 0x25, 0x71, 0x1a, 0xca, 0x7c, 0x02, 0xf2, 0xae, 0x1d, 0x62, 0x88, 0x72, 0xdd,
+	0xce, 0xcc, 0x9b, 0xf7, 0xde, 0xbc, 0xc4, 0x70, 0xc2, 0x53, 0x37, 0xe3, 0xd4, 0x4a, 0x33, 0x26,
+	0x18, 0xee, 0x85, 0xc9, 0x8c, 0x7e, 0x9e, 0x53, 0x77, 0x46, 0xb3, 0xd4, 0xeb, 0x3f, 0x0a, 0x42,
+	0x31, 0xcf, 0x3d, 0xeb, 0x03, 0x8b, 0x07, 0x01, 0x0b, 0xd8, 0x40, 0xa2, 0xbc, 0xdc, 0x97, 0x95,
+	0x2c, 0xe4, 0x4b, 0x6d, 0x9b, 0xdf, 0x10, 0x1c, 0x39, 0x92, 0x0e, 0x3f, 0x86, 0x0e, 0x2f, 0x62,
+	0x8f, 0x45, 0xfc, 0x1c, 0xdd, 0x47, 0x97, 0xdd, 0xe1, 0x5d, 0xab, 0x45, 0x6d, 0x39, 0x6a, 0x6a,
+	0x37, 0x30, 0xec, 0xc0, 0x59, 0xca, 0xb8, 0x08, 0x93, 0x80, 0x4f, 0x7c, 0x9f, 0x53, 0xf1, 0xd6,
+	0xf5, 0x22, 0x7a, 0xae, 0xcb, 0xed, 0x07, 0xff, 0x6c, 0xbf, 0x51, 0xc8, 0x1d, 0xa0, 0xbd, 0x6f,
+	0xdb, 0x1c, 0x43, 0xa7, 0x16, 0xc2, 0x17, 0xd0, 0x61, 0x72, 0x52, 0x39, 0x32, 0x2e, 0x8d, 0x6b,
+	0xfd, 0x14, 0xd9, 0x4d, 0x0b, 0x9b, 0x70, 0x52, 0x1b, 0x19, 0xb1, 0x3c, 0x11, 0x52, 0xd6, 0xb0,
+	0x5b, 0x3d, 0xf3, 0xab, 0x0e, 0xf8, 0x7f, 0x61, 0x3c, 0x86, 0xe3, 0x46, 0x5a, 0x32, 0x77, 0x87,
+	0x83, 0x1b, 0xdd, 0x36, 0x2d, 0xfe, 0x32, 0x11, 0x59, 0x61, 0x6f, 0x09, 0xf0, 0x0b, 0xb8, 0x97,
+	0xee, 0xa2, 0x5f, 0x25, 0xaf, 0x69, 0xcc, 0xb2, 0xc2, 0x71, 0xe3, 0x34, 0x0a, 0x93, 0xa0, 0x36,
+	0x76, 0x18, 0xd4, 0x7f, 0x0f, 0xbd, 0x96, 0x00, 0x3e, 0x05, 0xe3, 0x23, 0x2d, 0xe4, 0x4f, 0x71,
+	0xdb, 0xae, 0x9e, 0xf8, 0x0a, 0x6e, 0x7d, 0x72, 0xa3, 0xbc, 0x09, 0xd8, 0xdc, 0x6f, 0x79, 0x5a,
+	0x41, 0x94, 0x08, 0xb7, 0xd5, 0xc2, 0x33, 0xfd, 0x0a, 0x99, 0x1c, 0xce, 0xf6, 0x20, 0xf0, 0xd3,
+	0x76, 0xc6, 0xdd, 0xe1, 0xc5, 0xa1, 0x24, 0xfe, 0xa6, 0xff, 0x10, 0x7a, 0x91, 0xcb, 0xc5, 0xd4,
+	0x8d, 0xd4, 0xa4, 0xbe, 0xb2, 0xdd, 0x34, 0x9f, 0x6f, 0xaf, 0x52, 0x0d, 0x7c, 0xa7, 0xb9, 0x41,
+	0xdd, 0xa5, 0x0a, 0xdc, 0x87, 0x63, 0x51, 0x65, 0x3c, 0xf1, 0xfd, 0x9a, 0x67, 0x5b, 0x5f, 0x8f,
+	0x16, 0x2b, 0xa2, 0x2d, 0x57, 0x44, 0xdb, 0xac, 0x08, 0xfa, 0x52, 0x12, 0xf4, 0xa3, 0x24, 0xe8,
+	0x67, 0x49, 0xd0, 0xa2, 0x24, 0xe8, 0x57, 0x49, 0xd0, 0xef, 0x92, 0x68, 0x9b, 0x92, 0xa0, 0xef,
+	0x6b, 0xa2, 0x2d, 0xd6, 0x44, 0x5b, 0xae, 0x89, 0xf6, 0xae, 0xfd, 0x55, 0x78, 0x47, 0xf2, 0xdf,
+	0xfe, 0xe4, 0x4f, 0x00, 0x00, 0x00, 0xff, 0xff, 0x4c, 0x91, 0x1d, 0x5b, 0x3b, 0x03, 0x00, 0x00,
 }
 
 func (this *Sparse) Equal(that interface{}) bool {
@@ -404,6 +413,9 @@ func (this *PostingOffsetTable) Equal(that interface{}) bool {
 			return false
 		}
 	}
+	if this.PostingOffsetInMemorySampling != that1.PostingOffsetInMemorySampling {
+		return false
+	}
 	return true
 }
 func (this *PostingValueOffsets) Equal(that interface{}) bool {
@@ -495,7 +507,7 @@ func (this *PostingOffsetTable) GoString() string {
 	if this == nil {
 		return "nil"
 	}
-	s := make([]string, 0, 5)
+	s := make([]string, 0, 6)
 	s = append(s, "&indexheaderpb.PostingOffsetTable{")
 	keysForPostings := make([]string, 0, len(this.Postings))
 	for k, _ := range this.Postings {
@@ -510,6 +522,7 @@ func (this *PostingOffsetTable) GoString() string {
 	if this.Postings != nil {
 		s = append(s, "Postings: "+mapStringForPostings+",\n")
 	}
+	s = append(s, "PostingOffsetInMemorySampling: "+fmt.Sprintf("%#v", this.PostingOffsetInMemorySampling)+",\n")
 	s = append(s, "}")
 	return strings.Join(s, "")
 }
@@ -659,6 +672,11 @@ func (m *PostingOffsetTable) MarshalToSizedBuffer(dAtA []byte) (int, error) {
 	_ = i
 	var l int
 	_ = l
+	if m.PostingOffsetInMemorySampling != 0 {
+		i = encodeVarintSparse(dAtA, i, uint64(m.PostingOffsetInMemorySampling))
+		i--
+		dAtA[i] = 0x10
+	}
 	if len(m.Postings) > 0 {
 		for k := range m.Postings {
 			v := m.Postings[k]
@@ -831,6 +849,9 @@ func (m *PostingOffsetTable) Size() (n int) {
 			n += mapEntrySize + 1 + sovSparse(uint64(mapEntrySize))
 		}
 	}
+	if m.PostingOffsetInMemorySampling != 0 {
+		n += 1 + sovSparse(uint64(m.PostingOffsetInMemorySampling))
+	}
 	return n
 }
 
@@ -912,6 +933,7 @@ func (this *PostingOffsetTable) String() string {
 	mapStringForPostings += "}"
 	s := strings.Join([]string{`&PostingOffsetTable{`,
 		`Postings:` + mapStringForPostings + `,`,
+		`PostingOffsetInMemorySampling:` + fmt.Sprintf("%v", this.PostingOffsetInMemorySampling) + `,`,
 		`}`,
 	}, "")
 	return s
@@ -1382,6 +1404,25 @@ func (m *PostingOffsetTable) Unmarshal(dAtA []byte) error {
 			}
 			m.Postings[mapkey] = mapvalue
 			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PostingOffsetInMemorySampling", wireType)
+			}
+			m.PostingOffsetInMemorySampling = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowSparse
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.PostingOffsetInMemorySampling |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
 		default:
 			iNdEx = preIndex
 			skippy, err := skipSparse(dAtA[iNdEx:])
diff --git a/pkg/storage/indexheader/indexheaderpb/sparse.proto b/pkg/storage/indexheader/indexheaderpb/sparse.proto
index c20c45f1ebd..6027e938f88 100644
--- a/pkg/storage/indexheader/indexheaderpb/sparse.proto
+++ b/pkg/storage/indexheader/indexheaderpb/sparse.proto
@@ -26,6 +26,7 @@ message Symbols {
 message PostingOffsetTable {
 	// Postings is a map of label names -> PostingValueOffsets
 	map<string, PostingValueOffsets> postings = 1;
+	int64 postingOffsetInMemorySampling = 2;
 }
 
 // PostingValueOffsets stores a list of the first, last, and every 32nd (config default) PostingOffset for this label name.
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index ac0a66cec38..6c09f90a9f2 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -141,13 +141,36 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 
 	// Load in sparse symbols and postings offset table; from disk if this is a v2 index.
 	if r.indexVersion == index.FormatV2 {
+		var reconstruct bool
 		sparseData, err := os.ReadFile(sparseHeadersPath)
 		if err != nil && !os.IsNotExist(err) {
 			level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "id", id, "err", err)
 		}
 
+		// read persisted sparseHeaders from disk to memory.
+		if err == nil {
+			if err = r.loadFromSparseIndexHeader(logger, id, sparseHeadersPath, sparseData, postingOffsetsInMemSampling); err != nil {
+				return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err)
+			}
+
+			sampleRate := r.postingsOffsetTable.PostingOffsetInMemSampling()
+			if (sampleRate > 0) && (sampleRate < postingOffsetsInMemSampling) {
+				// if the downloaded sparse-index-header sampling is set lower (more frequent) than
+				// postingOffsetsInMemSampling, we can downsample the PostingOffsetTable to the
+				// configured rate.
+				if err := downsampleSparseIndexHeader(r, sampleRate, postingOffsetsInMemSampling); err != nil {
+					level.Warn(logger).Log("msg", "failed to downsample sparse index-headers; recreating", "id", id, "err", err)
+					reconstruct = true
+				}
+			} else if (sampleRate > 0) && (sampleRate > postingOffsetsInMemSampling) {
+				// if the downloaded sparse-header-index sampling is set higher, then reconstruct
+				// from index-header using the desired rate.
+				reconstruct = true
+			}
+		}
+
 		// If sparseHeaders are not on disk, construct sparseHeaders and write to disk.
-		if err != nil {
+		if err != nil || reconstruct {
 			if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 				return nil, fmt.Errorf("cannot load sparse index-header: %w", err)
 			}
@@ -156,11 +179,6 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 			}
 
 			level.Debug(logger).Log("msg", "built sparse index-header file", "id", id, "path", sparseHeadersPath)
-		} else {
-			// Otherwise, read persisted sparseHeaders from disk to memory.
-			if err = r.loadFromSparseIndexHeader(logger, id, sparseHeadersPath, sparseData, postingOffsetsInMemSampling); err != nil {
-				return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err)
-			}
 		}
 	} else {
 		if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
@@ -168,6 +186,7 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 		}
 	}
 
+	level.Debug(logger).Log("msg", "built sparse index-header file", "id", id, "path", sparseHeadersPath)
 	labelNames, err := r.postingsOffsetTable.LabelNames()
 	if err != nil {
 		return nil, fmt.Errorf("cannot load label names from postings offset table: %w", err)
@@ -184,6 +203,18 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 	return r, err
 }
 
+func downsampleSparseIndexHeader(r *StreamBinaryReader, a, b int) error {
+	tbl, ok := r.postingsOffsetTable.(*streamindex.PostingOffsetTableV2)
+	if !ok {
+		return fmt.Errorf("failed to downsample sparse-index-header")
+	}
+	// TODO: need to track num offsets removed to decrement lastValOffset...
+	for _, pvo := range tbl.Postings() {
+		pvo.Downsample(a, b)
+	}
+	return nil
+}
+
 // loadFromSparseIndexHeader load from sparse index-header on disk.
 func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger *spanlogger.SpanLogger, id ulid.ULID, sparseHeadersPath string, sparseData []byte, postingOffsetsInMemSampling int) (err error) {
 	start := time.Now()

From fd2d9fd2845ce9fd1cad4a7b49ce2de3ef261220 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 21 Feb 2025 20:31:07 -0500
Subject: [PATCH 17/45] add comments on DownsamplePostings

---
 pkg/storage/indexheader/index/postings.go       | 13 ++++++++-----
 pkg/storage/indexheader/stream_binary_reader.go |  6 ++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index f1e3c9d426b..445cc6ade4b 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -357,14 +357,17 @@ type postingValueOffsets struct {
 	lastValOffset int64
 }
 
-func (t *PostingOffsetTableV2) Postings() map[string]*postingValueOffsets {
-	return t.postings
+func (t *PostingOffsetTableV2) DownsamplePostings(a, b int) {
+	// todo: handle decrementing lastOffsetVal
+	for _, pvo := range t.postings {
+		_ = pvo.downsample(a, b)
+	}
 }
 
-func (e *postingValueOffsets) Downsample(a int, b int) {
+func (e *postingValueOffsets) downsample(a, b int) int {
 	pvolen := len(e.offsets)
 	if pvolen == 0 || a <= b || a <= 0 || b <= 0 {
-		return
+		return 0
 	}
 
 	step := (a + b - 1) / b
@@ -374,6 +377,7 @@ func (e *postingValueOffsets) Downsample(a int, b int) {
 		j++
 	}
 	e.offsets = e.offsets[:j]
+	return pvolen - j
 }
 
 // prefixOffsets returns the index of the first matching offset (start) and the index of the first non-matching (end).
@@ -639,7 +643,6 @@ func (t *PostingOffsetTableV2) PostingOffsetInMemSampling() int {
 	return t.postingOffsetsInMemSampling
 }
 
-
 // NewSparsePostingOffsetTable loads all postings offset table data into a sparse index-header to be persisted to disk
 func (t *PostingOffsetTableV2) NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable) {
 	sparseHeaders := &indexheaderpb.PostingOffsetTable{
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index 6c09f90a9f2..46a04a70814 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -208,10 +208,8 @@ func downsampleSparseIndexHeader(r *StreamBinaryReader, a, b int) error {
 	if !ok {
 		return fmt.Errorf("failed to downsample sparse-index-header")
 	}
-	// TODO: need to track num offsets removed to decrement lastValOffset...
-	for _, pvo := range tbl.Postings() {
-		pvo.Downsample(a, b)
-	}
+	// todo: set tbl back onto StreamBinaryReader
+	tbl.DownsamplePostings(a, b)
 	return nil
 }
 

From 1ea4e57136d9939bc6053e8f119546885aaa1fe2 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Mon, 24 Feb 2025 17:04:30 -0500
Subject: [PATCH 18/45] updates to downsampling

---
 pkg/storage/indexheader/header_test.go        | 54 +++++++++++++++++++
 pkg/storage/indexheader/index/postings.go     | 28 +++++-----
 .../indexheader/index/postings_test.go        | 30 +++++++++++
 .../indexheader/stream_binary_reader.go       | 23 ++++----
 4 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 87ecde23d23..f0217bddb2c 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -129,6 +129,60 @@ func TestReadersComparedToIndexHeader(t *testing.T) {
 
 }
 
+func Test_DownsampleSparseIndexHeader(t *testing.T) {
+
+	filterFunc := func(string) bool { return true }
+	name := "__name__"
+	prefix := "continuous_app_metric36"
+
+	m, err := block.ReadMetaFromDir("./testdata/index_format_v2")
+	require.NoError(t, err)
+
+	tmpDir := t.TempDir()
+	test.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String()))
+
+	bkt, err := filesystem.NewBucket(tmpDir)
+	require.NoError(t, err)
+
+	ctx := context.Background()
+	noopMetrics := NewStreamBinaryReaderMetrics(nil)
+
+	// call NewStreamBinaryReader to write a sparse-index-header file to disk
+	br1, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, 32, noopMetrics, Config{})
+	require.NoError(t, err)
+	require.Equal(t, 32, br1.postingsOffsetTable.PostingOffsetInMemSampling())
+
+	origLabelNames, err := br1.postingsOffsetTable.LabelNames()
+	require.NoError(t, err)
+
+	origLabelValuesOffsets, err := br1.LabelValuesOffsets(context.Background(), name, prefix, filterFunc)
+	require.NoError(t, err)
+
+	origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
+
+	// a second call to NewStreamBinaryReader loads the previously written sparse-index-header and downsamples
+	// the original from 1/32 entries to 1/64 entries for each posting
+	br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, 64, noopMetrics, Config{})
+	require.NoError(t, err)
+	require.Equal(t, 64, br2.postingsOffsetTable.PostingOffsetInMemSampling())
+
+	downsampleLabelNames, err := br2.postingsOffsetTable.LabelNames()
+	require.NoError(t, err)
+
+	downsampleLabelValuesOffsets, err := br2.LabelValuesOffsets(context.Background(), name, prefix, filterFunc)
+	require.NoError(t, err)
+
+	// label names and label values offsets are equal between orginal and downsampled sparse-index-headers
+	require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
+	require.ElementsMatch(t, downsampleLabelValuesOffsets, origLabelValuesOffsets)
+
+	// each downsampled set of postings is a subset of the original sparse-index-headers'
+	downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
+	for name, vals := range origIdxpbTbl.Postings {
+		require.Subset(t, vals.Offsets, downsampleIdxpbTbl.Postings[name].Offsets)
+	}
+}
+
 func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) {
 	ctx := context.Background()
 
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 445cc6ade4b..de2c7d551ec 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -249,6 +249,10 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		t.postings[sName].lastValOffset = sOffsets.LastValOffset
 	}
 
+	// replace sampling if postingOffsetsInMemSampling set in proto
+	if sampling := postingsOffsetTable.GetPostingOffsetInMemorySampling(); sampling > 0 {
+		t.postingOffsetsInMemSampling = int(sampling)
+	}
 	return &t, err
 }
 
@@ -357,27 +361,27 @@ type postingValueOffsets struct {
 	lastValOffset int64
 }
 
-func (t *PostingOffsetTableV2) DownsamplePostings(a, b int) {
-	// todo: handle decrementing lastOffsetVal
-	for _, pvo := range t.postings {
-		_ = pvo.downsample(a, b)
+func (t *PostingOffsetTableV2) DownsamplePostings(cur, tgt int) error {
+	if cur >= tgt || cur <= 0 || tgt <= 0 || cur%tgt != 0 {
+		return fmt.Errorf("invalid sampling rates, cannot downsample 1/%d to 1/%d", cur, tgt)
 	}
-}
 
-func (e *postingValueOffsets) downsample(a, b int) int {
-	pvolen := len(e.offsets)
-	if pvolen == 0 || a <= b || a <= 0 || b <= 0 {
-		return 0
+	step := cur / tgt
+	for _, pvo := range t.postings {
+		pvo.downsample(step)
 	}
+	t.postingOffsetsInMemSampling = tgt
+	return nil
+}
 
-	step := (a + b - 1) / b
+func (e *postingValueOffsets) downsample(step int) {
 	j := 0
-	for i := 0; i < pvolen; i += step {
+	for i := 0; i < len(e.offsets); i += step {
 		e.offsets[j] = e.offsets[i]
 		j++
 	}
 	e.offsets = e.offsets[:j]
-	return pvolen - j
+	return
 }
 
 // prefixOffsets returns the index of the first matching offset (start) and the index of the first non-matching (end).
diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index 2a7cd943265..a92e90e6fc7 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -3,6 +3,7 @@
 package index
 
 import (
+	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -100,3 +101,32 @@ func TestPostingValueOffsets(t *testing.T) {
 		})
 	}
 }
+
+func Test_PostingValueOffsets_Downsample(t *testing.T) {
+
+	pvos := make([]postingOffset, 20)
+	for i := range pvos {
+		pvos[i] = postingOffset{value: fmt.Sprintf("%d", i)}
+	}
+
+	tests := []struct {
+		name        string
+		pvo         []postingOffset
+		step        int
+		expectedLen int
+	}{
+		{"len_eq_when_step_size=1", pvos, 1, 20},
+		{"len_approx_halved_when_step_size=2", pvos, 2, 10},
+		{"len_approx_quartered_when_step_size=4", pvos, 4, 5},
+		{"len_one_when_step_size_is_len_offsets", pvos, 20, 1},
+		{"len_one_when_step_size_exceeds_len_offsets", pvos, 200, 1},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			pvo := &postingValueOffsets{offsets: append([]postingOffset{}, tc.pvo...)}
+			pvo.downsample(tc.step)
+			assert.Equal(t, tc.expectedLen, len(pvo.offsets), "unexpected length for step size %d", tc.step)
+		})
+	}
+}
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index 46a04a70814..5549895867e 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -153,16 +153,15 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 				return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err)
 			}
 
-			sampleRate := r.postingsOffsetTable.PostingOffsetInMemSampling()
-			if (sampleRate > 0) && (sampleRate < postingOffsetsInMemSampling) {
-				// if the downloaded sparse-index-header sampling is set lower (more frequent) than
-				// postingOffsetsInMemSampling, we can downsample the PostingOffsetTable to the
-				// configured rate.
-				if err := downsampleSparseIndexHeader(r, sampleRate, postingOffsetsInMemSampling); err != nil {
+			sampling := r.postingsOffsetTable.PostingOffsetInMemSampling()
+			if (sampling > 0) && (sampling < postingOffsetsInMemSampling) {
+				// if the sampling rate stored in the sparse-index-header is set lower (more frequent) than
+				// the configured postingOffsetsInMemSampling, downsample to the configured rate
+				if err := downsampleSparseIndexHeader(r, postingOffsetsInMemSampling, sampling); err != nil {
 					level.Warn(logger).Log("msg", "failed to downsample sparse index-headers; recreating", "id", id, "err", err)
 					reconstruct = true
 				}
-			} else if (sampleRate > 0) && (sampleRate > postingOffsetsInMemSampling) {
+			} else if (sampling > 0) && (sampling > postingOffsetsInMemSampling) {
 				// if the downloaded sparse-header-index sampling is set higher, then reconstruct
 				// from index-header using the desired rate.
 				reconstruct = true
@@ -203,13 +202,15 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 	return r, err
 }
 
-func downsampleSparseIndexHeader(r *StreamBinaryReader, a, b int) error {
+func downsampleSparseIndexHeader(r *StreamBinaryReader, curSampling, newSampling int) error {
 	tbl, ok := r.postingsOffsetTable.(*streamindex.PostingOffsetTableV2)
 	if !ok {
-		return fmt.Errorf("failed to downsample sparse-index-header")
+		return fmt.Errorf("cannot downsample sparse-index-header")
 	}
-	// todo: set tbl back onto StreamBinaryReader
-	tbl.DownsamplePostings(a, b)
+	if err := tbl.DownsamplePostings(curSampling, newSampling); err != nil {
+		return err
+	}
+	r.postingsOffsetTable = tbl
 	return nil
 }
 

From c3625894f75a4ad1bf12316a7242c958b91a124a Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Mon, 24 Feb 2025 17:11:50 -0500
Subject: [PATCH 19/45] golangci-lint

---
 pkg/storage/indexheader/header_test.go    | 2 +-
 pkg/storage/indexheader/index/postings.go | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index f0217bddb2c..d0694075a61 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -172,7 +172,7 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 	downsampleLabelValuesOffsets, err := br2.LabelValuesOffsets(context.Background(), name, prefix, filterFunc)
 	require.NoError(t, err)
 
-	// label names and label values offsets are equal between orginal and downsampled sparse-index-headers
+	// label names and label values offsets are equal between original and downsampled sparse-index-headers
 	require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
 	require.ElementsMatch(t, downsampleLabelValuesOffsets, origLabelValuesOffsets)
 
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index de2c7d551ec..51705fb1b04 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -381,7 +381,6 @@ func (e *postingValueOffsets) downsample(step int) {
 		j++
 	}
 	e.offsets = e.offsets[:j]
-	return
 }
 
 // prefixOffsets returns the index of the first matching offset (start) and the index of the first non-matching (end).

From d7660410c380fa6ec1b0bb5e530eabbf977b7372 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Mon, 24 Feb 2025 18:20:10 -0500
Subject: [PATCH 20/45] add todo comment on test, can pass unexpectedly

---
 pkg/storage/indexheader/header_test.go          |  9 +++++++--
 pkg/storage/indexheader/index/postings.go       | 10 ++++++----
 pkg/storage/indexheader/stream_binary_reader.go |  8 +++++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index d0694075a61..52b984e487c 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -160,9 +160,12 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 
 	origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
 
+	// TODO: delete index-header before a second call to NewStreamBinaryReader, this forces the reader to downsample
+	// w.o. falling back to reconstructing from the header if there's an error
+
 	// a second call to NewStreamBinaryReader loads the previously written sparse-index-header and downsamples
 	// the original from 1/32 entries to 1/64 entries for each posting
-	br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, 64, noopMetrics, Config{})
+	br2, err := NewStreamBinaryReader(ctx, test.NewTestingLogger(t), bkt, tmpDir, m.ULID, 64, noopMetrics, Config{})
 	require.NoError(t, err)
 	require.Equal(t, 64, br2.postingsOffsetTable.PostingOffsetInMemSampling())
 
@@ -176,9 +179,11 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 	require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
 	require.ElementsMatch(t, downsampleLabelValuesOffsets, origLabelValuesOffsets)
 
-	// each downsampled set of postings is a subset of the original sparse-index-headers'
+	// each downsampled set of postings is a subset of the original sparse-index-headers with half
+	// as many entries as the original
 	downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
 	for name, vals := range origIdxpbTbl.Postings {
+		require.Equal(t, (len(vals.Offsets)+1)/2, len(downsampleIdxpbTbl.Postings[name].Offsets))
 		require.Subset(t, vals.Offsets, downsampleIdxpbTbl.Postings[name].Offsets)
 	}
 }
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 51705fb1b04..e77586d5990 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -362,11 +362,11 @@ type postingValueOffsets struct {
 }
 
 func (t *PostingOffsetTableV2) DownsamplePostings(cur, tgt int) error {
-	if cur >= tgt || cur <= 0 || tgt <= 0 || cur%tgt != 0 {
+	if cur >= tgt || cur <= 0 || tgt <= 0 || tgt%cur != 0 {
 		return fmt.Errorf("invalid sampling rates, cannot downsample 1/%d to 1/%d", cur, tgt)
 	}
 
-	step := cur / tgt
+	step := tgt / cur
 	for _, pvo := range t.postings {
 		pvo.downsample(step)
 	}
@@ -641,9 +641,11 @@ func (t *PostingOffsetTableV2) LabelNames() ([]string, error) {
 	return labelNames, nil
 }
 
-// PostingOffsetInMemSampling ...
 func (t *PostingOffsetTableV2) PostingOffsetInMemSampling() int {
-	return t.postingOffsetsInMemSampling
+	if t != nil {
+		return t.postingOffsetsInMemSampling
+	}
+	return 0
 }
 
 // NewSparsePostingOffsetTable loads all postings offset table data into a sparse index-header to be persisted to disk
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index 5549895867e..6f6ea7204c6 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -157,14 +157,16 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 			if (sampling > 0) && (sampling < postingOffsetsInMemSampling) {
 				// if the sampling rate stored in the sparse-index-header is set lower (more frequent) than
 				// the configured postingOffsetsInMemSampling, downsample to the configured rate
-				if err := downsampleSparseIndexHeader(r, postingOffsetsInMemSampling, sampling); err != nil {
+				if err := r.downsampleSparseIndexHeader(sampling, postingOffsetsInMemSampling); err != nil {
 					level.Warn(logger).Log("msg", "failed to downsample sparse index-headers; recreating", "id", id, "err", err)
 					reconstruct = true
 				}
 			} else if (sampling > 0) && (sampling > postingOffsetsInMemSampling) {
+				level.Warn(logger).Log("msg", "mismatch...", "id", id, "err", err)
 				// if the downloaded sparse-header-index sampling is set higher, then reconstruct
 				// from index-header using the desired rate.
 				reconstruct = true
+
 			}
 		}
 
@@ -202,12 +204,12 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 	return r, err
 }
 
-func downsampleSparseIndexHeader(r *StreamBinaryReader, curSampling, newSampling int) error {
+func (r *StreamBinaryReader) downsampleSparseIndexHeader(curSampling, tgtSampling int) error {
 	tbl, ok := r.postingsOffsetTable.(*streamindex.PostingOffsetTableV2)
 	if !ok {
 		return fmt.Errorf("cannot downsample sparse-index-header")
 	}
-	if err := tbl.DownsamplePostings(curSampling, newSampling); err != nil {
+	if err := tbl.DownsamplePostings(curSampling, tgtSampling); err != nil {
 		return err
 	}
 	r.postingsOffsetTable = tbl

From 13753ffd8c057c2e9ed35da6c49445c18c1f1821 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Mon, 24 Feb 2025 21:25:43 -0500
Subject: [PATCH 21/45] update to tests

---
 pkg/storage/indexheader/header_test.go        | 91 ++++++++++---------
 pkg/storage/indexheader/index/postings.go     |  2 +-
 .../indexheader/stream_binary_reader.go       | 18 ++--
 3 files changed, 59 insertions(+), 52 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 52b984e487c..eb1f91b26a0 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -130,61 +130,64 @@ func TestReadersComparedToIndexHeader(t *testing.T) {
 }
 
 func Test_DownsampleSparseIndexHeader(t *testing.T) {
+	tests := map[string]struct {
+		protoRate         int
+		inMemSamplingRate int
+	}{
+		"downsample_1_to_32":       {1, 32},
+		"downsample_2_to_32":       {2, 32},
+		"downsample_8_to_32":       {8, 32},
+		"downsample_4_to_16":       {4, 16},
+		"downsample_4_to_48":       {4, 48},
+		"downsample_4_to_4096":     {4, 4096},
+		"downsample_noop_32_to_32": {32, 32},
+	}
 
-	filterFunc := func(string) bool { return true }
-	name := "__name__"
-	prefix := "continuous_app_metric36"
-
-	m, err := block.ReadMetaFromDir("./testdata/index_format_v2")
-	require.NoError(t, err)
-
-	tmpDir := t.TempDir()
-	test.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String()))
-
-	bkt, err := filesystem.NewBucket(tmpDir)
-	require.NoError(t, err)
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			m, err := block.ReadMetaFromDir("./testdata/index_format_v2")
+			require.NoError(t, err)
 
-	ctx := context.Background()
-	noopMetrics := NewStreamBinaryReaderMetrics(nil)
+			tmpDir := t.TempDir()
+			test.Copy(t, "./testdata/index_format_v2", filepath.Join(tmpDir, m.ULID.String()))
 
-	// call NewStreamBinaryReader to write a sparse-index-header file to disk
-	br1, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, 32, noopMetrics, Config{})
-	require.NoError(t, err)
-	require.Equal(t, 32, br1.postingsOffsetTable.PostingOffsetInMemSampling())
-
-	origLabelNames, err := br1.postingsOffsetTable.LabelNames()
-	require.NoError(t, err)
+			bkt, err := filesystem.NewBucket(tmpDir)
+			require.NoError(t, err)
 
-	origLabelValuesOffsets, err := br1.LabelValuesOffsets(context.Background(), name, prefix, filterFunc)
-	require.NoError(t, err)
+			ctx := context.Background()
+			noopMetrics := NewStreamBinaryReaderMetrics(nil)
 
-	origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
+			// write a sparse index-header file to disk
+			br1, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.protoRate, noopMetrics, Config{})
+			require.NoError(t, err)
+			require.Equal(t, tt.protoRate, br1.postingsOffsetTable.PostingOffsetInMemSampling())
 
-	// TODO: delete index-header before a second call to NewStreamBinaryReader, this forces the reader to downsample
-	// w.o. falling back to reconstructing from the header if there's an error
+			origLabelNames, err := br1.postingsOffsetTable.LabelNames()
+			require.NoError(t, err)
 
-	// a second call to NewStreamBinaryReader loads the previously written sparse-index-header and downsamples
-	// the original from 1/32 entries to 1/64 entries for each posting
-	br2, err := NewStreamBinaryReader(ctx, test.NewTestingLogger(t), bkt, tmpDir, m.ULID, 64, noopMetrics, Config{})
-	require.NoError(t, err)
-	require.Equal(t, 64, br2.postingsOffsetTable.PostingOffsetInMemSampling())
+			// a second call to NewStreamBinaryReader loads the previously written sparse index-header and downsamples
+			// the header from tt.protoRate to tt.inMemSamplingRate entries for each posting
+			br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{})
+			require.NoError(t, err)
+			require.Equal(t, tt.inMemSamplingRate, br2.postingsOffsetTable.PostingOffsetInMemSampling())
 
-	downsampleLabelNames, err := br2.postingsOffsetTable.LabelNames()
-	require.NoError(t, err)
+			downsampleLabelNames, err := br2.postingsOffsetTable.LabelNames()
+			require.NoError(t, err)
 
-	downsampleLabelValuesOffsets, err := br2.LabelValuesOffsets(context.Background(), name, prefix, filterFunc)
-	require.NoError(t, err)
+			// label names are equal between original and downsampled sparse index-headers
+			require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
 
-	// label names and label values offsets are equal between original and downsampled sparse-index-headers
-	require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
-	require.ElementsMatch(t, downsampleLabelValuesOffsets, origLabelValuesOffsets)
+			// downsampled postings are a subset of the original sparse index-header postings
+			origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
+			downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
 
-	// each downsampled set of postings is a subset of the original sparse-index-headers with half
-	// as many entries as the original
-	downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
-	for name, vals := range origIdxpbTbl.Postings {
-		require.Equal(t, (len(vals.Offsets)+1)/2, len(downsampleIdxpbTbl.Postings[name].Offsets))
-		require.Subset(t, vals.Offsets, downsampleIdxpbTbl.Postings[name].Offsets)
+			rr := tt.inMemSamplingRate / tt.protoRate
+			for name, vals := range origIdxpbTbl.Postings {
+				downsampledOffsets := downsampleIdxpbTbl.Postings[name].Offsets
+				require.Equal(t, (len(vals.Offsets)+rr-1)/rr, len(downsampledOffsets))
+				require.Subset(t, vals.Offsets, downsampledOffsets)
+			}
+		})
 	}
 }
 
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index e77586d5990..838e000e0cc 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -363,7 +363,7 @@ type postingValueOffsets struct {
 
 func (t *PostingOffsetTableV2) DownsamplePostings(cur, tgt int) error {
 	if cur >= tgt || cur <= 0 || tgt <= 0 || tgt%cur != 0 {
-		return fmt.Errorf("invalid sampling rates, cannot downsample 1/%d to 1/%d", cur, tgt)
+		return fmt.Errorf("invalid in-mem-sampling rates")
 	}
 
 	step := tgt / cur
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index 6f6ea7204c6..e243e8fb1be 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -155,18 +155,22 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 
 			sampling := r.postingsOffsetTable.PostingOffsetInMemSampling()
 			if (sampling > 0) && (sampling < postingOffsetsInMemSampling) {
-				// if the sampling rate stored in the sparse-index-header is set lower (more frequent) than
+				// if the sampling rate in the sparse index-header is set lower (more frequent) than
 				// the configured postingOffsetsInMemSampling, downsample to the configured rate
 				if err := r.downsampleSparseIndexHeader(sampling, postingOffsetsInMemSampling); err != nil {
-					level.Warn(logger).Log("msg", "failed to downsample sparse index-headers; recreating", "id", id, "err", err)
+					level.Warn(logger).Log(
+						"msg", "failed to downsample sparse index-header; recreating", "id", id, "err", err,
+						"header_rate", sampling, "in-mem-sampling rate", postingOffsetsInMemSampling,
+					)
 					reconstruct = true
 				}
 			} else if (sampling > 0) && (sampling > postingOffsetsInMemSampling) {
-				level.Warn(logger).Log("msg", "mismatch...", "id", id, "err", err)
-				// if the downloaded sparse-header-index sampling is set higher, then reconstruct
-				// from index-header using the desired rate.
+				// if the sparse index-header sampling rate is set higher, reconstruct from index-header
+				level.Warn(logger).Log(
+					"msg", "sparse index-header sampling rate doesn't match in-mem-sampling rate; recreating",
+					"id", id, "err", err, "header_rate", sampling, "in-mem-sampling rate", postingOffsetsInMemSampling,
+				)
 				reconstruct = true
-
 			}
 		}
 
@@ -207,7 +211,7 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 func (r *StreamBinaryReader) downsampleSparseIndexHeader(curSampling, tgtSampling int) error {
 	tbl, ok := r.postingsOffsetTable.(*streamindex.PostingOffsetTableV2)
 	if !ok {
-		return fmt.Errorf("cannot downsample sparse-index-header")
+		return fmt.Errorf("postings offset table has incompatible version v%d", r.indexVersion)
 	}
 	if err := tbl.DownsamplePostings(curSampling, tgtSampling); err != nil {
 		return err

From 41d561b7fde8bb1387b6d6c89640665e6ed185fd Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 11:54:58 -0500
Subject: [PATCH 22/45] review comments

---
 cmd/mimir/config-descriptor.json              |  2 +-
 cmd/mimir/help-all.txt.tmpl                   |  2 +-
 .../configuration-parameters/index.md         |  4 +-
 pkg/compactor/compactor.go                    |  2 +-
 pkg/storage/indexheader/header_test.go        |  4 +-
 pkg/storage/indexheader/index/postings.go     |  2 +-
 .../indexheader/stream_binary_reader.go       | 44 +++++++++----------
 .../indexheader/stream_binary_reader_test.go  |  2 +-
 8 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index dba56bc956a..e8a153f1732 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -11517,7 +11517,7 @@
           "kind": "field",
           "name": "upload_sparse_index_headers",
           "required": false,
-          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.",
+          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.",
           "fieldValue": null,
           "fieldDefaultValue": false,
           "fieldFlag": "compactor.upload-sparse-index-headers",
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index f718b39e207..118cd41bd91 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1284,7 +1284,7 @@ Usage of ./cmd/mimir/mimir:
   -compactor.tenant-cleanup-delay duration
     	For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant. (default 6h0m0s)
   -compactor.upload-sparse-index-headers
-    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.
+    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.
   -config.expand-env
     	Expands ${var} or $var in config according to the values of the environment variables.
   -config.file value
diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index dd9aecea63c..59e0f7d95e7 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -4950,7 +4950,9 @@ sharding_ring:
 [max_lookback: <duration> | default = 0s]
 
 # (experimental) If enabled, the compactor will construct and upload sparse
-# index headers to object storage during each compaction cycle.
+# index headers to object storage during each compaction cycle. This allows
+# store-gateway instances to use the sparse headers from object storage instead
+# of recreating them locally.
 # CLI flag: -compactor.upload-sparse-index-headers
 [upload_sparse_index_headers: <boolean> | default = false]
 ```
diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go
index 5986e85100c..c3f5bd64581 100644
--- a/pkg/compactor/compactor.go
+++ b/pkg/compactor/compactor.go
@@ -161,7 +161,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
 	f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant.")
 	f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.")
 	f.DurationVar(&cfg.MaxLookback, "compactor.max-lookback", 0*time.Second, "Blocks uploaded before the lookback aren't considered in compactor cycles. If set, this value should be larger than all values in `-blocks-storage.tsdb.block-ranges-period`. A value of 0s means that all blocks are considered regardless of their upload time.")
-	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle.")
+	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.")
 
 	// compactor concurrency options
 	f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 1, "Number of goroutines opening blocks before compaction.")
diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index eb1f91b26a0..dc18aae916f 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -181,10 +181,10 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 			origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
 			downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
 
-			rr := tt.inMemSamplingRate / tt.protoRate
+			step := tt.inMemSamplingRate / tt.protoRate
 			for name, vals := range origIdxpbTbl.Postings {
 				downsampledOffsets := downsampleIdxpbTbl.Postings[name].Offsets
-				require.Equal(t, (len(vals.Offsets)+rr-1)/rr, len(downsampledOffsets))
+				require.Equal(t, (len(vals.Offsets)+step-1)/step, len(downsampledOffsets))
 				require.Subset(t, vals.Offsets, downsampledOffsets)
 			}
 		})
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 838e000e0cc..61f976bb113 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -44,7 +44,7 @@ type PostingOffsetTable interface {
 
 	NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable)
 
-	// PostingOffsetInMemSampling ...
+	// PostingOffsetInMemSampling returns the ratio of postings offsets that the store
 	PostingOffsetInMemSampling() int
 }
 
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index e243e8fb1be..440b7eff549 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -93,7 +93,9 @@ func NewStreamBinaryReader(ctx context.Context, logger log.Logger, bkt objstore.
 }
 
 // newFileStreamBinaryReader loads sparse index-headers from disk or constructs it from the index-header if not available.
-func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath string, postingOffsetsInMemSampling int, logger *spanlogger.SpanLogger, metrics *StreamBinaryReaderMetrics, cfg Config) (bw *StreamBinaryReader, err error) {
+func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath string, postingOffsetsInMemSampling int, logger log.Logger, metrics *StreamBinaryReaderMetrics, cfg Config) (bw *StreamBinaryReader, err error) {
+	logger = log.With(logger, "id", id, "path", sparseHeadersPath, "inmem_sampling_rate", postingOffsetsInMemSampling)
+
 	r := &StreamBinaryReader{
 		factory: streamencoding.NewDecbufFactory(binPath, cfg.MaxIdleFileHandles, metrics.decbufFactory),
 	}
@@ -144,12 +146,12 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 		var reconstruct bool
 		sparseData, err := os.ReadFile(sparseHeadersPath)
 		if err != nil && !os.IsNotExist(err) {
-			level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "id", id, "err", err)
+			level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "err", err)
 		}
 
 		// read persisted sparseHeaders from disk to memory.
 		if err == nil {
-			if err = r.loadFromSparseIndexHeader(logger, id, sparseHeadersPath, sparseData, postingOffsetsInMemSampling); err != nil {
+			if err = r.loadFromSparseIndexHeader(logger, sparseData, postingOffsetsInMemSampling); err != nil {
 				return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err)
 			}
 
@@ -159,7 +161,7 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 				// the configured postingOffsetsInMemSampling, downsample to the configured rate
 				if err := r.downsampleSparseIndexHeader(sampling, postingOffsetsInMemSampling); err != nil {
 					level.Warn(logger).Log(
-						"msg", "failed to downsample sparse index-header; recreating", "id", id, "err", err,
+						"msg", "failed to downsample sparse index-header; recreating", "err", err,
 						"header_rate", sampling, "in-mem-sampling rate", postingOffsetsInMemSampling,
 					)
 					reconstruct = true
@@ -168,30 +170,29 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 				// if the sparse index-header sampling rate is set higher, reconstruct from index-header
 				level.Warn(logger).Log(
 					"msg", "sparse index-header sampling rate doesn't match in-mem-sampling rate; recreating",
-					"id", id, "err", err, "header_rate", sampling, "in-mem-sampling rate", postingOffsetsInMemSampling,
+					"err", err, "header_sampling_rate", sampling,
 				)
 				reconstruct = true
 			}
 		}
 
-		// If sparseHeaders are not on disk, construct sparseHeaders and write to disk.
+		// construct sparseHeaders and write to disk if sparseHeaders are not on disk or the sparseHeaders on disk couldn't be
+		// downsampled to the configured rate
 		if err != nil || reconstruct {
-			if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
+			if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 				return nil, fmt.Errorf("cannot load sparse index-header: %w", err)
 			}
-			if err := writeSparseHeadersToFile(logger, id, sparseHeadersPath, r); err != nil {
+			if err := writeSparseHeadersToFile(logger, sparseHeadersPath, r); err != nil {
 				return nil, fmt.Errorf("cannot write sparse index-header to disk: %w", err)
 			}
-
-			level.Debug(logger).Log("msg", "built sparse index-header file", "id", id, "path", sparseHeadersPath)
 		}
 	} else {
-		if err = r.loadFromIndexHeader(logger, id, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
+		if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 			return nil, fmt.Errorf("cannot load sparse index-header: %w", err)
 		}
 	}
 
-	level.Debug(logger).Log("msg", "built sparse index-header file", "id", id, "path", sparseHeadersPath)
+	level.Debug(logger).Log("msg", "built sparse index-header file")
 	labelNames, err := r.postingsOffsetTable.LabelNames()
 	if err != nil {
 		return nil, fmt.Errorf("cannot load label names from postings offset table: %w", err)
@@ -216,18 +217,17 @@ func (r *StreamBinaryReader) downsampleSparseIndexHeader(curSampling, tgtSamplin
 	if err := tbl.DownsamplePostings(curSampling, tgtSampling); err != nil {
 		return err
 	}
-	r.postingsOffsetTable = tbl
 	return nil
 }
 
 // loadFromSparseIndexHeader load from sparse index-header on disk.
-func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger *spanlogger.SpanLogger, id ulid.ULID, sparseHeadersPath string, sparseData []byte, postingOffsetsInMemSampling int) (err error) {
+func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger log.Logger, sparseData []byte, postingOffsetsInMemSampling int) (err error) {
 	start := time.Now()
 	defer func() {
-		level.Info(logger).Log("msg", "loaded sparse index-header from disk", "id", id, "path", sparseHeadersPath, "elapsed", time.Since(start))
+		level.Info(logger).Log("msg", "loaded sparse index-header from disk", "elapsed", time.Since(start))
 	}()
 
-	level.Info(logger).Log("msg", "loading sparse index-header from disk", "id", id, "path", sparseHeadersPath)
+	level.Info(logger).Log("msg", "loading sparse index-header from disk")
 	sparseHeaders := &indexheaderpb.Sparse{}
 
 	gzipped := bytes.NewReader(sparseData)
@@ -260,13 +260,13 @@ func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger *spanlogger.SpanLo
 }
 
 // loadFromIndexHeader loads in symbols and postings offset table from the index-header.
-func (r *StreamBinaryReader) loadFromIndexHeader(logger *spanlogger.SpanLogger, id ulid.ULID, cfg Config, indexLastPostingListEndBound uint64, postingOffsetsInMemSampling int) (err error) {
+func (r *StreamBinaryReader) loadFromIndexHeader(logger log.Logger, cfg Config, indexLastPostingListEndBound uint64, postingOffsetsInMemSampling int) (err error) {
 	start := time.Now()
 	defer func() {
-		level.Info(logger).Log("msg", "loaded sparse index-header from full index-header", "id", id, "elapsed", time.Since(start))
+		level.Info(logger).Log("msg", "loaded sparse index-header from full index-header", "elapsed", time.Since(start))
 	}()
 
-	level.Info(logger).Log("msg", "loading sparse index-header from full index-header", "id", id)
+	level.Info(logger).Log("msg", "loading sparse index-header from full index-header")
 
 	r.symbols, err = streamindex.NewSymbols(r.factory, r.indexVersion, int(r.toc.Symbols), cfg.VerifyOnLoad)
 	if err != nil {
@@ -282,13 +282,13 @@ func (r *StreamBinaryReader) loadFromIndexHeader(logger *spanlogger.SpanLogger,
 }
 
 // writeSparseHeadersToFile uses protocol buffer to write StreamBinaryReader to disk at sparseHeadersPath.
-func writeSparseHeadersToFile(logger *spanlogger.SpanLogger, id ulid.ULID, sparseHeadersPath string, reader *StreamBinaryReader) error {
+func writeSparseHeadersToFile(logger log.Logger, sparseHeadersPath string, reader *StreamBinaryReader) error {
 	start := time.Now()
 	defer func() {
-		level.Info(logger).Log("msg", "wrote sparse index-header to disk", "id", id, "path", sparseHeadersPath, "elapsed", time.Since(start))
+		level.Info(logger).Log("msg", "wrote sparse index-header to disk", "elapsed", time.Since(start))
 	}()
 
-	level.Info(logger).Log("msg", "writing sparse index-header to disk", "id", id, "path", sparseHeadersPath)
+	level.Info(logger).Log("msg", "writing sparse index-header to disk")
 
 	sparseHeaders := &indexheaderpb.Sparse{}
 	sparseHeaders.Symbols = reader.symbols.NewSparseSymbol()
diff --git a/pkg/storage/indexheader/stream_binary_reader_test.go b/pkg/storage/indexheader/stream_binary_reader_test.go
index 1990949956e..1c3767eca0d 100644
--- a/pkg/storage/indexheader/stream_binary_reader_test.go
+++ b/pkg/storage/indexheader/stream_binary_reader_test.go
@@ -50,7 +50,7 @@ func TestStreamBinaryReader_ShouldBuildSparseHeadersFromFileSimple(t *testing.T)
 	require.NoError(t, err)
 
 	logger := spanlogger.FromContext(context.Background(), log.NewNopLogger())
-	err = r.loadFromSparseIndexHeader(logger, blockID, sparseHeadersPath, sparseData, 3)
+	err = r.loadFromSparseIndexHeader(logger, sparseData, 3)
 	require.NoError(t, err)
 }
 

From fb28d6ec814bde882bc88e24809de655e263fbec Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 15:03:43 -0500
Subject: [PATCH 23/45] address review comments

---
 pkg/compactor/bucket_compactor.go             | 21 ++----
 pkg/storage/indexheader/header_test.go        | 38 ++++++++---
 pkg/storage/indexheader/index/postings.go     | 67 +++++++++----------
 .../indexheader/index/postings_test.go        | 29 --------
 .../indexheader/stream_binary_reader.go       | 36 +---------
 pkg/storage/tsdb/block/block.go               |  8 +++
 6 files changed, 77 insertions(+), 122 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 87a74ea1751..61a37a6ef4a 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -443,15 +443,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrapf(err, "invalid result block %s", bdir)
 		}
 
-		begin := time.Now()
-		if err := block.Upload(ctx, jobLogger, c.bkt, bdir, nil); err != nil {
-			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
-		}
-
 		if uploadSparseIndexHeaders {
 			// Calling NewStreamBinaryReader reads a block's index and writes a sparse-index-header to disk. Because we
 			// don't use the writer, we pass a default indexheader.Config and don't register metrics.
-			if _, err := indexheader.NewStreamBinaryReader(
+			if br, err := indexheader.NewStreamBinaryReader(
 				ctx,
 				jobLogger,
 				fsbkt,
@@ -462,16 +457,14 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 				indexheader.Config{},
 			); err != nil {
 				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
-				return nil
+			} else {
+				br.Close()
 			}
+		}
 
-			// upload local sparse-index-header to object storage
-			src := path.Join(bdir, block.SparseIndexHeaderFilename)
-			dst := path.Join(blockID, block.SparseIndexHeaderFilename)
-			if err := objstore.UploadFile(ctx, jobLogger, c.bkt, src, dst); err != nil {
-				level.Warn(jobLogger).Log("msg", "failed to upload sparse index headers", "block", blockID, "err", err)
-				return nil
-			}
+		begin := time.Now()
+		if err := block.Upload(ctx, jobLogger, c.bkt, bdir, nil); err != nil {
+			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
 		elapsed := time.Since(begin)
diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index dc18aae916f..de21a4e6fcd 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -134,13 +134,16 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 		protoRate         int
 		inMemSamplingRate int
 	}{
-		"downsample_1_to_32":       {1, 32},
-		"downsample_2_to_32":       {2, 32},
-		"downsample_8_to_32":       {8, 32},
-		"downsample_4_to_16":       {4, 16},
-		"downsample_4_to_48":       {4, 48},
-		"downsample_4_to_4096":     {4, 4096},
-		"downsample_noop_32_to_32": {32, 32},
+		"downsample_1_to_32":                          {1, 32},
+		"downsample_2_to_32":                          {2, 32},
+		"downsample_4_to_16":                          {4, 16},
+		"downsample_4_to_48":                          {4, 48},
+		"downsample_8_to_24":                          {8, 32},
+		"proto_value_not_divisible_rebuild_8_to_20":   {8, 20},
+		"noop_32_to_32":                               {32, 32},
+		"cannot_upsample_from_proto_rebuild_48_to_32": {48, 32},
+		"cannot_upsample_from_proto_rebuild_64_to_32": {64, 32},
+		"downsample_1024_to_4096":                     {1024, 4096},
 	}
 
 	for name, tt := range tests {
@@ -167,7 +170,7 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 
 			// a second call to NewStreamBinaryReader loads the previously written sparse index-header and downsamples
 			// the header from tt.protoRate to tt.inMemSamplingRate entries for each posting
-			br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{})
+			br2, err := NewStreamBinaryReader(ctx, test.NewTestingLogger(t), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{})
 			require.NoError(t, err)
 			require.Equal(t, tt.inMemSamplingRate, br2.postingsOffsetTable.PostingOffsetInMemSampling())
 
@@ -177,15 +180,28 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 			// label names are equal between original and downsampled sparse index-headers
 			require.ElementsMatch(t, downsampleLabelNames, origLabelNames)
 
-			// downsampled postings are a subset of the original sparse index-header postings
 			origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
 			downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
 
 			step := tt.inMemSamplingRate / tt.protoRate
 			for name, vals := range origIdxpbTbl.Postings {
 				downsampledOffsets := downsampleIdxpbTbl.Postings[name].Offsets
-				require.Equal(t, (len(vals.Offsets)+step-1)/step, len(downsampledOffsets))
-				require.Subset(t, vals.Offsets, downsampledOffsets)
+
+				// downsampled postings are a subset of the original sparse index-header postings
+				if (tt.inMemSamplingRate > tt.protoRate) && (tt.inMemSamplingRate%tt.protoRate == 0) {
+					require.Equal(t, (len(vals.Offsets)+step-1)/step, len(downsampledOffsets))
+					require.Subset(t, vals.Offsets, downsampledOffsets)
+				}
+
+				// cannot "upsample" a sparse index header. When the sampling value found in sparse header is larger
+				// than the configured rate, should rebuild sparse headers. The rebuilt headers should be at least
+				// as large as the original sparse index header postings.
+				if tt.inMemSamplingRate < tt.protoRate {
+					require.GreaterOrEqual(t, len(downsampledOffsets), len(vals.Offsets))
+					if tt.inMemSamplingRate%tt.protoRate == 0 {
+						require.Subset(t, downsampledOffsets, vals.Offsets)
+					}
+				}
 			}
 		})
 	}
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 61f976bb113..db613434d52 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -44,7 +44,7 @@ type PostingOffsetTable interface {
 
 	NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable)
 
-	// PostingOffsetInMemSampling returns the ratio of postings offsets that the store
+	// PostingOffsetInMemSampling returns the ratio of postings kept in memory to those kept
 	PostingOffsetInMemSampling() int
 }
 
@@ -237,21 +237,35 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		postingOffsetsInMemSampling: postingOffsetsInMemSampling,
 	}
 
-	for sName, sOffsets := range postingsOffsetTable.Postings {
-		t.postings[sName] = &postingValueOffsets{
-			offsets: make([]postingOffset, len(sOffsets.Offsets)),
+	var step int
+	var ok bool
+	pbSampling := int(postingsOffsetTable.GetPostingOffsetInMemorySampling())
+	if (pbSampling > 0) && (pbSampling <= postingOffsetsInMemSampling) {
+		// if the sampling rate in the sparse index-header is set lower (more frequent) than
+		// the configured postingOffsetsInMemSampling we downsample to the configured rate
+		step, ok = stepSize(pbSampling, postingOffsetsInMemSampling)
+		if !ok {
+			return nil, fmt.Errorf("sparse index-header sampling rate not compatible with in-mem-sampling rate")
 		}
+	} else if (pbSampling > 0) && (pbSampling > postingOffsetsInMemSampling) {
+		// if the sparse index-header sampling rate is set higher must reconstruct from index-header
+		return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate")
+	}
 
+	var last int64
+	for sName, sOffsets := range postingsOffsetTable.Postings {
+		downsampledLen := len(sOffsets.Offsets) / step
+		if len(sOffsets.Offsets)%step != 0 {
+			downsampledLen += 1
+		}
+		t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)}
 		for i, sPostingOff := range sOffsets.Offsets {
-			t.postings[sName].offsets[i] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
+			if i%step == 0 {
+				t.postings[sName].offsets[i/step] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
+				last = sOffsets.LastValOffset
+			}
 		}
-
-		t.postings[sName].lastValOffset = sOffsets.LastValOffset
-	}
-
-	// replace sampling if postingOffsetsInMemSampling set in proto
-	if sampling := postingsOffsetTable.GetPostingOffsetInMemorySampling(); sampling > 0 {
-		t.postingOffsetsInMemSampling = int(sampling)
+		t.postings[sName].lastValOffset = last
 	}
 	return &t, err
 }
@@ -361,28 +375,6 @@ type postingValueOffsets struct {
 	lastValOffset int64
 }
 
-func (t *PostingOffsetTableV2) DownsamplePostings(cur, tgt int) error {
-	if cur >= tgt || cur <= 0 || tgt <= 0 || tgt%cur != 0 {
-		return fmt.Errorf("invalid in-mem-sampling rates")
-	}
-
-	step := tgt / cur
-	for _, pvo := range t.postings {
-		pvo.downsample(step)
-	}
-	t.postingOffsetsInMemSampling = tgt
-	return nil
-}
-
-func (e *postingValueOffsets) downsample(step int) {
-	j := 0
-	for i := 0; i < len(e.offsets); i += step {
-		e.offsets[j] = e.offsets[i]
-		j++
-	}
-	e.offsets = e.offsets[:j]
-}
-
 // prefixOffsets returns the index of the first matching offset (start) and the index of the first non-matching (end).
 // If all offsets match the prefix, then end will equal the length of offsets.
 // prefixOffsets returns false when no offsets match this prefix.
@@ -681,3 +673,10 @@ func skipNAndName(d *streamencoding.Decbuf, buf *int) {
 	}
 	d.Skip(*buf)
 }
+
+func stepSize(cur, tgt int) (int, bool) {
+	if cur > tgt || cur <= 0 || tgt <= 0 || tgt%cur != 0 {
+		return 0, false
+	}
+	return tgt / cur, true
+}
diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index a92e90e6fc7..e03e7e47ff2 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -101,32 +101,3 @@ func TestPostingValueOffsets(t *testing.T) {
 		})
 	}
 }
-
-func Test_PostingValueOffsets_Downsample(t *testing.T) {
-
-	pvos := make([]postingOffset, 20)
-	for i := range pvos {
-		pvos[i] = postingOffset{value: fmt.Sprintf("%d", i)}
-	}
-
-	tests := []struct {
-		name        string
-		pvo         []postingOffset
-		step        int
-		expectedLen int
-	}{
-		{"len_eq_when_step_size=1", pvos, 1, 20},
-		{"len_approx_halved_when_step_size=2", pvos, 2, 10},
-		{"len_approx_quartered_when_step_size=4", pvos, 4, 5},
-		{"len_one_when_step_size_is_len_offsets", pvos, 20, 1},
-		{"len_one_when_step_size_exceeds_len_offsets", pvos, 200, 1},
-	}
-
-	for _, tc := range tests {
-		t.Run(tc.name, func(t *testing.T) {
-			pvo := &postingValueOffsets{offsets: append([]postingOffset{}, tc.pvo...)}
-			pvo.downsample(tc.step)
-			assert.Equal(t, tc.expectedLen, len(pvo.offsets), "unexpected length for step size %d", tc.step)
-		})
-	}
-}
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index 440b7eff549..dbb78b406e6 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -149,35 +149,12 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 			level.Warn(logger).Log("msg", "failed to read sparse index-headers from disk; recreating", "err", err)
 		}
 
-		// read persisted sparseHeaders from disk to memory.
 		if err == nil {
 			if err = r.loadFromSparseIndexHeader(logger, sparseData, postingOffsetsInMemSampling); err != nil {
-				return nil, fmt.Errorf("cannot load sparse index-header from disk: %w", err)
-			}
-
-			sampling := r.postingsOffsetTable.PostingOffsetInMemSampling()
-			if (sampling > 0) && (sampling < postingOffsetsInMemSampling) {
-				// if the sampling rate in the sparse index-header is set lower (more frequent) than
-				// the configured postingOffsetsInMemSampling, downsample to the configured rate
-				if err := r.downsampleSparseIndexHeader(sampling, postingOffsetsInMemSampling); err != nil {
-					level.Warn(logger).Log(
-						"msg", "failed to downsample sparse index-header; recreating", "err", err,
-						"header_rate", sampling, "in-mem-sampling rate", postingOffsetsInMemSampling,
-					)
-					reconstruct = true
-				}
-			} else if (sampling > 0) && (sampling > postingOffsetsInMemSampling) {
-				// if the sparse index-header sampling rate is set higher, reconstruct from index-header
-				level.Warn(logger).Log(
-					"msg", "sparse index-header sampling rate doesn't match in-mem-sampling rate; recreating",
-					"err", err, "header_sampling_rate", sampling,
-				)
 				reconstruct = true
 			}
 		}
 
-		// construct sparseHeaders and write to disk if sparseHeaders are not on disk or the sparseHeaders on disk couldn't be
-		// downsampled to the configured rate
 		if err != nil || reconstruct {
 			if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 				return nil, fmt.Errorf("cannot load sparse index-header: %w", err)
@@ -185,7 +162,9 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 			if err := writeSparseHeadersToFile(logger, sparseHeadersPath, r); err != nil {
 				return nil, fmt.Errorf("cannot write sparse index-header to disk: %w", err)
 			}
+			level.Debug(logger).Log("msg", "built sparse index-header file")
 		}
+
 	} else {
 		if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 			return nil, fmt.Errorf("cannot load sparse index-header: %w", err)
@@ -209,17 +188,6 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 	return r, err
 }
 
-func (r *StreamBinaryReader) downsampleSparseIndexHeader(curSampling, tgtSampling int) error {
-	tbl, ok := r.postingsOffsetTable.(*streamindex.PostingOffsetTableV2)
-	if !ok {
-		return fmt.Errorf("postings offset table has incompatible version v%d", r.indexVersion)
-	}
-	if err := tbl.DownsamplePostings(curSampling, tgtSampling); err != nil {
-		return err
-	}
-	return nil
-}
-
 // loadFromSparseIndexHeader load from sparse index-header on disk.
 func (r *StreamBinaryReader) loadFromSparseIndexHeader(logger log.Logger, sparseData []byte, postingOffsetsInMemSampling int) (err error) {
 	start := time.Now()
diff --git a/pkg/storage/tsdb/block/block.go b/pkg/storage/tsdb/block/block.go
index be8d04ee3ca..8aaea37b39e 100644
--- a/pkg/storage/tsdb/block/block.go
+++ b/pkg/storage/tsdb/block/block.go
@@ -121,6 +121,14 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, blockDi
 		return cleanUp(logger, bkt, id, errors.Wrap(err, "upload index"))
 	}
 
+	if err := objstore.UploadFile(ctx, logger, bkt,
+		filepath.Join(blockDir, SparseIndexHeaderFilename),
+		filepath.Join(id.String(), SparseIndexHeaderFilename),
+	); err != nil {
+		// Don't call cleanUp. Uploading sparse index headers is best effort.
+		level.Warn(logger).Log("msg", "failed to upload sparse index headers", "block", id.String(), "err", err)
+	}
+
 	// Meta.json always need to be uploaded as a last item. This will allow to assume block directories without meta file to be pending uploads.
 	if err := bkt.Upload(ctx, path.Join(id.String(), MetaFilename), strings.NewReader(metaEncoded.String())); err != nil {
 		// Don't call cleanUp here. Despite getting error, meta.json may have been uploaded in certain cases,

From 35a4c1bb72d8f24379d24ca50723dba8e07994e3 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 15:26:31 -0500
Subject: [PATCH 24/45] golint

---
 pkg/storage/indexheader/index/postings.go      | 2 +-
 pkg/storage/indexheader/index/postings_test.go | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index db613434d52..89f6c889a62 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -256,7 +256,7 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 	for sName, sOffsets := range postingsOffsetTable.Postings {
 		downsampledLen := len(sOffsets.Offsets) / step
 		if len(sOffsets.Offsets)%step != 0 {
-			downsampledLen += 1
+			downsampledLen++
 		}
 		t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)}
 		for i, sPostingOff := range sOffsets.Offsets {
diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index e03e7e47ff2..2a7cd943265 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -3,7 +3,6 @@
 package index
 
 import (
-	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"

From 8ad719fc996acc3c0b0da04060d33efe6473289e Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 20:10:59 -0500
Subject: [PATCH 25/45] pass config through init functions

---
 pkg/compactor/bucket_compactor.go          | 93 ++++++++++++----------
 pkg/compactor/bucket_compactor_e2e_test.go |  6 +-
 pkg/compactor/bucket_compactor_test.go     |  7 +-
 pkg/compactor/compactor.go                 |  7 +-
 4 files changed, 65 insertions(+), 48 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 61a37a6ef4a..d987b841751 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -444,21 +444,10 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		}
 
 		if uploadSparseIndexHeaders {
-			// Calling NewStreamBinaryReader reads a block's index and writes a sparse-index-header to disk. Because we
-			// don't use the writer, we pass a default indexheader.Config and don't register metrics.
-			if br, err := indexheader.NewStreamBinaryReader(
-				ctx,
-				jobLogger,
-				fsbkt,
-				subDir,
-				blockToUpload.ulid,
-				mimir_tsdb.DefaultPostingOffsetInMemorySampling,
-				indexheader.NewStreamBinaryReaderMetrics(nil),
-				indexheader.Config{},
+			if err := prepareSparseIndexHeader(
+				ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig,
 			); err != nil {
 				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
-			} else {
-				br.Close()
 			}
 		}
 
@@ -493,10 +482,20 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return false, nil, errors.Wrapf(err, "mark old block for deletion from bucket")
 		}
 	}
-
 	return true, compIDs, nil
 }
 
+func prepareSparseIndexHeader(ctx context.Context, logger log.Logger, bkt objstore.Bucket, dir string, id ulid.ULID, sampling int, cfg indexheader.Config) error {
+	// Calling NewStreamBinaryReader reads a block's index and writes a sparse-index-header to disk.
+	mets := indexheader.NewStreamBinaryReaderMetrics(nil)
+	br, err := indexheader.NewStreamBinaryReader(ctx, logger, bkt, dir, id, sampling, mets, cfg)
+	if err != nil {
+		return err
+	}
+	br.Close()
+	return nil
+}
+
 // verifyCompactedBlocksTimeRanges does a full run over the compacted blocks
 // and verifies that they satisfy the min/maxTime from the source blocks
 func verifyCompactedBlocksTimeRanges(compIDs []ulid.ULID, sourceBlocksMinTime, sourceBlocksMaxTime int64, subDir string) error {
@@ -783,21 +782,23 @@ var ownAllJobs = func(*Job) (bool, error) {
 
 // BucketCompactor compacts blocks in a bucket.
 type BucketCompactor struct {
-	logger                   log.Logger
-	sy                       *metaSyncer
-	grouper                  Grouper
-	comp                     Compactor
-	planner                  Planner
-	compactDir               string
-	bkt                      objstore.Bucket
-	concurrency              int
-	skipUnhealthyBlocks      bool
-	uploadSparseIndexHeaders bool
-	ownJob                   ownCompactionJobFunc
-	sortJobs                 JobsOrderFunc
-	waitPeriod               time.Duration
-	blockSyncConcurrency     int
-	metrics                  *BucketCompactorMetrics
+	logger                        log.Logger
+	sy                            *metaSyncer
+	grouper                       Grouper
+	comp                          Compactor
+	planner                       Planner
+	compactDir                    string
+	bkt                           objstore.Bucket
+	concurrency                   int
+	skipUnhealthyBlocks           bool
+	uploadSparseIndexHeaders      bool
+	sparseIndexHeaderSamplingRate int
+	sparseIndexHeaderconfig       indexheader.Config
+	ownJob                        ownCompactionJobFunc
+	sortJobs                      JobsOrderFunc
+	waitPeriod                    time.Duration
+	blockSyncConcurrency          int
+	metrics                       *BucketCompactorMetrics
 }
 
 // NewBucketCompactor creates a new bucket compactor.
@@ -817,26 +818,30 @@ func NewBucketCompactor(
 	blockSyncConcurrency int,
 	metrics *BucketCompactorMetrics,
 	uploadSparseIndexHeaders bool,
+	sparseIndexHeaderSamplingRate int,
+	sparseIndexHeaderconfig indexheader.Config,
 ) (*BucketCompactor, error) {
 	if concurrency <= 0 {
 		return nil, errors.Errorf("invalid concurrency level (%d), concurrency level must be > 0", concurrency)
 	}
 	return &BucketCompactor{
-		logger:                   logger,
-		sy:                       sy,
-		grouper:                  grouper,
-		planner:                  planner,
-		comp:                     comp,
-		compactDir:               compactDir,
-		bkt:                      bkt,
-		concurrency:              concurrency,
-		skipUnhealthyBlocks:      skipUnhealthyBlocks,
-		ownJob:                   ownJob,
-		sortJobs:                 sortJobs,
-		waitPeriod:               waitPeriod,
-		blockSyncConcurrency:     blockSyncConcurrency,
-		metrics:                  metrics,
-		uploadSparseIndexHeaders: uploadSparseIndexHeaders,
+		logger:                        logger,
+		sy:                            sy,
+		grouper:                       grouper,
+		planner:                       planner,
+		comp:                          comp,
+		compactDir:                    compactDir,
+		bkt:                           bkt,
+		concurrency:                   concurrency,
+		skipUnhealthyBlocks:           skipUnhealthyBlocks,
+		ownJob:                        ownJob,
+		sortJobs:                      sortJobs,
+		waitPeriod:                    waitPeriod,
+		blockSyncConcurrency:          blockSyncConcurrency,
+		metrics:                       metrics,
+		uploadSparseIndexHeaders:      uploadSparseIndexHeaders,
+		sparseIndexHeaderSamplingRate: sparseIndexHeaderSamplingRate,
+		sparseIndexHeaderconfig:       sparseIndexHeaderconfig,
 	}, nil
 }
 
diff --git a/pkg/compactor/bucket_compactor_e2e_test.go b/pkg/compactor/bucket_compactor_e2e_test.go
index 389818cc555..3ff3984f7fa 100644
--- a/pkg/compactor/bucket_compactor_e2e_test.go
+++ b/pkg/compactor/bucket_compactor_e2e_test.go
@@ -37,6 +37,7 @@ import (
 	"github.com/thanos-io/objstore/providers/filesystem"
 	"golang.org/x/sync/errgroup"
 
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	util_log "github.com/grafana/mimir/pkg/util/log"
 )
@@ -240,7 +241,10 @@ func TestGroupCompactE2E(t *testing.T) {
 		planner := NewSplitAndMergePlanner([]int64{1000, 3000})
 		grouper := NewSplitAndMergeGrouper("user-1", []int64{1000, 3000}, 0, 0, logger)
 		metrics := NewBucketCompactorMetrics(blocksMarkedForDeletion, prometheus.NewPedanticRegistry())
-		bComp, err := NewBucketCompactor(logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics, true)
+		cfg := indexheader.Config{VerifyOnLoad: true}
+		bComp, err := NewBucketCompactor(
+			logger, sy, grouper, planner, comp, dir, bkt, 2, true, ownAllJobs, sortJobsByNewestBlocksFirst, 0, 4, metrics, true, 32, cfg,
+		)
 		require.NoError(t, err)
 
 		// Compaction on empty should not fail.
diff --git a/pkg/compactor/bucket_compactor_test.go b/pkg/compactor/bucket_compactor_test.go
index f2159533a86..fe6b2d86213 100644
--- a/pkg/compactor/bucket_compactor_test.go
+++ b/pkg/compactor/bucket_compactor_test.go
@@ -23,6 +23,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/thanos-io/objstore"
 
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/util/extprom"
 )
@@ -118,9 +119,10 @@ func TestBucketCompactor_FilterOwnJobs(t *testing.T) {
 	}
 
 	m := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil)
+	cfg := indexheader.Config{VerifyOnLoad: true}
 	for testName, testCase := range tests {
 		t.Run(testName, func(t *testing.T) {
-			bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m, false)
+			bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, testCase.ownJob, nil, 0, 4, m, false, 32, cfg)
 			require.NoError(t, err)
 
 			res, err := bc.filterOwnJobs(jobsFn())
@@ -155,8 +157,9 @@ func TestBlockMaxTimeDeltas(t *testing.T) {
 	}))
 
 	metrics := NewBucketCompactorMetrics(promauto.With(nil).NewCounter(prometheus.CounterOpts{}), nil)
+	cfg := indexheader.Config{VerifyOnLoad: true}
 	now := time.UnixMilli(1500002900159)
-	bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics, true)
+	bc, err := NewBucketCompactor(log.NewNopLogger(), nil, nil, nil, nil, "", nil, 2, false, nil, nil, 0, 4, metrics, true, 32, cfg)
 	require.NoError(t, err)
 
 	deltas := bc.blockMaxTimeDeltas(now, []*Job{j1, j2})
diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go
index c3f5bd64581..3d579b2c95e 100644
--- a/pkg/compactor/compactor.go
+++ b/pkg/compactor/compactor.go
@@ -33,6 +33,7 @@ import (
 	"go.uber.org/atomic"
 
 	"github.com/grafana/mimir/pkg/storage/bucket"
+	"github.com/grafana/mimir/pkg/storage/indexheader"
 	mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
 	"github.com/grafana/mimir/pkg/storage/tsdb/block"
 	"github.com/grafana/mimir/pkg/util"
@@ -132,7 +133,9 @@ type Config struct {
 	BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"`
 
 	// Allow compactor to upload sparse-index-header files
-	UploadSparseIndexHeaders bool `yaml:"upload_sparse_index_headers" category:"experimental"`
+	UploadSparseIndexHeaders       bool               `yaml:"upload_sparse_index_headers" category:"experimental"`
+	SparseIndexHeadersSamplingRate int                `yaml:"-"`
+	SparseIndexHeadersConfig       indexheader.Config `yaml:"-"`
 }
 
 // RegisterFlags registers the MultitenantCompactor flags.
@@ -839,6 +842,8 @@ func (c *MultitenantCompactor) compactUser(ctx context.Context, userID string) e
 		c.compactorCfg.BlockSyncConcurrency,
 		c.bucketCompactorMetrics,
 		c.compactorCfg.UploadSparseIndexHeaders,
+		c.compactorCfg.SparseIndexHeadersSamplingRate,
+		c.compactorCfg.SparseIndexHeadersConfig,
 	)
 	if err != nil {
 		return errors.Wrap(err, "failed to create bucket compactor")

From 1d7d4029610e205980a5836c7305cc14aac48085 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 22:09:34 -0500
Subject: [PATCH 26/45] update downsampling in
 NewPostingOffsetTableFromSparseHeader to always take last

---
 pkg/mimir/modules.go                          |  2 ++
 pkg/storage/indexheader/header_test.go        | 24 ++++++++++---------
 pkg/storage/indexheader/index/postings.go     | 22 +++++++++++------
 .../indexheader/stream_binary_reader.go       |  2 ++
 4 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/pkg/mimir/modules.go b/pkg/mimir/modules.go
index c911869cffb..8ae38203f0a 100644
--- a/pkg/mimir/modules.go
+++ b/pkg/mimir/modules.go
@@ -1024,6 +1024,8 @@ func (t *Mimir) initAlertManager() (serv services.Service, err error) {
 
 func (t *Mimir) initCompactor() (serv services.Service, err error) {
 	t.Cfg.Compactor.ShardingRing.Common.ListenPort = t.Cfg.Server.GRPCListenPort
+	t.Cfg.Compactor.SparseIndexHeadersConfig = t.Cfg.BlocksStorage.BucketStore.IndexHeader
+	t.Cfg.Compactor.SparseIndexHeadersSamplingRate = t.Cfg.BlocksStorage.BucketStore.PostingOffsetsInMemSampling
 
 	t.Compactor, err = compactor.NewMultitenantCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, t.Overrides, util_log.Logger, t.Registerer)
 	if err != nil {
diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index de21a4e6fcd..67bcf14bcae 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -134,16 +134,15 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 		protoRate         int
 		inMemSamplingRate int
 	}{
-		"downsample_1_to_32":                          {1, 32},
-		"downsample_2_to_32":                          {2, 32},
-		"downsample_4_to_16":                          {4, 16},
-		"downsample_4_to_48":                          {4, 48},
-		"downsample_8_to_24":                          {8, 32},
-		"proto_value_not_divisible_rebuild_8_to_20":   {8, 20},
-		"noop_32_to_32":                               {32, 32},
-		"cannot_upsample_from_proto_rebuild_48_to_32": {48, 32},
-		"cannot_upsample_from_proto_rebuild_64_to_32": {64, 32},
-		"downsample_1024_to_4096":                     {1024, 4096},
+		"downsample_1_to_32":                                   {1, 32},
+		"downsample_4_to_16":                                   {4, 16},
+		"downsample_8_to_24":                                   {8, 32},
+		"downsample_17_to_51":                                  {17, 51},
+		"noop_on_same_sampling_rate":                           {32, 32},
+		"rebuild_proto_sampling_rate_not_divisible":            {8, 20},
+		"rebuild_cannot_upsample_from_proto_48_to_32":          {48, 32},
+		"rebuild_cannot_upsample_from_proto_64_to_32":          {64, 32},
+		"downsample_to_low_frequency_keep_only_first_and_last": {4, 16384},
 	}
 
 	for name, tt := range tests {
@@ -170,7 +169,7 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 
 			// a second call to NewStreamBinaryReader loads the previously written sparse index-header and downsamples
 			// the header from tt.protoRate to tt.inMemSamplingRate entries for each posting
-			br2, err := NewStreamBinaryReader(ctx, test.NewTestingLogger(t), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{})
+			br2, err := NewStreamBinaryReader(ctx, log.NewNopLogger(), bkt, tmpDir, m.ULID, tt.inMemSamplingRate, noopMetrics, Config{})
 			require.NoError(t, err)
 			require.Equal(t, tt.inMemSamplingRate, br2.postingsOffsetTable.PostingOffsetInMemSampling())
 
@@ -202,6 +201,9 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 						require.Subset(t, downsampledOffsets, vals.Offsets)
 					}
 				}
+
+				// lastValOffset should be set for all series
+				require.NotZero(t, downsampleIdxpbTbl.Postings[name].LastValOffset)
 			}
 		})
 	}
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 89f6c889a62..58b065f5230 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -44,7 +44,8 @@ type PostingOffsetTable interface {
 
 	NewSparsePostingOffsetTable() (table *indexheaderpb.PostingOffsetTable)
 
-	// PostingOffsetInMemSampling returns the ratio of postings kept in memory to those kept
+	// PostingOffsetInMemSampling returns the inverse of the fraction of postings held in memory. A lower value indicates
+	// postings are sample more frequently.
 	PostingOffsetInMemSampling() int
 }
 
@@ -252,20 +253,27 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate")
 	}
 
-	var last int64
+	// The first and last values for each name are always present, we keep 1/postingOffsetsInMemSampling of the rest.
 	for sName, sOffsets := range postingsOffsetTable.Postings {
-		downsampledLen := len(sOffsets.Offsets) / step
-		if len(sOffsets.Offsets)%step != 0 {
-			downsampledLen++
+		var downsampledLen int
+
+		olen := len(sOffsets.Offsets)
+		if olen == 1 {
+			downsampledLen = 1
+		} else {
+			downsampledLen = 1 + olen/step
 		}
+
 		t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)}
 		for i, sPostingOff := range sOffsets.Offsets {
 			if i%step == 0 {
 				t.postings[sName].offsets[i/step] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
-				last = sOffsets.LastValOffset
+			} else if i == olen-1 {
+				t.postings[sName].offsets[downsampledLen-1] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
 			}
 		}
-		t.postings[sName].lastValOffset = last
+
+		t.postings[sName].lastValOffset = sOffsets.LastValOffset
 	}
 	return &t, err
 }
diff --git a/pkg/storage/indexheader/stream_binary_reader.go b/pkg/storage/indexheader/stream_binary_reader.go
index dbb78b406e6..4fa3cafd444 100644
--- a/pkg/storage/indexheader/stream_binary_reader.go
+++ b/pkg/storage/indexheader/stream_binary_reader.go
@@ -155,6 +155,8 @@ func newFileStreamBinaryReader(binPath string, id ulid.ULID, sparseHeadersPath s
 			}
 		}
 
+		// reconstruct from index if the sparse index-header file isn't on disk or if the sampling rate of the headers
+		// on disk can't be downsampled to the desired rate.
 		if err != nil || reconstruct {
 			if err = r.loadFromIndexHeader(logger, cfg, indexLastPostingListEndBound, postingOffsetsInMemSampling); err != nil {
 				return nil, fmt.Errorf("cannot load sparse index-header: %w", err)

From 7905de57fd0d6d44f1c078daa79e112b23c0d18a Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 23:19:21 -0500
Subject: [PATCH 27/45] fix postings to pass
 TestStreamBinaryReader_CheckSparseHeadersCorrectnessExtive

---
 .../mimir-microservices-mode/compose-up       | 34 +++++++++++++++++++
 pkg/storage/indexheader/header_test.go        | 18 +++++-----
 pkg/storage/indexheader/index/postings.go     | 11 +-----
 3 files changed, 44 insertions(+), 19 deletions(-)
 create mode 100755 development/mimir-microservices-mode/compose-up

diff --git a/development/mimir-microservices-mode/compose-up b/development/mimir-microservices-mode/compose-up
new file mode 100755
index 00000000000..63d184f6026
--- /dev/null
+++ b/development/mimir-microservices-mode/compose-up
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: AGPL-3.0-only
+# Provenance-includes-location: https://github.com/cortexproject/cortex/development/tsdb-blocks-storage-s3/compose-up.sh
+# Provenance-includes-license: Apache-2.0
+# Provenance-includes-copyright: The Cortex Authors.
+
+set -e
+
+# newer compose is a subcommand of `docker`, not a hyphenated standalone command
+docker_compose() {
+    if [ -x "$(command -v docker-compose)" ]; then
+        docker-compose "$@"
+    else
+        docker compose "$@"
+    fi
+}
+
+SCRIPT_DIR=$(cd "$(dirname -- "$0")" && pwd)
+BUILD_IMAGE=$(make -s -C "${SCRIPT_DIR}"/../.. print-build-image)
+# Make sure docker-compose.yml is up-to-date.
+cd "$SCRIPT_DIR" && make
+
+# -gcflags "all=-N -l" disables optimizations that allow for better run with combination with Delve debugger.
+# GOARCH is not changed.
+CGO_ENABLED=0 GOOS=linux go build -mod=vendor -tags=netgo,stringlabels -gcflags "all=-N -l" -o "${SCRIPT_DIR}"/mimir "${SCRIPT_DIR}"/../../cmd/mimir
+docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml build --build-arg BUILD_IMAGE="${BUILD_IMAGE}" distributor-1
+
+if [ "$(yq '.services.query-tee' "${SCRIPT_DIR}"/docker-compose.yml)" != "null" ]; then
+  # If query-tee is enabled, build its binary and image as well.
+  CGO_ENABLED=0 GOOS=linux go build -mod=vendor -tags=netgo,stringlabels -gcflags "all=-N -l" -o "${SCRIPT_DIR}"/../../cmd/query-tee "${SCRIPT_DIR}"/../../cmd/query-tee
+  docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml build --build-arg BUILD_IMAGE="${BUILD_IMAGE}" query-tee
+fi
+
+docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml up "$@"
diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 67bcf14bcae..11f490eef50 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -134,15 +134,15 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 		protoRate         int
 		inMemSamplingRate int
 	}{
-		"downsample_1_to_32":                                   {1, 32},
-		"downsample_4_to_16":                                   {4, 16},
-		"downsample_8_to_24":                                   {8, 32},
-		"downsample_17_to_51":                                  {17, 51},
-		"noop_on_same_sampling_rate":                           {32, 32},
-		"rebuild_proto_sampling_rate_not_divisible":            {8, 20},
-		"rebuild_cannot_upsample_from_proto_48_to_32":          {48, 32},
-		"rebuild_cannot_upsample_from_proto_64_to_32":          {64, 32},
-		"downsample_to_low_frequency_keep_only_first_and_last": {4, 16384},
+		"downsample_1_to_32":                          {1, 32},
+		"downsample_4_to_16":                          {4, 16},
+		"downsample_8_to_24":                          {8, 32},
+		"downsample_17_to_51":                         {17, 51},
+		"noop_on_same_sampling_rate":                  {32, 32},
+		"rebuild_proto_sampling_rate_not_divisible":   {8, 20},
+		"rebuild_cannot_upsample_from_proto_48_to_32": {48, 32},
+		"rebuild_cannot_upsample_from_proto_64_to_32": {64, 32},
+		"downsample_to_low_frequency":                 {4, 16384},
 	}
 
 	for name, tt := range tests {
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 58b065f5230..0ccc2054c5a 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -253,26 +253,17 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate")
 	}
 
-	// The first and last values for each name are always present, we keep 1/postingOffsetsInMemSampling of the rest.
 	for sName, sOffsets := range postingsOffsetTable.Postings {
-		var downsampledLen int
 
 		olen := len(sOffsets.Offsets)
-		if olen == 1 {
-			downsampledLen = 1
-		} else {
-			downsampledLen = 1 + olen/step
-		}
+		downsampledLen := (olen + step - 1) / step
 
 		t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)}
 		for i, sPostingOff := range sOffsets.Offsets {
 			if i%step == 0 {
 				t.postings[sName].offsets[i/step] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
-			} else if i == olen-1 {
-				t.postings[sName].offsets[downsampledLen-1] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
 			}
 		}
-
 		t.postings[sName].lastValOffset = sOffsets.LastValOffset
 	}
 	return &t, err

From cd77083a4db59e3f890f7dc78c7f4da944128d6f Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 26 Feb 2025 23:19:41 -0500
Subject: [PATCH 28/45] fix postings to pass
 TestStreamBinaryReader_CheckSparseHeadersCorrectnessExtive

---
 .../mimir-microservices-mode/compose-up       | 34 -------------------
 1 file changed, 34 deletions(-)
 delete mode 100755 development/mimir-microservices-mode/compose-up

diff --git a/development/mimir-microservices-mode/compose-up b/development/mimir-microservices-mode/compose-up
deleted file mode 100755
index 63d184f6026..00000000000
--- a/development/mimir-microservices-mode/compose-up
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: AGPL-3.0-only
-# Provenance-includes-location: https://github.com/cortexproject/cortex/development/tsdb-blocks-storage-s3/compose-up.sh
-# Provenance-includes-license: Apache-2.0
-# Provenance-includes-copyright: The Cortex Authors.
-
-set -e
-
-# newer compose is a subcommand of `docker`, not a hyphenated standalone command
-docker_compose() {
-    if [ -x "$(command -v docker-compose)" ]; then
-        docker-compose "$@"
-    else
-        docker compose "$@"
-    fi
-}
-
-SCRIPT_DIR=$(cd "$(dirname -- "$0")" && pwd)
-BUILD_IMAGE=$(make -s -C "${SCRIPT_DIR}"/../.. print-build-image)
-# Make sure docker-compose.yml is up-to-date.
-cd "$SCRIPT_DIR" && make
-
-# -gcflags "all=-N -l" disables optimizations that allow for better run with combination with Delve debugger.
-# GOARCH is not changed.
-CGO_ENABLED=0 GOOS=linux go build -mod=vendor -tags=netgo,stringlabels -gcflags "all=-N -l" -o "${SCRIPT_DIR}"/mimir "${SCRIPT_DIR}"/../../cmd/mimir
-docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml build --build-arg BUILD_IMAGE="${BUILD_IMAGE}" distributor-1
-
-if [ "$(yq '.services.query-tee' "${SCRIPT_DIR}"/docker-compose.yml)" != "null" ]; then
-  # If query-tee is enabled, build its binary and image as well.
-  CGO_ENABLED=0 GOOS=linux go build -mod=vendor -tags=netgo,stringlabels -gcflags "all=-N -l" -o "${SCRIPT_DIR}"/../../cmd/query-tee "${SCRIPT_DIR}"/../../cmd/query-tee
-  docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml build --build-arg BUILD_IMAGE="${BUILD_IMAGE}" query-tee
-fi
-
-docker_compose -f "${SCRIPT_DIR}"/docker-compose.yml up "$@"

From 62b3c6fbe8f0aa103af3ebdfab0ddbf66c03414e Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 09:18:29 -0500
Subject: [PATCH 29/45] stat sparse index headers before block upload; no
 warning on failed upload if not on fs

---
 pkg/storage/tsdb/block/block.go | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pkg/storage/tsdb/block/block.go b/pkg/storage/tsdb/block/block.go
index 8aaea37b39e..e53f8820fbb 100644
--- a/pkg/storage/tsdb/block/block.go
+++ b/pkg/storage/tsdb/block/block.go
@@ -121,12 +121,13 @@ func Upload(ctx context.Context, logger log.Logger, bkt objstore.Bucket, blockDi
 		return cleanUp(logger, bkt, id, errors.Wrap(err, "upload index"))
 	}
 
-	if err := objstore.UploadFile(ctx, logger, bkt,
-		filepath.Join(blockDir, SparseIndexHeaderFilename),
-		filepath.Join(id.String(), SparseIndexHeaderFilename),
-	); err != nil {
-		// Don't call cleanUp. Uploading sparse index headers is best effort.
-		level.Warn(logger).Log("msg", "failed to upload sparse index headers", "block", id.String(), "err", err)
+	src := filepath.Join(blockDir, SparseIndexHeaderFilename)
+	dst := filepath.Join(id.String(), SparseIndexHeaderFilename)
+	if _, err := os.Stat(src); err == nil {
+		if err := objstore.UploadFile(ctx, logger, bkt, src, dst); err != nil {
+			// Don't call cleanUp. Uploading sparse index headers is best effort.
+			level.Warn(logger).Log("msg", "failed to upload sparse index headers", "block", id.String(), "err", err)
+		}
 	}
 
 	// Meta.json always need to be uploaded as a last item. This will allow to assume block directories without meta file to be pending uploads.

From 75d604b35cb95c06f339ad117061d6be00e6b7ea Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 11:23:31 -0500
Subject: [PATCH 30/45] posting sampling tests

---
 pkg/storage/indexheader/index/postings.go     | 10 +++-
 .../indexheader/index/postings_test.go        | 58 +++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 0ccc2054c5a..16b0bc00fca 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -238,17 +238,21 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		postingOffsetsInMemSampling: postingOffsetsInMemSampling,
 	}
 
+	pbSampling := int(postingsOffsetTable.GetPostingOffsetInMemorySampling())
+	if pbSampling == 0 {
+		return nil, fmt.Errorf("sparse index-header sampling rate not set")
+	}
+
 	var step int
 	var ok bool
-	pbSampling := int(postingsOffsetTable.GetPostingOffsetInMemorySampling())
-	if (pbSampling > 0) && (pbSampling <= postingOffsetsInMemSampling) {
+	if pbSampling <= postingOffsetsInMemSampling {
 		// if the sampling rate in the sparse index-header is set lower (more frequent) than
 		// the configured postingOffsetsInMemSampling we downsample to the configured rate
 		step, ok = stepSize(pbSampling, postingOffsetsInMemSampling)
 		if !ok {
 			return nil, fmt.Errorf("sparse index-header sampling rate not compatible with in-mem-sampling rate")
 		}
-	} else if (pbSampling > 0) && (pbSampling > postingOffsetsInMemSampling) {
+	} else {
 		// if the sparse index-header sampling rate is set higher must reconstruct from index-header
 		return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate")
 	}
diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index 2a7cd943265..aa5e8da8b56 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -3,9 +3,13 @@
 package index
 
 import (
+	"fmt"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+
+	streamencoding "github.com/grafana/mimir/pkg/storage/indexheader/encoding"
+	"github.com/grafana/mimir/pkg/storage/indexheader/indexheaderpb"
 )
 
 func TestPostingValueOffsets(t *testing.T) {
@@ -100,3 +104,57 @@ func TestPostingValueOffsets(t *testing.T) {
 		})
 	}
 }
+
+func createPostingOffset(n int) []*indexheaderpb.PostingOffset {
+	offsets := make([]*indexheaderpb.PostingOffset, n)
+	for i := 0; i < n; i++ {
+		offsets[i] = &indexheaderpb.PostingOffset{Value: fmt.Sprintf("%d", i), TableOff: int64(i)}
+	}
+	return offsets
+}
+
+func Test_NewPostingOffsetTableFromSparseHeader(t *testing.T) {
+
+	testCases := map[string]struct {
+		existingOffsetsLen              int
+		postingOffsetsInMemSamplingRate int
+		protoSamplingRate               int64
+		expectedLen                     int
+		expectErr                       bool
+	}{
+		"downsample_noop_proto_has_equal_sampling_rate":           {100, 32, 32, 100, false},
+		"downsample_short_offsets":                                {2, 32, 16, 1, false},
+		"downsample_noop_short_offsets":                           {1, 32, 16, 1, false},
+		"downsample_proto_has_divisible_sampling_rate":            {100, 32, 16, 50, false},
+		"cannot_downsample_proto_has_no_sampling_rate":            {100, 32, 0, 0, true},
+		"cannot_upsample_proto_has_less_frequent_sampling_rate":   {100, 32, 64, 0, true},
+		"cannot_downsample_proto_has_non_divisible_sampling_rate": {100, 32, 10, 0, true},
+		"downsample_sampling_rates_ratio_does_not_divide_offsets": {33, 32, 16, 17, false},
+		"downsample_sampling_rates_ratio_exceeds_offset_len":      {10, 1024, 8, 1, false},
+		"downsample_sampling_rates_ratio_equals_offset_len":       {100, 100, 1, 1, false},
+	}
+
+	for testName, testCase := range testCases {
+		t.Run(testName, func(t *testing.T) {
+			factory := streamencoding.DecbufFactory{}
+
+			postingsMap := make(map[string]*indexheaderpb.PostingValueOffsets)
+			postingsMap["__name__"] = &indexheaderpb.PostingValueOffsets{Offsets: createPostingOffset(testCase.existingOffsetsLen)}
+
+			protoTbl := indexheaderpb.PostingOffsetTable{
+				Postings:                      postingsMap,
+				PostingOffsetInMemorySampling: testCase.protoSamplingRate,
+			}
+
+			tbl, err := NewPostingOffsetTableFromSparseHeader(&factory, &protoTbl, 0, testCase.postingOffsetsInMemSamplingRate)
+			if testCase.expectErr {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				assert.Equal(t, testCase.expectedLen, len(tbl.postings["__name__"].offsets))
+			}
+
+		})
+	}
+
+}

From 3663d67a7e43b3ca6f491b3ce8e2f35339eb83b4 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 11:28:27 -0500
Subject: [PATCH 31/45] update header sampling tests

---
 pkg/storage/indexheader/header_test.go | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 11f490eef50..ebbe235c9b8 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -191,18 +191,6 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 					require.Equal(t, (len(vals.Offsets)+step-1)/step, len(downsampledOffsets))
 					require.Subset(t, vals.Offsets, downsampledOffsets)
 				}
-
-				// cannot "upsample" a sparse index header. When the sampling value found in sparse header is larger
-				// than the configured rate, should rebuild sparse headers. The rebuilt headers should be at least
-				// as large as the original sparse index header postings.
-				if tt.inMemSamplingRate < tt.protoRate {
-					require.GreaterOrEqual(t, len(downsampledOffsets), len(vals.Offsets))
-					if tt.inMemSamplingRate%tt.protoRate == 0 {
-						require.Subset(t, downsampledOffsets, vals.Offsets)
-					}
-				}
-
-				// lastValOffset should be set for all series
 				require.NotZero(t, downsampleIdxpbTbl.Postings[name].LastValOffset)
 			}
 		})

From be12809deaa52727313c9cc48d6b10ab3be6aedb Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 15:23:29 -0500
Subject: [PATCH 32/45] split runCompactionJob upload into multiple
 concurrency.ForEachJob

---
 pkg/compactor/bucket_compactor.go | 63 ++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 27 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index d987b841751..6d9d61b811e 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -395,34 +395,19 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		c.metrics.compactionBlocksVerificationFailed.Inc()
 	}
 
-	var uploadSparseIndexHeaders = c.uploadSparseIndexHeaders
-	var fsbkt objstore.Bucket
-	if uploadSparseIndexHeaders {
-		// Create a bucket backed by the local compaction directory. This allows calls to NewStreamBinaryReader to
-		// construct sparse-index-headers without making requests to object storage. Building and uploading
-		// sparse-index-headers is best effort, and we do not skip uploading a compacted block if there's an
-		// error affecting the sparse-index-header upload.
-		fsbkt, err = filesystem.NewBucket(subDir)
-		if err != nil {
-			level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
-			uploadSparseIndexHeaders = false
-		}
-	}
-
 	blocksToUpload := convertCompactionResultToForEachJobs(compIDs, job.UseSplitting(), jobLogger)
+
+	// update labels and verify all blocks
 	err = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
 		blockToUpload := blocksToUpload[idx]
-
-		uploadedBlocks.Inc()
-
-		blockID := blockToUpload.ulid.String()
-		bdir := filepath.Join(subDir, blockID)
+		bdir := filepath.Join(subDir, blockToUpload.ulid.String())
 
 		// When splitting is enabled, we need to inject the shard ID as an external label.
 		newLabels := job.Labels().Map()
 		if job.UseSplitting() {
 			newLabels[mimir_tsdb.CompactorShardIDExternalLabel] = sharding.FormatShardIDLabelValue(uint64(blockToUpload.shardIndex), uint64(job.SplittingShards()))
 		}
+		blocksToUpload[idx].labels = newLabels
 
 		newMeta, err := block.InjectThanosMeta(jobLogger, bdir, block.ThanosMeta{
 			Labels:       newLabels,
@@ -430,6 +415,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			Source:       block.CompactorSource,
 			SegmentFiles: block.GetSegmentFiles(bdir),
 		}, nil)
+
 		if err != nil {
 			return errors.Wrapf(err, "failed to finalize the block %s", bdir)
 		}
@@ -438,26 +424,48 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 			return errors.Wrap(err, "remove tombstones")
 		}
 
-		// Ensure the compacted block is valid.
 		if err := block.VerifyBlock(ctx, jobLogger, bdir, newMeta.MinTime, newMeta.MaxTime, false); err != nil {
 			return errors.Wrapf(err, "invalid result block %s", bdir)
 		}
+		return nil
+	})
+	if err != nil {
+		return false, nil, err
+	}
 
-		if uploadSparseIndexHeaders {
-			if err := prepareSparseIndexHeader(
-				ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig,
-			); err != nil {
-				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockID, "err", err)
-			}
+	// Optionally build sparse-index-headers. Building sparse-index-headers is best effort, we do not skip uploading a 
+	// compacted block if there's an error affecting sparse-index-headers.
+	if c.uploadSparseIndexHeaders {
+		// Create a bucket backed by the local compaction directory, allows calls to prepareSparseIndexHeader to
+		// construct sparse-index-headers without making requests to object storage.
+		fsbkt, err := filesystem.NewBucket(subDir)
+		if err != nil {
+			level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
+			return
+		} else {
+			_ = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
+				blockToUpload := blocksToUpload[idx]
+				err := prepareSparseIndexHeader(ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig)
+				if err != nil {
+					level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "shard", blockToUpload.shardIndex, "err", err)
+				}
+				return nil
+			})
 		}
+	}
 
+	// upload all blocks
+	err = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
+		blockToUpload := blocksToUpload[idx]
+		uploadedBlocks.Inc()
+		bdir := filepath.Join(subDir, blockToUpload.ulid.String())
 		begin := time.Now()
 		if err := block.Upload(ctx, jobLogger, c.bkt, bdir, nil); err != nil {
 			return errors.Wrapf(err, "upload of %s failed", blockToUpload.ulid)
 		}
 
 		elapsed := time.Since(begin)
-		level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(newLabels))
+		level.Info(jobLogger).Log("msg", "uploaded block", "result_block", blockToUpload.ulid, "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "external_labels", labels.FromMap(blockToUpload.labels))
 		return nil
 	})
 	if err != nil {
@@ -565,6 +573,7 @@ func convertCompactionResultToForEachJobs(compactedBlocks []ulid.ULID, splitJob
 type ulidWithShardIndex struct {
 	ulid       ulid.ULID
 	shardIndex int
+	labels     map[string]string
 }
 
 // issue347Error is a type wrapper for errors that should invoke the repair process for broken block.

From f4a80340ea9ca866e99fb55cf7e7bc92ea831208 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 22:55:05 -0500
Subject: [PATCH 33/45] update changelog.md

---
 CHANGELOG.md                      | 1 +
 pkg/compactor/bucket_compactor.go | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cdbf16618e3..8f8d868d397 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@
 * [CHANGE] Ruler: Add `user` and `reason` labels to `cortex_ruler_write_requests_failed_total` and `cortex_ruler_queries_failed_total`; add `user` to
     `cortex_ruler_write_requests_total` and `cortex_ruler_queries_total` metrics. #10536
 * [CHANGE] Querier / Query-frontend: Remove experimental `-querier.promql-experimental-functions-enabled` and `-query-frontend.block-promql-experimental-functions` CLI flags and respective YAML configuration options to enable experimental PromQL functions. Instead access to experimental PromQL functions is always blocked. You can enable them using the per-tenant setting `enabled_promql_experimental_functions`. #10660
+* [CHANGE] Include posting sampling rate in sparse index headers. When the sampling rate isn't set in a sparse index header, store gateway will rebuild the sparse header with the configured `blocks-storage.bucket-store.posting-offsets-in-mem-sampling` value. If the sparse header's sampling rate is set, but doesn't match the configured rate, store gateway will either rebuild the sparse header or downsample to the configured sampling rate. #10684
 * [FEATURE] Distributor: Add experimental Influx handler. #10153
 * [ENHANCEMENT] Compactor: Expose `cortex_bucket_index_last_successful_update_timestamp_seconds` for all tenants assigned to the compactor before starting the block cleanup job. #10569
 * [ENHANCEMENT] Query Frontend: Return server-side `samples_processed` statistics. #10103
diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index 6d9d61b811e..ddce9f33b11 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -433,7 +433,7 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		return false, nil, err
 	}
 
-	// Optionally build sparse-index-headers. Building sparse-index-headers is best effort, we do not skip uploading a 
+	// Optionally build sparse-index-headers. Building sparse-index-headers is best effort, we do not skip uploading a
 	// compacted block if there's an error affecting sparse-index-headers.
 	if c.uploadSparseIndexHeaders {
 		// Create a bucket backed by the local compaction directory, allows calls to prepareSparseIndexHeader to

From 0977f1516a2bcae7e1395e81e726db1f55a9ca2b Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Thu, 27 Feb 2025 23:33:46 -0500
Subject: [PATCH 34/45] golint

---
 pkg/compactor/bucket_compactor.go | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index ddce9f33b11..b6691d62f60 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -442,16 +442,15 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul
 		if err != nil {
 			level.Warn(jobLogger).Log("msg", "failed to create filesystem bucket, skipping sparse header upload", "err", err)
 			return
-		} else {
-			_ = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
-				blockToUpload := blocksToUpload[idx]
-				err := prepareSparseIndexHeader(ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig)
-				if err != nil {
-					level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "shard", blockToUpload.shardIndex, "err", err)
-				}
-				return nil
-			})
 		}
+		_ = concurrency.ForEachJob(ctx, len(blocksToUpload), c.blockSyncConcurrency, func(ctx context.Context, idx int) error {
+			blockToUpload := blocksToUpload[idx]
+			err := prepareSparseIndexHeader(ctx, jobLogger, fsbkt, subDir, blockToUpload.ulid, c.sparseIndexHeaderSamplingRate, c.sparseIndexHeaderconfig)
+			if err != nil {
+				level.Warn(jobLogger).Log("msg", "failed to create sparse index headers", "block", blockToUpload.ulid.String(), "shard", blockToUpload.shardIndex, "err", err)
+			}
+			return nil
+		})
 	}
 
 	// upload all blocks

From 92b46101355f077cb035f75eff2906a1cdd4d8b8 Mon Sep 17 00:00:00 2001
From: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Date: Fri, 28 Feb 2025 10:57:14 +0100
Subject: [PATCH 35/45] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 541ef4f1563..4259831d438 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,6 @@
 
 * [FEATURE] Ingester/Distributor: Add support for exporting cost attribution metrics (`cortex_ingester_attributed_active_series`, `cortex_distributor_received_attributed_samples_total`, and `cortex_discarded_attributed_samples_total`) with labels specified by customers to a custom Prometheus registry. This feature enables more flexible billing data tracking. #10269 #10702
 * [FEATURE] Ruler: Added `/ruler/tenants` endpoints to list the discovered tenants with rule groups. #10738
-* [FEATURE] Distributor: Add experimental Influx handler. #10153
 * [CHANGE] Querier: pass context to queryable `IsApplicable` hook. #10451
 * [CHANGE] Distributor: OTLP and push handler replace all non-UTF8 characters with the unicode replacement character `\uFFFD` in error messages before propagating them. #10236
 * [CHANGE] Querier: pass query matchers to queryable `IsApplicable` hook. #10256

From d2439ac4143b19421e84fd50dc9e67bfc90c47d6 Mon Sep 17 00:00:00 2001
From: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Date: Fri, 28 Feb 2025 10:57:31 +0100
Subject: [PATCH 36/45] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4259831d438..dfd069c2120 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,7 +16,7 @@
 * [CHANGE] Ingester: Set `-ingester.ooo-native-histograms-ingestion-enabled` to true by default. #10483
 * [CHANGE] Ruler: Add `user` and `reason` labels to `cortex_ruler_write_requests_failed_total` and `cortex_ruler_queries_failed_total`; add `user` to
     `cortex_ruler_write_requests_total` and `cortex_ruler_queries_total` metrics. #10536
-* [CHANGE] Querier / Query-frontend: Remove experimental `-querier.promql-experimental-functions-enabled` and `-query-frontend.block-promql-experimental-functions` CLI flags and respective YAML configuration options to enable experimental PromQL functions. Instead access to experimental PromQL functions is always blocked. You can enable them using the per-tenant setting `enabled_promql_experimental_functions`. #10660
+* [CHANGE] Querier / Query-frontend: Remove experimental `-querier.promql-experimental-functions-enabled` and `-query-frontend.block-promql-experimental-functions` CLI flags and respective YAML configuration options to enable experimental PromQL functions. Instead access to experimental PromQL functions is always blocked. You can enable them using the per-tenant setting `enabled_promql_experimental_functions`. #10660 #10712
 * [CHANGE] Store-gateway: Include posting sampling rate in sparse index headers. When the sampling rate isn't set in a sparse index header, store gateway will rebuild the sparse header with the configured `blocks-storage.bucket-store.posting-offsets-in-mem-sampling` value. If the sparse header's sampling rate is set, but doesn't match the configured rate, store gateway will either rebuild the sparse header or downsample to the configured sampling rate. #10684
 * [ENHANCEMENT] Compactor: Expose `cortex_bucket_index_last_successful_update_timestamp_seconds` for all tenants assigned to the compactor before starting the block cleanup job. #10569
 * [ENHANCEMENT] Query Frontend: Return server-side `samples_processed` statistics. #10103

From 7949eafc9b63e018ca8d6be76119880b295455af Mon Sep 17 00:00:00 2001
From: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Date: Fri, 28 Feb 2025 10:59:45 +0100
Subject: [PATCH 37/45] Revert "Update CHANGELOG.md"

This reverts commit 92b46101355f077cb035f75eff2906a1cdd4d8b8.
---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index dfd069c2120..9cdc87af802 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 
 * [FEATURE] Ingester/Distributor: Add support for exporting cost attribution metrics (`cortex_ingester_attributed_active_series`, `cortex_distributor_received_attributed_samples_total`, and `cortex_discarded_attributed_samples_total`) with labels specified by customers to a custom Prometheus registry. This feature enables more flexible billing data tracking. #10269 #10702
 * [FEATURE] Ruler: Added `/ruler/tenants` endpoints to list the discovered tenants with rule groups. #10738
+* [FEATURE] Distributor: Add experimental Influx handler. #10153
 * [CHANGE] Querier: pass context to queryable `IsApplicable` hook. #10451
 * [CHANGE] Distributor: OTLP and push handler replace all non-UTF8 characters with the unicode replacement character `\uFFFD` in error messages before propagating them. #10236
 * [CHANGE] Querier: pass query matchers to queryable `IsApplicable` hook. #10256

From b69e920e8c4cce55f888fe6855a2d94156909729 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 28 Feb 2025 09:07:53 -0500
Subject: [PATCH 38/45] Update pkg/compactor/bucket_compactor.go

Co-authored-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
---
 pkg/compactor/bucket_compactor.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/compactor/bucket_compactor.go b/pkg/compactor/bucket_compactor.go
index b6691d62f60..626cbb5c538 100644
--- a/pkg/compactor/bucket_compactor.go
+++ b/pkg/compactor/bucket_compactor.go
@@ -499,8 +499,7 @@ func prepareSparseIndexHeader(ctx context.Context, logger log.Logger, bkt objsto
 	if err != nil {
 		return err
 	}
-	br.Close()
-	return nil
+	return br.Close()
 }
 
 // verifyCompactedBlocksTimeRanges does a full run over the compacted blocks

From ce90588eab7a893e741cc5339ff30cd59892777e Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 28 Feb 2025 09:31:32 -0500
Subject: [PATCH 39/45] add struct fields on test

---
 .../indexheader/index/postings_test.go        | 70 ++++++++++++++++---
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index aa5e8da8b56..7581200668b 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -122,16 +122,66 @@ func Test_NewPostingOffsetTableFromSparseHeader(t *testing.T) {
 		expectedLen                     int
 		expectErr                       bool
 	}{
-		"downsample_noop_proto_has_equal_sampling_rate":           {100, 32, 32, 100, false},
-		"downsample_short_offsets":                                {2, 32, 16, 1, false},
-		"downsample_noop_short_offsets":                           {1, 32, 16, 1, false},
-		"downsample_proto_has_divisible_sampling_rate":            {100, 32, 16, 50, false},
-		"cannot_downsample_proto_has_no_sampling_rate":            {100, 32, 0, 0, true},
-		"cannot_upsample_proto_has_less_frequent_sampling_rate":   {100, 32, 64, 0, true},
-		"cannot_downsample_proto_has_non_divisible_sampling_rate": {100, 32, 10, 0, true},
-		"downsample_sampling_rates_ratio_does_not_divide_offsets": {33, 32, 16, 17, false},
-		"downsample_sampling_rates_ratio_exceeds_offset_len":      {10, 1024, 8, 1, false},
-		"downsample_sampling_rates_ratio_equals_offset_len":       {100, 100, 1, 1, false},
+		"downsample_noop_proto_has_equal_sampling_rate": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               32,
+			expectedLen:                     100,
+		},
+		"downsample_short_offsets": {
+			existingOffsetsLen:              2,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     1,
+		},
+		"downsample_noop_short_offsets": {
+			existingOffsetsLen:              1,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     1,
+		},
+		"downsample_proto_has_divisible_sampling_rate": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     50,
+		},
+		"cannot_downsample_proto_has_no_sampling_rate": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               0,
+			expectErr:                       true,
+		},
+		"cannot_upsample_proto_has_less_frequent_sampling_rate": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               64,
+			expectErr:                       true,
+		},
+		"cannot_downsample_proto_has_non_divisible_sampling_rate": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               10,
+			expectErr:                       true,
+		},
+		"downsample_sampling_rates_ratio_does_not_divide_offsets": {
+			existingOffsetsLen:              33,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     17,
+		},
+		"downsample_sampling_rates_ratio_exceeds_offset_len": {
+			existingOffsetsLen:              10,
+			postingOffsetsInMemSamplingRate: 1024,
+			protoSamplingRate:               8,
+			expectedLen:                     1,
+		},
+		"downsample_sampling_rates_ratio_equals_offset_len": {
+			existingOffsetsLen:              100,
+			postingOffsetsInMemSamplingRate: 100,
+			protoSamplingRate:               1,
+			expectedLen:                     1,
+		},
 	}
 
 	for testName, testCase := range testCases {

From 19a1b4941024fd0391fb8541251c55693bd794b3 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Fri, 28 Feb 2025 17:51:45 -0500
Subject: [PATCH 40/45] rework downsampling tests; require first and last

---
 pkg/storage/indexheader/header_test.go        | 82 ++++++++++++++++---
 pkg/storage/indexheader/index/postings.go     | 26 +++---
 .../indexheader/index/postings_test.go        | 24 +++++-
 3 files changed, 105 insertions(+), 27 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index ebbe235c9b8..fabe6665e54 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -133,16 +133,69 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 	tests := map[string]struct {
 		protoRate         int
 		inMemSamplingRate int
+		expected          map[string]int
 	}{
-		"downsample_1_to_32":                          {1, 32},
-		"downsample_4_to_16":                          {4, 16},
-		"downsample_8_to_24":                          {8, 32},
-		"downsample_17_to_51":                         {17, 51},
-		"noop_on_same_sampling_rate":                  {32, 32},
-		"rebuild_proto_sampling_rate_not_divisible":   {8, 20},
-		"rebuild_cannot_upsample_from_proto_48_to_32": {48, 32},
-		"rebuild_cannot_upsample_from_proto_64_to_32": {64, 32},
-		"downsample_to_low_frequency":                 {4, 16384},
+		"downsample_1_to_32": {
+			protoRate:         1,
+			inMemSamplingRate: 32,
+			expected: map[string]int{
+				"__name__":            4,
+				"":                    1,
+				"__blockgen_target__": 4,
+			},
+		},
+		"downsample_4_to_16": {
+			protoRate:         4,
+			inMemSamplingRate: 16,
+			expected: map[string]int{
+				"__name__":            7,
+				"":                    1,
+				"__blockgen_target__": 7,
+			},
+		},
+		"downsample_8_to_24": {
+			protoRate:         8,
+			inMemSamplingRate: 24,
+			expected: map[string]int{
+				"__name__":            5,
+				"":                    1,
+				"__blockgen_target__": 5,
+			},
+		},
+		"downsample_17_to_51": {
+			protoRate:         17,
+			inMemSamplingRate: 51,
+			expected: map[string]int{
+				"__name__":            3,
+				"":                    1,
+				"__blockgen_target__": 3,
+			},
+		},
+		"noop_on_same_sampling_rate": {
+			protoRate:         32,
+			inMemSamplingRate: 32,
+		},
+		"rebuild_proto_sampling_rate_not_divisible": {
+			protoRate:         8,
+			inMemSamplingRate: 20,
+		},
+		"rebuild_cannot_upsample_from_proto_48_to_32": {
+			protoRate:         48,
+			inMemSamplingRate: 32,
+		},
+		"rebuild_cannot_upsample_from_proto_64_to_32": {
+			protoRate:         64,
+			inMemSamplingRate: 32,
+		},
+		"downsample_to_low_frequency": {
+			protoRate:         4,
+			inMemSamplingRate: 16384,
+			expected: map[string]int{
+				"__name__":            2,
+				"":                    1,
+				"__blockgen_target__": 2,
+			},
+		},
 	}
 
 	for name, tt := range tests {
@@ -182,15 +235,18 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 			origIdxpbTbl := br1.postingsOffsetTable.NewSparsePostingOffsetTable()
 			downsampleIdxpbTbl := br2.postingsOffsetTable.NewSparsePostingOffsetTable()
 
-			step := tt.inMemSamplingRate / tt.protoRate
 			for name, vals := range origIdxpbTbl.Postings {
 				downsampledOffsets := downsampleIdxpbTbl.Postings[name].Offsets
-
 				// downsampled postings are a subset of the original sparse index-header postings
 				if (tt.inMemSamplingRate > tt.protoRate) && (tt.inMemSamplingRate%tt.protoRate == 0) {
-					require.Equal(t, (len(vals.Offsets)+step-1)/step, len(downsampledOffsets))
-					require.Subset(t, vals.Offsets, downsampledOffsets)
+					require.Equal(t, tt.expected[name], len(downsampledOffsets))
+					require.Subset(t, vals.Offsets, downsampledOffsets, "downsampled offsets not a subset of original for name '%s'", name)
+
+					require.Equal(t, downsampledOffsets[0], vals.Offsets[0], "downsampled offsets do not contain first value for name '%s'", name)
+					require.Equal(t, downsampledOffsets[len(downsampledOffsets)-1], vals.Offsets[len(vals.Offsets)-1], "downsampled offsets do not contain last value for name '%s'", name)
 				}
+
+				// check first and last entry from the original postings in downsampled set
 				require.NotZero(t, downsampleIdxpbTbl.Postings[name].LastValOffset)
 			}
 		})
diff --git a/pkg/storage/indexheader/index/postings.go b/pkg/storage/indexheader/index/postings.go
index 16b0bc00fca..28505dffbab 100644
--- a/pkg/storage/indexheader/index/postings.go
+++ b/pkg/storage/indexheader/index/postings.go
@@ -243,30 +243,34 @@ func NewPostingOffsetTableFromSparseHeader(factory *streamencoding.DecbufFactory
 		return nil, fmt.Errorf("sparse index-header sampling rate not set")
 	}
 
-	var step int
-	var ok bool
-	if pbSampling <= postingOffsetsInMemSampling {
-		// if the sampling rate in the sparse index-header is set lower (more frequent) than
-		// the configured postingOffsetsInMemSampling we downsample to the configured rate
-		step, ok = stepSize(pbSampling, postingOffsetsInMemSampling)
-		if !ok {
-			return nil, fmt.Errorf("sparse index-header sampling rate not compatible with in-mem-sampling rate")
-		}
-	} else {
-		// if the sparse index-header sampling rate is set higher must reconstruct from index-header
+	if pbSampling > postingOffsetsInMemSampling {
 		return nil, fmt.Errorf("sparse index-header sampling rate exceeds in-mem-sampling rate")
 	}
 
+	// if the sampling rate in the sparse index-header is set lower (more frequent) than
+	// the configured postingOffsetsInMemSampling we downsample to the configured rate
+	step, ok := stepSize(pbSampling, postingOffsetsInMemSampling)
+	if !ok {
+		return nil, fmt.Errorf("sparse index-header sampling rate not compatible with in-mem-sampling rate")
+	}
+
 	for sName, sOffsets := range postingsOffsetTable.Postings {
 
 		olen := len(sOffsets.Offsets)
 		downsampledLen := (olen + step - 1) / step
+		if (olen > 1) && (downsampledLen == 1) {
+			downsampledLen++
+		}
 
 		t.postings[sName] = &postingValueOffsets{offsets: make([]postingOffset, downsampledLen)}
 		for i, sPostingOff := range sOffsets.Offsets {
 			if i%step == 0 {
 				t.postings[sName].offsets[i/step] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
 			}
+
+			if i == olen-1 {
+				t.postings[sName].offsets[downsampledLen-1] = postingOffset{value: sPostingOff.Value, tableOff: int(sPostingOff.TableOff)}
+			}
 		}
 		t.postings[sName].lastValOffset = sOffsets.LastValOffset
 	}
diff --git a/pkg/storage/indexheader/index/postings_test.go b/pkg/storage/indexheader/index/postings_test.go
index 7581200668b..e3a76723391 100644
--- a/pkg/storage/indexheader/index/postings_test.go
+++ b/pkg/storage/indexheader/index/postings_test.go
@@ -128,11 +128,29 @@ func Test_NewPostingOffsetTableFromSparseHeader(t *testing.T) {
 			protoSamplingRate:               32,
 			expectedLen:                     100,
 		},
+		"downsample_noop_preserve": {
+			existingOffsetsLen:              1,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     1,
+		},
+		"downsample_noop_retain_first_and_last_posting": {
+			existingOffsetsLen:              2,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               16,
+			expectedLen:                     2,
+		},
+		"downsample_noop_retain_first_and_last_posting_larger_sampling_rates_ratio": {
+			existingOffsetsLen:              2,
+			postingOffsetsInMemSamplingRate: 32,
+			protoSamplingRate:               8,
+			expectedLen:                     2,
+		},
 		"downsample_short_offsets": {
 			existingOffsetsLen:              2,
 			postingOffsetsInMemSamplingRate: 32,
 			protoSamplingRate:               16,
-			expectedLen:                     1,
+			expectedLen:                     2,
 		},
 		"downsample_noop_short_offsets": {
 			existingOffsetsLen:              1,
@@ -174,13 +192,13 @@ func Test_NewPostingOffsetTableFromSparseHeader(t *testing.T) {
 			existingOffsetsLen:              10,
 			postingOffsetsInMemSamplingRate: 1024,
 			protoSamplingRate:               8,
-			expectedLen:                     1,
+			expectedLen:                     2,
 		},
 		"downsample_sampling_rates_ratio_equals_offset_len": {
 			existingOffsetsLen:              100,
 			postingOffsetsInMemSamplingRate: 100,
 			protoSamplingRate:               1,
-			expectedLen:                     1,
+			expectedLen:                     2,
 		},
 	}
 

From 3426a410b9b9c36846e1571cadea1c10da633beb Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Mon, 3 Mar 2025 18:11:47 -0500
Subject: [PATCH 41/45] add check for first and last table offsets to
 CheckSparseHeadersCorrectnessExtensive

---
 pkg/storage/indexheader/header_test.go        | 36 +++++++++++++++++++
 .../indexheader/stream_binary_reader_test.go  |  1 +
 2 files changed, 37 insertions(+)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index fabe6665e54..36d0cdae675 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -253,6 +253,42 @@ func Test_DownsampleSparseIndexHeader(t *testing.T) {
 	}
 }
 
+func compareIndexToHeaderPostings(t *testing.T, indexByteSlice index.ByteSlice, sbr *StreamBinaryReader) {
+
+	ir, err := index.NewReader(indexByteSlice, index.DecodePostingsRaw)
+	require.NoError(t, err)
+	defer func() {
+		_ = ir.Close()
+	}()
+
+	toc, err := index.NewTOCFromByteSlice(indexByteSlice)
+	require.NoError(t, err)
+
+	tblOffsetBounds := make(map[string][2]int64)
+	err = index.ReadPostingsOffsetTable(indexByteSlice, toc.PostingsTable, func(label []byte, _ []byte, _ uint64, offset int) error {
+		name := string(label)
+		off := int64(offset + 4) // 4B offset - store count on TOC
+		if v, ok := tblOffsetBounds[name]; ok {
+			v[1] = off
+			tblOffsetBounds[name] = v
+		} else {
+			tblOffsetBounds[name] = [2]int64{off, off}
+		}
+		return nil
+	})
+	require.NoError(t, err)
+
+	tbl := sbr.postingsOffsetTable.NewSparsePostingOffsetTable()
+
+	expLabelNames, err := ir.LabelNames(context.Background())
+	require.NoError(t, err)
+	for _, lname := range expLabelNames {
+		offsets := tbl.Postings[lname].Offsets
+		assert.Equal(t, offsets[0].TableOff, tblOffsetBounds[lname][0])
+		assert.Equal(t, offsets[len(offsets)-1].TableOff, tblOffsetBounds[lname][1])
+	}
+}
+
 func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerReader Reader) {
 	ctx := context.Background()
 
diff --git a/pkg/storage/indexheader/stream_binary_reader_test.go b/pkg/storage/indexheader/stream_binary_reader_test.go
index 1c3767eca0d..1335c3c4fb2 100644
--- a/pkg/storage/indexheader/stream_binary_reader_test.go
+++ b/pkg/storage/indexheader/stream_binary_reader_test.go
@@ -91,6 +91,7 @@ func TestStreamBinaryReader_CheckSparseHeadersCorrectnessExtensive(t *testing.T)
 
 				// Check correctness of sparse index headers.
 				compareIndexToHeader(t, b, r2)
+				compareIndexToHeaderPostings(t, b, r2)
 			})
 		}
 	}

From f2e8cbdc99b86c181c42d484ded5ca31e08bbae3 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 4 Mar 2025 09:41:29 -0500
Subject: [PATCH 42/45] check all ranges in index are in header

---
 pkg/storage/indexheader/header_test.go | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 36d0cdae675..53a4c99cc91 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -352,15 +352,22 @@ func compareIndexToHeader(t *testing.T, indexByteSlice index.ByteSlice, headerRe
 		for _, v := range valOffsets {
 			ptr, err := headerReader.PostingsOffset(ctx, lname, v.LabelValue)
 			require.NoError(t, err)
-			assert.Equal(t, expRanges[labels.Label{Name: lname, Value: v.LabelValue}], ptr)
-			assert.Equal(t, expRanges[labels.Label{Name: lname, Value: v.LabelValue}], v.Off)
+			label := labels.Label{Name: lname, Value: v.LabelValue}
+			assert.Equal(t, expRanges[label], ptr)
+			assert.Equal(t, expRanges[label], v.Off)
+			delete(expRanges, label)
 		}
 	}
+
 	allPName, allPValue := index.AllPostingsKey()
 	ptr, err := headerReader.PostingsOffset(ctx, allPName, allPValue)
 	require.NoError(t, err)
-	require.Equal(t, expRanges[labels.Label{Name: "", Value: ""}].Start, ptr.Start)
-	require.Equal(t, expRanges[labels.Label{Name: "", Value: ""}].End, ptr.End)
+
+	emptyLabel := labels.Label{Name: "", Value: ""}
+	require.Equal(t, expRanges[emptyLabel].Start, ptr.Start)
+	require.Equal(t, expRanges[emptyLabel].End, ptr.End)
+	delete(expRanges, emptyLabel)
+	require.Empty(t, expRanges)
 }
 
 func prepareIndexV2Block(t testing.TB, tmpDir string, bkt objstore.Bucket) *block.Meta {

From c9ead14edaa77594f1751d61169275376d414aee Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Tue, 4 Mar 2025 14:58:33 -0500
Subject: [PATCH 43/45] comment on offset adjust

---
 pkg/storage/indexheader/header_test.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pkg/storage/indexheader/header_test.go b/pkg/storage/indexheader/header_test.go
index 53a4c99cc91..bb12e36698a 100644
--- a/pkg/storage/indexheader/header_test.go
+++ b/pkg/storage/indexheader/header_test.go
@@ -265,9 +265,12 @@ func compareIndexToHeaderPostings(t *testing.T, indexByteSlice index.ByteSlice,
 	require.NoError(t, err)
 
 	tblOffsetBounds := make(map[string][2]int64)
+
+	// Read the postings offset table and record first and last offset for each label. Adjust offsets in ReadPostingsOffsetTable
+	// by 4B (int32 count of postings in table) to align with postings in index headers.
 	err = index.ReadPostingsOffsetTable(indexByteSlice, toc.PostingsTable, func(label []byte, _ []byte, _ uint64, offset int) error {
 		name := string(label)
-		off := int64(offset + 4) // 4B offset - store count on TOC
+		off := int64(offset + 4)
 		if v, ok := tblOffsetBounds[name]; ok {
 			v[1] = off
 			tblOffsetBounds[name] = v

From 0f9892630a0547b99ff9bccc2655c8576653f004 Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 5 Mar 2025 15:47:43 -0500
Subject: [PATCH 44/45] Update
 docs/sources/mimir/configure/configuration-parameters/index.md

Co-authored-by: Taylor C <41653732+tacole02@users.noreply.github.com>
---
 docs/sources/mimir/configure/configuration-parameters/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index 819d79fd7dd..2bc4137ab91 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -4977,7 +4977,7 @@ sharding_ring:
 # CLI flag: -compactor.max-lookback
 [max_lookback: <duration> | default = 0s]
 
-# (experimental) If enabled, the compactor will construct and upload sparse
+# (experimental) If enabled, the compactor constructs and uploads sparse
 # index headers to object storage during each compaction cycle. This allows
 # store-gateway instances to use the sparse headers from object storage instead
 # of recreating them locally.

From a6043992bb42e1101ce11ed553fa4cf7ae910dae Mon Sep 17 00:00:00 2001
From: Dustin Wilson <dustin.wilson@grafana.com>
Date: Wed, 5 Mar 2025 16:32:44 -0500
Subject: [PATCH 45/45] update docs

---
 cmd/mimir/config-descriptor.json                              | 2 +-
 cmd/mimir/help-all.txt.tmpl                                   | 2 +-
 .../sources/mimir/configure/configuration-parameters/index.md | 4 ++--
 pkg/compactor/compactor.go                                    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmd/mimir/config-descriptor.json b/cmd/mimir/config-descriptor.json
index 4e65d35be8a..c79678c5e25 100644
--- a/cmd/mimir/config-descriptor.json
+++ b/cmd/mimir/config-descriptor.json
@@ -11634,7 +11634,7 @@
           "kind": "field",
           "name": "upload_sparse_index_headers",
           "required": false,
-          "desc": "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.",
+          "desc": "If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.",
           "fieldValue": null,
           "fieldDefaultValue": false,
           "fieldFlag": "compactor.upload-sparse-index-headers",
diff --git a/cmd/mimir/help-all.txt.tmpl b/cmd/mimir/help-all.txt.tmpl
index b8b1b2a688e..84993fe1f37 100644
--- a/cmd/mimir/help-all.txt.tmpl
+++ b/cmd/mimir/help-all.txt.tmpl
@@ -1298,7 +1298,7 @@ Usage of ./cmd/mimir/mimir:
   -compactor.tenant-cleanup-delay duration
     	For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant. (default 6h0m0s)
   -compactor.upload-sparse-index-headers
-    	[experimental] If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.
+    	[experimental] If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.
   -config.expand-env
     	Expands ${var} or $var in config according to the values of the environment variables.
   -config.file value
diff --git a/docs/sources/mimir/configure/configuration-parameters/index.md b/docs/sources/mimir/configure/configuration-parameters/index.md
index 2bc4137ab91..2f5babf55fd 100644
--- a/docs/sources/mimir/configure/configuration-parameters/index.md
+++ b/docs/sources/mimir/configure/configuration-parameters/index.md
@@ -4977,8 +4977,8 @@ sharding_ring:
 # CLI flag: -compactor.max-lookback
 [max_lookback: <duration> | default = 0s]
 
-# (experimental) If enabled, the compactor constructs and uploads sparse
-# index headers to object storage during each compaction cycle. This allows
+# (experimental) If enabled, the compactor constructs and uploads sparse index
+# headers to object storage during each compaction cycle. This allows
 # store-gateway instances to use the sparse headers from object storage instead
 # of recreating them locally.
 # CLI flag: -compactor.upload-sparse-index-headers
diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go
index 3d579b2c95e..44d074e3e63 100644
--- a/pkg/compactor/compactor.go
+++ b/pkg/compactor/compactor.go
@@ -164,7 +164,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
 	f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is the time between deletion of the last block, and doing final cleanup (marker files, debug files) of the tenant.")
 	f.BoolVar(&cfg.NoBlocksFileCleanupEnabled, "compactor.no-blocks-file-cleanup-enabled", false, "If enabled, will delete the bucket-index, markers and debug files in the tenant bucket when there are no blocks left in the index.")
 	f.DurationVar(&cfg.MaxLookback, "compactor.max-lookback", 0*time.Second, "Blocks uploaded before the lookback aren't considered in compactor cycles. If set, this value should be larger than all values in `-blocks-storage.tsdb.block-ranges-period`. A value of 0s means that all blocks are considered regardless of their upload time.")
-	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor will construct and upload sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.")
+	f.BoolVar(&cfg.UploadSparseIndexHeaders, "compactor.upload-sparse-index-headers", false, "If enabled, the compactor constructs and uploads sparse index headers to object storage during each compaction cycle. This allows store-gateway instances to use the sparse headers from object storage instead of recreating them locally.")
 
 	// compactor concurrency options
 	f.IntVar(&cfg.MaxOpeningBlocksConcurrency, "compactor.max-opening-blocks-concurrency", 1, "Number of goroutines opening blocks before compaction.")