From f6170c317f14ecd6fe32e3d65105ce757deec07b Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Wed, 6 Nov 2024 15:52:49 +0530
Subject: [PATCH 01/35] real first draft

---
 index/scorch/snapshot_index.go     | 91 +++++++++++++++++++++++++++++-
 index/scorch/snapshot_index_str.go | 82 +++++++++++++++++++++++++++
 mapping/document.go                | 11 ++++
 mapping/field.go                   |  2 +
 mapping/index.go                   | 37 ++++++++++++
 mapping/synonym.go                 | 56 ++++++++++++++++++
 pre_search.go                      | 73 ++++++++++++++++++++++--
 7 files changed, 345 insertions(+), 7 deletions(-)
 create mode 100644 index/scorch/snapshot_index_str.go
 create mode 100644 mapping/synonym.go

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 79840a41f..685f1c921 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -60,6 +60,7 @@ var reflectStaticSizeIndexSnapshot int
 // exported variable, or at the index level by setting the FieldTFRCacheThreshold
 // in the kvConfig.
 var DefaultFieldTFRCacheThreshold uint64 = 10
+var DefaultSynonymTermReaderCacheThreshold uint64 = 10
 
 func init() {
 	var is interface{} = IndexSnapshot{}
@@ -87,8 +88,9 @@ type IndexSnapshot struct {
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
 
-	m2        sync.Mutex                                 // Protects the fields that follow.
-	fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
+	m2                 sync.Mutex                                   // Protects the fields that follow.
+	fieldTFRs          map[string][]*IndexSnapshotTermFieldReader   // keyed by field, recycled TFR's
+	synonymTermReaders map[string][]*IndexSnapshotSynonymTermReader // keyed by thesaurus name, recycled thesaurus readers
 }
 
 func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -649,6 +651,15 @@ func (is *IndexSnapshot) getFieldTFRCacheThreshold() uint64 {
 	return DefaultFieldTFRCacheThreshold
 }
 
+func (is *IndexSnapshot) getSynonymTermReaderCacheThreshold() uint64 {
+	if is.parent.config != nil {
+		if _, ok := is.parent.config["SynonymTermReaderCacheThreshold"]; ok {
+			return is.parent.config["SynonymTermReaderCacheThreshold"].(uint64)
+		}
+	}
+	return DefaultSynonymTermReaderCacheThreshold
+}
+
 func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
 	if !tfr.recycle {
 		// Do not recycle an optimized unadorned term field reader (used for
@@ -677,6 +688,25 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade
 	is.m2.Unlock()
 }
 
+func (is *IndexSnapshot) recycleSynonymTermReader(str *IndexSnapshotSynonymTermReader) {
+	is.parent.rootLock.RLock()
+	obsolete := is.parent.root != is
+	is.parent.rootLock.RUnlock()
+	if obsolete {
+		// if we're not the current root (mutations happened), don't bother recycling
+		return
+	}
+
+	is.m2.Lock()
+	if is.synonymTermReaders == nil {
+		is.synonymTermReaders = map[string][]*IndexSnapshotSynonymTermReader{}
+	}
+	if uint64(len(is.synonymTermReaders[str.name])) < is.getSynonymTermReaderCacheThreshold() {
+		is.synonymTermReaders[str.name] = append(is.synonymTermReaders[str.name], str)
+	}
+	is.m2.Unlock()
+}
+
 func docNumberToBytes(buf []byte, in uint64) []byte {
 	if len(buf) != 8 {
 		if cap(buf) >= 8 {
@@ -956,3 +986,60 @@ func (is *IndexSnapshot) CloseCopyReader() error {
 	// close the index snapshot normally
 	return is.Close()
 }
+
+func (is *IndexSnapshot) allocSynonymTermReader(name string) (str *IndexSnapshotSynonymTermReader) {
+	is.m2.Lock()
+	if is.synonymTermReaders != nil {
+		strs := is.synonymTermReaders[name]
+		last := len(strs) - 1
+		if last >= 0 {
+			str = strs[last]
+			strs[last] = nil
+			is.synonymTermReaders[name] = strs[:last]
+			is.m2.Unlock()
+			return
+		}
+	}
+	is.m2.Unlock()
+	return &IndexSnapshotSynonymTermReader{}
+}
+
+func (is *IndexSnapshot) SynonymTermReader(ctx context.Context, thesaurusName string, term []byte) (index.SynonymTermReader, error) {
+	rv := is.allocSynonymTermReader(thesaurusName)
+
+	rv.name = thesaurusName
+	rv.snapshot = is
+	if rv.postings == nil {
+		rv.postings = make([]segment.SynonymsList, len(is.segment))
+	}
+	if rv.iterators == nil {
+		rv.iterators = make([]segment.SynonymsIterator, len(is.segment))
+	}
+	rv.segmentOffset = 0
+
+	if rv.thesauri == nil {
+		rv.thesauri = make([]segment.Thesaurus, len(is.segment))
+		for i, s := range is.segment {
+			if synSeg, ok := s.segment.(segment.SynonymSegment); ok {
+				thes, err := synSeg.Thesaurus(thesaurusName)
+				if err != nil {
+					return nil, err
+				}
+				rv.thesauri[i] = thes
+			}
+		}
+	}
+
+	for i, s := range is.segment {
+		if _, ok := s.segment.(segment.SynonymSegment); ok {
+			pl, err := rv.thesauri[i].SynonymsList(term, s.deleted, rv.postings[i])
+			if err != nil {
+				return nil, err
+			}
+			rv.postings[i] = pl
+
+			rv.iterators[i] = pl.Iterator(rv.iterators[i])
+		}
+	}
+	return rv, nil
+}
diff --git a/index/scorch/snapshot_index_str.go b/index/scorch/snapshot_index_str.go
new file mode 100644
index 000000000..5dee83770
--- /dev/null
+++ b/index/scorch/snapshot_index_str.go
@@ -0,0 +1,82 @@
+//  Copyright (c) 2024 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"reflect"
+
+	"github.com/blevesearch/bleve/v2/size"
+	segment "github.com/blevesearch/scorch_segment_api/v2"
+)
+
+var reflectStaticSizeIndexSnapshotSynonymTermReader int
+
+func init() {
+	var istr IndexSnapshotSynonymTermReader
+	reflectStaticSizeIndexSnapshotSynonymTermReader = int(reflect.TypeOf(istr).Size())
+}
+
+type IndexSnapshotSynonymTermReader struct {
+	name          string
+	snapshot      *IndexSnapshot
+	thesauri      []segment.Thesaurus
+	postings      []segment.SynonymsList
+	iterators     []segment.SynonymsIterator
+	segmentOffset int
+}
+
+func (i *IndexSnapshotSynonymTermReader) Size() int {
+	sizeInBytes := reflectStaticSizeIndexSnapshotSynonymTermReader + size.SizeOfPtr +
+		len(i.name)
+
+	for _, thesaurus := range i.thesauri {
+		sizeInBytes += thesaurus.Size()
+	}
+
+	for _, postings := range i.postings {
+		sizeInBytes += postings.Size()
+	}
+
+	for _, iterator := range i.iterators {
+		sizeInBytes += iterator.Size()
+	}
+
+	return sizeInBytes
+}
+
+func (i *IndexSnapshotSynonymTermReader) Next() (string, error) {
+	// find the next hit
+	for i.segmentOffset < len(i.iterators) {
+		if i.iterators[i.segmentOffset] != nil {
+			next, err := i.iterators[i.segmentOffset].Next()
+			if err != nil {
+				return "", err
+			}
+			if next != nil {
+				synTerm := next.Term()
+				return synTerm, nil
+			}
+			i.segmentOffset++
+		}
+	}
+	return "", nil
+}
+
+func (i *IndexSnapshotSynonymTermReader) Close() error {
+	if i.snapshot != nil {
+		i.snapshot.recycleSynonymTermReader(i)
+	}
+	return nil
+}
diff --git a/mapping/document.go b/mapping/document.go
index 847326e41..5d70af912 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -112,6 +112,17 @@ func (dm *DocumentMapping) analyzerNameForPath(path string) string {
 	return ""
 }
 
+// synonymSourceForPath attempts to first find the field
+// described by this path, then returns the analyzer
+// configured for that field
+func (dm *DocumentMapping) synonymSourceForPath(path string) string {
+	field := dm.fieldDescribedByPath(path)
+	if field != nil {
+		return field.SynonymSource
+	}
+	return ""
+}
+
 func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
 	pathElements := decodePath(path)
 	if len(pathElements) > 1 {
diff --git a/mapping/field.go b/mapping/field.go
index 5c064fddd..ad5b4f424 100644
--- a/mapping/field.go
+++ b/mapping/field.go
@@ -80,6 +80,8 @@ type FieldMapping struct {
 
 	// Applicable to vector fields only - optimization string
 	VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
+
+	SynonymSource string `json:"synonym_source,omitempty"`
 }
 
 // NewTextFieldMapping returns a default field mapping for text
diff --git a/mapping/index.go b/mapping/index.go
index fe8c96713..94b2cdfa7 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -54,6 +54,7 @@ type IndexMappingImpl struct {
 	IndexDynamic          bool                        `json:"index_dynamic"`
 	DocValuesDynamic      bool                        `json:"docvalues_dynamic"`
 	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
+	SynonymSources        map[string]*SynonymSource   `json:"synonym_sources,omitempty"`
 	cache                 *registry.Cache
 }
 
@@ -186,6 +187,12 @@ func (im *IndexMappingImpl) Validate() error {
 			return err
 		}
 	}
+	for _, synSource := range im.SynonymSources {
+		err = synSource.Validate(im.cache)
+		if err != nil {
+			return err
+		}
+	}
 	return nil
 }
 
@@ -283,6 +290,14 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "synonym_sources":
+			if im.SynonymSources == nil {
+				im.SynonymSources = make(map[string]*SynonymSource)
+			}
+			err := util.UnmarshalJSON(v, &im.SynonymSources)
+			if err != nil {
+				return err
+			}
 		default:
 			invalidKeys = append(invalidKeys, k)
 		}
@@ -457,3 +472,25 @@ func (im *IndexMappingImpl) FieldMappingForPath(path string) FieldMapping {
 func (im *IndexMappingImpl) DefaultSearchField() string {
 	return im.DefaultField
 }
+
+func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
+	// first we look for explicit mapping on the field
+	for _, docMapping := range im.TypeMapping {
+		synonymSource := docMapping.synonymSourceForPath(path)
+		if synonymSource != "" {
+			return synonymSource
+		}
+	}
+
+	// now try the default mapping
+	pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
+	if pathMapping != nil {
+		if len(pathMapping.Fields) > 0 {
+			if pathMapping.Fields[0].SynonymSource != "" {
+				return pathMapping.Fields[0].SynonymSource
+			}
+		}
+	}
+
+	return ""
+}
diff --git a/mapping/synonym.go b/mapping/synonym.go
new file mode 100644
index 000000000..5065b3b2a
--- /dev/null
+++ b/mapping/synonym.go
@@ -0,0 +1,56 @@
+//  Copyright (c) 2024 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package mapping
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+type SynonymSource struct {
+	CollectionName string `json:"collection"`
+	AnalyzerName   string `json:"analyzer"`
+}
+
+func (s *SynonymSource) Collection() string {
+	return s.CollectionName
+}
+
+func (s *SynonymSource) Analyzer() string {
+	return s.AnalyzerName
+}
+
+func (s *SynonymSource) SetCollection(c string) {
+	s.CollectionName = c
+}
+
+func (s *SynonymSource) SetAnalyzer(a string) {
+	s.AnalyzerName = a
+}
+
+func (s *SynonymSource) Validate(c *registry.Cache) error {
+	if s.CollectionName == "" {
+		return fmt.Errorf("collection name is required")
+	}
+	if s.AnalyzerName == "" {
+		return fmt.Errorf("analyzer name is required")
+	}
+	_, err := c.AnalyzerNamed(s.AnalyzerName)
+	if err != nil {
+		return fmt.Errorf("analyzer named '%s' not found", s.AnalyzerName)
+	}
+	return nil
+}
diff --git a/pre_search.go b/pre_search.go
index c8c55bfbc..646d25199 100644
--- a/pre_search.go
+++ b/pre_search.go
@@ -26,6 +26,8 @@ type preSearchResultProcessor interface {
 	finalize(*SearchResult)
 }
 
+// -----------------------------------------------------------------------------
+// KNN preSearchResultProcessor for handling KNN presearch results
 type knnPreSearchResultProcessor struct {
 	addFn      func(sr *SearchResult, indexName string)
 	finalizeFn func(sr *SearchResult)
@@ -44,16 +46,77 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
 }
 
 // -----------------------------------------------------------------------------
+// Synonym preSearchResultProcessor for handling Synonym presearch results
+type synonymPreSearchResultProcessor struct {
+	addFn      func(sr *SearchResult, indexName string)
+	finalizeFn func(sr *SearchResult)
+}
 
-func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) {
-	if requestHasKNN(req) {
-		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
+func (s *synonymPreSearchResultProcessor) add(sr *SearchResult, indexName string) {
+	if s.addFn != nil {
+		s.addFn(sr, indexName)
 	}
 }
 
+func (s *synonymPreSearchResultProcessor) finalize(sr *SearchResult) {
+	if s.finalizeFn != nil {
+		s.finalizeFn(sr)
+	}
+}
+
+// -----------------------------------------------------------------------------
+// Master struct that can hold any number of presearch result processors
+type compositePreSearchResultProcessor struct {
+	presearchResultProcessors []preSearchResultProcessor
+}
+
+// Implements the add method, which forwards to all the internal processors
+func (m *compositePreSearchResultProcessor) add(sr *SearchResult, indexName string) {
+	for _, p := range m.presearchResultProcessors {
+		p.add(sr, indexName)
+	}
+}
+
+// Implements the finalize method, which forwards to all the internal processors
+func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) {
+	for _, p := range m.presearchResultProcessors {
+		p.finalize(sr)
+	}
+}
+
+// -----------------------------------------------------------------------------
+// Function to create the appropriate preSearchResultProcessor(s)
 func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor {
+	var processors []preSearchResultProcessor
+	// Add KNN processor if the request has KNN
+	if requestHasKNN(req) {
+		if knnProcessor := newKnnPreSearchResultProcessor(req); knnProcessor != nil {
+			processors = append(processors, knnProcessor)
+		}
+	}
+	// Add Synonym processor if the request has Synonym
+	if requestHasSynonym(req) {
+		if synonymProcessor := newSynonymPreSearchResultProcessor(req); synonymProcessor != nil {
+			processors = append(processors, synonymProcessor)
+		}
+	}
+	// Return based on the number of processors, optimizing for the common case of 1 processor
+	// If there are no processors, return nil
+	switch len(processors) {
+	case 0:
+		return nil
+	case 1:
+		return processors[0]
+	default:
+		return &compositePreSearchResultProcessor{
+			presearchResultProcessors: processors,
+		}
+	}
+}
+
+// -----------------------------------------------------------------------------
+func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) {
 	if requestHasKNN(req) {
-		return newKnnPreSearchResultProcessor(req)
+		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
 	}
-	return &knnPreSearchResultProcessor{} // equivalent to nil
 }

From 263e9904c05626267ea6f7f3f6f8cf5c54d2af1a Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Wed, 6 Nov 2024 16:19:29 +0530
Subject: [PATCH 02/35] fix bug

---
 index/scorch/snapshot_index_str.go | 6 +-----
 pre_search.go                      | 6 ------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/index/scorch/snapshot_index_str.go b/index/scorch/snapshot_index_str.go
index 5dee83770..e1ba60272 100644
--- a/index/scorch/snapshot_index_str.go
+++ b/index/scorch/snapshot_index_str.go
@@ -39,11 +39,7 @@ type IndexSnapshotSynonymTermReader struct {
 
 func (i *IndexSnapshotSynonymTermReader) Size() int {
 	sizeInBytes := reflectStaticSizeIndexSnapshotSynonymTermReader + size.SizeOfPtr +
-		len(i.name)
-
-	for _, thesaurus := range i.thesauri {
-		sizeInBytes += thesaurus.Size()
-	}
+		len(i.name) + size.SizeOfString
 
 	for _, postings := range i.postings {
 		sizeInBytes += postings.Size()
diff --git a/pre_search.go b/pre_search.go
index 646d25199..a539afc35 100644
--- a/pre_search.go
+++ b/pre_search.go
@@ -94,12 +94,6 @@ func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor
 			processors = append(processors, knnProcessor)
 		}
 	}
-	// Add Synonym processor if the request has Synonym
-	if requestHasSynonym(req) {
-		if synonymProcessor := newSynonymPreSearchResultProcessor(req); synonymProcessor != nil {
-			processors = append(processors, synonymProcessor)
-		}
-	}
 	// Return based on the number of processors, optimizing for the common case of 1 processor
 	// If there are no processors, return nil
 	switch len(processors) {

From c5fc54851ac77b2cdf741bb0106712b2e2eeede5 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Wed, 6 Nov 2024 19:19:33 +0530
Subject: [PATCH 03/35] small fix the first draft

---
 mapping/field.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mapping/field.go b/mapping/field.go
index ad5b4f424..08af2dfa7 100644
--- a/mapping/field.go
+++ b/mapping/field.go
@@ -476,6 +476,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "synonym_source":
+			err := json.Unmarshal(v, &fm.SynonymSource)
+			if err != nil {
+				return err
+			}
 		default:
 			invalidKeys = append(invalidKeys, k)
 		}

From 542d34a9dcd89f314ff0954feb934fb3cbd1a461 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 15 Nov 2024 15:59:08 +0530
Subject: [PATCH 04/35] glue code for indexing path

---
 document/document.go      |  15 ++++
 document/field_synonym.go | 143 ++++++++++++++++++++++++++++++++++++++
 error.go                  |   2 +
 index.go                  |  55 +++++++++++++++
 mapping/index.go          |  17 +++++
 mapping/mapping.go        |   5 ++
 6 files changed, 237 insertions(+)
 create mode 100644 document/field_synonym.go

diff --git a/document/document.go b/document/document.go
index 54fd6d442..0f9591c85 100644
--- a/document/document.go
+++ b/document/document.go
@@ -48,6 +48,13 @@ func NewDocument(id string) *Document {
 	}
 }
 
+func NewSynonymDocument(id string) *Document {
+	return &Document{
+		id:     id,
+		Fields: make([]Field, 0),
+	}
+}
+
 func (d *Document) Size() int {
 	sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
 		len(d.id)
@@ -133,3 +140,11 @@ func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
 func (d *Document) HasComposite() bool {
 	return len(d.CompositeFields) > 0
 }
+
+func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
+	for _, f := range d.Fields {
+		if sf, ok := f.(index.SynonymField); ok {
+			visitor(sf)
+		}
+	}
+}
diff --git a/document/field_synonym.go b/document/field_synonym.go
new file mode 100644
index 000000000..0e4812690
--- /dev/null
+++ b/document/field_synonym.go
@@ -0,0 +1,143 @@
+//  Copyright (c) 2024 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package document
+
+import (
+	"reflect"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/size"
+	index "github.com/blevesearch/bleve_index_api"
+)
+
+var reflectStaticSizeSynonymField int
+
+func init() {
+	var f SynonymField
+	reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
+}
+
+const DefaultSynonymIndexingOptions = index.IndexField
+
+type SynonymField struct {
+	name              string
+	analyzer          analysis.Analyzer
+	options           index.FieldIndexingOptions
+	input             []string
+	synonyms          []string
+	numPlainTextBytes uint64
+
+	// populated during analysis
+	synonymMap map[string][]string
+}
+
+func (s *SynonymField) Size() int {
+	return reflectStaticSizeSynonymField + size.SizeOfPtr +
+		len(s.name)
+}
+
+func (s *SynonymField) Name() string {
+	return s.name
+}
+
+func (s *SynonymField) ArrayPositions() []uint64 {
+	return nil
+}
+
+func (s *SynonymField) Options() index.FieldIndexingOptions {
+	return s.options
+}
+
+func (s *SynonymField) NumPlainTextBytes() uint64 {
+	return s.numPlainTextBytes
+}
+
+func (s *SynonymField) AnalyzedLength() int {
+	return 0
+}
+
+func (s *SynonymField) EncodedFieldType() byte {
+	return 'y'
+}
+
+func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
+	return nil
+}
+
+func (s *SynonymField) Analyze() {
+	var analyzedInput []string
+	if len(s.input) > 0 {
+		analyzedInput = make([]string, 0, len(s.input))
+		for _, term := range s.input {
+			analyzedInput = append(analyzedInput, analyzeSynonymTerm(term, s.analyzer))
+		}
+	}
+	analyzedSynonyms := make([]string, 0, len(s.synonyms))
+	for _, syn := range s.synonyms {
+		analyzedSynonyms = append(analyzedSynonyms, analyzeSynonymTerm(syn, s.analyzer))
+	}
+	s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
+}
+
+func (s *SynonymField) Value() []byte {
+	return nil
+}
+
+func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
+	for term, synonyms := range s.synonymMap {
+		visitor(term, synonyms)
+	}
+}
+
+func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
+	return &SynonymField{
+		name:     name,
+		analyzer: analyzer,
+		options:  DefaultSynonymIndexingOptions,
+		input:    input,
+		synonyms: synonyms,
+	}
+}
+
+func processSynonymData(input []string, synonyms []string) map[string][]string {
+	var synonymMap map[string][]string
+	if len(input) > 0 {
+		// Map each term to the same list of synonyms.
+		synonymMap = make(map[string][]string, len(input))
+		for _, term := range input {
+			synonymMap[term] = append([]string(nil), synonyms...) // Avoid sharing slices.
+		}
+	} else {
+		synonymMap = make(map[string][]string, len(synonyms))
+		// Precompute a map where each synonym points to all other synonyms.
+		for i, elem := range synonyms {
+			synonymMap[elem] = make([]string, 0, len(synonyms)-1)
+			for j, otherElem := range synonyms {
+				if i != j {
+					synonymMap[elem] = append(synonymMap[elem], otherElem)
+				}
+			}
+		}
+	}
+	return synonymMap
+}
+
+func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
+	tokenStream := analyzer.Analyze([]byte(term))
+	if len(tokenStream) == 0 {
+		return term
+	}
+	return string(tokenStream[0].Term)
+}
diff --git a/error.go b/error.go
index 2d2751cd4..b57a61543 100644
--- a/error.go
+++ b/error.go
@@ -27,6 +27,7 @@ const (
 	ErrorEmptyID
 	ErrorIndexReadInconsistency
 	ErrorTwoPhaseSearchInconsistency
+	ErrorSynonymSearchNotSupported
 )
 
 // Error represents a more strongly typed bleve error for detecting
@@ -49,4 +50,5 @@ var errorMessages = map[Error]string{
 	ErrorEmptyID:                     "document ID cannot be empty",
 	ErrorIndexReadInconsistency:      "index read inconsistency detected",
 	ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
+	ErrorSynonymSearchNotSupported:   "synonym search not supported",
 }
diff --git a/index.go b/index.go
index acbefc695..d98f28558 100644
--- a/index.go
+++ b/index.go
@@ -16,6 +16,7 @@ package bleve
 
 import (
 	"context"
+	"fmt"
 
 	"github.com/blevesearch/bleve/v2/index/upsidedown"
 
@@ -63,6 +64,36 @@ func (b *Batch) Index(id string, data interface{}) error {
 	return nil
 }
 
+func (b *Batch) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
+	if id == "" {
+		return ErrorEmptyID
+	}
+	if eventIndex, ok := b.index.(index.EventIndex); ok {
+		eventIndex.FireIndexEvent()
+	}
+	synMap, ok := b.index.Mapping().(mapping.SynonymMapping)
+	if !ok {
+		return ErrorSynonymSearchNotSupported
+	}
+
+	if err := definition.Validate(); err != nil {
+		return err
+	}
+
+	doc := document.NewSynonymDocument(id)
+	err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
+	if err != nil {
+		return err
+	}
+	b.internal.Update(doc)
+
+	b.lastDocSize = uint64(doc.Size() +
+		len(id) + size.SizeOfString) // overhead from internal
+	b.totalSize += b.lastDocSize
+
+	return nil
+}
+
 func (b *Batch) LastDocSize() uint64 {
 	return b.lastDocSize
 }
@@ -323,3 +354,27 @@ type IndexCopyable interface {
 // FileSystemDirectory is the default implementation for the
 // index.Directory interface.
 type FileSystemDirectory string
+
+// SynonymDefinition represents a synonym mapping in Bleve.
+// Each instance associates one or more input terms with a list of synonyms,
+// defining how terms are treated as equivalent in searches.
+type SynonymDefinition struct {
+	// Input is an optional list of terms for unidirectional synonym mapping.
+	// When terms are specified in Input, they will map to the terms in Synonyms,
+	// making the relationship unidirectional (each Input maps to all Synonyms).
+	// If Input is omitted, the relationship is bidirectional among all Synonyms.
+	Input []string `json:"input"`
+
+	// Synonyms is a list of terms that are considered equivalent.
+	// If Input is specified, each term in Input will map to each term in Synonyms.
+	// If Input is not specified, the Synonyms list will be treated bidirectionally,
+	// meaning each term in Synonyms is treated as synonymous with all others.
+	Synonyms []string `json:"synonyms"`
+}
+
+func (sd *SynonymDefinition) Validate() error {
+	if len(sd.Synonyms) == 0 {
+		return fmt.Errorf("synonym definition must have at least one synonym")
+	}
+	return nil
+}
diff --git a/mapping/index.go b/mapping/index.go
index 94b2cdfa7..d6d355b11 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -354,6 +354,23 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
 	return nil
 }
 
+func (im *IndexMappingImpl) MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error {
+	// determine all the synonym sources with the given collection
+	// and create a synonym field for each
+	for name, synSource := range im.SynonymSources {
+		if synSource.Collection() == collection {
+			// create a new field with the name of the synonym source
+			analyzer := im.AnalyzerNamed(synSource.Analyzer())
+			if analyzer == nil {
+				return fmt.Errorf("unknown analyzer named: %s", synSource.Analyzer())
+			}
+			field := document.NewSynonymField(name, analyzer, input, synonyms)
+			doc.AddField(field)
+		}
+	}
+	return nil
+}
+
 type walkContext struct {
 	doc             *document.Document
 	im              *IndexMappingImpl
diff --git a/mapping/mapping.go b/mapping/mapping.go
index cbfc98faa..6100d6d09 100644
--- a/mapping/mapping.go
+++ b/mapping/mapping.go
@@ -58,3 +58,8 @@ type IndexMapping interface {
 
 	FieldMappingForPath(path string) FieldMapping
 }
+
+type SynonymMapping interface {
+	IndexMapping
+	MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error
+}

From 6627dcb2a5f5c7fd132d5a35b94576166788ea35 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 15 Nov 2024 18:50:54 +0530
Subject: [PATCH 05/35] unit test

---
 mapping/index.go   |   8 ++
 mapping/synonym.go |   7 ++
 search_test.go     | 182 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 197 insertions(+)

diff --git a/mapping/index.go b/mapping/index.go
index d6d355b11..ae17a3da5 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -146,6 +146,14 @@ func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[stri
 	return nil
 }
 
+func (im *IndexMappingImpl) AddSynonymSource(name, collection, analyzer string) error {
+	if im.SynonymSources == nil {
+		im.SynonymSources = make(map[string]*SynonymSource)
+	}
+	im.SynonymSources[name] = NewSynonymSource(collection, analyzer)
+	return nil
+}
+
 // NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
 func NewIndexMapping() *IndexMappingImpl {
 	return &IndexMappingImpl{
diff --git a/mapping/synonym.go b/mapping/synonym.go
index 5065b3b2a..597539bf0 100644
--- a/mapping/synonym.go
+++ b/mapping/synonym.go
@@ -25,6 +25,13 @@ type SynonymSource struct {
 	AnalyzerName   string `json:"analyzer"`
 }
 
+func NewSynonymSource(collection, analyzer string) *SynonymSource {
+	return &SynonymSource{
+		CollectionName: collection,
+		AnalyzerName:   analyzer,
+	}
+}
+
 func (s *SynonymSource) Collection() string {
 	return s.CollectionName
 }
diff --git a/search_test.go b/search_test.go
index c39a58558..2a30a4502 100644
--- a/search_test.go
+++ b/search_test.go
@@ -15,10 +15,12 @@
 package bleve
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"math"
 	"reflect"
+	"sort"
 	"strconv"
 	"strings"
 	"testing"
@@ -3746,3 +3748,183 @@ func TestAutoFuzzy(t *testing.T) {
 		}
 	}
 }
+
+func TestSynonymSearch(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	synonymCollection := "collection1"
+
+	synonymSourceName := "english"
+
+	synonymAnalyzer := "simple"
+
+	imap := mapping.NewIndexMapping()
+	textField := mapping.NewTextFieldMapping()
+	textField.Analyzer = simple.Name
+	imap.DefaultMapping.AddFieldMappingsAt("text", textField)
+	imap.AddSynonymSource(synonymSourceName, synonymCollection, synonymAnalyzer)
+
+	err := imap.Validate()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	idx, err := New(tmpIndexPath, imap)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	documents := map[string]map[string]interface{}{
+		"doc1": {
+			"text": "quick brown fox eats",
+		},
+		"doc2": {
+			"text": "fast red wolf jumps",
+		},
+		"doc3": {
+			"text": "quick red cat runs",
+		},
+		"doc4": {
+			"text": "speedy brown dog barks",
+		},
+		"doc5": {
+			"text": "fast green rabbit hops",
+		},
+	}
+
+	batch := idx.NewBatch()
+	for docID, doc := range documents {
+		err := batch.Index(docID, doc)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	synonymDocuments := map[string]*SynonymDefinition{
+		"synDoc1": {
+			Synonyms: []string{"quick", "fast", "speedy"},
+		},
+		"synDoc2": {
+			Input:    []string{"color", "colour"},
+			Synonyms: []string{"red", "green", "blue", "yellow", "brown"},
+		},
+		"synDoc3": {
+			Input:    []string{"animal", "creature"},
+			Synonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
+		},
+		"synDoc4": {
+			Synonyms: []string{"eats", "jumps", "runs", "barks", "hops"},
+		},
+	}
+
+	for synName, synDef := range synonymDocuments {
+		err := batch.IndexSynonym(synName, "collection1", synDef)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	err = idx.Batch(batch)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	sco, err := idx.Advanced()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	reader, err := sco.Reader()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = reader.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	synReader, ok := reader.(index.ThesaurusReader)
+	if !ok {
+		t.Fatal("expected thesaurus reader")
+	}
+
+	type testStruct struct {
+		queryTerm        string
+		expectedSynonyms []string
+	}
+
+	testQueries := []testStruct{
+		{
+			queryTerm:        "quick",
+			expectedSynonyms: []string{"fast", "speedy"},
+		},
+		{
+			queryTerm:        "red",
+			expectedSynonyms: []string{},
+		},
+		{
+			queryTerm:        "color",
+			expectedSynonyms: []string{"red", "green", "blue", "yellow", "brown"},
+		},
+		{
+			queryTerm:        "colour",
+			expectedSynonyms: []string{"red", "green", "blue", "yellow", "brown"},
+		},
+		{
+			queryTerm:        "animal",
+			expectedSynonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
+		},
+		{
+			queryTerm:        "creature",
+			expectedSynonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
+		},
+		{
+			queryTerm:        "fox",
+			expectedSynonyms: []string{},
+		},
+		{
+			queryTerm:        "eats",
+			expectedSynonyms: []string{"jumps", "runs", "barks", "hops"},
+		},
+		{
+			queryTerm:        "jumps",
+			expectedSynonyms: []string{"eats", "runs", "barks", "hops"},
+		},
+	}
+
+	for _, test := range testQueries {
+		str, err := synReader.SynonymTermReader(context.Background(), synonymSourceName, []byte(test.queryTerm))
+		if err != nil {
+			t.Fatal(err)
+		}
+		var gotSynonyms []string
+		for {
+			synonym, err := str.Next()
+			if err != nil {
+				t.Fatal(err)
+			}
+			if synonym == "" {
+				break
+			}
+			gotSynonyms = append(gotSynonyms, string(synonym))
+		}
+		if len(gotSynonyms) != len(test.expectedSynonyms) {
+			t.Fatalf("expected %d synonyms, got %d", len(test.expectedSynonyms), len(gotSynonyms))
+		}
+		sort.Strings(gotSynonyms)
+		sort.Strings(test.expectedSynonyms)
+		for i, syn := range gotSynonyms {
+			if syn != test.expectedSynonyms[i] {
+				t.Fatalf("expected synonym %s, got %s", test.expectedSynonyms[i], syn)
+			}
+		}
+	}
+}

From 32c67af39313f4517f70e8f9bec6d7e743e0073d Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 18 Nov 2024 19:13:31 +0530
Subject: [PATCH 06/35] query path first draft

---
 index_impl.go         |  28 +++-
 mapping/mapping.go    |   5 +
 search/query/query.go | 296 ++++++++++++++++++++++++++++++++++++++++++
 search/util.go        |   6 +
 search_test.go        |   4 +-
 5 files changed, 334 insertions(+), 5 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index e6debf17a..3cce0a12c 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -38,6 +38,7 @@ import (
 	"github.com/blevesearch/bleve/v2/search/collector"
 	"github.com/blevesearch/bleve/v2/search/facet"
 	"github.com/blevesearch/bleve/v2/search/highlight"
+	"github.com/blevesearch/bleve/v2/search/query"
 	"github.com/blevesearch/bleve/v2/util"
 	index "github.com/blevesearch/bleve_index_api"
 	"github.com/blevesearch/geo/s2"
@@ -505,8 +506,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	}
 
 	var knnHits []*search.DocumentMatch
+	var fts search.FieldTermSynonymMap
 	var ok bool
-	var skipKnnCollector bool
 	if req.PreSearchData != nil {
 		for k, v := range req.PreSearchData {
 			switch k {
@@ -517,19 +518,40 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 						return nil, fmt.Errorf("knn preSearchData must be of type []*search.DocumentMatch")
 					}
 				}
-				skipKnnCollector = true
+			case search.SynonymPreSearchDataKey:
+				if v != nil {
+					fts, ok = v.(search.FieldTermSynonymMap)
+					if !ok {
+						return nil, fmt.Errorf("synonym preSearchData must be of type search.FieldTermSynonymMap")
+					}
+				}
 			}
 		}
 	}
-	if !skipKnnCollector && requestHasKNN(req) {
+	if knnHits == nil && requestHasKNN(req) {
 		knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
 		if err != nil {
 			return nil, err
 		}
 	}
 
+	if fts == nil {
+		if synMap, ok := i.m.(mapping.SynonymMapping); ok && synMap.SynonymCount() > 0 {
+			if synReader, ok := indexReader.(index.SynonymReader); ok {
+				fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+	}
+
 	setKnnHitsInCollector(knnHits, req, coll)
 
+	if fts != nil {
+		ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts)
+	}
+
 	// This callback and variable handles the tracking of bytes read
 	//  1. as part of creation of tfr and its Next() calls which is
 	//     accounted by invoking this callback when the TFR is closed.
diff --git a/mapping/mapping.go b/mapping/mapping.go
index 6100d6d09..6714c55aa 100644
--- a/mapping/mapping.go
+++ b/mapping/mapping.go
@@ -61,5 +61,10 @@ type IndexMapping interface {
 
 type SynonymMapping interface {
 	IndexMapping
+
 	MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error
+
+	SynonymSourceForPath(path string) string
+
+	SynonymCount() int
 }
diff --git a/search/query/query.go b/search/query/query.go
index d263a0e54..22f1293ee 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -21,8 +21,10 @@ import (
 	"io"
 	"log"
 
+	"github.com/blevesearch/bleve/v2/analysis"
 	"github.com/blevesearch/bleve/v2/mapping"
 	"github.com/blevesearch/bleve/v2/search"
+	"github.com/blevesearch/bleve/v2/search/searcher"
 	"github.com/blevesearch/bleve/v2/util"
 	index "github.com/blevesearch/bleve_index_api"
 )
@@ -423,3 +425,297 @@ func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
 	data, err := json.MarshalIndent(q, "", "  ")
 	return string(data), err
 }
+
+// ExtractSynonyms extracts synonyms from the query tree and returns a map of
+// field-term pairs to their synonyms. The input query tree is traversed and
+// for each term query, the synonyms are extracted from the synonym source
+// associated with the field. The synonyms are then added to the provided map.
+// The map is returned and may be nil if no synonyms were found.
+func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.SynonymReader,
+	query Query, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+
+	if r == nil || m == nil || query == nil {
+		return rv, nil
+	}
+	resolveFieldAndSource := func(field string) (string, string) {
+		if field == "" {
+			field = m.DefaultSearchField()
+		}
+		return field, m.SynonymSourceForPath(field)
+	}
+	handleAnalyzer := func(analyzerName, field string) (analysis.Analyzer, error) {
+		if analyzerName == "" {
+			analyzerName = m.AnalyzerNameForPath(field)
+		}
+		analyzer := m.AnalyzerNamed(analyzerName)
+		if analyzer == nil {
+			return nil, fmt.Errorf("no analyzer named '%s' registered", analyzerName)
+		}
+		return analyzer, nil
+	}
+	switch q := query.(type) {
+	case *BooleanQuery:
+		var err error
+		rv, err = ExtractSynonyms(ctx, m, r, q.Must, rv)
+		if err != nil {
+			return nil, err
+		}
+		rv, err = ExtractSynonyms(ctx, m, r, q.Should, rv)
+		if err != nil {
+			return nil, err
+		}
+		rv, err = ExtractSynonyms(ctx, m, r, q.MustNot, rv)
+		if err != nil {
+			return nil, err
+		}
+		return rv, nil
+	case *ConjunctionQuery:
+		for _, child := range q.Conjuncts {
+			var err error
+			rv, err = ExtractSynonyms(ctx, m, r, child, rv)
+			if err != nil {
+				return nil, err
+			}
+		}
+	case *DisjunctionQuery:
+		for _, child := range q.Disjuncts {
+			var err error
+			rv, err = ExtractSynonyms(ctx, m, r, child, rv)
+			if err != nil {
+				return nil, err
+			}
+		}
+	case *FuzzyQuery:
+		field, source := resolveFieldAndSource(q.FieldVal)
+		if source != "" {
+			return addFuzzySynonymsForTerm(ctx, source, field, q.Term, q.Fuzziness, q.Prefix, r, rv)
+		}
+	case *MatchQuery, *MatchPhraseQuery:
+		var analyzerName, matchString, fieldVal string
+		var fuzziness, prefix int
+		if mq, ok := q.(*MatchQuery); ok {
+			analyzerName, fieldVal, matchString, fuzziness, prefix = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix
+		} else if mpq, ok := q.(*MatchPhraseQuery); ok {
+			analyzerName, fieldVal, matchString, fuzziness = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness
+		}
+		field, source := resolveFieldAndSource(fieldVal)
+		if source != "" {
+			analyzer, err := handleAnalyzer(analyzerName, field)
+			if err != nil {
+				return nil, err
+			}
+			tokens := analyzer.Analyze([]byte(matchString))
+			for _, token := range tokens {
+				rv, err = addFuzzySynonymsForTerm(ctx, source, field, string(token.Term), fuzziness, prefix, r, rv)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+	case *MultiPhraseQuery, *PhraseQuery:
+		var fieldVal string
+		if mpq, ok := q.(*MultiPhraseQuery); ok {
+			fieldVal = mpq.FieldVal
+		} else if pq, ok := q.(*PhraseQuery); ok {
+			fieldVal = pq.FieldVal
+		}
+		field, source := resolveFieldAndSource(fieldVal)
+		if source != "" {
+			var terms []string
+			if mpq, ok := q.(*MultiPhraseQuery); ok {
+				for _, termGroup := range mpq.Terms {
+					terms = append(terms, termGroup...)
+				}
+			} else if pq, ok := q.(*PhraseQuery); ok {
+				terms = pq.Terms
+			}
+			for _, term := range terms {
+				var err error
+				rv, err = addSynonymsForTerm(ctx, source, term, field, r, rv)
+				if err != nil {
+					return nil, err
+				}
+			}
+			return rv, nil
+		}
+	case *PrefixQuery:
+		field, source := resolveFieldAndSource(q.FieldVal)
+		if source != "" {
+			return addPrefixSynonymsForTerm(ctx, source, field, q.Prefix, r, rv)
+		}
+	case *QueryStringQuery:
+		expanded, err := expandQuery(m, q)
+		if err != nil {
+			return nil, err
+		}
+		return ExtractSynonyms(ctx, m, r, expanded, rv)
+	case *RegexpQuery:
+		field, source := resolveFieldAndSource(q.FieldVal)
+		if source != "" {
+			return addRegexpSynonymsForTerm(ctx, source, field, q.Regexp, r, rv)
+		}
+	case *TermQuery:
+		field, source := resolveFieldAndSource(q.FieldVal)
+		if source != "" {
+			return addSynonymsForTerm(ctx, source, q.Term, field, r, rv)
+		}
+	case *WildcardQuery:
+		field, source := resolveFieldAndSource(q.FieldVal)
+		if source != "" {
+			regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
+			return addRegexpSynonymsForTerm(ctx, source, field, regexpString, r, rv)
+		}
+	}
+	return rv, nil
+}
+
+// addRegexpSynonymsForTerm finds all terms that match the given regexp and
+// adds their synonyms to the provided map.
+func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string,
+	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+
+	if ir, ok := r.(index.IndexReaderRegexp); ok {
+		fieldDict, err := ir.FieldDictRegexp(field, term)
+		if err != nil {
+			return nil, err
+		}
+		defer func() {
+			if cerr := fieldDict.Close(); cerr != nil && err == nil {
+				err = cerr
+			}
+		}()
+		regexpTerms := []string{term}
+		tfd, err := fieldDict.Next()
+		for err == nil && tfd != nil {
+			regexpTerms = append(regexpTerms, tfd.Term)
+			tfd, err = fieldDict.Next()
+		}
+		if err != nil {
+			return nil, err
+		}
+		for _, term := range regexpTerms {
+			rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+			if err != nil {
+				return nil, err
+			}
+		}
+		return rv, nil
+	}
+	return nil, nil
+}
+
+// addPrefixSynonymsForTerm finds all terms that match the given prefix and
+// adds their synonyms to the provided map.
+func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
+	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+	// find the terms with this prefix
+	fieldDict, err := r.FieldDictPrefix(field, []byte(term))
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+	prefixTerms := []string{term}
+	tfd, err := fieldDict.Next()
+	for err == nil && tfd != nil {
+		prefixTerms = append(prefixTerms, tfd.Term)
+		tfd, err = fieldDict.Next()
+	}
+	if err != nil {
+		return nil, err
+	}
+	for _, term := range prefixTerms {
+		rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return rv, nil
+}
+
+// addFuzzySynonymsForTerm finds all terms that match the given term with the
+// given fuzziness and adds their synonyms to the provided map.
+func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int,
+	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+	if fuzziness == 0 {
+		return addSynonymsForTerm(ctx, src, term, field, r, rv)
+	}
+	if ir, ok := r.(index.IndexReaderFuzzy); ok {
+		if fuzziness > searcher.MaxFuzziness {
+			return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
+		}
+		if fuzziness < 0 {
+			return nil, fmt.Errorf("invalid fuzziness, negative")
+		}
+		prefixTerm := ""
+		for i, r := range term {
+			if i < prefix {
+				prefixTerm += string(r)
+			} else {
+				break
+			}
+		}
+		fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
+		if err != nil {
+			return nil, err
+		}
+		defer func() {
+			if cerr := fieldDict.Close(); cerr != nil && err == nil {
+				err = cerr
+			}
+		}()
+		fuzzyTerms := []string{term}
+		tfd, err := fieldDict.Next()
+		for err == nil && tfd != nil {
+			fuzzyTerms = append(fuzzyTerms, tfd.Term)
+			tfd, err = fieldDict.Next()
+		}
+		if err != nil {
+			return nil, err
+		}
+		for _, term := range fuzzyTerms {
+			rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+			if err != nil {
+				return nil, err
+			}
+		}
+		return rv, nil
+	}
+	return nil, nil
+}
+
+// addSynonymsForTerm finds synonyms for the given term and adds them to the
+// provided map.
+func addSynonymsForTerm(ctx context.Context, src, term, field string, r index.SynonymReader,
+	rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+
+	termBytes := []byte(term)
+	termReader, err := r.SynonymTermReader(ctx, src, termBytes)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if cerr := termReader.Close(); cerr != nil && err == nil {
+			err = cerr
+		}
+	}()
+	var synonyms []string
+	synonym, err := termReader.Next()
+	for err == nil && synonym != "" {
+		synonyms = append(synonyms, synonym)
+		synonym, err = termReader.Next()
+	}
+	if len(synonyms) > 0 {
+		if rv == nil {
+			rv = make(search.FieldTermSynonymMap)
+		}
+		if _, exists := rv[field]; !exists {
+			rv[field] = make(map[string][]string)
+		}
+		rv[field][term] = synonyms
+	}
+	return rv, err
+}
diff --git a/search/util.go b/search/util.go
index 6472803d1..0f16db19a 100644
--- a/search/util.go
+++ b/search/util.go
@@ -136,6 +136,7 @@ const MinGeoBufPoolSize = 24
 type GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool
 
 const KnnPreSearchDataKey = "_knn_pre_search_data_key"
+const SynonymPreSearchDataKey = "_synonym_pre_search_data_key"
 
 const PreSearchKey = "_presearch_key"
 
@@ -144,5 +145,10 @@ type ScoreExplCorrectionCallbackFunc func(queryMatch *DocumentMatch, knnMatch *D
 type SearcherStartCallbackFn func(size uint64) error
 type SearcherEndCallbackFn func(size uint64) error
 
+// field -> term -> synonyms
+type FieldTermSynonymMap map[string]map[string][]string
+
+const FieldTermSynonymMapKey = "_field_term_synonym_map_key"
+
 const SearcherStartCallbackKey = "_searcher_start_callback_key"
 const SearcherEndCallbackKey = "_searcher_end_callback_key"
diff --git a/search_test.go b/search_test.go
index 2a30a4502..b929fa989 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3851,9 +3851,9 @@ func TestSynonymSearch(t *testing.T) {
 		}
 	}()
 
-	synReader, ok := reader.(index.ThesaurusReader)
+	synReader, ok := reader.(index.SynonymReader)
 	if !ok {
-		t.Fatal("expected thesaurus reader")
+		t.Fatal("expected synonym reader")
 	}
 
 	type testStruct struct {

From 784f45bdfae5300a5b9f0813189068f831fa4b85 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 22 Nov 2024 17:45:55 +0530
Subject: [PATCH 07/35] minor fixes

---
 mapping/index.go | 4 ++++
 search_test.go   | 7 ++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/mapping/index.go b/mapping/index.go
index ae17a3da5..36b423108 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -519,3 +519,7 @@ func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
 
 	return ""
 }
+
+func (im *IndexMappingImpl) SynonymCount() int {
+	return len(im.SynonymSources)
+}
diff --git a/search_test.go b/search_test.go
index b929fa989..bdfb2fd42 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3749,7 +3749,7 @@ func TestAutoFuzzy(t *testing.T) {
 	}
 }
 
-func TestSynonymSearch(t *testing.T) {
+func TestSynonymTermReader(t *testing.T) {
 	tmpIndexPath := createTmpIndexPath(t)
 	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
@@ -3759,12 +3759,13 @@ func TestSynonymSearch(t *testing.T) {
 
 	synonymAnalyzer := "simple"
 
-	imap := mapping.NewIndexMapping()
 	textField := mapping.NewTextFieldMapping()
 	textField.Analyzer = simple.Name
+	textField.SynonymSource = synonymSourceName
+
+	imap := mapping.NewIndexMapping()
 	imap.DefaultMapping.AddFieldMappingsAt("text", textField)
 	imap.AddSynonymSource(synonymSourceName, synonymCollection, synonymAnalyzer)
-
 	err := imap.Validate()
 	if err != nil {
 		t.Fatal(err)

From f19cedc6f703c4255938e71e7c9bbdb87b031213 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 29 Nov 2024 12:53:33 +0530
Subject: [PATCH 08/35] bug fixes and unit tests for single index
 implementation

---
 search/query/query.go                 |  42 ++-
 search/searcher/search_fuzzy.go       |  22 +-
 search/searcher/search_phrase.go      |  37 +++
 search/searcher/search_regexp.go      |   4 +
 search/searcher/search_term.go        |  57 +++-
 search/searcher/search_term_prefix.go |   4 +
 search_test.go                        | 372 ++++++++++++++++++++++++++
 7 files changed, 517 insertions(+), 21 deletions(-)

diff --git a/search/query/query.go b/search/query/query.go
index 22f1293ee..c908bbc54 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"strings"
 
 	"github.com/blevesearch/bleve/v2/analysis"
 	"github.com/blevesearch/bleve/v2/mapping"
@@ -487,16 +488,21 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 		}
 	case *FuzzyQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
+		fuzziness := q.Fuzziness
+		if q.autoFuzzy {
+			fuzziness = searcher.GetAutoFuzziness(q.Term)
+		}
 		if source != "" {
-			return addFuzzySynonymsForTerm(ctx, source, field, q.Term, q.Fuzziness, q.Prefix, r, rv)
+			return addFuzzySynonymsForTerm(ctx, source, field, q.Term, fuzziness, q.Prefix, r, rv)
 		}
 	case *MatchQuery, *MatchPhraseQuery:
 		var analyzerName, matchString, fieldVal string
 		var fuzziness, prefix int
+		var autoFuzzy bool
 		if mq, ok := q.(*MatchQuery); ok {
-			analyzerName, fieldVal, matchString, fuzziness, prefix = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix
+			analyzerName, fieldVal, matchString, fuzziness, prefix, autoFuzzy = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix, mq.autoFuzzy
 		} else if mpq, ok := q.(*MatchPhraseQuery); ok {
-			analyzerName, fieldVal, matchString, fuzziness = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness
+			analyzerName, fieldVal, matchString, fuzziness, autoFuzzy = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness, mpq.autoFuzzy
 		}
 		field, source := resolveFieldAndSource(fieldVal)
 		if source != "" {
@@ -506,6 +512,9 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 			}
 			tokens := analyzer.Analyze([]byte(matchString))
 			for _, token := range tokens {
+				if autoFuzzy {
+					fuzziness = searcher.GetAutoFuzziness(string(token.Term))
+				}
 				rv, err = addFuzzySynonymsForTerm(ctx, source, field, string(token.Term), fuzziness, prefix, r, rv)
 				if err != nil {
 					return nil, err
@@ -514,10 +523,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 		}
 	case *MultiPhraseQuery, *PhraseQuery:
 		var fieldVal string
+		var fuzziness int
+		var autoFuzzy bool
 		if mpq, ok := q.(*MultiPhraseQuery); ok {
-			fieldVal = mpq.FieldVal
+			fieldVal, fuzziness, autoFuzzy = mpq.FieldVal, mpq.Fuzziness, mpq.autoFuzzy
 		} else if pq, ok := q.(*PhraseQuery); ok {
-			fieldVal = pq.FieldVal
+			fieldVal, fuzziness, autoFuzzy = pq.FieldVal, pq.Fuzziness, pq.autoFuzzy
 		}
 		field, source := resolveFieldAndSource(fieldVal)
 		if source != "" {
@@ -531,7 +542,10 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 			}
 			for _, term := range terms {
 				var err error
-				rv, err = addSynonymsForTerm(ctx, source, term, field, r, rv)
+				if autoFuzzy {
+					fuzziness = searcher.GetAutoFuzziness(term)
+				}
+				rv, err = addFuzzySynonymsForTerm(ctx, source, field, term, fuzziness, 0, r, rv)
 				if err != nil {
 					return nil, err
 				}
@@ -552,12 +566,12 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 	case *RegexpQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			return addRegexpSynonymsForTerm(ctx, source, field, q.Regexp, r, rv)
+			return addRegexpSynonymsForTerm(ctx, source, field, strings.TrimPrefix(q.Regexp, "^"), r, rv)
 		}
 	case *TermQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			return addSynonymsForTerm(ctx, source, q.Term, field, r, rv)
+			return addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
 		}
 	case *WildcardQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
@@ -594,7 +608,7 @@ func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string,
 			return nil, err
 		}
 		for _, term := range regexpTerms {
-			rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+			rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
 			if err != nil {
 				return nil, err
 			}
@@ -628,7 +642,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
 		return nil, err
 	}
 	for _, term := range prefixTerms {
-		rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
 		if err != nil {
 			return nil, err
 		}
@@ -641,7 +655,7 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
 func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int,
 	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
 	if fuzziness == 0 {
-		return addSynonymsForTerm(ctx, src, term, field, r, rv)
+		return addSynonymsForTerm(ctx, src, field, term, r, rv)
 	}
 	if ir, ok := r.(index.IndexReaderFuzzy); ok {
 		if fuzziness > searcher.MaxFuzziness {
@@ -677,7 +691,7 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi
 			return nil, err
 		}
 		for _, term := range fuzzyTerms {
-			rv, err = addSynonymsForTerm(ctx, src, term, field, r, rv)
+			rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
 			if err != nil {
 				return nil, err
 			}
@@ -689,8 +703,8 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi
 
 // addSynonymsForTerm finds synonyms for the given term and adds them to the
 // provided map.
-func addSynonymsForTerm(ctx context.Context, src, term, field string, r index.SynonymReader,
-	rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+func addSynonymsForTerm(ctx context.Context, src, field, term string,
+	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
 
 	termBytes := []byte(term)
 	termReader, err := r.SynonymTermReader(ctx, src, termBytes)
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index 6c29f845d..d0e5c1ec5 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -55,9 +55,11 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
 		// since the fuzzy candidate terms are not collected
 		// for a term search, and the only candidate term is
 		// the term itself
-		fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
-		if fuzzyTermMatches != nil {
-			fuzzyTermMatches.(map[string][]string)[term] = []string{term}
+		if ctx != nil {
+			fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
+			if fuzzyTermMatches != nil {
+				fuzzyTermMatches.(map[string][]string)[term] = []string{term}
+			}
 		}
 		return NewTermSearcher(ctx, indexReader, term, field, boost, options)
 	}
@@ -94,12 +96,22 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
 			fuzzyTermMatches.(map[string][]string)[term] = candidates
 		}
 	}
+	// check if the candidates are empty or have one term which is the term itself
+	if len(candidates) == 0 || (len(candidates) == 1 && candidates[0] == term) {
+		if ctx != nil {
+			fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
+			if fuzzyTermMatches != nil {
+				fuzzyTermMatches.(map[string][]string)[term] = []string{term}
+			}
+		}
+		return NewTermSearcher(ctx, indexReader, term, field, boost, options)
+	}
 
 	return NewMultiTermSearcherBoosted(ctx, indexReader, candidates, field,
 		boost, editDistances, options, true)
 }
 
-func getAutoFuzziness(term string) int {
+func GetAutoFuzziness(term string) int {
 	termLength := len(term)
 	if termLength > AutoFuzzinessHighThreshold {
 		return MaxFuzziness
@@ -111,7 +123,7 @@ func getAutoFuzziness(term string) int {
 
 func NewAutoFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string,
 	prefix int, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
-	return NewFuzzySearcher(ctx, indexReader, term, prefix, getAutoFuzziness(term), field, boost, options)
+	return NewFuzzySearcher(ctx, indexReader, term, prefix, GetAutoFuzziness(term), field, boost, options)
 }
 
 type fuzzyCandidates struct {
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index bf24b465a..9c2ff7d5f 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -164,6 +164,40 @@ func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader,
 		}
 	}
 
+	if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+		if ts, exists := fts[field]; exists {
+			if fuzzinessEnabled {
+				for term, fuzzyTerms := range fuzzyTermMatches {
+					fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms))
+					if s, found := ts[term]; found {
+						fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
+					}
+					for _, fuzzyTerm := range fuzzyTerms {
+						if fuzzyTerm == term {
+							continue
+						}
+						if s, found := ts[fuzzyTerm]; found {
+							fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
+						}
+					}
+					if len(fuzzySynonymTerms) > 0 {
+						fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...)
+					}
+				}
+			} else {
+				for _, termPos := range terms {
+					for _, term := range termPos {
+						if s, found := ts[term]; found {
+							if fuzzyTermMatches == nil {
+								fuzzyTermMatches = make(map[string][]string)
+							}
+							fuzzyTermMatches[term] = s
+						}
+					}
+				}
+			}
+		}
+	}
 	mustSearcher, err := NewConjunctionSearcher(ctx, indexReader, termPositionSearchers, options)
 	if err != nil {
 		// close any searchers already opened
@@ -337,6 +371,9 @@ func (s *PhraseSearcher) expandFuzzyMatches(tlm search.TermLocationMap, expanded
 	for term, fuzzyMatches := range s.fuzzyTermMatches {
 		locations := tlm[term]
 		for _, fuzzyMatch := range fuzzyMatches {
+			if fuzzyMatch == term {
+				continue
+			}
 			locations = append(locations, tlm[fuzzyMatch]...)
 		}
 		expandedTlm[term] = locations
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index b88133e31..74caf0703 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -68,6 +68,10 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
 	if err != nil {
 		return nil, err
 	}
+	// check if the candidateTerms are empty or have one term which is the term itself
+	if len(candidateTerms) == 0 || (len(candidateTerms) == 1 && candidateTerms[0] == pattern) {
+		return NewTermSearcher(ctx, indexReader, pattern, field, boost, options)
+	}
 
 	return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
 		options, true)
diff --git a/search/searcher/search_term.go b/search/searcher/search_term.go
index cd794ea32..c519d8d51 100644
--- a/search/searcher/search_term.go
+++ b/search/searcher/search_term.go
@@ -38,14 +38,23 @@ type TermSearcher struct {
 	tfd         index.TermFieldDoc
 }
 
-func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
+func NewTermSearcher(ctx context.Context, indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
 	if isTermQuery(ctx) {
 		ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
 	}
 	return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
 }
 
-func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
+func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
+	if ctx != nil {
+		if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+			if ts, exists := fts[field]; exists {
+				if s, found := ts[string(term)]; found {
+					return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
+				}
+			}
+		}
+	}
 	needFreqNorm := options.Score != "none"
 	reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
 	if err != nil {
@@ -69,6 +78,50 @@ func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermF
 	}, nil
 }
 
+func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
+	createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
+		needFreqNorm := options.Score != "none"
+		reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
+		if err != nil {
+			return nil, err
+		}
+		return newTermSearcherFromReader(indexReader, reader, term, field, boostVal, options)
+	}
+	// create a searcher for the term itself
+	termSearcher, err := createTermSearcher(term, boost)
+	if err != nil {
+		return nil, err
+	}
+	// constituent searchers of the disjunction
+	qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
+	// helper method to close all the searchers we've created
+	// in case of an error
+	qsearchersClose := func() {
+		for _, searcher := range qsearchers {
+			if searcher != nil {
+				_ = searcher.Close()
+			}
+		}
+	}
+	qsearchers = append(qsearchers, termSearcher)
+	// create a searcher for each synonym
+	for _, synonym := range synonyms {
+		synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
+		if err != nil {
+			qsearchersClose()
+			return nil, err
+		}
+		qsearchers = append(qsearchers, synonymSearcher)
+	}
+	// create a disjunction searcher
+	rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
+	if err != nil {
+		qsearchersClose()
+		return nil, err
+	}
+	return rv, nil
+}
+
 func (s *TermSearcher) Size() int {
 	return reflectStaticSizeTermSearcher + size.SizeOfPtr +
 		s.reader.Size() +
diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index dc16e4864..3b05e5a8d 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -52,6 +52,10 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
 		reportIOStats(ctx, fieldDict.BytesRead())
 		search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
 	}
+	// check if the terms are empty or have one term which is the prefix itself
+	if len(terms) == 0 || (len(terms) == 1 && terms[0] == prefix) {
+		return NewTermSearcher(ctx, indexReader, prefix, field, boost, options)
+	}
 
 	return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
 }
diff --git a/search_test.go b/search_test.go
index bdfb2fd42..cd1346936 100644
--- a/search_test.go
+++ b/search_test.go
@@ -19,6 +19,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"math"
+	"math/rand"
 	"reflect"
 	"sort"
 	"strconv"
@@ -41,6 +42,7 @@ import (
 	"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
 	"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
 	"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
+	"github.com/blevesearch/bleve/v2/analysis/lang/en"
 	"github.com/blevesearch/bleve/v2/analysis/token/length"
 	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
 	"github.com/blevesearch/bleve/v2/analysis/token/shingle"
@@ -3929,3 +3931,373 @@ func TestSynonymTermReader(t *testing.T) {
 		}
 	}
 }
+
+func TestSynonymSearchQueries(t *testing.T) {
+	tmpIndexPath := createTmpIndexPath(t)
+	defer cleanupTmpIndexPath(t, tmpIndexPath)
+
+	synonymCollection := "collection1"
+
+	synonymSourceName := "english"
+
+	analyzer := en.AnalyzerName
+
+	textField := mapping.NewTextFieldMapping()
+	textField.Analyzer = analyzer
+	textField.SynonymSource = synonymSourceName
+
+	imap := mapping.NewIndexMapping()
+	imap.DefaultMapping.AddFieldMappingsAt("text", textField)
+	imap.AddSynonymSource(synonymSourceName, synonymCollection, analyzer)
+	err := imap.Validate()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	idx, err := New(tmpIndexPath, imap)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		err = idx.Close()
+		if err != nil {
+			t.Fatal(err)
+		}
+	}()
+
+	documents := map[string]map[string]interface{}{
+		"doc1": {
+			"text": `The hardworking employee consistently strives to exceed expectations.
+					His industrious nature makes him a valuable asset to any team.
+					His conscientious attention to detail ensures that projects are completed efficiently and accurately.
+					He remains persistent even in the face of challenges.`,
+		},
+		"doc2": {
+			"text": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life. 
+					Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation. 
+					The calm environment offers the ideal place to meditate and connect with nature. 
+					Even the most stressed individuals find themselves feeling relaxed and at ease.`,
+		},
+		"doc3": {
+			"text": `The house was burned down, leaving only a charred shell behind. 
+					The intense heat of the flames caused the walls to warp and the roof to cave in. 
+					The seared remains of the furniture told the story of the blaze. 
+					The incinerated remains left little more than ashes to remember what once was.`,
+		},
+		"doc4": {
+			"text": `The faithful dog followed its owner everywhere, always loyal and steadfast. 
+					It was devoted to protecting its family, and its reliable nature meant it could always be trusted. 
+					In the face of danger, the dog remained calm, knowing its role was to stay vigilant. 
+					Its trustworthy companionship provided comfort and security.`,
+		},
+		"doc5": {
+			"text": `The lively market is bustling with activity from morning to night. 
+					The dynamic energy of the crowd fills the air as vendors sell their wares. 
+					Shoppers wander from stall to stall, captivated by the vibrant colors and energetic atmosphere. 
+					This place is alive with movement and life.`,
+		},
+		"doc6": {
+			"text": `In moments of crisis, bravery shines through. 
+					It takes valor to step forward when others are afraid to act. 
+					Heroes are defined by their guts and nerve, taking risks to protect others. 
+					Boldness in the face of danger is what sets them apart.`,
+		},
+		"doc7": {
+			"text": `Innovation is the driving force behind progress in every industry. 
+					The company fosters an environment of invention, encouraging creativity at every level. 
+					The focus on novelty and improvement means that ideas are always evolving. 
+					The development of new solutions is at the core of the company's mission.`,
+		},
+		"doc8": {
+			"text": `The blazing sunset cast a radiant glow over the horizon, painting the sky with hues of red and orange. 
+					The intense heat of the day gave way to a fiery display of color. 
+					As the sun set, the glowing light illuminated the landscape, creating a breathtaking scene. 
+					The fiery sky was a sight to behold.`,
+		},
+		"doc9": {
+			"text": `The fertile soil of the valley makes it perfect for farming. 
+					The productive land yields abundant crops year after year. 
+					Farmers rely on the rich, fruitful ground to sustain their livelihoods. 
+					The area is known for its plentiful harvests, supporting both local communities and export markets.`,
+		},
+		"doc10": {
+			"text": `The arid desert is a vast, dry expanse with little water or vegetation. 
+					The barren landscape stretches as far as the eye can see, offering little respite from the scorching sun. 
+					The desolate environment is unforgiving to those who venture too far without preparation. 
+					The parched earth cracks under the heat, creating a harsh, unyielding terrain.`,
+		},
+		"doc11": {
+			"text": `The fox is known for its cunning and intelligence. 
+					As a predator, it relies on its sharp instincts to outwit its prey. 
+					Its vulpine nature makes it both mysterious and fascinating. 
+					The fox's ability to hunt with precision and stealth is what makes it such a formidable hunter.`,
+		},
+		"doc12": {
+			"text": `The dog is often considered man's best friend due to its loyal nature. 
+					As a companion, the hound provides both protection and affection. 
+					The puppy quickly becomes a member of the family, always by your side. 
+					Its playful energy and unshakable loyalty make it a beloved pet.`,
+		},
+		"doc13": {
+			"text": `He worked tirelessly through the night, always persistent in his efforts. 
+					His industrious approach to problem-solving kept the project moving forward. 
+					No matter how difficult the task, he remained focused, always giving his best. 
+					His dedication paid off when the project was completed ahead of schedule.`,
+		},
+		"doc14": {
+			"text": `The river flowed calmly through the valley, its peaceful current offering a sense of tranquility. 
+					Fishermen relaxed by the banks, enjoying the calm waters that reflected the sky above. 
+					The tranquil nature of the river made it a perfect spot for meditation. 
+					As the day ended, the river's quiet flow brought a sense of peace.`,
+		},
+		"doc15": {
+			"text": `After the fire, all that was left was the charred remains of what once was. 
+					The seared walls of the house told a tragic story. 
+					The intensity of the blaze had burned everything in its path, leaving only the smoldering wreckage behind. 
+					The incinerated objects could not be salvaged, and the damage was beyond repair.`,
+		},
+		"doc16": {
+			"text": `The devoted employee always went above and beyond to complete his tasks. 
+					His steadfast commitment to the company made him a valuable team member. 
+					He was reliable, never failing to meet deadlines. 
+					His trustworthiness earned him the respect of his colleagues, and was considered an
+					ingenious expert in his field.`,
+		},
+		"doc17": {
+			"text": `The city is vibrant, full of life and energy. 
+					The dynamic pace of the streets reflects the diverse culture of its inhabitants. 
+					People from all walks of life contribute to the energetic atmosphere. 
+					The city's lively spirit can be felt in every corner, from the bustling markets to the lively festivals.`,
+		},
+		"doc18": {
+			"text": `In a moment of uncertainty, he made a bold decision that would change his life forever. 
+					It took courage and nerve to take the leap, but his bravery paid off. 
+					The guts to face the unknown allowed him to achieve something remarkable. 
+					Being an bright scholar, the skill he demonstrated inspired those around him.`,
+		},
+		"doc19": {
+			"text": `Innovation is often born from necessity, and the lightbulb is a prime example. 
+					Thomas Edison's invention changed the world, offering a new way to see the night. 
+					The creativity involved in developing such a groundbreaking product sparked a wave of 
+					novelty in the scientific community. This improvement in technology continues to shape the modern world.
+					He was a clever academic and a smart researcher.`,
+		},
+		"doc20": {
+			"text": `The fiery volcano erupted with a force that shook the earth. Its radiant lava flowed down the sides, 
+					illuminating the night sky. The intense heat from the eruption could be felt miles away, as the 
+					glowing lava burned everything in its path. The fiery display was both terrifying and mesmerizing.`,
+		},
+	}
+
+	synonymDocuments := map[string]*SynonymDefinition{
+		"synDoc1": {
+			Synonyms: []string{"hardworking", "industrious", "conscientious", "persistent", "focused", "devoted"},
+		},
+		"synDoc2": {
+			Synonyms: []string{"tranquil", "peaceful", "calm", "relaxed", "unruffled"},
+		},
+		"synDoc3": {
+			Synonyms: []string{"burned", "charred", "seared", "incinerated", "singed"},
+		},
+		"synDoc4": {
+			Synonyms: []string{"faithful", "steadfast", "devoted", "reliable", "trustworthy"},
+		},
+		"synDoc5": {
+			Synonyms: []string{"lively", "dynamic", "energetic", "vivid", "vibrating"},
+		},
+		"synDoc6": {
+			Synonyms: []string{"bravery", "valor", "guts", "nerve", "boldness"},
+		},
+		"synDoc7": {
+			Input:    []string{"innovation"},
+			Synonyms: []string{"invention", "creativity", "novelty", "improvement", "development"},
+		},
+		"synDoc8": {
+			Input:    []string{"blazing"},
+			Synonyms: []string{"intense", "radiant", "burning", "fiery", "glowing"},
+		},
+		"synDoc9": {
+			Input:    []string{"fertile"},
+			Synonyms: []string{"productive", "fruitful", "rich", "abundant", "plentiful"},
+		},
+		"synDoc10": {
+			Input:    []string{"arid"},
+			Synonyms: []string{"dry", "barren", "desolate", "parched", "unfertile"},
+		},
+		"synDoc11": {
+			Input:    []string{"fox"},
+			Synonyms: []string{"vulpine", "canine", "predator", "hunter", "pursuer"},
+		},
+		"synDoc12": {
+			Input:    []string{"dog"},
+			Synonyms: []string{"canine", "hound", "puppy", "pup", "companion"},
+		},
+		"synDoc13": {
+			Synonyms: []string{"researcher", "scientist", "scholar", "academic", "expert"},
+		},
+		"synDoc14": {
+			Synonyms: []string{"bright", "clever", "ingenious", "sharp", "astute", "smart"},
+		},
+	}
+
+	// Combine both maps into a slice of map entries (as they both have similar structure)
+	var combinedDocIDs []string
+	for id := range synonymDocuments {
+		combinedDocIDs = append(combinedDocIDs, id)
+	}
+	for id := range documents {
+		combinedDocIDs = append(combinedDocIDs, id)
+	}
+	rand.Shuffle(len(combinedDocIDs), func(i, j int) {
+		combinedDocIDs[i], combinedDocIDs[j] = combinedDocIDs[j], combinedDocIDs[i]
+	})
+
+	// Function to create batches of 5
+	createDocBatches := func(docs []string, batchSize int) [][]string {
+		var batches [][]string
+		for i := 0; i < len(docs); i += batchSize {
+			end := i + batchSize
+			if end > len(docs) {
+				end = len(docs)
+			}
+			batches = append(batches, docs[i:end])
+		}
+		return batches
+	}
+	// Create batches of 5 documents
+	var batchSize = 5
+	docBatches := createDocBatches(combinedDocIDs, batchSize)
+	if len(docBatches) == 0 {
+		t.Fatal("expected batches")
+	}
+	totalDocs := 0
+	for _, batch := range docBatches {
+		totalDocs += len(batch)
+	}
+	if totalDocs != len(combinedDocIDs) {
+		t.Fatalf("expected %d documents, got %d", len(combinedDocIDs), totalDocs)
+	}
+
+	var batches []*Batch
+	for _, docBatch := range docBatches {
+		batch := idx.NewBatch()
+		for _, docID := range docBatch {
+			if synDef, ok := synonymDocuments[docID]; ok {
+				err := batch.IndexSynonym(docID, synonymCollection, synDef)
+				if err != nil {
+					t.Fatal(err)
+				}
+			} else {
+				err := batch.Index(docID, documents[docID])
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+		}
+		batches = append(batches, batch)
+	}
+	for _, batch := range batches {
+		err = idx.Batch(batch)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	type testStruct struct {
+		query      string
+		expectHits []string
+	}
+
+	testQueries := []testStruct{
+		{
+			query: `{
+				"match": "hardworking employee",
+				"field": "text"
+			}`,
+			expectHits: []string{"doc1", "doc13", "doc16", "doc4", "doc7"},
+		},
+		{
+			query: `{
+				"match": "Hardwork and industrius efforts bring lovely and tranqual moments, with a glazing blow of valour.",
+				"field": "text",
+				"fuzziness": "auto"
+			}`,
+			expectHits: []string{
+				"doc1", "doc13", "doc14", "doc15", "doc16",
+				"doc17", "doc18", "doc2", "doc20", "doc3",
+				"doc4", "doc5", "doc6", "doc7", "doc8", "doc9",
+			},
+		},
+		{
+			query: `{
+				"prefix": "in",
+				"field": "text"
+			}`,
+			expectHits: []string{
+				"doc1", "doc11", "doc13", "doc15", "doc16",
+				"doc17", "doc18", "doc19", "doc2", "doc20",
+				"doc3", "doc4", "doc7", "doc8",
+			},
+		},
+		{
+			query: `{
+				"prefix": "vivid",
+				"field": "text"
+			}`,
+			expectHits: []string{
+				"doc17", "doc5",
+			},
+		},
+		{
+			query: `{
+				"match_phrase": "smart academic",
+				"field": "text"
+			}`,
+			expectHits: []string{"doc16", "doc18", "doc19"},
+		},
+		{
+			query: `{
+				"match_phrase": "smrat acedemic",
+				"field": "text",
+				"fuzziness": "auto"
+			}`,
+			expectHits: []string{"doc16", "doc18", "doc19"},
+		},
+		{
+			query: `{
+				"wildcard": "br*",
+				"field": "text"
+			}`,
+			expectHits: []string{"doc11", "doc14", "doc16", "doc18", "doc19", "doc6", "doc8"},
+		},
+	}
+
+	for _, dtq := range testQueries {
+		q, err := query.ParseQuery([]byte(dtq.query))
+		if err != nil {
+			t.Fatal(err)
+		}
+		sr := NewSearchRequest(q)
+		sr.Highlight = NewHighlightWithStyle(ansi.Name)
+		sr.SortBy([]string{"_id"})
+		sr.Fields = []string{"*"}
+		sr.Size = 30
+		sr.Explain = true
+
+		res, err := idx.Search(sr)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(res.Hits) != len(dtq.expectHits) {
+			t.Fatalf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
+		}
+		// sort the expected hits to match the order of the search results
+		sort.Strings(dtq.expectHits)
+		for i, hit := range res.Hits {
+			if hit.ID != dtq.expectHits[i] {
+				t.Fatalf("expected docID %s, got %s", dtq.expectHits[i], hit.ID)
+			}
+		}
+	}
+
+}

From 91be4720983269c6ca81955a65403c711c8df90c Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 29 Nov 2024 15:33:34 +0530
Subject: [PATCH 09/35] remove regex optimization

---
 search/searcher/search_regexp.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index 74caf0703..b88133e31 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -68,10 +68,6 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
 	if err != nil {
 		return nil, err
 	}
-	// check if the candidateTerms are empty or have one term which is the term itself
-	if len(candidateTerms) == 0 || (len(candidateTerms) == 1 && candidateTerms[0] == pattern) {
-		return NewTermSearcher(ctx, indexReader, pattern, field, boost, options)
-	}
 
 	return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
 		options, true)

From 9baf914f1b8fd6f4f530b9df779f7f07a3bdcddd Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Sat, 30 Nov 2024 23:25:37 +0530
Subject: [PATCH 10/35] add default synonym sources

---
 index_alias_impl.go |  5 +++++
 mapping/document.go | 32 +++++++++++++++++++++++++++-----
 mapping/index.go    | 26 +++++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 3c7cdcd32..73cc637f7 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -30,6 +30,7 @@ type indexAliasImpl struct {
 	name    string
 	indexes []Index
 	mutex   sync.RWMutex
+	mapping mapping.IndexMapping
 	open    bool
 }
 
@@ -360,6 +361,10 @@ func (i *indexAliasImpl) Mapping() mapping.IndexMapping {
 		return nil
 	}
 
+	if i.mapping != nil {
+		return i.mapping
+	}
+
 	err := i.isAliasToSingleIndex()
 	if err != nil {
 		return nil
diff --git a/mapping/document.go b/mapping/document.go
index 5d70af912..b470afaa4 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -40,11 +40,12 @@ import (
 // are used.  To disable this automatic handling, set
 // Dynamic to false.
 type DocumentMapping struct {
-	Enabled         bool                        `json:"enabled"`
-	Dynamic         bool                        `json:"dynamic"`
-	Properties      map[string]*DocumentMapping `json:"properties,omitempty"`
-	Fields          []*FieldMapping             `json:"fields,omitempty"`
-	DefaultAnalyzer string                      `json:"default_analyzer,omitempty"`
+	Enabled              bool                        `json:"enabled"`
+	Dynamic              bool                        `json:"dynamic"`
+	Properties           map[string]*DocumentMapping `json:"properties,omitempty"`
+	Fields               []*FieldMapping             `json:"fields,omitempty"`
+	DefaultAnalyzer      string                      `json:"default_analyzer,omitempty"`
+	DefaultSynonymSource string                      `json:"default_synonym_source,omitempty"`
 
 	// StructTagKey overrides "json" when looking for field names in struct tags
 	StructTagKey string `json:"struct_tag_key,omitempty"`
@@ -306,6 +307,11 @@ func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "default_synonym_source":
+			err := util.UnmarshalJSON(v, &dm.DefaultSynonymSource)
+			if err != nil {
+				return err
+			}
 		case "properties":
 			err := util.UnmarshalJSON(v, &dm.Properties)
 			if err != nil {
@@ -349,6 +355,22 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
 	return rv
 }
 
+func (dm *DocumentMapping) defaultSynonymSource(path []string) string {
+	current := dm
+	rv := current.DefaultSynonymSource
+	for _, pathElement := range path {
+		var ok bool
+		current, ok = current.Properties[pathElement]
+		if !ok {
+			break
+		}
+		if current.DefaultSynonymSource != "" {
+			rv = current.DefaultSynonymSource
+		}
+	}
+	return rv
+}
+
 func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
 	// allow default "json" tag to be overridden
 	structTagKey := dm.StructTagKey
diff --git a/mapping/index.go b/mapping/index.go
index 36b423108..feb7634a9 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -49,6 +49,7 @@ type IndexMappingImpl struct {
 	DefaultType           string                      `json:"default_type"`
 	DefaultAnalyzer       string                      `json:"default_analyzer"`
 	DefaultDateTimeParser string                      `json:"default_datetime_parser"`
+	DefaultSynonymSource  string                      `json:"default_synonym_source"`
 	DefaultField          string                      `json:"default_field"`
 	StoreDynamic          bool                        `json:"store_dynamic"`
 	IndexDynamic          bool                        `json:"index_dynamic"`
@@ -268,6 +269,11 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
+		case "default_synonym_source":
+			err := util.UnmarshalJSON(v, &im.DefaultSynonymSource)
+			if err != nil {
+				return err
+			}
 		case "default_field":
 			err := util.UnmarshalJSON(v, &im.DefaultField)
 			if err != nil {
@@ -517,7 +523,25 @@ func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
 		}
 	}
 
-	return ""
+	// next we will try default synonym sources for the path
+	pathDecoded := decodePath(path)
+	for _, docMapping := range im.TypeMapping {
+		if docMapping.Enabled {
+			rv := docMapping.defaultSynonymSource(pathDecoded)
+			if rv != "" {
+				return rv
+			}
+		}
+	}
+	// now the default analyzer for the default mapping
+	if im.DefaultMapping.Enabled {
+		rv := im.DefaultMapping.defaultSynonymSource(pathDecoded)
+		if rv != "" {
+			return rv
+		}
+	}
+
+	return im.DefaultSynonymSource
 }
 
 func (im *IndexMappingImpl) SynonymCount() int {

From dd692bfc116d4ccd84742a8dd12f2ed8bd231405 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 3 Dec 2024 14:22:37 +0530
Subject: [PATCH 11/35] refactor code

---
 search.go             |   8 +++
 search/query/query.go | 156 ++++++++++++++++++++++++------------------
 search/util.go        |  13 ++++
 search_knn.go         |   4 +-
 4 files changed, 111 insertions(+), 70 deletions(-)

diff --git a/search.go b/search.go
index 7861d24b8..8734867e5 100644
--- a/search.go
+++ b/search.go
@@ -444,6 +444,9 @@ type SearchResult struct {
 	MaxScore float64                        `json:"max_score"`
 	Took     time.Duration                  `json:"took"`
 	Facets   search.FacetResults            `json:"facets"`
+	// special fields that are applicable only for search
+	// results that are obtained from a presearch
+	PreSearchResults map[string]interface{} `json:"presearch_results,omitempty"`
 }
 
 func (sr *SearchResult) Size() int {
@@ -589,3 +592,8 @@ func (r *SearchRequest) SortFunc() func(data sort.Interface) {
 
 	return sort.Sort
 }
+
+func isMatchNoneQuery(q query.Query) bool {
+	_, ok := q.(*query.MatchNoneQuery)
+	return ok
+}
diff --git a/search/query/query.go b/search/query/query.go
index c908bbc54..5e3259c11 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -93,6 +93,18 @@ func ParsePreSearchData(input []byte) (map[string]interface{}, error) {
 				rv = make(map[string]interface{})
 			}
 			rv[search.KnnPreSearchDataKey] = value
+		case search.SynonymPreSearchDataKey:
+			var value search.FieldTermSynonymMap
+			if v != nil {
+				err := util.UnmarshalJSON(v, &value)
+				if err != nil {
+					return nil, err
+				}
+			}
+			if rv == nil {
+				rv = make(map[string]interface{})
+			}
+			rv[search.SynonymPreSearchDataKey] = value
 		}
 	}
 	return rv, nil
@@ -488,11 +500,11 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 		}
 	case *FuzzyQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
-		fuzziness := q.Fuzziness
-		if q.autoFuzzy {
-			fuzziness = searcher.GetAutoFuzziness(q.Term)
-		}
 		if source != "" {
+			fuzziness := q.Fuzziness
+			if q.autoFuzzy {
+				fuzziness = searcher.GetAutoFuzziness(q.Term)
+			}
 			return addFuzzySynonymsForTerm(ctx, source, field, q.Term, fuzziness, q.Prefix, r, rv)
 		}
 	case *MatchQuery, *MatchPhraseQuery:
@@ -587,43 +599,49 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 // adds their synonyms to the provided map.
 func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string,
 	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-
-	if ir, ok := r.(index.IndexReaderRegexp); ok {
-		fieldDict, err := ir.FieldDictRegexp(field, term)
-		if err != nil {
-			return nil, err
-		}
-		defer func() {
-			if cerr := fieldDict.Close(); cerr != nil && err == nil {
-				err = cerr
-			}
-		}()
-		regexpTerms := []string{term}
-		tfd, err := fieldDict.Next()
-		for err == nil && tfd != nil {
-			regexpTerms = append(regexpTerms, tfd.Term)
-			tfd, err = fieldDict.Next()
+	// find the terms with this regexp
+	var ok bool
+	var ir index.IndexReaderRegexp
+	if ir, ok = r.(index.IndexReaderRegexp); !ok {
+		return addSynonymsForTerm(ctx, src, field, term, r, rv)
+	}
+	fieldDict, err := ir.FieldDictRegexp(field, term)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
 		}
+	}()
+	regexpTerms := []string{term}
+	tfd, err := fieldDict.Next()
+	for err == nil && tfd != nil {
+		regexpTerms = append(regexpTerms, tfd.Term)
+		tfd, err = fieldDict.Next()
+	}
+	if err != nil {
+		return nil, err
+	}
+	for _, term := range regexpTerms {
+		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
 		if err != nil {
 			return nil, err
 		}
-		for _, term := range regexpTerms {
-			rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
-			if err != nil {
-				return nil, err
-			}
-		}
-		return rv, nil
 	}
-	return nil, nil
+	return rv, nil
 }
 
 // addPrefixSynonymsForTerm finds all terms that match the given prefix and
 // adds their synonyms to the provided map.
 func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
 	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-	// find the terms with this prefix
-	fieldDict, err := r.FieldDictPrefix(field, []byte(term))
+	var ok bool
+	var ir index.IndexReaderPrefix
+	if ir, ok = r.(index.IndexReaderPrefix); !ok {
+		return addSynonymsForTerm(ctx, src, field, term, r, rv)
+	}
+	fieldDict, err := ir.FieldDictPrefix(field, []byte(term))
 	if err != nil {
 		return nil, err
 	}
@@ -654,51 +672,50 @@ func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
 // given fuzziness and adds their synonyms to the provided map.
 func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int,
 	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-	if fuzziness == 0 {
+	if fuzziness > searcher.MaxFuzziness {
+		return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
+	}
+	if fuzziness < 0 {
+		return nil, fmt.Errorf("invalid fuzziness, negative")
+	}
+	var ok bool
+	var ir index.IndexReaderFuzzy
+	if ir, ok = r.(index.IndexReaderFuzzy); !ok || fuzziness == 0 {
 		return addSynonymsForTerm(ctx, src, field, term, r, rv)
 	}
-	if ir, ok := r.(index.IndexReaderFuzzy); ok {
-		if fuzziness > searcher.MaxFuzziness {
-			return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
+	prefixTerm := ""
+	for i, r := range term {
+		if i < prefix {
+			prefixTerm += string(r)
+		} else {
+			break
 		}
-		if fuzziness < 0 {
-			return nil, fmt.Errorf("invalid fuzziness, negative")
-		}
-		prefixTerm := ""
-		for i, r := range term {
-			if i < prefix {
-				prefixTerm += string(r)
-			} else {
-				break
-			}
-		}
-		fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
-		if err != nil {
-			return nil, err
-		}
-		defer func() {
-			if cerr := fieldDict.Close(); cerr != nil && err == nil {
-				err = cerr
-			}
-		}()
-		fuzzyTerms := []string{term}
-		tfd, err := fieldDict.Next()
-		for err == nil && tfd != nil {
-			fuzzyTerms = append(fuzzyTerms, tfd.Term)
-			tfd, err = fieldDict.Next()
+	}
+	fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+			err = cerr
 		}
+	}()
+	fuzzyTerms := []string{term}
+	tfd, err := fieldDict.Next()
+	for err == nil && tfd != nil {
+		fuzzyTerms = append(fuzzyTerms, tfd.Term)
+		tfd, err = fieldDict.Next()
+	}
+	if err != nil {
+		return nil, err
+	}
+	for _, term := range fuzzyTerms {
+		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
 		if err != nil {
 			return nil, err
 		}
-		for _, term := range fuzzyTerms {
-			rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
-			if err != nil {
-				return nil, err
-			}
-		}
-		return rv, nil
 	}
-	return nil, nil
+	return rv, nil
 }
 
 // addSynonymsForTerm finds synonyms for the given term and adds them to the
@@ -722,6 +739,9 @@ func addSynonymsForTerm(ctx context.Context, src, field, term string,
 		synonyms = append(synonyms, synonym)
 		synonym, err = termReader.Next()
 	}
+	if err != nil {
+		return nil, err
+	}
 	if len(synonyms) > 0 {
 		if rv == nil {
 			rv = make(search.FieldTermSynonymMap)
@@ -731,5 +751,5 @@ func addSynonymsForTerm(ctx context.Context, src, field, term string,
 		}
 		rv[field][term] = synonyms
 	}
-	return rv, err
+	return rv, nil
 }
diff --git a/search/util.go b/search/util.go
index 0f16db19a..9f5a15cac 100644
--- a/search/util.go
+++ b/search/util.go
@@ -148,6 +148,19 @@ type SearcherEndCallbackFn func(size uint64) error
 // field -> term -> synonyms
 type FieldTermSynonymMap map[string]map[string][]string
 
+func (f *FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
+	for field, termSynonymMap := range fts {
+		// Ensure the field exists in the receiver
+		if _, exists := (*f)[field]; !exists {
+			(*f)[field] = make(map[string][]string)
+		}
+		for term, synonyms := range termSynonymMap {
+			// Append synonyms
+			(*f)[field][term] = append((*f)[field][term], synonyms...)
+		}
+	}
+}
+
 const FieldTermSynonymMapKey = "_field_term_synonym_map_key"
 
 const SearcherStartCallbackKey = "_searcher_start_callback_key"
diff --git a/search_knn.go b/search_knn.go
index 309b36593..498770a3a 100644
--- a/search_knn.go
+++ b/search_knn.go
@@ -381,7 +381,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
 			continue
 		}
 
-		if _, ok := filterQ.(*query.MatchNoneQuery); ok {
+		if isMatchNoneQuery(filterQ) {
 			// Filtering required since no hits are eligible.
 			requiresFiltering[idx] = true
 			// a match none query just means none the documents are eligible
@@ -559,7 +559,7 @@ func requestHasKNN(req *SearchRequest) bool {
 func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
 	// if req.Query is not match_none => then we need to go to phase 2
 	// to perform the actual query.
-	if _, ok := req.Query.(*query.MatchNoneQuery); !ok {
+	if !isMatchNoneQuery(req.Query) {
 		return false
 	}
 	// req.Query is a match_none query

From 4d4440eb9fafb0132256ac7efca7b3e75e3cac41 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 3 Dec 2024 17:43:41 +0530
Subject: [PATCH 12/35] alias path code

---
 index_alias_impl.go | 134 +++++++++++++++++++++++++++++++++++---------
 index_impl.go       |  15 ++++-
 pre_search.go       |  44 +++++++++++----
 search.go           |   2 +-
 search_knn.go       |  35 ------------
 search_no_knn.go    |   2 +-
 6 files changed, 158 insertions(+), 74 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 73cc637f7..e4aad1eb7 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -169,7 +169,11 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 		// indicates that this index alias is set as an Index
 		// in another alias, so we need to do a preSearch search
 		// and NOT a real search
-		return preSearchDataSearch(ctx, req, i.indexes...)
+		flags := &preSearchFlags{
+			knn:      requestHasKNN(req),
+			synonyms: !isMatchNoneQuery(req.Query),
+		}
+		return preSearchDataSearch(ctx, req, flags, i.indexes...)
 	}
 
 	// at this point we know we are doing a real search
@@ -183,12 +187,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	// if necessary
 	var preSearchData map[string]map[string]interface{}
 	if req.PreSearchData != nil {
-		if requestHasKNN(req) {
-			var err error
-			preSearchData, err = redistributeKNNPreSearchData(req, i.indexes)
-			if err != nil {
-				return nil, err
-			}
+		var err error
+		preSearchData, err = redistributePreSearchData(req, i.indexes)
+		if err != nil {
+			return nil, err
 		}
 	}
 
@@ -209,9 +211,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	//  - the request requires preSearch
 	var preSearchDuration time.Duration
 	var sr *SearchResult
-	if req.PreSearchData == nil && preSearchRequired(req) {
+	flags := preSearchRequired(req, i.mapping)
+	if req.PreSearchData == nil && flags != nil {
 		searchStart := time.Now()
-		preSearchResult, err := preSearch(ctx, req, i.indexes...)
+		preSearchResult, err := preSearch(ctx, req, flags, i.indexes...)
 		if err != nil {
 			return nil, err
 		}
@@ -222,17 +225,17 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 			return preSearchResult, nil
 		}
 		// finalize the preSearch result now
-		finalizePreSearchResult(req, preSearchResult)
+		finalizePreSearchResult(req, flags, preSearchResult)
 
 		// if there are no errors, then merge the data in the preSearch result
 		// and construct the preSearchData to be used in the actual search
 		// if the request is satisfied by the preSearch result, then we can
 		// directly return the preSearch result as the final result
-		if requestSatisfiedByPreSearch(req) {
+		if requestSatisfiedByPreSearch(req, flags) {
 			sr = finalizeSearchResult(req, preSearchResult)
 			// no need to run the 2nd phase MultiSearch(..)
 		} else {
-			preSearchData, err = constructPreSearchData(req, preSearchResult, i.indexes)
+			preSearchData, err = constructPreSearchData(req, flags, preSearchResult, i.indexes)
 			if err != nil {
 				return nil, err
 			}
@@ -507,6 +510,13 @@ func (i *indexAliasImpl) Swap(in, out []Index) {
 	}
 }
 
+func (i *indexAliasImpl) SetIndexMapping(m mapping.IndexMapping) {
+	i.mutex.Lock()
+	defer i.mutex.Unlock()
+
+	i.mapping = m
+}
+
 // createChildSearchRequest creates a separate
 // request from the original
 // For now, avoid data race on req structure.
@@ -525,21 +535,48 @@ type asyncSearchResult struct {
 	Err    error
 }
 
-func preSearchRequired(req *SearchRequest) bool {
-	return requestHasKNN(req)
+// preSearchFlags is a struct to hold flags indicating why preSearch is required
+type preSearchFlags struct {
+	knn      bool
+	synonyms bool
+}
+
+// preSearchRequired checks if preSearch is required and returns a boolean flag
+// It only allocates the preSearchFlags struct if necessary
+func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
+	// Check for KNN query
+	knn := requestHasKNN(req)
+	var synonyms bool
+	if !isMatchNoneQuery(req.Query) {
+		// Check if synonyms are defined in the mapping
+		if sm, ok := m.(mapping.SynonymMapping); ok && sm.SynonymCount() > 0 {
+			synonyms = true
+		}
+	}
+	if knn || synonyms {
+		return &preSearchFlags{
+			knn:      knn,
+			synonyms: synonyms,
+		}
+	}
+	return nil
 }
 
-func preSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
-	// create a dummy request with a match none query
-	// since we only care about the preSearchData in PreSearch
+func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
+	var dummyQuery = req.Query
+	if !flags.synonyms {
+		// create a dummy request with a match none query
+		// since we only care about the preSearchData in PreSearch
+		dummyQuery = query.NewMatchNoneQuery()
+	}
 	dummyRequest := &SearchRequest{
-		Query: query.NewMatchNoneQuery(),
+		Query: dummyQuery,
 	}
 	newCtx := context.WithValue(ctx, search.PreSearchKey, true)
-	if requestHasKNN(req) {
+	if flags.knn {
 		addKnnToDummyRequest(dummyRequest, req)
 	}
-	return preSearchDataSearch(newCtx, dummyRequest, indexes...)
+	return preSearchDataSearch(newCtx, dummyRequest, flags, indexes...)
 }
 
 // if the request is satisfied by just the preSearch result,
@@ -590,29 +627,74 @@ func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *Se
 	return preSearchResult
 }
 
-func requestSatisfiedByPreSearch(req *SearchRequest) bool {
-	if requestHasKNN(req) && isKNNrequestSatisfiedByPreSearch(req) {
+func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool {
+	// if the synonyms presearch flag is set the request can never be satisfied by
+	// the preSearch result as synonyms are not part of the preSearch result
+	if flags.synonyms {
+		return false
+	}
+	if flags.knn && isKNNrequestSatisfiedByPreSearch(req) {
 		return true
 	}
 	return false
 }
 
-func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
+func constructSynonymPreSearchData(rv map[string]map[string]interface{}, sr *SearchResult, indexes []Index) map[string]map[string]interface{} {
+	for _, index := range indexes {
+		rv[index.Name()][search.SynonymPreSearchDataKey] = sr.SynonymResult
+	}
+	return rv
+}
+
+func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
+	preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
 	mergedOut := make(map[string]map[string]interface{}, len(indexes))
 	for _, index := range indexes {
 		mergedOut[index.Name()] = make(map[string]interface{})
 	}
 	var err error
-	if requestHasKNN(req) {
+	if flags.knn {
 		mergedOut, err = constructKnnPreSearchData(mergedOut, preSearchResult, indexes)
 		if err != nil {
 			return nil, err
 		}
 	}
+	if flags.synonyms {
+		mergedOut = constructSynonymPreSearchData(mergedOut, preSearchResult, indexes)
+	}
 	return mergedOut, nil
 }
 
-func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
+func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
+	rv := make(map[string]map[string]interface{})
+	for _, index := range indexes {
+		rv[index.Name()] = make(map[string]interface{})
+	}
+	if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok {
+		// the preSearchData for KNN is a list of DocumentMatch objects
+		// that need to be redistributed to the right index.
+		// This is used only in the case of an alias tree, where the indexes
+		// are at the leaves of the tree, and the master alias is at the root.
+		// At each level of the tree, the preSearchData needs to be redistributed
+		// to the indexes/aliases at that level. Because the preSearchData is
+		// specific to each final index at the leaf.
+		segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
+		if err != nil {
+			return nil, err
+		}
+		for _, index := range indexes {
+			rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
+		}
+	}
+	if fts, ok := req.PreSearchData[search.SynonymPreSearchDataKey].(search.FieldTermSynonymMap); ok {
+		for _, index := range indexes {
+			rv[index.Name()][search.SynonymPreSearchDataKey] = fts
+		}
+	}
+	return rv, nil
+}
+
+func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
 	asyncResults := make(chan *asyncSearchResult, len(indexes))
 	// run search on each index in separate go routine
 	var waitGroup sync.WaitGroup
@@ -643,7 +725,7 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Ind
 			if prp == nil {
 				// first valid preSearch result
 				// create a new preSearch result processor
-				prp = createPreSearchResultProcessor(req)
+				prp = createPreSearchResultProcessor(req, flags)
 			}
 			prp.add(asr.Result, asr.Name)
 			if sr == nil {
diff --git a/index_impl.go b/index_impl.go
index 3cce0a12c..c085b796a 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -450,12 +450,25 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
 		}
 	}
 
+	var fts search.FieldTermSynonymMap
+	if !isMatchNoneQuery(req.Query) {
+		if synMap, ok := i.m.(mapping.SynonymMapping); ok {
+			if synReader, ok := reader.(index.SynonymReader); ok {
+				fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
+				if err != nil {
+					return nil, err
+				}
+			}
+		}
+	}
+
 	return &SearchResult{
 		Status: &SearchStatus{
 			Total:      1,
 			Successful: 1,
 		},
-		Hits: knnHits,
+		Hits:          knnHits,
+		SynonymResult: fts,
 	}, nil
 }
 
diff --git a/pre_search.go b/pre_search.go
index a539afc35..59a9dc2e3 100644
--- a/pre_search.go
+++ b/pre_search.go
@@ -14,6 +14,10 @@
 
 package bleve
 
+import (
+	"github.com/blevesearch/bleve/v2/search"
+)
+
 // A preSearchResultProcessor processes the data in
 // the preSearch result from multiple
 // indexes in an alias and merges them together to
@@ -48,19 +52,33 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
 // -----------------------------------------------------------------------------
 // Synonym preSearchResultProcessor for handling Synonym presearch results
 type synonymPreSearchResultProcessor struct {
-	addFn      func(sr *SearchResult, indexName string)
-	finalizeFn func(sr *SearchResult)
+	finalizedFts search.FieldTermSynonymMap
+}
+
+func newSynonymPreSearchResultProcessor() *synonymPreSearchResultProcessor {
+	return &synonymPreSearchResultProcessor{}
 }
 
 func (s *synonymPreSearchResultProcessor) add(sr *SearchResult, indexName string) {
-	if s.addFn != nil {
-		s.addFn(sr, indexName)
+	// Check if SynonymResult or the synonym data key is nil
+	if sr.SynonymResult == nil {
+		return
+	}
+
+	// Attempt to cast PreSearchResults to FieldTermSynonymMap
+
+	// Merge with finalizedFts or initialize it if nil
+	if s.finalizedFts == nil {
+		s.finalizedFts = sr.SynonymResult
+	} else {
+		s.finalizedFts.MergeWith(sr.SynonymResult)
 	}
 }
 
 func (s *synonymPreSearchResultProcessor) finalize(sr *SearchResult) {
-	if s.finalizeFn != nil {
-		s.finalizeFn(sr)
+	// Set the finalized synonym data to the PreSearchResults
+	if s.finalizedFts != nil {
+		sr.SynonymResult = s.finalizedFts
 	}
 }
 
@@ -86,14 +104,20 @@ func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) {
 
 // -----------------------------------------------------------------------------
 // Function to create the appropriate preSearchResultProcessor(s)
-func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor {
+func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) preSearchResultProcessor {
 	var processors []preSearchResultProcessor
 	// Add KNN processor if the request has KNN
-	if requestHasKNN(req) {
+	if flags.knn {
 		if knnProcessor := newKnnPreSearchResultProcessor(req); knnProcessor != nil {
 			processors = append(processors, knnProcessor)
 		}
 	}
+	// Add Synonym processor if the request has Synonym
+	if flags.synonyms {
+		if synonymProcessor := newSynonymPreSearchResultProcessor(); synonymProcessor != nil {
+			processors = append(processors, synonymProcessor)
+		}
+	}
 	// Return based on the number of processors, optimizing for the common case of 1 processor
 	// If there are no processors, return nil
 	switch len(processors) {
@@ -109,8 +133,8 @@ func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor
 }
 
 // -----------------------------------------------------------------------------
-func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) {
-	if requestHasKNN(req) {
+func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) {
+	if flags.knn {
 		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
 	}
 }
diff --git a/search.go b/search.go
index 8734867e5..72bfca5e2 100644
--- a/search.go
+++ b/search.go
@@ -446,7 +446,7 @@ type SearchResult struct {
 	Facets   search.FacetResults            `json:"facets"`
 	// special fields that are applicable only for search
 	// results that are obtained from a presearch
-	PreSearchResults map[string]interface{} `json:"presearch_results,omitempty"`
+	SynonymResult search.FieldTermSynonymMap `json:"synonym_result,omitempty"`
 }
 
 func (sr *SearchResult) Size() int {
diff --git a/search_knn.go b/search_knn.go
index 498770a3a..e5fd595d4 100644
--- a/search_knn.go
+++ b/search_knn.go
@@ -598,41 +598,6 @@ func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
 	dummyReq.Sort = realReq.Sort
 }
 
-// the preSearchData for KNN is a list of DocumentMatch objects
-// that need to be redistributed to the right index.
-// This is used only in the case of an alias tree, where the indexes
-// are at the leaves of the tree, and the master alias is at the root.
-// At each level of the tree, the preSearchData needs to be redistributed
-// to the indexes/aliases at that level. Because the preSearchData is
-// specific to each final index at the leaf.
-func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
-	knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch)
-	if !ok {
-		return nil, fmt.Errorf("request does not have knn preSearchData for redistribution")
-	}
-	segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
-	if err != nil {
-		return nil, err
-	}
-
-	rv := make(map[string]map[string]interface{})
-	for _, index := range indexes {
-		rv[index.Name()] = make(map[string]interface{})
-	}
-
-	for _, index := range indexes {
-		for k, v := range req.PreSearchData {
-			switch k {
-			case search.KnnPreSearchDataKey:
-				rv[index.Name()][k] = segregatedKnnHits[index.Name()]
-			default:
-				rv[index.Name()][k] = v
-			}
-		}
-	}
-	return rv, nil
-}
-
 func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
 	kArray := make([]int64, len(req.KNN))
 	for i, knnReq := range req.KNN {
diff --git a/search_no_knn.go b/search_no_knn.go
index bb72e15a9..c91980589 100644
--- a/search_no_knn.go
+++ b/search_no_knn.go
@@ -187,7 +187,7 @@ func requestHasKNN(req *SearchRequest) bool {
 func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
 }
 
-func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
+func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
 	return nil, nil
 }
 

From 7b865338225d482415590ad6ad9cdd6f2638cb8d Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 6 Dec 2024 11:55:55 +0530
Subject: [PATCH 13/35] Presearch Code Refactor - Refactor the presearch code
 path to make    it more generic and extensible.

---
 index_alias_impl.go | 114 +++++++++++++++++++++++++++++++++++++-------
 pre_search.go       |  45 ++++++++++++++---
 search.go           |   5 ++
 search_knn.go       |  39 +--------------
 search_no_knn.go    |   2 +-
 5 files changed, 142 insertions(+), 63 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 3c7cdcd32..1daae819b 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -31,6 +31,10 @@ type indexAliasImpl struct {
 	indexes []Index
 	mutex   sync.RWMutex
 	open    bool
+	// if all the indexes in tha alias have the same mapping
+	// then the user can set the mapping here to avoid
+	// checking the mapping of each index in the alias
+	mapping mapping.IndexMapping
 }
 
 // NewIndexAlias creates a new IndexAlias over the provided
@@ -168,7 +172,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 		// indicates that this index alias is set as an Index
 		// in another alias, so we need to do a preSearch search
 		// and NOT a real search
-		return preSearchDataSearch(ctx, req, i.indexes...)
+		flags := &preSearchFlags{
+			knn: requestHasKNN(req), // set knn flag if the request has KNN
+		}
+		return preSearchDataSearch(ctx, req, flags, i.indexes...)
 	}
 
 	// at this point we know we are doing a real search
@@ -184,7 +191,7 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	if req.PreSearchData != nil {
 		if requestHasKNN(req) {
 			var err error
-			preSearchData, err = redistributeKNNPreSearchData(req, i.indexes)
+			preSearchData, err = redistributePreSearchData(req, i.indexes)
 			if err != nil {
 				return nil, err
 			}
@@ -208,9 +215,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	//  - the request requires preSearch
 	var preSearchDuration time.Duration
 	var sr *SearchResult
-	if req.PreSearchData == nil && preSearchRequired(req) {
+	flags := preSearchRequired(req, i.mapping)
+	if req.PreSearchData == nil && flags != nil {
 		searchStart := time.Now()
-		preSearchResult, err := preSearch(ctx, req, i.indexes...)
+		preSearchResult, err := preSearch(ctx, req, flags, i.indexes...)
 		if err != nil {
 			return nil, err
 		}
@@ -221,17 +229,17 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 			return preSearchResult, nil
 		}
 		// finalize the preSearch result now
-		finalizePreSearchResult(req, preSearchResult)
+		finalizePreSearchResult(req, flags, preSearchResult)
 
 		// if there are no errors, then merge the data in the preSearch result
 		// and construct the preSearchData to be used in the actual search
 		// if the request is satisfied by the preSearch result, then we can
 		// directly return the preSearch result as the final result
-		if requestSatisfiedByPreSearch(req) {
+		if requestSatisfiedByPreSearch(req, flags) {
 			sr = finalizeSearchResult(req, preSearchResult)
 			// no need to run the 2nd phase MultiSearch(..)
 		} else {
-			preSearchData, err = constructPreSearchData(req, preSearchResult, i.indexes)
+			preSearchData, err = constructPreSearchData(req, flags, preSearchResult, i.indexes)
 			if err != nil {
 				return nil, err
 			}
@@ -352,6 +360,20 @@ func (i *indexAliasImpl) Close() error {
 	return nil
 }
 
+// SetIndexMapping sets the mapping for the alias and must be used
+// ONLY when all the indexes in the alias have the same mapping.
+// This is to avoid checking the mapping of each index in the alias
+// when executing a search request.
+func (i *indexAliasImpl) SetIndexMapping(m mapping.IndexMapping) error {
+	i.mutex.Lock()
+	defer i.mutex.Unlock()
+	if !i.open {
+		return ErrorIndexClosed
+	}
+	i.mapping = m
+	return nil
+}
+
 func (i *indexAliasImpl) Mapping() mapping.IndexMapping {
 	i.mutex.RLock()
 	defer i.mutex.RUnlock()
@@ -360,6 +382,11 @@ func (i *indexAliasImpl) Mapping() mapping.IndexMapping {
 		return nil
 	}
 
+	// if the mapping is already set, return it
+	if i.mapping != nil {
+		return i.mapping
+	}
+
 	err := i.isAliasToSingleIndex()
 	if err != nil {
 		return nil
@@ -520,21 +547,35 @@ type asyncSearchResult struct {
 	Err    error
 }
 
-func preSearchRequired(req *SearchRequest) bool {
-	return requestHasKNN(req)
+// preSearchFlags is a struct to hold flags indicating why preSearch is required
+type preSearchFlags struct {
+	knn bool
+}
+
+// preSearchRequired checks if preSearch is required and returns the presearch flags struct
+// indicating which preSearch is required
+func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
+	// Check for KNN query
+	knn := requestHasKNN(req)
+	if knn {
+		return &preSearchFlags{
+			knn: knn,
+		}
+	}
+	return nil
 }
 
-func preSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
+func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
 	// create a dummy request with a match none query
 	// since we only care about the preSearchData in PreSearch
 	dummyRequest := &SearchRequest{
 		Query: query.NewMatchNoneQuery(),
 	}
 	newCtx := context.WithValue(ctx, search.PreSearchKey, true)
-	if requestHasKNN(req) {
+	if flags.knn {
 		addKnnToDummyRequest(dummyRequest, req)
 	}
-	return preSearchDataSearch(newCtx, dummyRequest, indexes...)
+	return preSearchDataSearch(newCtx, dummyRequest, flags, indexes...)
 }
 
 // if the request is satisfied by just the preSearch result,
@@ -585,20 +626,20 @@ func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *Se
 	return preSearchResult
 }
 
-func requestSatisfiedByPreSearch(req *SearchRequest) bool {
-	if requestHasKNN(req) && isKNNrequestSatisfiedByPreSearch(req) {
+func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool {
+	if flags.knn && isKNNrequestSatisfiedByPreSearch(req) {
 		return true
 	}
 	return false
 }
 
-func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
+func constructPreSearchData(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
 	mergedOut := make(map[string]map[string]interface{}, len(indexes))
 	for _, index := range indexes {
 		mergedOut[index.Name()] = make(map[string]interface{})
 	}
 	var err error
-	if requestHasKNN(req) {
+	if flags.knn {
 		mergedOut, err = constructKnnPreSearchData(mergedOut, preSearchResult, indexes)
 		if err != nil {
 			return nil, err
@@ -607,7 +648,7 @@ func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, i
 	return mergedOut, nil
 }
 
-func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) {
+func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
 	asyncResults := make(chan *asyncSearchResult, len(indexes))
 	// run search on each index in separate go routine
 	var waitGroup sync.WaitGroup
@@ -638,7 +679,7 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Ind
 			if prp == nil {
 				// first valid preSearch result
 				// create a new preSearch result processor
-				prp = createPreSearchResultProcessor(req)
+				prp = createPreSearchResultProcessor(req, flags)
 			}
 			prp.add(asr.Result, asr.Name)
 			if sr == nil {
@@ -684,6 +725,43 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Ind
 	return sr, nil
 }
 
+// redistributePreSearchData redistributes the preSearchData coming from the individual constituent index aliases
+// of an alias to the individual indexes in the alias. This is necessary when the preSearchData is specific to each
+// index in the alias. This is used only in the case of an alias tree, where the indexes are at the leaves of the tree,
+// and the master alias is at the root. At each level of the tree, the preSearchData needs to be redistributed to the
+// indexes/aliases at that level. Because the preSearchData is specific to each final index at the leaf.
+func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
+	rv := make(map[string]map[string]interface{})
+	for _, index := range indexes {
+		rv[index.Name()] = make(map[string]interface{})
+	}
+	if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok {
+		// the preSearchData for KNN is a list of DocumentMatch objects
+		// that need to be redistributed to the right index.
+		// This is used only in the case of an alias tree, where the indexes
+		// are at the leaves of the tree, and the master alias is at the root.
+		// At each level of the tree, the preSearchData needs to be redistributed
+		// to the indexes/aliases at that level. Because the preSearchData is
+		// specific to each final index at the leaf.
+		segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
+		if err != nil {
+			return nil, err
+		}
+		for _, index := range indexes {
+			rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
+		}
+	}
+	return rv, nil
+}
+
+// finalizePreSearchResult finalizes the preSearch result by applying the finalization steps
+// specific to the preSearch flags
+func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) {
+	if flags.knn {
+		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
+	}
+}
+
 // hitsInCurrentPage returns the hits in the current page
 // using the From and Size parameters in the request
 func hitsInCurrentPage(req *SearchRequest, hits []*search.DocumentMatch) []*search.DocumentMatch {
diff --git a/pre_search.go b/pre_search.go
index c8c55bfbc..0992124d9 100644
--- a/pre_search.go
+++ b/pre_search.go
@@ -26,6 +26,8 @@ type preSearchResultProcessor interface {
 	finalize(*SearchResult)
 }
 
+// -----------------------------------------------------------------------------
+// KNN preSearchResultProcessor for handling KNN presearch results
 type knnPreSearchResultProcessor struct {
 	addFn      func(sr *SearchResult, indexName string)
 	finalizeFn func(sr *SearchResult)
@@ -44,16 +46,45 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
 }
 
 // -----------------------------------------------------------------------------
+// Master struct that can hold any number of presearch result processors
+type compositePreSearchResultProcessor struct {
+	presearchResultProcessors []preSearchResultProcessor
+}
 
-func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) {
-	if requestHasKNN(req) {
-		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
+// Implements the add method, which forwards to all the internal processors
+func (m *compositePreSearchResultProcessor) add(sr *SearchResult, indexName string) {
+	for _, p := range m.presearchResultProcessors {
+		p.add(sr, indexName)
 	}
 }
 
-func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor {
-	if requestHasKNN(req) {
-		return newKnnPreSearchResultProcessor(req)
+// Implements the finalize method, which forwards to all the internal processors
+func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) {
+	for _, p := range m.presearchResultProcessors {
+		p.finalize(sr)
+	}
+}
+
+// -----------------------------------------------------------------------------
+// Function to create the appropriate preSearchResultProcessor(s)
+func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) preSearchResultProcessor {
+	var processors []preSearchResultProcessor
+	// Add KNN processor if the request has KNN
+	if flags.knn {
+		if knnProcessor := newKnnPreSearchResultProcessor(req); knnProcessor != nil {
+			processors = append(processors, knnProcessor)
+		}
+	}
+	// Return based on the number of processors, optimizing for the common case of 1 processor
+	// If there are no processors, return nil
+	switch len(processors) {
+	case 0:
+		return nil
+	case 1:
+		return processors[0]
+	default:
+		return &compositePreSearchResultProcessor{
+			presearchResultProcessors: processors,
+		}
 	}
-	return &knnPreSearchResultProcessor{} // equivalent to nil
 }
diff --git a/search.go b/search.go
index 7861d24b8..402109e05 100644
--- a/search.go
+++ b/search.go
@@ -589,3 +589,8 @@ func (r *SearchRequest) SortFunc() func(data sort.Interface) {
 
 	return sort.Sort
 }
+
+func isMatchNoneQuery(q query.Query) bool {
+	_, ok := q.(*query.MatchNoneQuery)
+	return ok
+}
diff --git a/search_knn.go b/search_knn.go
index 309b36593..e5fd595d4 100644
--- a/search_knn.go
+++ b/search_knn.go
@@ -381,7 +381,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
 			continue
 		}
 
-		if _, ok := filterQ.(*query.MatchNoneQuery); ok {
+		if isMatchNoneQuery(filterQ) {
 			// Filtering required since no hits are eligible.
 			requiresFiltering[idx] = true
 			// a match none query just means none the documents are eligible
@@ -559,7 +559,7 @@ func requestHasKNN(req *SearchRequest) bool {
 func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
 	// if req.Query is not match_none => then we need to go to phase 2
 	// to perform the actual query.
-	if _, ok := req.Query.(*query.MatchNoneQuery); !ok {
+	if !isMatchNoneQuery(req.Query) {
 		return false
 	}
 	// req.Query is a match_none query
@@ -598,41 +598,6 @@ func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
 	dummyReq.Sort = realReq.Sort
 }
 
-// the preSearchData for KNN is a list of DocumentMatch objects
-// that need to be redistributed to the right index.
-// This is used only in the case of an alias tree, where the indexes
-// are at the leaves of the tree, and the master alias is at the root.
-// At each level of the tree, the preSearchData needs to be redistributed
-// to the indexes/aliases at that level. Because the preSearchData is
-// specific to each final index at the leaf.
-func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
-	knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch)
-	if !ok {
-		return nil, fmt.Errorf("request does not have knn preSearchData for redistribution")
-	}
-	segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
-	if err != nil {
-		return nil, err
-	}
-
-	rv := make(map[string]map[string]interface{})
-	for _, index := range indexes {
-		rv[index.Name()] = make(map[string]interface{})
-	}
-
-	for _, index := range indexes {
-		for k, v := range req.PreSearchData {
-			switch k {
-			case search.KnnPreSearchDataKey:
-				rv[index.Name()][k] = segregatedKnnHits[index.Name()]
-			default:
-				rv[index.Name()][k] = v
-			}
-		}
-	}
-	return rv, nil
-}
-
 func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
 	kArray := make([]int64, len(req.KNN))
 	for i, knnReq := range req.KNN {
diff --git a/search_no_knn.go b/search_no_knn.go
index bb72e15a9..c91980589 100644
--- a/search_no_knn.go
+++ b/search_no_knn.go
@@ -187,7 +187,7 @@ func requestHasKNN(req *SearchRequest) bool {
 func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
 }
 
-func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
+func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
 	return nil, nil
 }
 

From d58474f7f95c6f83cd312bd2b2c9c43baa263974 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 6 Dec 2024 12:01:44 +0530
Subject: [PATCH 14/35] fix comment

---
 index_alias_impl.go | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 1daae819b..e2948ff53 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -725,11 +725,9 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSear
 	return sr, nil
 }
 
-// redistributePreSearchData redistributes the preSearchData coming from the individual constituent index aliases
-// of an alias to the individual indexes in the alias. This is necessary when the preSearchData is specific to each
-// index in the alias. This is used only in the case of an alias tree, where the indexes are at the leaves of the tree,
-// and the master alias is at the root. At each level of the tree, the preSearchData needs to be redistributed to the
-// indexes/aliases at that level. Because the preSearchData is specific to each final index at the leaf.
+// redistributePreSearchData redistributes the preSearchData sent in the search request to an index alias
+// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData
+// needs to be redistributed to the indexes at that level
 func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
 	rv := make(map[string]map[string]interface{})
 	for _, index := range indexes {

From 3908df35e385f3d60ea10f3e8fbae3cd8d382872 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 6 Dec 2024 13:52:09 +0530
Subject: [PATCH 15/35] Add ExtractFields API with unit test

---
 search/query/query.go      |  38 ++++++
 search/query/query_test.go | 234 +++++++++++++++++++++++++++++++++++++
 2 files changed, 272 insertions(+)

diff --git a/search/query/query.go b/search/query/query.go
index d263a0e54..1f1131f30 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -423,3 +423,41 @@ func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
 	data, err := json.MarshalIndent(q, "", "  ")
 	return string(data), err
 }
+
+// FieldSet represents a set of queried fields.
+type FieldSet map[string]struct{}
+
+// ExtractFields returns a set of fields referenced by the query.
+// The returned set may be nil if the query does not explicitly reference any field
+// and the DefaultSearchField is unset in the index mapping.
+func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) FieldSet {
+	if q == nil {
+		return fs
+	}
+	switch q := q.(type) {
+	case FieldableQuery:
+		f := q.Field()
+		if f == "" {
+			f = m.DefaultSearchField()
+		}
+		if f != "" {
+			if fs == nil {
+				fs = make(FieldSet)
+			}
+			fs[f] = struct{}{}
+		}
+	case *BooleanQuery:
+		for _, subq := range []Query{q.Must, q.Should, q.MustNot} {
+			fs = ExtractFields(subq, m, fs)
+		}
+	case *ConjunctionQuery:
+		for _, subq := range q.Conjuncts {
+			fs = ExtractFields(subq, m, fs)
+		}
+	case *DisjunctionQuery:
+		for _, subq := range q.Disjuncts {
+			fs = ExtractFields(subq, m, fs)
+		}
+	}
+	return fs
+}
diff --git a/search/query/query_test.go b/search/query/query_test.go
index 0028c956b..870510de2 100644
--- a/search/query/query_test.go
+++ b/search/query/query_test.go
@@ -16,6 +16,7 @@ package query
 
 import (
 	"reflect"
+	"sort"
 	"strings"
 	"testing"
 	"time"
@@ -785,3 +786,236 @@ func TestParseEmptyQuery(t *testing.T) {
 		t.Errorf("[2] Expected %#v, got %#v", expect, rv)
 	}
 }
+
+func TestExtractFields(t *testing.T) {
+	testQueries := []struct {
+		query     string
+		expFields []string
+	}{
+		{
+			query:     `{"term":"water","field":"desc"}`,
+			expFields: []string{"desc"},
+		},
+		{
+			query: `{
+						"must": {
+							"conjuncts": [
+								{
+									"match": "water",
+									"prefix_length": 0,
+									"fuzziness": 0
+								}
+							]
+						},
+						"should": {
+							"disjuncts": [
+								{
+									"match": "beer",
+									"prefix_length": 0,
+									"fuzziness": 0
+								}
+							],
+							"min": 0
+						},
+						"must_not": {
+							"disjuncts": [
+								{
+									"match": "light",
+									"prefix_length": 0,
+									"fuzziness": 0
+								}
+							],
+							"min": 0
+						}
+					}`,
+			expFields: []string{"_all"},
+		},
+		{
+			query: `{
+						"must": {
+							"conjuncts": [
+								{
+									"match": "water",
+									"prefix_length": 0,
+									"field": "desc",
+									"fuzziness": 0
+								}
+							]
+						},
+						"should": {
+							"disjuncts": [
+								{
+									"match": "beer",
+									"prefix_length": 0,
+									"field": "desc",
+									"fuzziness": 0
+								}
+							],
+							"min": 0
+						},
+						"must_not": {
+							"disjuncts": [
+								{
+									"match": "light",
+									"prefix_length": 0,
+									"field": "genre",
+									"fuzziness": 0
+								}
+							],
+							"min": 0
+						}
+					}`,
+			expFields: []string{"desc", "genre"},
+		},
+		{
+			query: `
+					{
+						"conjuncts": [
+							{
+								"conjuncts": [
+									{
+										"conjuncts": [
+											{
+												"conjuncts": [
+													{
+														"field": "date",
+														"start": "2002-09-05T08:09:00Z",
+														"end": "2007-03-01T03:52:00Z",
+														"inclusive_start": true,
+														"inclusive_end": true
+													},
+													{
+														"field": "number",
+														"min": 1260295,
+														"max": 3917314,
+														"inclusive_min": true,
+														"inclusive_max": true
+													}
+												]
+											},
+											{
+												"conjuncts": [
+													{
+														"field": "date2",
+														"start": "2004-08-21T18:30:00Z",
+														"end": "2006-03-24T08:08:00Z",
+														"inclusive_start": true,
+														"inclusive_end": true
+													},
+													{
+														"field": "number",
+														"min": 165449,
+														"max": 3847517,
+														"inclusive_min": true,
+														"inclusive_max": true
+													}
+												]
+											}
+										]
+									},
+									{
+										"conjuncts": [
+											{
+												"conjuncts": [
+													{
+														"field": "date",
+														"start": "2004-09-02T22:15:00Z",
+														"end": "2008-06-22T15:06:00Z",
+														"inclusive_start": true,
+														"inclusive_end": true
+													},
+													{
+														"field": "number2",
+														"min": 876843,
+														"max": 3363351,
+														"inclusive_min": true,
+														"inclusive_max": true
+													}
+												]
+											},
+											{
+												"conjuncts": [
+													{
+														"field": "date",
+														"start": "2000-12-03T21:35:00Z",
+														"end": "2008-02-07T05:00:00Z",
+														"inclusive_start": true,
+														"inclusive_end": true
+													},
+													{
+														"field": "number",
+														"min": 2021479,
+														"max": 4763404,
+														"inclusive_min": true,
+														"inclusive_max": true
+													}
+												]
+											}
+										]
+									}
+								]
+							},
+							{
+								"conjuncts": [
+									{
+										"conjuncts": [
+											{
+												"field": "date3",
+												"start": "2000-03-13T07:13:00Z",
+												"end": "2005-09-19T09:33:00Z",
+												"inclusive_start": true,
+												"inclusive_end": true
+											},
+											{
+												"field": "number",
+												"min": 883125,
+												"max": 4817433,
+												"inclusive_min": true,
+												"inclusive_max": true
+											}
+										]
+									},
+									{
+										"conjuncts": [
+											{
+												"field": "date",
+												"start": "2002-08-10T22:42:00Z",
+												"end": "2008-02-10T23:19:00Z",
+												"inclusive_start": true,
+												"inclusive_end": true
+											},
+											{
+												"field": "number",
+												"min": 896115,
+												"max": 3897074,
+												"inclusive_min": true,
+												"inclusive_max": true
+											}
+										]
+									}
+								]
+							}
+						]
+					}`,
+			expFields: []string{"date", "number", "date2", "number2", "date3"},
+		},
+	}
+
+	m := mapping.NewIndexMapping()
+	for i, test := range testQueries {
+		q, err := ParseQuery([]byte(test.query))
+		if err != nil {
+			t.Fatal(err)
+		}
+		fields := ExtractFields(q, m, nil)
+		var fieldsSlice []string
+		for k := range fields {
+			fieldsSlice = append(fieldsSlice, k)
+		}
+		sort.Strings(test.expFields)
+		sort.Strings(fieldsSlice)
+		if !reflect.DeepEqual(fieldsSlice, test.expFields) {
+			t.Errorf("Test %d: expected %v, got %v", i, test.expFields, fieldsSlice)
+		}
+	}
+}

From 31973e08f0f28b456f0249164b6dbb968e4d41ea Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Fri, 6 Dec 2024 13:59:39 +0530
Subject: [PATCH 16/35] bug fix

---
 index_alias_impl.go | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index e2948ff53..04270e89d 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -189,12 +189,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	// if necessary
 	var preSearchData map[string]map[string]interface{}
 	if req.PreSearchData != nil {
-		if requestHasKNN(req) {
-			var err error
-			preSearchData, err = redistributePreSearchData(req, i.indexes)
-			if err != nil {
-				return nil, err
-			}
+		var err error
+		preSearchData, err = redistributePreSearchData(req, i.indexes)
+		if err != nil {
+			return nil, err
 		}
 	}
 

From 2130f3c96107079ac2f5f279f9dbce91c4778caa Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 12:51:48 +0530
Subject: [PATCH 17/35] final fixes to alias query path

---
 index/scorch/snapshot_index.go        | 145 ++++++++++++++++--
 index/scorch/snapshot_index_thes.go   | 107 +++++++++++++
 index_impl.go                         |   4 +-
 search/levenshtein.go                 |   4 +
 search/levenshtein_test.go            |  13 +-
 search/query/query.go                 | 206 +++++++++++---------------
 search/searcher/search_fuzzy.go       |  53 +++++--
 search/searcher/search_regexp.go      |  26 +++-
 search/searcher/search_term_prefix.go |  33 ++++-
 search_test.go                        |  98 +++++++++---
 10 files changed, 517 insertions(+), 172 deletions(-)
 create mode 100644 index/scorch/snapshot_index_thes.go

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 685f1c921..2c3ea5167 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -45,7 +45,7 @@ type asynchSegmentResult struct {
 	index int
 	docs  *roaring.Bitmap
 
-	postings segment.PostingsList
+	thesItr segment.ThesaurusIterator
 
 	err error
 }
@@ -271,17 +271,32 @@ func (is *IndexSnapshot) FieldDictPrefix(field string,
 
 func (is *IndexSnapshot) FieldDictRegexp(field string,
 	termRegex string) (index.FieldDict, error) {
+	fd, _, err := is.FieldDictRegexpAutomaton(field, termRegex)
+	return fd, err
+}
+
+func (is *IndexSnapshot) FieldDictRegexpAutomaton(field string,
+	termRegex string) (index.FieldDict, index.RegexAutomaton, error) {
+	return is.fieldDictRegexp(field, termRegex)
+}
+
+func (is *IndexSnapshot) fieldDictRegexp(field string,
+	termRegex string) (index.FieldDict, index.RegexAutomaton, error) {
 	// TODO: potential optimization where the literal prefix represents the,
 	//       entire regexp, allowing us to use PrefixIterator(prefixTerm)?
 
 	a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
-	return is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
+	fd, err := is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
 		return is.AutomatonIterator(a, prefixBeg, prefixEnd)
 	}, false)
+	if err != nil {
+		return nil, nil, err
+	}
+	return fd, a, nil
 }
 
 func (is *IndexSnapshot) getLevAutomaton(term string,
@@ -296,20 +311,37 @@ func (is *IndexSnapshot) getLevAutomaton(term string,
 
 func (is *IndexSnapshot) FieldDictFuzzy(field string,
 	term string, fuzziness int, prefix string) (index.FieldDict, error) {
+	fd, _, err := is.FieldDictFuzzyAutomaton(field, term, fuzziness, prefix)
+	return fd, err
+}
+
+func (is *IndexSnapshot) FieldDictFuzzyAutomaton(field string,
+	term string, fuzziness int, prefix string) (index.FieldDict, index.FuzzyAutomaton, error) {
+	return is.fieldDictFuzzy(field, term, fuzziness, prefix)
+}
+
+func (is *IndexSnapshot) fieldDictFuzzy(field string,
+	term string, fuzziness int, prefix string) (index.FieldDict, index.FuzzyAutomaton, error) {
 	a, err := is.getLevAutomaton(term, uint8(fuzziness))
 	if err != nil {
-		return nil, err
+		return nil, nil, err
+	}
+	var fa index.FuzzyAutomaton
+	if vfa, ok := a.(vellum.FuzzyAutomaton); ok {
+		fa = vfa
 	}
-
 	var prefixBeg, prefixEnd []byte
 	if prefix != "" {
 		prefixBeg = []byte(prefix)
 		prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
 	}
-
-	return is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
+	fd, err := is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
 		return is.AutomatonIterator(a, prefixBeg, prefixEnd)
 	}, false)
+	if err != nil {
+		return nil, nil, err
+	}
+	return fd, fa, nil
 }
 
 func (is *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
@@ -1020,7 +1052,7 @@ func (is *IndexSnapshot) SynonymTermReader(ctx context.Context, thesaurusName st
 	if rv.thesauri == nil {
 		rv.thesauri = make([]segment.Thesaurus, len(is.segment))
 		for i, s := range is.segment {
-			if synSeg, ok := s.segment.(segment.SynonymSegment); ok {
+			if synSeg, ok := s.segment.(segment.ThesaurusSegment); ok {
 				thes, err := synSeg.Thesaurus(thesaurusName)
 				if err != nil {
 					return nil, err
@@ -1031,7 +1063,7 @@ func (is *IndexSnapshot) SynonymTermReader(ctx context.Context, thesaurusName st
 	}
 
 	for i, s := range is.segment {
-		if _, ok := s.segment.(segment.SynonymSegment); ok {
+		if _, ok := s.segment.(segment.ThesaurusSegment); ok {
 			pl, err := rv.thesauri[i].SynonymsList(term, s.deleted, rv.postings[i])
 			if err != nil {
 				return nil, err
@@ -1043,3 +1075,98 @@ func (is *IndexSnapshot) SynonymTermReader(ctx context.Context, thesaurusName st
 	}
 	return rv, nil
 }
+
+func (is *IndexSnapshot) newIndexSnapshotThesaurusKeys(name string,
+	makeItr func(i segment.Thesaurus) segment.ThesaurusIterator) (*IndexSnapshotThesaurusKeys, error) {
+
+	results := make(chan *asynchSegmentResult, len(is.segment))
+	var wg sync.WaitGroup
+	wg.Add(len(is.segment))
+	for _, s := range is.segment {
+		go func(s *SegmentSnapshot) {
+			defer wg.Done()
+			if synSeg, ok := s.segment.(segment.ThesaurusSegment); ok {
+				thes, err := synSeg.Thesaurus(name)
+				if err != nil {
+					results <- &asynchSegmentResult{err: err}
+				} else {
+					results <- &asynchSegmentResult{thesItr: makeItr(thes)}
+				}
+			}
+		}(s)
+	}
+	// Close the channel after all goroutines complete
+	go func() {
+		wg.Wait()
+		close(results)
+	}()
+
+	var err error
+	rv := &IndexSnapshotThesaurusKeys{
+		snapshot: is,
+		cursors:  make([]*segmentThesCursor, 0, len(is.segment)),
+	}
+	for asr := range results {
+		if asr.err != nil && err == nil {
+			err = asr.err
+		} else {
+			next, err2 := asr.thesItr.Next()
+			if err2 != nil && err == nil {
+				err = err2
+			}
+			if next != nil {
+				rv.cursors = append(rv.cursors, &segmentThesCursor{
+					itr:  asr.thesItr,
+					curr: *next,
+				})
+			}
+		}
+	}
+	// after ensuring we've read all items on channel
+	if err != nil {
+		return nil, err
+	}
+
+	return rv, nil
+}
+
+func (is *IndexSnapshot) ThesaurusKeys(name string) (index.ThesaurusKeys, error) {
+	return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
+		return is.AutomatonIterator(nil, nil, nil)
+	})
+}
+
+func (is *IndexSnapshot) ThesaurusKeysFuzzy(name string,
+	term string, fuzziness int, prefix string) (index.ThesaurusKeys, error) {
+	a, err := is.getLevAutomaton(term, uint8(fuzziness))
+	if err != nil {
+		return nil, err
+	}
+	var prefixBeg, prefixEnd []byte
+	if prefix != "" {
+		prefixBeg = []byte(prefix)
+		prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
+	}
+	return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
+		return is.AutomatonIterator(a, prefixBeg, prefixEnd)
+	})
+}
+
+func (is *IndexSnapshot) ThesaurusKeysPrefix(name string,
+	termPrefix []byte) (index.ThesaurusKeys, error) {
+	termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix)
+	return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
+		return is.AutomatonIterator(nil, termPrefix, termPrefixEnd)
+	})
+}
+
+func (is *IndexSnapshot) ThesaurusKeysRegexp(name string,
+	termRegex string) (index.ThesaurusKeys, error) {
+	a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
+	if err != nil {
+		return nil, err
+	}
+	return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
+		return is.AutomatonIterator(a, prefixBeg, prefixEnd)
+	})
+}
diff --git a/index/scorch/snapshot_index_thes.go b/index/scorch/snapshot_index_thes.go
new file mode 100644
index 000000000..6f3aae818
--- /dev/null
+++ b/index/scorch/snapshot_index_thes.go
@@ -0,0 +1,107 @@
+//  Copyright (c) 2024 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package scorch
+
+import (
+	"container/heap"
+
+	index "github.com/blevesearch/bleve_index_api"
+	segment "github.com/blevesearch/scorch_segment_api/v2"
+)
+
+type segmentThesCursor struct {
+	thes segment.Thesaurus
+	itr  segment.ThesaurusIterator
+	curr index.ThesaurusEntry
+}
+
+type IndexSnapshotThesaurusKeys struct {
+	snapshot *IndexSnapshot
+	cursors  []*segmentThesCursor
+	entry    index.ThesaurusEntry
+}
+
+func (i *IndexSnapshotThesaurusKeys) Len() int { return len(i.cursors) }
+func (i *IndexSnapshotThesaurusKeys) Less(a, b int) bool {
+	return i.cursors[a].curr.Term < i.cursors[b].curr.Term
+}
+func (i *IndexSnapshotThesaurusKeys) Swap(a, b int) {
+	i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
+}
+
+func (i *IndexSnapshotThesaurusKeys) Push(x interface{}) {
+	i.cursors = append(i.cursors, x.(*segmentThesCursor))
+}
+
+func (i *IndexSnapshotThesaurusKeys) Pop() interface{} {
+	n := len(i.cursors)
+	x := i.cursors[n-1]
+	i.cursors = i.cursors[0 : n-1]
+	return x
+}
+
+func (i *IndexSnapshotThesaurusKeys) Next() (*index.ThesaurusEntry, error) {
+	if len(i.cursors) == 0 {
+		return nil, nil
+	}
+	i.entry = i.cursors[0].curr
+	next, err := i.cursors[0].itr.Next()
+	if err != nil {
+		return nil, err
+	}
+	if next == nil {
+		// at end of this cursor, remove it
+		heap.Pop(i)
+	} else {
+		// modified heap, fix it
+		i.cursors[0].curr = *next
+		heap.Fix(i, 0)
+	}
+	// look for any other entries with the exact same term
+	for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
+		next, err := i.cursors[0].itr.Next()
+		if err != nil {
+			return nil, err
+		}
+		if next == nil {
+			// at end of this cursor, remove it
+			heap.Pop(i)
+		} else {
+			// modified heap, fix it
+			i.cursors[0].curr = *next
+			heap.Fix(i, 0)
+		}
+	}
+
+	return &i.entry, nil
+}
+
+func (i *IndexSnapshotThesaurusKeys) Close() error {
+	return nil
+}
+
+func (i *IndexSnapshotThesaurusKeys) Contains(key []byte) (bool, error) {
+	if len(i.cursors) == 0 {
+		return false, nil
+	}
+
+	for _, cursor := range i.cursors {
+		if found, _ := cursor.thes.Contains(key); found {
+			return true, nil
+		}
+	}
+
+	return false, nil
+}
diff --git a/index_impl.go b/index_impl.go
index c085b796a..650f1d68c 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -453,7 +453,7 @@ func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader in
 	var fts search.FieldTermSynonymMap
 	if !isMatchNoneQuery(req.Query) {
 		if synMap, ok := i.m.(mapping.SynonymMapping); ok {
-			if synReader, ok := reader.(index.SynonymReader); ok {
+			if synReader, ok := reader.(index.ThesaurusReader); ok {
 				fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
 				if err != nil {
 					return nil, err
@@ -550,7 +550,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 
 	if fts == nil {
 		if synMap, ok := i.m.(mapping.SynonymMapping); ok && synMap.SynonymCount() > 0 {
-			if synReader, ok := indexReader.(index.SynonymReader); ok {
+			if synReader, ok := indexReader.(index.ThesaurusReader); ok {
 				fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
 				if err != nil {
 					return nil, err
diff --git a/search/levenshtein.go b/search/levenshtein.go
index 687608d3f..dadab2521 100644
--- a/search/levenshtein.go
+++ b/search/levenshtein.go
@@ -68,6 +68,10 @@ func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool,
 	ld := int(math.Abs(float64(la - lb)))
 	if ld > max {
 		return max, true, d
+	} else if la == 0 || lb == 0 {
+		// if one string of the two strings is empty, then ld is
+		// the length of the other string and as such is <= max
+		return ld, false, d
 	}
 
 	if cap(d) < la+1 {
diff --git a/search/levenshtein_test.go b/search/levenshtein_test.go
index 651f7803c..ef23980ef 100644
--- a/search/levenshtein_test.go
+++ b/search/levenshtein_test.go
@@ -69,12 +69,19 @@ func TestLevenshteinDistanceMax(t *testing.T) {
 			exceeded: true,
 		},
 		{
-			a:        "water",
+			a:        "",
 			b:        "water",
-			max:      1,
-			dist:     0,
+			max:      10,
+			dist:     5,
 			exceeded: false,
 		},
+		{
+			a:        "water",
+			b:        "",
+			max:      3,
+			dist:     3,
+			exceeded: true,
+		},
 	}
 
 	for _, test := range tests {
diff --git a/search/query/query.go b/search/query/query.go
index 5e3259c11..dcb0e1583 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -439,17 +439,24 @@ func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
 	return string(data), err
 }
 
+const (
+	FuzzyMatchType = iota
+	RegexpMatchType
+	PrefixMatchType
+)
+
 // ExtractSynonyms extracts synonyms from the query tree and returns a map of
 // field-term pairs to their synonyms. The input query tree is traversed and
 // for each term query, the synonyms are extracted from the synonym source
 // associated with the field. The synonyms are then added to the provided map.
 // The map is returned and may be nil if no synonyms were found.
-func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.SynonymReader,
+func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.ThesaurusReader,
 	query Query, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
 
 	if r == nil || m == nil || query == nil {
 		return rv, nil
 	}
+	var err error
 	resolveFieldAndSource := func(field string) (string, string) {
 		if field == "" {
 			field = m.DefaultSearchField()
@@ -468,7 +475,6 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 	}
 	switch q := query.(type) {
 	case *BooleanQuery:
-		var err error
 		rv, err = ExtractSynonyms(ctx, m, r, q.Must, rv)
 		if err != nil {
 			return nil, err
@@ -481,10 +487,8 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 		if err != nil {
 			return nil, err
 		}
-		return rv, nil
 	case *ConjunctionQuery:
 		for _, child := range q.Conjuncts {
-			var err error
 			rv, err = ExtractSynonyms(ctx, m, r, child, rv)
 			if err != nil {
 				return nil, err
@@ -492,7 +496,6 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 		}
 	case *DisjunctionQuery:
 		for _, child := range q.Disjuncts {
-			var err error
 			rv, err = ExtractSynonyms(ctx, m, r, child, rv)
 			if err != nil {
 				return nil, err
@@ -505,7 +508,10 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 			if q.autoFuzzy {
 				fuzziness = searcher.GetAutoFuzziness(q.Term)
 			}
-			return addFuzzySynonymsForTerm(ctx, source, field, q.Term, fuzziness, q.Prefix, r, rv)
+			rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, q.Term, fuzziness, q.Prefix, r, rv)
+			if err != nil {
+				return nil, err
+			}
 		}
 	case *MatchQuery, *MatchPhraseQuery:
 		var analyzerName, matchString, fieldVal string
@@ -527,7 +533,7 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 				if autoFuzzy {
 					fuzziness = searcher.GetAutoFuzziness(string(token.Term))
 				}
-				rv, err = addFuzzySynonymsForTerm(ctx, source, field, string(token.Term), fuzziness, prefix, r, rv)
+				rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, string(token.Term), fuzziness, prefix, r, rv)
 				if err != nil {
 					return nil, err
 				}
@@ -553,164 +559,123 @@ func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.Syno
 				terms = pq.Terms
 			}
 			for _, term := range terms {
-				var err error
 				if autoFuzzy {
 					fuzziness = searcher.GetAutoFuzziness(term)
 				}
-				rv, err = addFuzzySynonymsForTerm(ctx, source, field, term, fuzziness, 0, r, rv)
+				rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, term, fuzziness, 0, r, rv)
 				if err != nil {
 					return nil, err
 				}
 			}
-			return rv, nil
 		}
 	case *PrefixQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			return addPrefixSynonymsForTerm(ctx, source, field, q.Prefix, r, rv)
+			rv, err = addSynonymsForTermWithMatchType(ctx, PrefixMatchType, source, field, q.Prefix, 0, 0, r, rv)
+			if err != nil {
+				return nil, err
+			}
 		}
 	case *QueryStringQuery:
 		expanded, err := expandQuery(m, q)
 		if err != nil {
 			return nil, err
 		}
-		return ExtractSynonyms(ctx, m, r, expanded, rv)
-	case *RegexpQuery:
+		rv, err = ExtractSynonyms(ctx, m, r, expanded, rv)
+		if err != nil {
+			return nil, err
+		}
+	case *TermQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			return addRegexpSynonymsForTerm(ctx, source, field, strings.TrimPrefix(q.Regexp, "^"), r, rv)
+			rv, err = addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
+			if err != nil {
+				return nil, err
+			}
 		}
-	case *TermQuery:
+	case *RegexpQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			return addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
+			rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, strings.TrimPrefix(q.Regexp, "^"), 0, 0, r, rv)
+			if err != nil {
+				return nil, err
+			}
 		}
 	case *WildcardQuery:
 		field, source := resolveFieldAndSource(q.FieldVal)
 		if source != "" {
-			regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
-			return addRegexpSynonymsForTerm(ctx, source, field, regexpString, r, rv)
+			rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, wildcardRegexpReplacer.Replace(q.Wildcard), 0, 0, r, rv)
+			if err != nil {
+				return nil, err
+			}
 		}
 	}
 	return rv, nil
 }
 
-// addRegexpSynonymsForTerm finds all terms that match the given regexp and
-// adds their synonyms to the provided map.
-func addRegexpSynonymsForTerm(ctx context.Context, src, field, term string,
-	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-	// find the terms with this regexp
-	var ok bool
-	var ir index.IndexReaderRegexp
-	if ir, ok = r.(index.IndexReaderRegexp); !ok {
-		return addSynonymsForTerm(ctx, src, field, term, r, rv)
-	}
-	fieldDict, err := ir.FieldDictRegexp(field, term)
-	if err != nil {
-		return nil, err
-	}
-	defer func() {
-		if cerr := fieldDict.Close(); cerr != nil && err == nil {
-			err = cerr
-		}
-	}()
-	regexpTerms := []string{term}
-	tfd, err := fieldDict.Next()
-	for err == nil && tfd != nil {
-		regexpTerms = append(regexpTerms, tfd.Term)
-		tfd, err = fieldDict.Next()
-	}
-	if err != nil {
-		return nil, err
-	}
-	for _, term := range regexpTerms {
-		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
-		if err != nil {
-			return nil, err
+// addFuzzySynonymsForTerm finds all terms that match the given term with the
+// given fuzziness and adds their synonyms to the provided map.
+func addSynonymsForTermWithMatchType(ctx context.Context, matchType int, src, field, term string, fuzziness, prefix int,
+	r index.ThesaurusReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+	// Determine the terms based on the match type (fuzzy, prefix, or regexp)
+	var thesKeys index.ThesaurusKeys
+	var err error
+	var terms []string
+	switch matchType {
+	case FuzzyMatchType:
+		// Ensure valid fuzziness
+		if fuzziness == 0 {
+			rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
+			if err != nil {
+				return nil, err
+			}
+			return rv, nil
 		}
-	}
-	return rv, nil
-}
-
-// addPrefixSynonymsForTerm finds all terms that match the given prefix and
-// adds their synonyms to the provided map.
-func addPrefixSynonymsForTerm(ctx context.Context, src, field, term string,
-	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-	var ok bool
-	var ir index.IndexReaderPrefix
-	if ir, ok = r.(index.IndexReaderPrefix); !ok {
-		return addSynonymsForTerm(ctx, src, field, term, r, rv)
-	}
-	fieldDict, err := ir.FieldDictPrefix(field, []byte(term))
-	if err != nil {
-		return nil, err
-	}
-	defer func() {
-		if cerr := fieldDict.Close(); cerr != nil && err == nil {
-			err = cerr
+		if fuzziness > searcher.MaxFuzziness {
+			return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
 		}
-	}()
-	prefixTerms := []string{term}
-	tfd, err := fieldDict.Next()
-	for err == nil && tfd != nil {
-		prefixTerms = append(prefixTerms, tfd.Term)
-		tfd, err = fieldDict.Next()
-	}
-	if err != nil {
-		return nil, err
-	}
-	for _, term := range prefixTerms {
-		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
-		if err != nil {
-			return nil, err
+		if fuzziness < 0 {
+			return nil, fmt.Errorf("invalid fuzziness, negative")
 		}
-	}
-	return rv, nil
-}
-
-// addFuzzySynonymsForTerm finds all terms that match the given term with the
-// given fuzziness and adds their synonyms to the provided map.
-func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzziness, prefix int,
-	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
-	if fuzziness > searcher.MaxFuzziness {
-		return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
-	}
-	if fuzziness < 0 {
-		return nil, fmt.Errorf("invalid fuzziness, negative")
-	}
-	var ok bool
-	var ir index.IndexReaderFuzzy
-	if ir, ok = r.(index.IndexReaderFuzzy); !ok || fuzziness == 0 {
-		return addSynonymsForTerm(ctx, src, field, term, r, rv)
-	}
-	prefixTerm := ""
-	for i, r := range term {
-		if i < prefix {
-			prefixTerm += string(r)
-		} else {
-			break
+		// Handle fuzzy match
+		prefixTerm := ""
+		for i, r := range term {
+			if i < prefix {
+				prefixTerm += string(r)
+			} else {
+				break
+			}
 		}
+		thesKeys, err = r.ThesaurusKeysFuzzy(src, term, fuzziness, prefixTerm)
+	case RegexpMatchType:
+		// Handle regexp match
+		thesKeys, err = r.ThesaurusKeysRegexp(src, term)
+	case PrefixMatchType:
+		// Handle prefix match
+		thesKeys, err = r.ThesaurusKeysPrefix(src, []byte(term))
+	default:
+		return nil, fmt.Errorf("invalid match type: %d", matchType)
 	}
-	fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
 	if err != nil {
 		return nil, err
 	}
 	defer func() {
-		if cerr := fieldDict.Close(); cerr != nil && err == nil {
+		if cerr := thesKeys.Close(); cerr != nil && err == nil {
 			err = cerr
 		}
 	}()
-	fuzzyTerms := []string{term}
-	tfd, err := fieldDict.Next()
+	// Collect the matching terms
+	terms = []string{}
+	tfd, err := thesKeys.Next()
 	for err == nil && tfd != nil {
-		fuzzyTerms = append(fuzzyTerms, tfd.Term)
-		tfd, err = fieldDict.Next()
+		terms = append(terms, tfd.Term)
+		tfd, err = thesKeys.Next()
 	}
 	if err != nil {
 		return nil, err
 	}
-	for _, term := range fuzzyTerms {
-		rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
+	for _, synTerm := range terms {
+		rv, err = addSynonymsForTerm(ctx, src, field, synTerm, r, rv)
 		if err != nil {
 			return nil, err
 		}
@@ -718,13 +683,10 @@ func addFuzzySynonymsForTerm(ctx context.Context, src, field, term string, fuzzi
 	return rv, nil
 }
 
-// addSynonymsForTerm finds synonyms for the given term and adds them to the
-// provided map.
 func addSynonymsForTerm(ctx context.Context, src, field, term string,
-	r index.SynonymReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
+	r index.ThesaurusReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
 
-	termBytes := []byte(term)
-	termReader, err := r.SynonymTermReader(ctx, src, termBytes)
+	termReader, err := r.SynonymTermReader(ctx, src, []byte(term))
 	if err != nil {
 		return nil, err
 	}
diff --git a/search/searcher/search_fuzzy.go b/search/searcher/search_fuzzy.go
index d0e5c1ec5..187486efc 100644
--- a/search/searcher/search_fuzzy.go
+++ b/search/searcher/search_fuzzy.go
@@ -17,6 +17,7 @@ package searcher
 import (
 	"context"
 	"fmt"
+	"strings"
 
 	"github.com/blevesearch/bleve/v2/search"
 	index "github.com/blevesearch/bleve_index_api"
@@ -73,7 +74,7 @@ func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term s
 			break
 		}
 	}
-	fuzzyCandidates, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
+	fuzzyCandidates, err := findFuzzyCandidateTerms(ctx, indexReader, term, fuzziness,
 		field, prefixTerm)
 	if err != nil {
 		return nil, err
@@ -144,7 +145,7 @@ func reportIOStats(ctx context.Context, bytesRead uint64) {
 	}
 }
 
-func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
+func findFuzzyCandidateTerms(ctx context.Context, indexReader index.IndexReader, term string,
 	fuzziness int, field, prefixTerm string) (rv *fuzzyCandidates, err error) {
 	rv = &fuzzyCandidates{
 		candidates:    make([]string, 0),
@@ -155,7 +156,19 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 	// the levenshtein automaton based iterator to collect the
 	// candidate terms
 	if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
-		fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
+		termSet := make(map[string]struct{})
+		addCandidateTerm := func(term string, editDistance uint8) error {
+			if _, exists := termSet[term]; !exists {
+				termSet[term] = struct{}{}
+				rv.candidates = append(rv.candidates, term)
+				rv.editDistances = append(rv.editDistances, editDistance)
+				if tooManyClauses(len(rv.candidates)) {
+					return tooManyClausesErr(field, len(rv.candidates))
+				}
+			}
+			return nil
+		}
+		fieldDict, a, err := ir.FieldDictFuzzyAutomaton(field, term, fuzziness, prefixTerm)
 		if err != nil {
 			return nil, err
 		}
@@ -166,16 +179,38 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
 		}()
 		tfd, err := fieldDict.Next()
 		for err == nil && tfd != nil {
-			rv.candidates = append(rv.candidates, tfd.Term)
-			rv.editDistances = append(rv.editDistances, tfd.EditDistance)
-			if tooManyClauses(len(rv.candidates)) {
-				return nil, tooManyClausesErr(field, len(rv.candidates))
+			err = addCandidateTerm(tfd.Term, tfd.EditDistance)
+			if err != nil {
+				return nil, err
 			}
 			tfd, err = fieldDict.Next()
 		}
-
+		if err != nil {
+			return nil, err
+		}
+		if ctx != nil {
+			if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+				if ts, exists := fts[field]; exists {
+					for term := range ts {
+						if _, exists := termSet[term]; exists {
+							continue
+						}
+						if !strings.HasPrefix(term, prefixTerm) {
+							continue
+						}
+						match, editDistance := a.MatchAndDistance(term)
+						if match {
+							err = addCandidateTerm(term, editDistance)
+							if err != nil {
+								return nil, err
+							}
+						}
+					}
+				}
+			}
+		}
 		rv.bytesRead = fieldDict.BytesRead()
-		return rv, err
+		return rv, nil
 	}
 
 	var fieldDict index.FieldDict
diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go
index b88133e31..1afdaee02 100644
--- a/search/searcher/search_regexp.go
+++ b/search/searcher/search_regexp.go
@@ -48,7 +48,7 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
 		return NewRegexpSearcher(ctx, indexReader, r, field, boost, options)
 	}
 
-	fieldDict, err := ir.FieldDictRegexp(field, pattern)
+	fieldDict, a, err := ir.FieldDictRegexpAutomaton(field, pattern)
 	if err != nil {
 		return nil, err
 	}
@@ -58,17 +58,37 @@ func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader,
 		}
 	}()
 
+	var termSet = make(map[string]struct{})
 	var candidateTerms []string
 
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {
-		candidateTerms = append(candidateTerms, tfd.Term)
-		tfd, err = fieldDict.Next()
+		if _, exists := termSet[tfd.Term]; !exists {
+			termSet[tfd.Term] = struct{}{}
+			candidateTerms = append(candidateTerms, tfd.Term)
+			tfd, err = fieldDict.Next()
+		}
 	}
 	if err != nil {
 		return nil, err
 	}
 
+	if ctx != nil {
+		if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+			if ts, exists := fts[field]; exists {
+				for term := range ts {
+					if _, exists := termSet[term]; exists {
+						continue
+					}
+					if a.MatchesRegex(term) {
+						termSet[term] = struct{}{}
+						candidateTerms = append(candidateTerms, term)
+					}
+				}
+			}
+		}
+	}
+
 	return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
 		options, true)
 }
diff --git a/search/searcher/search_term_prefix.go b/search/searcher/search_term_prefix.go
index 3b05e5a8d..3d98cd28e 100644
--- a/search/searcher/search_term_prefix.go
+++ b/search/searcher/search_term_prefix.go
@@ -16,6 +16,7 @@ package searcher
 
 import (
 	"context"
+	"strings"
 
 	"github.com/blevesearch/bleve/v2/search"
 	index "github.com/blevesearch/bleve_index_api"
@@ -36,13 +37,17 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
 	}()
 
 	var terms []string
+	var termSet = make(map[string]struct{})
 	tfd, err := fieldDict.Next()
 	for err == nil && tfd != nil {
-		terms = append(terms, tfd.Term)
-		if tooManyClauses(len(terms)) {
-			return nil, tooManyClausesErr(field, len(terms))
+		if _, exists := termSet[tfd.Term]; !exists {
+			termSet[tfd.Term] = struct{}{}
+			terms = append(terms, tfd.Term)
+			if tooManyClauses(len(terms)) {
+				return nil, tooManyClausesErr(field, len(terms))
+			}
+			tfd, err = fieldDict.Next()
 		}
-		tfd, err = fieldDict.Next()
 	}
 	if err != nil {
 		return nil, err
@@ -52,6 +57,26 @@ func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, p
 		reportIOStats(ctx, fieldDict.BytesRead())
 		search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
 	}
+
+	if ctx != nil {
+		if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+			if ts, exists := fts[field]; exists {
+				for term := range ts {
+					if _, exists := termSet[term]; exists {
+						continue
+					}
+					if strings.HasPrefix(term, prefix) {
+						termSet[term] = struct{}{}
+						terms = append(terms, term)
+						if tooManyClauses(len(terms)) {
+							return nil, tooManyClausesErr(field, len(terms))
+						}
+					}
+				}
+			}
+		}
+	}
+
 	// check if the terms are empty or have one term which is the prefix itself
 	if len(terms) == 0 || (len(terms) == 1 && terms[0] == prefix) {
 		return NewTermSearcher(ctx, indexReader, prefix, field, boost, options)
diff --git a/search_test.go b/search_test.go
index cd1346936..9c5f0d7bb 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3854,7 +3854,7 @@ func TestSynonymTermReader(t *testing.T) {
 		}
 	}()
 
-	synReader, ok := reader.(index.SynonymReader)
+	synReader, ok := reader.(index.ThesaurusReader)
 	if !ok {
 		t.Fatal("expected synonym reader")
 	}
@@ -4272,32 +4272,90 @@ func TestSynonymSearchQueries(t *testing.T) {
 		},
 	}
 
-	for _, dtq := range testQueries {
-		q, err := query.ParseQuery([]byte(dtq.query))
+	runTestQueries := func(idx Index) error {
+		for _, dtq := range testQueries {
+			q, err := query.ParseQuery([]byte(dtq.query))
+			if err != nil {
+				return err
+			}
+			sr := NewSearchRequest(q)
+			sr.Highlight = NewHighlightWithStyle(ansi.Name)
+			sr.SortBy([]string{"_id"})
+			sr.Fields = []string{"*"}
+			sr.Size = 30
+			sr.Explain = true
+			res, err := idx.Search(sr)
+			if err != nil {
+				return err
+			}
+			if len(res.Hits) != len(dtq.expectHits) {
+				return fmt.Errorf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
+			}
+			// sort the expected hits to match the order of the search results
+			sort.Strings(dtq.expectHits)
+			for i, hit := range res.Hits {
+				if hit.ID != dtq.expectHits[i] {
+					return fmt.Errorf("expected docID %s, got %s", dtq.expectHits[i], hit.ID)
+				}
+			}
+		}
+		return nil
+	}
+	err = runTestQueries(idx)
+	if err != nil {
+		t.Fatal(err)
+	}
+	// test with index alias - with 1 batch per index
+	numIndexes := len(batches)
+	indexes := make([]Index, numIndexes)
+	indexesPath := make([]string, numIndexes)
+	for i := 0; i < numIndexes; i++ {
+		tmpIndexPath := createTmpIndexPath(t)
+		idx, err := New(tmpIndexPath, imap)
 		if err != nil {
 			t.Fatal(err)
 		}
-		sr := NewSearchRequest(q)
-		sr.Highlight = NewHighlightWithStyle(ansi.Name)
-		sr.SortBy([]string{"_id"})
-		sr.Fields = []string{"*"}
-		sr.Size = 30
-		sr.Explain = true
-
-		res, err := idx.Search(sr)
+		err = idx.Batch(batches[i])
 		if err != nil {
 			t.Fatal(err)
 		}
-		if len(res.Hits) != len(dtq.expectHits) {
-			t.Fatalf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
-		}
-		// sort the expected hits to match the order of the search results
-		sort.Strings(dtq.expectHits)
-		for i, hit := range res.Hits {
-			if hit.ID != dtq.expectHits[i] {
-				t.Fatalf("expected docID %s, got %s", dtq.expectHits[i], hit.ID)
+		indexes[i] = idx
+		indexesPath[i] = tmpIndexPath
+	}
+	defer func() {
+		for i := 0; i < numIndexes; i++ {
+			err = indexes[i].Close()
+			if err != nil {
+				t.Fatal(err)
 			}
+			cleanupTmpIndexPath(t, indexesPath[i])
 		}
+	}()
+	alias := NewIndexAlias(indexes...)
+	alias.SetIndexMapping(imap)
+	err = runTestQueries(alias)
+	if err != nil {
+		t.Fatal(err)
+	}
+	// test with multi-level alias now with two index per alias
+	// and having any extra index being in the final alias
+	numAliases := numIndexes / 2
+	extraIndex := numIndexes % 2
+	aliases := make([]IndexAlias, numAliases)
+	for i := 0; i < numAliases; i++ {
+		alias := NewIndexAlias(indexes[i*2], indexes[i*2+1])
+		aliases[i] = alias
+	}
+	if extraIndex > 0 {
+		aliases[numAliases-1].Add(indexes[numIndexes-1])
+	}
+	alias = NewIndexAlias()
+	alias.SetIndexMapping(imap)
+	for i := 0; i < numAliases; i++ {
+		alias.Add(aliases[i])
+	}
+	err = runTestQueries(alias)
+	if err != nil {
+		t.Fatal(err)
 	}
-
 }

From 59c3193377138975562d5690f35c7e277ef070f6 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 14:20:00 +0530
Subject: [PATCH 18/35] rebase

---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index b5b3a5507..31f8a34bc 100644
--- a/go.mod
+++ b/go.mod
@@ -24,7 +24,7 @@ require (
 	github.com/blevesearch/zapx/v13 v13.3.10
 	github.com/blevesearch/zapx/v14 v14.3.10
 	github.com/blevesearch/zapx/v15 v15.3.16
-	github.com/blevesearch/zapx/v16 v16.1.9-0.20241120170816-85db80035af2
+	github.com/blevesearch/zapx/v16 v16.1.9
 	github.com/couchbase/moss v0.2.0
 	github.com/golang/protobuf v1.3.2
 	github.com/spf13/cobra v1.7.0
diff --git a/go.sum b/go.sum
index 565d9a56c..088c6aafb 100644
--- a/go.sum
+++ b/go.sum
@@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
 github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
 github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
 github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
-github.com/blevesearch/zapx/v16 v16.1.9-0.20241120170816-85db80035af2 h1:+RX9SM7KO7q91E7rFj4NARSsAhKj2EbvdWfzX+ihg/w=
-github.com/blevesearch/zapx/v16 v16.1.9-0.20241120170816-85db80035af2/go.mod h1:zuxVgVaLZ0g4lZvrv06xDc24N6nLCOzXYHVkXI7LMHM=
+github.com/blevesearch/zapx/v16 v16.1.9 h1:br5EgGntCF723cCecSpOACE0LnGAP4+HYrkcEfQkcBY=
+github.com/blevesearch/zapx/v16 v16.1.9/go.mod h1:zuxVgVaLZ0g4lZvrv06xDc24N6nLCOzXYHVkXI7LMHM=
 github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
 github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
 github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=

From cad9e7950c4c2b4c36e111ba432ad5f48aa5da03 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 16:21:19 +0530
Subject: [PATCH 19/35] fix bug

---
 index_impl.go                    | 10 +++++--
 search/searcher/search_phrase.go | 50 +++++++++++++++++---------------
 2 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/index_impl.go b/index_impl.go
index 650f1d68c..d98463b73 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -519,7 +519,11 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 	}
 
 	var knnHits []*search.DocumentMatch
+	var skipKNNCollector bool
+
 	var fts search.FieldTermSynonymMap
+	var skipSynonymCollector bool
+
 	var ok bool
 	if req.PreSearchData != nil {
 		for k, v := range req.PreSearchData {
@@ -530,6 +534,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 					if !ok {
 						return nil, fmt.Errorf("knn preSearchData must be of type []*search.DocumentMatch")
 					}
+					skipKNNCollector = true
 				}
 			case search.SynonymPreSearchDataKey:
 				if v != nil {
@@ -537,18 +542,19 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
 					if !ok {
 						return nil, fmt.Errorf("synonym preSearchData must be of type search.FieldTermSynonymMap")
 					}
+					skipSynonymCollector = true
 				}
 			}
 		}
 	}
-	if knnHits == nil && requestHasKNN(req) {
+	if !skipKNNCollector && requestHasKNN(req) {
 		knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
 		if err != nil {
 			return nil, err
 		}
 	}
 
-	if fts == nil {
+	if !skipSynonymCollector {
 		if synMap, ok := i.m.(mapping.SynonymMapping); ok && synMap.SynonymCount() > 0 {
 			if synReader, ok := indexReader.(index.ThesaurusReader); ok {
 				fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
diff --git a/search/searcher/search_phrase.go b/search/searcher/search_phrase.go
index 9c2ff7d5f..07675cfad 100644
--- a/search/searcher/search_phrase.go
+++ b/search/searcher/search_phrase.go
@@ -164,34 +164,36 @@ func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader,
 		}
 	}
 
-	if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
-		if ts, exists := fts[field]; exists {
-			if fuzzinessEnabled {
-				for term, fuzzyTerms := range fuzzyTermMatches {
-					fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms))
-					if s, found := ts[term]; found {
-						fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
-					}
-					for _, fuzzyTerm := range fuzzyTerms {
-						if fuzzyTerm == term {
-							continue
-						}
-						if s, found := ts[fuzzyTerm]; found {
+	if ctx != nil {
+		if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
+			if ts, exists := fts[field]; exists {
+				if fuzzinessEnabled {
+					for term, fuzzyTerms := range fuzzyTermMatches {
+						fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms))
+						if s, found := ts[term]; found {
 							fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
 						}
+						for _, fuzzyTerm := range fuzzyTerms {
+							if fuzzyTerm == term {
+								continue
+							}
+							if s, found := ts[fuzzyTerm]; found {
+								fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
+							}
+						}
+						if len(fuzzySynonymTerms) > 0 {
+							fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...)
+						}
 					}
-					if len(fuzzySynonymTerms) > 0 {
-						fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...)
-					}
-				}
-			} else {
-				for _, termPos := range terms {
-					for _, term := range termPos {
-						if s, found := ts[term]; found {
-							if fuzzyTermMatches == nil {
-								fuzzyTermMatches = make(map[string][]string)
+				} else {
+					for _, termPos := range terms {
+						for _, term := range termPos {
+							if s, found := ts[term]; found {
+								if fuzzyTermMatches == nil {
+									fuzzyTermMatches = make(map[string][]string)
+								}
+								fuzzyTermMatches[term] = s
 							}
-							fuzzyTermMatches[term] = s
 						}
 					}
 				}

From f3d0ac57c608b7abc9404fa662c7898a35f16abf Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 16:27:22 +0530
Subject: [PATCH 20/35] optimization

---
 index_alias_impl.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index dddb359bb..e9ed3a481 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -561,7 +561,14 @@ func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFla
 	if !isMatchNoneQuery(req.Query) {
 		// Check if synonyms are defined in the mapping
 		if sm, ok := m.(mapping.SynonymMapping); ok && sm.SynonymCount() > 0 {
-			synonyms = true
+			// check if any of the fields queried have a synonym source
+			// in the index mapping, to prevent unnecessary preSearch
+			for field := range query.ExtractFields(req.Query, m, nil) {
+				if sm.SynonymSourceForPath(field) != "" {
+					synonyms = true
+					break
+				}
+			}
 		}
 	}
 	if knn || synonyms {

From e3b1d5b0536d487578a61db5c8d735df21d03bc9 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 18:28:50 +0530
Subject: [PATCH 21/35] bleve APIs

---
 document/field_synonym.go |  2 +-
 index.go                  |  8 ++++++++
 index_alias_impl.go       | 19 +++++++++++++++++++
 index_impl.go             | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/document/field_synonym.go b/document/field_synonym.go
index 0e4812690..4aa603fc4 100644
--- a/document/field_synonym.go
+++ b/document/field_synonym.go
@@ -117,7 +117,7 @@ func processSynonymData(input []string, synonyms []string) map[string][]string {
 		// Map each term to the same list of synonyms.
 		synonymMap = make(map[string][]string, len(input))
 		for _, term := range input {
-			synonymMap[term] = append([]string(nil), synonyms...) // Avoid sharing slices.
+			synonymMap[term] = synonyms
 		}
 	} else {
 		synonymMap = make(map[string][]string, len(synonyms))
diff --git a/index.go b/index.go
index d98f28558..5066c06af 100644
--- a/index.go
+++ b/index.go
@@ -378,3 +378,11 @@ func (sd *SynonymDefinition) Validate() error {
 	}
 	return nil
 }
+
+// SynonymIndex supports indexing synonym definitions alongside regular documents.
+// Synonyms, grouped by collection name, define term relationships for query expansion in searches.
+type SynonymIndex interface {
+	Index
+	// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
+	IndexSynonym(id string, collection string, definition *SynonymDefinition) error
+}
diff --git a/index_alias_impl.go b/index_alias_impl.go
index e9ed3a481..853665df4 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -82,6 +82,25 @@ func (i *indexAliasImpl) Index(id string, data interface{}) error {
 	return i.indexes[0].Index(id, data)
 }
 
+func (i *indexAliasImpl) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
+	i.mutex.RLock()
+	defer i.mutex.RUnlock()
+
+	if !i.open {
+		return ErrorIndexClosed
+	}
+
+	err := i.isAliasToSingleIndex()
+	if err != nil {
+		return err
+	}
+
+	if si, ok := i.indexes[0].(SynonymIndex); ok {
+		return si.IndexSynonym(id, collection, definition)
+	}
+	return ErrorSynonymSearchNotSupported
+}
+
 func (i *indexAliasImpl) Delete(id string) error {
 	i.mutex.RLock()
 	defer i.mutex.RUnlock()
diff --git a/index_impl.go b/index_impl.go
index d98463b73..289014f6c 100644
--- a/index_impl.go
+++ b/index_impl.go
@@ -268,6 +268,40 @@ func (i *indexImpl) Index(id string, data interface{}) (err error) {
 	return
 }
 
+// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
+// Synonym definition defines term relationships for query expansion in searches.
+func (i *indexImpl) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
+	if id == "" {
+		return ErrorEmptyID
+	}
+
+	i.mutex.RLock()
+	defer i.mutex.RUnlock()
+
+	if !i.open {
+		return ErrorIndexClosed
+	}
+
+	i.FireIndexEvent()
+
+	synMap, ok := i.m.(mapping.SynonymMapping)
+	if !ok {
+		return ErrorSynonymSearchNotSupported
+	}
+
+	if err := definition.Validate(); err != nil {
+		return err
+	}
+
+	doc := document.NewSynonymDocument(id)
+	err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
+	if err != nil {
+		return err
+	}
+	err = i.i.Update(doc)
+	return err
+}
+
 // IndexAdvanced takes a document.Document object
 // skips the mapping and indexes it.
 func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {

From b469373e8c1742bfa6903f4c9ba070bcb0321ea2 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 20:10:16 +0530
Subject: [PATCH 22/35] minor fix

---
 index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/index.go b/index.go
index 5066c06af..3d2389884 100644
--- a/index.go
+++ b/index.go
@@ -363,7 +363,7 @@ type SynonymDefinition struct {
 	// When terms are specified in Input, they will map to the terms in Synonyms,
 	// making the relationship unidirectional (each Input maps to all Synonyms).
 	// If Input is omitted, the relationship is bidirectional among all Synonyms.
-	Input []string `json:"input"`
+	Input []string `json:"input,omitempty"`
 
 	// Synonyms is a list of terms that are considered equivalent.
 	// If Input is specified, each term in Input will map to each term in Synonyms.

From 67815ad5451a94f2523819177c9f991f989edeba Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Mon, 9 Dec 2024 20:48:58 +0530
Subject: [PATCH 23/35] bug fix

---
 index_alias_impl.go        | 17 ++++++++++++-----
 search/query/query.go      | 30 +++++++++++++++++++++++-------
 search/query/query_test.go | 23 ++++++++++++++++++++++-
 3 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 853665df4..eee8243fb 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -233,7 +233,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
 	//  - the request requires preSearch
 	var preSearchDuration time.Duration
 	var sr *SearchResult
-	flags := preSearchRequired(req, i.mapping)
+	flags, err := preSearchRequired(req, i.mapping)
+	if err != nil {
+		return nil, err
+	}
 	if req.PreSearchData == nil && flags != nil {
 		searchStart := time.Now()
 		preSearchResult, err := preSearch(ctx, req, flags, i.indexes...)
@@ -573,7 +576,7 @@ type preSearchFlags struct {
 
 // preSearchRequired checks if preSearch is required and returns a boolean flag
 // It only allocates the preSearchFlags struct if necessary
-func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
+func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) (*preSearchFlags, error) {
 	// Check for KNN query
 	knn := requestHasKNN(req)
 	var synonyms bool
@@ -582,7 +585,11 @@ func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFla
 		if sm, ok := m.(mapping.SynonymMapping); ok && sm.SynonymCount() > 0 {
 			// check if any of the fields queried have a synonym source
 			// in the index mapping, to prevent unnecessary preSearch
-			for field := range query.ExtractFields(req.Query, m, nil) {
+			fs, err := query.ExtractFields(req.Query, m, nil)
+			if err != nil {
+				return nil, err
+			}
+			for field := range fs {
 				if sm.SynonymSourceForPath(field) != "" {
 					synonyms = true
 					break
@@ -594,9 +601,9 @@ func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFla
 		return &preSearchFlags{
 			knn:      knn,
 			synonyms: synonyms,
-		}
+		}, nil
 	}
-	return nil
+	return nil, nil
 }
 
 func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
diff --git a/search/query/query.go b/search/query/query.go
index defdb04eb..f44a248c7 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -445,10 +445,11 @@ type FieldSet map[string]struct{}
 // ExtractFields returns a set of fields referenced by the query.
 // The returned set may be nil if the query does not explicitly reference any field
 // and the DefaultSearchField is unset in the index mapping.
-func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) FieldSet {
-	if q == nil {
-		return fs
+func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, error) {
+	if q == nil || m == nil {
+		return fs, nil
 	}
+	var err error
 	switch q := q.(type) {
 	case FieldableQuery:
 		f := q.Field()
@@ -461,20 +462,35 @@ func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) FieldSet {
 			}
 			fs[f] = struct{}{}
 		}
+	case *QueryStringQuery:
+		var expandedQuery Query
+		expandedQuery, err = expandQuery(m, q)
+		if err == nil {
+			fs, err = ExtractFields(expandedQuery, m, fs)
+		}
 	case *BooleanQuery:
 		for _, subq := range []Query{q.Must, q.Should, q.MustNot} {
-			fs = ExtractFields(subq, m, fs)
+			fs, err = ExtractFields(subq, m, fs)
+			if err != nil {
+				break
+			}
 		}
 	case *ConjunctionQuery:
 		for _, subq := range q.Conjuncts {
-			fs = ExtractFields(subq, m, fs)
+			fs, err = ExtractFields(subq, m, fs)
+			if err != nil {
+				break
+			}
 		}
 	case *DisjunctionQuery:
 		for _, subq := range q.Disjuncts {
-			fs = ExtractFields(subq, m, fs)
+			fs, err = ExtractFields(subq, m, fs)
+			if err != nil {
+				break
+			}
 		}
 	}
-	return fs
+	return fs, err
 }
 
 const (
diff --git a/search/query/query_test.go b/search/query/query_test.go
index 870510de2..60c1fa374 100644
--- a/search/query/query_test.go
+++ b/search/query/query_test.go
@@ -999,6 +999,24 @@ func TestExtractFields(t *testing.T) {
 					}`,
 			expFields: []string{"date", "number", "date2", "number2", "date3"},
 		},
+		{
+			query: `{
+						"query" : "hardworking people"
+					}`,
+			expFields: []string{"_all"},
+		},
+		{
+			query: `{
+						"query" : "text:hardworking people"
+					}`,
+			expFields: []string{"text", "_all"},
+		},
+		{
+			query: `{
+						"query" : "text:\"hardworking people\""
+					}`,
+			expFields: []string{"text"},
+		},
 	}
 
 	m := mapping.NewIndexMapping()
@@ -1007,7 +1025,10 @@ func TestExtractFields(t *testing.T) {
 		if err != nil {
 			t.Fatal(err)
 		}
-		fields := ExtractFields(q, m, nil)
+		fields, err := ExtractFields(q, m, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
 		var fieldsSlice []string
 		for k := range fields {
 			fieldsSlice = append(fieldsSlice, k)

From 55f6d4c182d28fde92d8fec408b6cece12d74634 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 10 Dec 2024 16:28:54 +0530
Subject: [PATCH 24/35] make default_synonym_source omitempty

---
 mapping/index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mapping/index.go b/mapping/index.go
index feb7634a9..78485cfad 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -49,7 +49,7 @@ type IndexMappingImpl struct {
 	DefaultType           string                      `json:"default_type"`
 	DefaultAnalyzer       string                      `json:"default_analyzer"`
 	DefaultDateTimeParser string                      `json:"default_datetime_parser"`
-	DefaultSynonymSource  string                      `json:"default_synonym_source"`
+	DefaultSynonymSource  string                      `json:"default_synonym_source,omitempty"`
 	DefaultField          string                      `json:"default_field"`
 	StoreDynamic          bool                        `json:"store_dynamic"`
 	IndexDynamic          bool                        `json:"index_dynamic"`

From 1a6dd1e43ff01294c1e31c9b0fdc4b696ed7eb33 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 10 Dec 2024 16:35:46 +0530
Subject: [PATCH 25/35] fix bugs

---
 mapping/field.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mapping/field.go b/mapping/field.go
index 08af2dfa7..ce2878b18 100644
--- a/mapping/field.go
+++ b/mapping/field.go
@@ -462,22 +462,22 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
 				return err
 			}
 		case "dims":
-			err := json.Unmarshal(v, &fm.Dims)
+			err := util.UnmarshalJSON(v, &fm.Dims)
 			if err != nil {
 				return err
 			}
 		case "similarity":
-			err := json.Unmarshal(v, &fm.Similarity)
+			err := util.UnmarshalJSON(v, &fm.Similarity)
 			if err != nil {
 				return err
 			}
 		case "vector_index_optimized_for":
-			err := json.Unmarshal(v, &fm.VectorIndexOptimizedFor)
+			err := util.UnmarshalJSON(v, &fm.VectorIndexOptimizedFor)
 			if err != nil {
 				return err
 			}
 		case "synonym_source":
-			err := json.Unmarshal(v, &fm.SynonymSource)
+			err := util.UnmarshalJSON(v, &fm.SynonymSource)
 			if err != nil {
 				return err
 			}

From ee71211399dac62677b1970ac1453b076adc1b5c Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 10 Dec 2024 19:36:34 +0530
Subject: [PATCH 26/35] refactor bleve APIs

---
 analysis/type.go           |  9 ++++
 index_test.go              | 12 +++---
 mapping/analysis.go        |  8 ++++
 mapping/document.go        | 13 +++++-
 mapping/index.go           | 65 +++++++++++++++++------------
 mapping/synonym.go         | 25 ++++++-----
 registry/registry.go       | 11 +++++
 registry/synonym_source.go | 85 ++++++++++++++++++++++++++++++++++++++
 search_test.go             | 28 ++++++++++---
 9 files changed, 206 insertions(+), 50 deletions(-)
 create mode 100644 registry/synonym_source.go

diff --git a/analysis/type.go b/analysis/type.go
index e3a7f201b..f819984b5 100644
--- a/analysis/type.go
+++ b/analysis/type.go
@@ -106,6 +106,15 @@ type DateTimeParser interface {
 	ParseDateTime(string) (time.Time, string, error)
 }
 
+const SynonymSourceType = "synonym"
+
+type SynonymSourceVisitor func(name string, item SynonymSource) error
+
+type SynonymSource interface {
+	Analyzer() string
+	Collection() string
+}
+
 type ByteArrayConverter interface {
 	Convert([]byte) (interface{}, error)
 }
diff --git a/index_test.go b/index_test.go
index e75c5fd87..5d801b4e0 100644
--- a/index_test.go
+++ b/index_test.go
@@ -402,9 +402,9 @@ func TestBytesRead(t *testing.T) {
 	stats, _ := idx.StatsMap()["index"].(map[string]interface{})
 	prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
 
-	expectedBytesRead := uint64(21639)
+	expectedBytesRead := uint64(22049)
 	if supportForVectorSearch {
-		expectedBytesRead = 22049
+		expectedBytesRead = 22459
 	}
 
 	if prevBytesRead != expectedBytesRead && res.Cost == prevBytesRead {
@@ -560,9 +560,9 @@ func TestBytesReadStored(t *testing.T) {
 	stats, _ := idx.StatsMap()["index"].(map[string]interface{})
 	bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
 
-	expectedBytesRead := uint64(11501)
+	expectedBytesRead := uint64(11911)
 	if supportForVectorSearch {
-		expectedBytesRead = 11911
+		expectedBytesRead = 12321
 	}
 
 	if bytesRead != expectedBytesRead && bytesRead == res.Cost {
@@ -637,9 +637,9 @@ func TestBytesReadStored(t *testing.T) {
 	stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
 	bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
 
-	expectedBytesRead = uint64(3687)
+	expectedBytesRead = uint64(4097)
 	if supportForVectorSearch {
-		expectedBytesRead = 4097
+		expectedBytesRead = 4507
 	}
 
 	if bytesRead != expectedBytesRead && bytesRead == res.Cost {
diff --git a/mapping/analysis.go b/mapping/analysis.go
index 03e3cd01b..311e97232 100644
--- a/mapping/analysis.go
+++ b/mapping/analysis.go
@@ -21,6 +21,7 @@ type customAnalysis struct {
 	TokenFilters    map[string]map[string]interface{} `json:"token_filters,omitempty"`
 	Analyzers       map[string]map[string]interface{} `json:"analyzers,omitempty"`
 	DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"`
+	SynonymSources  map[string]map[string]interface{} `json:"synonym_sources,omitempty"`
 }
 
 func (c *customAnalysis) registerAll(i *IndexMappingImpl) error {
@@ -83,6 +84,12 @@ func (c *customAnalysis) registerAll(i *IndexMappingImpl) error {
 			return err
 		}
 	}
+	for name, config := range c.SynonymSources {
+		_, err := i.cache.DefineSynonymSource(name, config)
+		if err != nil {
+			return err
+		}
+	}
 	return nil
 }
 
@@ -94,6 +101,7 @@ func newCustomAnalysis() *customAnalysis {
 		TokenFilters:    make(map[string]map[string]interface{}),
 		Analyzers:       make(map[string]map[string]interface{}),
 		DateTimeParsers: make(map[string]map[string]interface{}),
+		SynonymSources:  make(map[string]map[string]interface{}),
 	}
 	return &rv
 }
diff --git a/mapping/document.go b/mapping/document.go
index b470afaa4..e89e66979 100644
--- a/mapping/document.go
+++ b/mapping/document.go
@@ -60,6 +60,12 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache,
 			return err
 		}
 	}
+	if dm.DefaultSynonymSource != "" {
+		_, err := cache.SynonymSourceNamed(dm.DefaultSynonymSource)
+		if err != nil {
+			return err
+		}
+	}
 	for propertyName, property := range dm.Properties {
 		newParent := propertyName
 		if parentName != "" {
@@ -83,7 +89,12 @@ func (dm *DocumentMapping) Validate(cache *registry.Cache,
 				return err
 			}
 		}
-
+		if field.SynonymSource != "" {
+			_, err = cache.SynonymSourceNamed(field.SynonymSource)
+			if err != nil {
+				return err
+			}
+		}
 		err := validateFieldMapping(field, parentName, fieldAliasCtx)
 		if err != nil {
 			return err
diff --git a/mapping/index.go b/mapping/index.go
index 78485cfad..8a0d5e34a 100644
--- a/mapping/index.go
+++ b/mapping/index.go
@@ -55,7 +55,6 @@ type IndexMappingImpl struct {
 	IndexDynamic          bool                        `json:"index_dynamic"`
 	DocValuesDynamic      bool                        `json:"docvalues_dynamic"`
 	CustomAnalysis        *customAnalysis             `json:"analysis,omitempty"`
-	SynonymSources        map[string]*SynonymSource   `json:"synonym_sources,omitempty"`
 	cache                 *registry.Cache
 }
 
@@ -147,11 +146,12 @@ func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[stri
 	return nil
 }
 
-func (im *IndexMappingImpl) AddSynonymSource(name, collection, analyzer string) error {
-	if im.SynonymSources == nil {
-		im.SynonymSources = make(map[string]*SynonymSource)
+func (im *IndexMappingImpl) AddSynonymSource(name string, config map[string]interface{}) error {
+	_, err := im.cache.DefineSynonymSource(name, config)
+	if err != nil {
+		return err
 	}
-	im.SynonymSources[name] = NewSynonymSource(collection, analyzer)
+	im.CustomAnalysis.SynonymSources[name] = config
 	return nil
 }
 
@@ -184,7 +184,12 @@ func (im *IndexMappingImpl) Validate() error {
 	if err != nil {
 		return err
 	}
-
+	if im.DefaultSynonymSource != "" {
+		_, err = im.cache.SynonymSourceNamed(im.DefaultSynonymSource)
+		if err != nil {
+			return err
+		}
+	}
 	fieldAliasCtx := make(map[string]*FieldMapping)
 	err = im.DefaultMapping.Validate(im.cache, "", fieldAliasCtx)
 	if err != nil {
@@ -196,12 +201,6 @@ func (im *IndexMappingImpl) Validate() error {
 			return err
 		}
 	}
-	for _, synSource := range im.SynonymSources {
-		err = synSource.Validate(im.cache)
-		if err != nil {
-			return err
-		}
-	}
 	return nil
 }
 
@@ -304,14 +303,6 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
 			if err != nil {
 				return err
 			}
-		case "synonym_sources":
-			if im.SynonymSources == nil {
-				im.SynonymSources = make(map[string]*SynonymSource)
-			}
-			err := util.UnmarshalJSON(v, &im.SynonymSources)
-			if err != nil {
-				return err
-			}
 		default:
 			invalidKeys = append(invalidKeys, k)
 		}
@@ -371,18 +362,19 @@ func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}
 func (im *IndexMappingImpl) MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error {
 	// determine all the synonym sources with the given collection
 	// and create a synonym field for each
-	for name, synSource := range im.SynonymSources {
-		if synSource.Collection() == collection {
+	err := im.SynonymSourceVisitor(func(name string, item analysis.SynonymSource) error {
+		if item.Collection() == collection {
 			// create a new field with the name of the synonym source
-			analyzer := im.AnalyzerNamed(synSource.Analyzer())
+			analyzer := im.AnalyzerNamed(item.Analyzer())
 			if analyzer == nil {
-				return fmt.Errorf("unknown analyzer named: %s", synSource.Analyzer())
+				return fmt.Errorf("unknown analyzer named: %s", item.Analyzer())
 			}
 			field := document.NewSynonymField(name, analyzer, input, synonyms)
 			doc.AddField(field)
 		}
-	}
-	return nil
+		return nil
+	})
+	return err
 }
 
 type walkContext struct {
@@ -504,6 +496,15 @@ func (im *IndexMappingImpl) DefaultSearchField() string {
 	return im.DefaultField
 }
 
+func (im *IndexMappingImpl) SynonymSourceNamed(name string) analysis.SynonymSource {
+	syn, err := im.cache.SynonymSourceNamed(name)
+	if err != nil {
+		logger.Printf("error using synonym source named: %s", name)
+		return nil
+	}
+	return syn
+}
+
 func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
 	// first we look for explicit mapping on the field
 	for _, docMapping := range im.TypeMapping {
@@ -544,6 +545,16 @@ func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
 	return im.DefaultSynonymSource
 }
 
+// SynonymCount() returns the number of synonym sources defined in the mapping
 func (im *IndexMappingImpl) SynonymCount() int {
-	return len(im.SynonymSources)
+	return len(im.CustomAnalysis.SynonymSources)
+}
+
+// SynonymSourceVisitor() allows a visitor to iterate over all synonym sources
+func (im *IndexMappingImpl) SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error {
+	err := im.cache.SynonymSources.VisitSynonymSources(visitor)
+	if err != nil {
+		return err
+	}
+	return nil
 }
diff --git a/mapping/synonym.go b/mapping/synonym.go
index 597539bf0..06dee754a 100644
--- a/mapping/synonym.go
+++ b/mapping/synonym.go
@@ -17,6 +17,7 @@ package mapping
 import (
 	"fmt"
 
+	"github.com/blevesearch/bleve/v2/analysis"
 	"github.com/blevesearch/bleve/v2/registry"
 )
 
@@ -47,17 +48,21 @@ func (s *SynonymSource) SetCollection(c string) {
 func (s *SynonymSource) SetAnalyzer(a string) {
 	s.AnalyzerName = a
 }
-
-func (s *SynonymSource) Validate(c *registry.Cache) error {
-	if s.CollectionName == "" {
-		return fmt.Errorf("collection name is required")
+func SynonymSourceConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.SynonymSource, error) {
+	collection, ok := config["collection"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify collection")
 	}
-	if s.AnalyzerName == "" {
-		return fmt.Errorf("analyzer name is required")
+	analyzer, ok := config["analyzer"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify analyzer")
 	}
-	_, err := c.AnalyzerNamed(s.AnalyzerName)
-	if err != nil {
-		return fmt.Errorf("analyzer named '%s' not found", s.AnalyzerName)
+	if _, err := cache.AnalyzerNamed(analyzer); err != nil {
+		return nil, fmt.Errorf("analyzer named '%s' not found", analyzer)
 	}
-	return nil
+	return NewSynonymSource(collection, analyzer), nil
+}
+
+func init() {
+	registry.RegisterSynonymSource(analysis.SynonymSourceType, SynonymSourceConstructor)
 }
diff --git a/registry/registry.go b/registry/registry.go
index 1954d0896..69ee8dd86 100644
--- a/registry/registry.go
+++ b/registry/registry.go
@@ -36,6 +36,7 @@ var tokenMaps = make(TokenMapRegistry, 0)
 var tokenFilters = make(TokenFilterRegistry, 0)
 var analyzers = make(AnalyzerRegistry, 0)
 var dateTimeParsers = make(DateTimeParserRegistry, 0)
+var synonymSources = make(SynonymSourceRegistry, 0)
 
 type Cache struct {
 	CharFilters        *CharFilterCache
@@ -47,6 +48,7 @@ type Cache struct {
 	FragmentFormatters *FragmentFormatterCache
 	Fragmenters        *FragmenterCache
 	Highlighters       *HighlighterCache
+	SynonymSources     *SynonymSourceCache
 }
 
 func NewCache() *Cache {
@@ -60,6 +62,7 @@ func NewCache() *Cache {
 		FragmentFormatters: NewFragmentFormatterCache(),
 		Fragmenters:        NewFragmenterCache(),
 		Highlighters:       NewHighlighterCache(),
+		SynonymSources:     NewSynonymSourceCache(),
 	}
 }
 
@@ -147,6 +150,14 @@ func (c *Cache) DefineDateTimeParser(name string, config map[string]interface{})
 	return c.DateTimeParsers.DefineDateTimeParser(name, typ, config, c)
 }
 
+func (c *Cache) SynonymSourceNamed(name string) (analysis.SynonymSource, error) {
+	return c.SynonymSources.SynonymSourceNamed(name, c)
+}
+
+func (c *Cache) DefineSynonymSource(name string, config map[string]interface{}) (analysis.SynonymSource, error) {
+	return c.SynonymSources.DefineSynonymSource(name, analysis.SynonymSourceType, config, c)
+}
+
 func (c *Cache) FragmentFormatterNamed(name string) (highlight.FragmentFormatter, error) {
 	return c.FragmentFormatters.FragmentFormatterNamed(name, c)
 }
diff --git a/registry/synonym_source.go b/registry/synonym_source.go
new file mode 100644
index 000000000..cd26d8f01
--- /dev/null
+++ b/registry/synonym_source.go
@@ -0,0 +1,85 @@
+//  Copyright (c) 2024 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package registry
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+)
+
+func RegisterSynonymSource(typ string, constructor SynonymSourceConstructor) {
+	_, exists := synonymSources[typ]
+	if exists {
+		panic(fmt.Errorf("attempted to register duplicate synonym source with type '%s'", typ))
+	}
+	synonymSources[typ] = constructor
+}
+
+type SynonymSourceCache struct {
+	*ConcurrentCache
+}
+
+func NewSynonymSourceCache() *SynonymSourceCache {
+	return &SynonymSourceCache{
+		NewConcurrentCache(),
+	}
+}
+
+type SynonymSourceConstructor func(config map[string]interface{}, cache *Cache) (analysis.SynonymSource, error)
+type SynonymSourceRegistry map[string]SynonymSourceConstructor
+
+func SynonymSourceBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
+	cons, registered := synonymSources[name]
+	if !registered {
+		return nil, fmt.Errorf("no synonym source with name '%s' registered", name)
+	}
+	synonymSource, err := cons(config, cache)
+	if err != nil {
+		return nil, fmt.Errorf("error building synonym source: %v", err)
+	}
+	return synonymSource, nil
+}
+
+func (c *SynonymSourceCache) SynonymSourceNamed(name string, cache *Cache) (analysis.SynonymSource, error) {
+	item, err := c.ItemNamed(name, cache, SynonymSourceBuild)
+	if err != nil {
+		return nil, err
+	}
+	return item.(analysis.SynonymSource), nil
+}
+
+func (c *SynonymSourceCache) DefineSynonymSource(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.SynonymSource, error) {
+	item, err := c.DefineItem(name, typ, config, cache, SynonymSourceBuild)
+	if err != nil {
+		if err == ErrAlreadyDefined {
+			return nil, fmt.Errorf("synonym source named '%s' already defined", name)
+		}
+		return nil, err
+	}
+	return item.(analysis.SynonymSource), nil
+}
+
+func (c *SynonymSourceCache) VisitSynonymSources(visitor analysis.SynonymSourceVisitor) error {
+	c.mutex.RLock()
+	defer c.mutex.RUnlock()
+	for k, v := range c.data {
+		err := visitor(k, v.(analysis.SynonymSource))
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/search_test.go b/search_test.go
index 9c5f0d7bb..885595971 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3759,16 +3759,24 @@ func TestSynonymTermReader(t *testing.T) {
 
 	synonymSourceName := "english"
 
-	synonymAnalyzer := "simple"
+	analyzer := simple.Name
+
+	synonymSourceConfig := map[string]interface{}{
+		"collection": synonymCollection,
+		"analyzer":   analyzer,
+	}
 
 	textField := mapping.NewTextFieldMapping()
-	textField.Analyzer = simple.Name
+	textField.Analyzer = analyzer
 	textField.SynonymSource = synonymSourceName
 
 	imap := mapping.NewIndexMapping()
 	imap.DefaultMapping.AddFieldMappingsAt("text", textField)
-	imap.AddSynonymSource(synonymSourceName, synonymCollection, synonymAnalyzer)
-	err := imap.Validate()
+	err := imap.AddSynonymSource(synonymSourceName, synonymSourceConfig)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = imap.Validate()
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -3942,14 +3950,22 @@ func TestSynonymSearchQueries(t *testing.T) {
 
 	analyzer := en.AnalyzerName
 
+	synonymSourceConfig := map[string]interface{}{
+		"collection": synonymCollection,
+		"analyzer":   analyzer,
+	}
+
 	textField := mapping.NewTextFieldMapping()
 	textField.Analyzer = analyzer
 	textField.SynonymSource = synonymSourceName
 
 	imap := mapping.NewIndexMapping()
 	imap.DefaultMapping.AddFieldMappingsAt("text", textField)
-	imap.AddSynonymSource(synonymSourceName, synonymCollection, analyzer)
-	err := imap.Validate()
+	err := imap.AddSynonymSource(synonymSourceName, synonymSourceConfig)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = imap.Validate()
 	if err != nil {
 		t.Fatal(err)
 	}

From a4d83ac3ff697c3f0bcb31207e4350e4c406abb6 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 10 Dec 2024 19:53:32 +0530
Subject: [PATCH 27/35] add additional methods to interface

---
 mapping/mapping.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mapping/mapping.go b/mapping/mapping.go
index 6714c55aa..a6c1591b8 100644
--- a/mapping/mapping.go
+++ b/mapping/mapping.go
@@ -59,6 +59,8 @@ type IndexMapping interface {
 	FieldMappingForPath(path string) FieldMapping
 }
 
+// A SynonymMapping extends the IndexMapping interface to provide
+// additional methods for working with synonyms.
 type SynonymMapping interface {
 	IndexMapping
 
@@ -66,5 +68,9 @@ type SynonymMapping interface {
 
 	SynonymSourceForPath(path string) string
 
+	SynonymSourceNamed(name string) analysis.SynonymSource
+
 	SynonymCount() int
+
+	SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error
 }

From 1cf2bfd4fe6b37f9f2e9bfccf26fda6eced9697f Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Wed, 11 Dec 2024 15:37:18 +0530
Subject: [PATCH 28/35] update interface name

---
 index/scorch/snapshot_index.go     | 40 +++++++++++++++---------------
 index/scorch/snapshot_index_str.go | 18 +++++++-------
 search/query/query.go              |  2 +-
 search_test.go                     |  8 +++---
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go
index 2c3ea5167..efb85b0f8 100644
--- a/index/scorch/snapshot_index.go
+++ b/index/scorch/snapshot_index.go
@@ -60,7 +60,7 @@ var reflectStaticSizeIndexSnapshot int
 // exported variable, or at the index level by setting the FieldTFRCacheThreshold
 // in the kvConfig.
 var DefaultFieldTFRCacheThreshold uint64 = 10
-var DefaultSynonymTermReaderCacheThreshold uint64 = 10
+var DefaultThesaurusTermReaderCacheThreshold uint64 = 10
 
 func init() {
 	var is interface{} = IndexSnapshot{}
@@ -88,9 +88,9 @@ type IndexSnapshot struct {
 	m    sync.Mutex // Protects the fields that follow.
 	refs int64
 
-	m2                 sync.Mutex                                   // Protects the fields that follow.
-	fieldTFRs          map[string][]*IndexSnapshotTermFieldReader   // keyed by field, recycled TFR's
-	synonymTermReaders map[string][]*IndexSnapshotSynonymTermReader // keyed by thesaurus name, recycled thesaurus readers
+	m2                   sync.Mutex                                     // Protects the fields that follow.
+	fieldTFRs            map[string][]*IndexSnapshotTermFieldReader     // keyed by field, recycled TFR's
+	thesaurusTermReaders map[string][]*IndexSnapshotThesaurusTermReader // keyed by thesaurus name, recycled thesaurus readers
 }
 
 func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
@@ -683,13 +683,13 @@ func (is *IndexSnapshot) getFieldTFRCacheThreshold() uint64 {
 	return DefaultFieldTFRCacheThreshold
 }
 
-func (is *IndexSnapshot) getSynonymTermReaderCacheThreshold() uint64 {
+func (is *IndexSnapshot) getThesaurusTermReaderCacheThreshold() uint64 {
 	if is.parent.config != nil {
-		if _, ok := is.parent.config["SynonymTermReaderCacheThreshold"]; ok {
-			return is.parent.config["SynonymTermReaderCacheThreshold"].(uint64)
+		if _, ok := is.parent.config["ThesaurusTermReaderCacheThreshold"]; ok {
+			return is.parent.config["ThesaurusTermReaderCacheThreshold"].(uint64)
 		}
 	}
-	return DefaultSynonymTermReaderCacheThreshold
+	return DefaultThesaurusTermReaderCacheThreshold
 }
 
 func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
@@ -720,7 +720,7 @@ func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReade
 	is.m2.Unlock()
 }
 
-func (is *IndexSnapshot) recycleSynonymTermReader(str *IndexSnapshotSynonymTermReader) {
+func (is *IndexSnapshot) recycleThesaurusTermReader(str *IndexSnapshotThesaurusTermReader) {
 	is.parent.rootLock.RLock()
 	obsolete := is.parent.root != is
 	is.parent.rootLock.RUnlock()
@@ -730,11 +730,11 @@ func (is *IndexSnapshot) recycleSynonymTermReader(str *IndexSnapshotSynonymTermR
 	}
 
 	is.m2.Lock()
-	if is.synonymTermReaders == nil {
-		is.synonymTermReaders = map[string][]*IndexSnapshotSynonymTermReader{}
+	if is.thesaurusTermReaders == nil {
+		is.thesaurusTermReaders = map[string][]*IndexSnapshotThesaurusTermReader{}
 	}
-	if uint64(len(is.synonymTermReaders[str.name])) < is.getSynonymTermReaderCacheThreshold() {
-		is.synonymTermReaders[str.name] = append(is.synonymTermReaders[str.name], str)
+	if uint64(len(is.thesaurusTermReaders[str.name])) < is.getThesaurusTermReaderCacheThreshold() {
+		is.thesaurusTermReaders[str.name] = append(is.thesaurusTermReaders[str.name], str)
 	}
 	is.m2.Unlock()
 }
@@ -1019,25 +1019,25 @@ func (is *IndexSnapshot) CloseCopyReader() error {
 	return is.Close()
 }
 
-func (is *IndexSnapshot) allocSynonymTermReader(name string) (str *IndexSnapshotSynonymTermReader) {
+func (is *IndexSnapshot) allocThesaurusTermReader(name string) (str *IndexSnapshotThesaurusTermReader) {
 	is.m2.Lock()
-	if is.synonymTermReaders != nil {
-		strs := is.synonymTermReaders[name]
+	if is.thesaurusTermReaders != nil {
+		strs := is.thesaurusTermReaders[name]
 		last := len(strs) - 1
 		if last >= 0 {
 			str = strs[last]
 			strs[last] = nil
-			is.synonymTermReaders[name] = strs[:last]
+			is.thesaurusTermReaders[name] = strs[:last]
 			is.m2.Unlock()
 			return
 		}
 	}
 	is.m2.Unlock()
-	return &IndexSnapshotSynonymTermReader{}
+	return &IndexSnapshotThesaurusTermReader{}
 }
 
-func (is *IndexSnapshot) SynonymTermReader(ctx context.Context, thesaurusName string, term []byte) (index.SynonymTermReader, error) {
-	rv := is.allocSynonymTermReader(thesaurusName)
+func (is *IndexSnapshot) ThesaurusTermReader(ctx context.Context, thesaurusName string, term []byte) (index.ThesaurusTermReader, error) {
+	rv := is.allocThesaurusTermReader(thesaurusName)
 
 	rv.name = thesaurusName
 	rv.snapshot = is
diff --git a/index/scorch/snapshot_index_str.go b/index/scorch/snapshot_index_str.go
index e1ba60272..c66fbeec5 100644
--- a/index/scorch/snapshot_index_str.go
+++ b/index/scorch/snapshot_index_str.go
@@ -21,14 +21,14 @@ import (
 	segment "github.com/blevesearch/scorch_segment_api/v2"
 )
 
-var reflectStaticSizeIndexSnapshotSynonymTermReader int
+var reflectStaticSizeIndexSnapshotThesaurusTermReader int
 
 func init() {
-	var istr IndexSnapshotSynonymTermReader
-	reflectStaticSizeIndexSnapshotSynonymTermReader = int(reflect.TypeOf(istr).Size())
+	var istr IndexSnapshotThesaurusTermReader
+	reflectStaticSizeIndexSnapshotThesaurusTermReader = int(reflect.TypeOf(istr).Size())
 }
 
-type IndexSnapshotSynonymTermReader struct {
+type IndexSnapshotThesaurusTermReader struct {
 	name          string
 	snapshot      *IndexSnapshot
 	thesauri      []segment.Thesaurus
@@ -37,8 +37,8 @@ type IndexSnapshotSynonymTermReader struct {
 	segmentOffset int
 }
 
-func (i *IndexSnapshotSynonymTermReader) Size() int {
-	sizeInBytes := reflectStaticSizeIndexSnapshotSynonymTermReader + size.SizeOfPtr +
+func (i *IndexSnapshotThesaurusTermReader) Size() int {
+	sizeInBytes := reflectStaticSizeIndexSnapshotThesaurusTermReader + size.SizeOfPtr +
 		len(i.name) + size.SizeOfString
 
 	for _, postings := range i.postings {
@@ -52,7 +52,7 @@ func (i *IndexSnapshotSynonymTermReader) Size() int {
 	return sizeInBytes
 }
 
-func (i *IndexSnapshotSynonymTermReader) Next() (string, error) {
+func (i *IndexSnapshotThesaurusTermReader) Next() (string, error) {
 	// find the next hit
 	for i.segmentOffset < len(i.iterators) {
 		if i.iterators[i.segmentOffset] != nil {
@@ -70,9 +70,9 @@ func (i *IndexSnapshotSynonymTermReader) Next() (string, error) {
 	return "", nil
 }
 
-func (i *IndexSnapshotSynonymTermReader) Close() error {
+func (i *IndexSnapshotThesaurusTermReader) Close() error {
 	if i.snapshot != nil {
-		i.snapshot.recycleSynonymTermReader(i)
+		i.snapshot.recycleThesaurusTermReader(i)
 	}
 	return nil
 }
diff --git a/search/query/query.go b/search/query/query.go
index f44a248c7..86859ae5b 100644
--- a/search/query/query.go
+++ b/search/query/query.go
@@ -740,7 +740,7 @@ func addSynonymsForTermWithMatchType(ctx context.Context, matchType int, src, fi
 func addSynonymsForTerm(ctx context.Context, src, field, term string,
 	r index.ThesaurusReader, rv search.FieldTermSynonymMap) (search.FieldTermSynonymMap, error) {
 
-	termReader, err := r.SynonymTermReader(ctx, src, []byte(term))
+	termReader, err := r.ThesaurusTermReader(ctx, src, []byte(term))
 	if err != nil {
 		return nil, err
 	}
diff --git a/search_test.go b/search_test.go
index 885595971..043be4073 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3751,7 +3751,7 @@ func TestAutoFuzzy(t *testing.T) {
 	}
 }
 
-func TestSynonymTermReader(t *testing.T) {
+func TestThesaurusTermReader(t *testing.T) {
 	tmpIndexPath := createTmpIndexPath(t)
 	defer cleanupTmpIndexPath(t, tmpIndexPath)
 
@@ -3862,9 +3862,9 @@ func TestSynonymTermReader(t *testing.T) {
 		}
 	}()
 
-	synReader, ok := reader.(index.ThesaurusReader)
+	thesReader, ok := reader.(index.ThesaurusReader)
 	if !ok {
-		t.Fatal("expected synonym reader")
+		t.Fatal("expected thesaurus reader")
 	}
 
 	type testStruct struct {
@@ -3912,7 +3912,7 @@ func TestSynonymTermReader(t *testing.T) {
 	}
 
 	for _, test := range testQueries {
-		str, err := synReader.SynonymTermReader(context.Background(), synonymSourceName, []byte(test.queryTerm))
+		str, err := thesReader.ThesaurusTermReader(context.Background(), synonymSourceName, []byte(test.queryTerm))
 		if err != nil {
 			t.Fatal(err)
 		}

From 302147b21701f1f5615376ade489e765c77dc9c5 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Thu, 12 Dec 2024 13:21:51 +0530
Subject: [PATCH 29/35] go.mod update

---
 go.mod |  6 +++---
 go.sum | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/go.mod b/go.mod
index 31f8a34bc..9743d6bf0 100644
--- a/go.mod
+++ b/go.mod
@@ -5,20 +5,20 @@ go 1.21
 require (
 	github.com/RoaringBitmap/roaring v1.9.3
 	github.com/bits-and-blooms/bitset v1.12.0
-	github.com/blevesearch/bleve_index_api v1.1.13
+	github.com/blevesearch/bleve_index_api v1.2.0
 	github.com/blevesearch/geo v0.1.20
 	github.com/blevesearch/go-faiss v1.0.23
 	github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
 	github.com/blevesearch/go-porterstemmer v1.0.3
 	github.com/blevesearch/goleveldb v1.0.1
 	github.com/blevesearch/gtreap v0.1.1
-	github.com/blevesearch/scorch_segment_api/v2 v2.2.16
+	github.com/blevesearch/scorch_segment_api/v2 v2.3.0
 	github.com/blevesearch/segment v0.9.1
 	github.com/blevesearch/snowball v0.6.1
 	github.com/blevesearch/snowballstem v0.9.0
 	github.com/blevesearch/stempel v0.2.0
 	github.com/blevesearch/upsidedown_store_api v1.0.2
-	github.com/blevesearch/vellum v1.0.11
+	github.com/blevesearch/vellum v1.1.0
 	github.com/blevesearch/zapx/v11 v11.3.10
 	github.com/blevesearch/zapx/v12 v12.3.10
 	github.com/blevesearch/zapx/v13 v13.3.10
diff --git a/go.sum b/go.sum
index 088c6aafb..28bc2b8d0 100644
--- a/go.sum
+++ b/go.sum
@@ -2,8 +2,8 @@ github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4
 github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
 github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
 github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
-github.com/blevesearch/bleve_index_api v1.1.13 h1:+nrA6oRJr85aCPyqaeZtsruObwKojutfonHJin/BP48=
-github.com/blevesearch/bleve_index_api v1.1.13/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
+github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo=
+github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
 github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
 github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
 github.com/blevesearch/go-faiss v1.0.23 h1:Wmc5AFwDLKGl2L6mjLX1Da3vCL0EKa2uHHSorcIS1Uc=
@@ -19,8 +19,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY
 github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
 github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
 github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
-github.com/blevesearch/scorch_segment_api/v2 v2.2.16 h1:uGvKVvG7zvSxCwcm4/ehBa9cCEuZVE+/zvrSl57QUVY=
-github.com/blevesearch/scorch_segment_api/v2 v2.2.16/go.mod h1:VF5oHVbIFTu+znY1v30GjSpT5+9YFs9dV2hjvuh34F0=
+github.com/blevesearch/scorch_segment_api/v2 v2.3.0 h1:vxCjbXAkkEBSb4AB3Iqgr/EJcPyYRsiGxpcvsS8E1Dw=
+github.com/blevesearch/scorch_segment_api/v2 v2.3.0/go.mod h1:5y+TgXYSx+xJGaCwSlvy9G/UJBIY5wzvIkhvhBm2ATc=
 github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
 github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
 github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A=
@@ -31,8 +31,8 @@ github.com/blevesearch/stempel v0.2.0 h1:CYzVPaScODMvgE9o+kf6D4RJ/VRomyi9uHF+PtB
 github.com/blevesearch/stempel v0.2.0/go.mod h1:wjeTHqQv+nQdbPuJ/YcvOjTInA2EIc6Ks1FoSUzSLvc=
 github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
 github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
-github.com/blevesearch/vellum v1.0.11 h1:SJI97toEFTtA9WsDZxkyGTaBWFdWl1n2LEDCXLCq/AU=
-github.com/blevesearch/vellum v1.0.11/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
+github.com/blevesearch/vellum v1.1.0 h1:CinkGyIsgVlYf8Y2LUQHvdelgXr6PYuvoDIajq6yR9w=
+github.com/blevesearch/vellum v1.1.0/go.mod h1:QgwWryE8ThtNPxtgWJof5ndPfx0/YMBh+W2weHKPw8Y=
 github.com/blevesearch/zapx/v11 v11.3.10 h1:hvjgj9tZ9DeIqBCxKhi70TtSZYMdcFn7gDb71Xo/fvk=
 github.com/blevesearch/zapx/v11 v11.3.10/go.mod h1:0+gW+FaE48fNxoVtMY5ugtNHHof/PxCqh7CnhYdnMzQ=
 github.com/blevesearch/zapx/v12 v12.3.10 h1:yHfj3vXLSYmmsBleJFROXuO08mS3L1qDCdDK81jDl8s=

From 2971072eff2e1c650fd1096d5893f8aaaa8bd6ce Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 17 Dec 2024 14:50:50 +0530
Subject: [PATCH 30/35] merge master

---
 go.mod              |  6 +++---
 go.sum              | 12 ++++++------
 index_alias_impl.go | 11 +++++++++++
 pre_search.go       |  4 ++++
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/go.mod b/go.mod
index 9743d6bf0..623904722 100644
--- a/go.mod
+++ b/go.mod
@@ -23,8 +23,8 @@ require (
 	github.com/blevesearch/zapx/v12 v12.3.10
 	github.com/blevesearch/zapx/v13 v13.3.10
 	github.com/blevesearch/zapx/v14 v14.3.10
-	github.com/blevesearch/zapx/v15 v15.3.16
-	github.com/blevesearch/zapx/v16 v16.1.9
+	github.com/blevesearch/zapx/v15 v15.3.17
+	github.com/blevesearch/zapx/v16 v16.1.10
 	github.com/couchbase/moss v0.2.0
 	github.com/golang/protobuf v1.3.2
 	github.com/spf13/cobra v1.7.0
@@ -36,7 +36,7 @@ require (
 	github.com/blevesearch/mmap-go v1.0.4 // indirect
 	github.com/couchbase/ghistogram v0.1.0 // indirect
 	github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
-	github.com/golang/snappy v0.0.1 // indirect
+	github.com/golang/snappy v0.0.4 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
 	github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect
 	github.com/mschoch/smat v0.2.0 // indirect
diff --git a/go.sum b/go.sum
index 28bc2b8d0..0ac89af30 100644
--- a/go.sum
+++ b/go.sum
@@ -41,10 +41,10 @@ github.com/blevesearch/zapx/v13 v13.3.10 h1:0KY9tuxg06rXxOZHg3DwPJBjniSlqEgVpxIq
 github.com/blevesearch/zapx/v13 v13.3.10/go.mod h1:w2wjSDQ/WBVeEIvP0fvMJZAzDwqwIEzVPnCPrz93yAk=
 github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz77pSwwKU=
 github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
-github.com/blevesearch/zapx/v15 v15.3.16 h1:Ct3rv7FUJPfPk99TI/OofdC+Kpb4IdyfdMH48sb+FmE=
-github.com/blevesearch/zapx/v15 v15.3.16/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
-github.com/blevesearch/zapx/v16 v16.1.9 h1:br5EgGntCF723cCecSpOACE0LnGAP4+HYrkcEfQkcBY=
-github.com/blevesearch/zapx/v16 v16.1.9/go.mod h1:zuxVgVaLZ0g4lZvrv06xDc24N6nLCOzXYHVkXI7LMHM=
+github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys=
+github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8=
+github.com/blevesearch/zapx/v16 v16.1.10 h1:moxlODQYuwqsXWK7Yyj40Wr1G8A4QKB32+fz3OLlEDU=
+github.com/blevesearch/zapx/v16 v16.1.10/go.mod h1:Xtloe2uqSXH2j3yrQr9yGiHwz3Hz/fpHn5szTqpyizs=
 github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
 github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
 github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
@@ -60,8 +60,8 @@ github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5y
 github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
-github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
diff --git a/index_alias_impl.go b/index_alias_impl.go
index eee8243fb..21f2daa64 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -16,6 +16,7 @@ package bleve
 
 import (
 	"context"
+	"fmt"
 	"sync"
 	"time"
 
@@ -672,6 +673,9 @@ func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *Se
 }
 
 func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool {
+	if flags == nil {
+		return false
+	}
 	// if the synonyms presearch flag is set the request can never be satisfied by
 	// the preSearch result as synonyms are not part of the preSearch result
 	if flags.synonyms {
@@ -692,6 +696,9 @@ func constructSynonymPreSearchData(rv map[string]map[string]interface{}, sr *Sea
 
 func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
 	preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
+	if flags == nil || preSearchResult == nil {
+		return nil, fmt.Errorf("invalid input, flags: %v, preSearchResult: %v", flags, preSearchResult)
+	}
 	mergedOut := make(map[string]map[string]interface{}, len(indexes))
 	for _, index := range indexes {
 		mergedOut[index.Name()] = make(map[string]interface{})
@@ -821,6 +828,10 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSear
 // finalizePreSearchResult finalizes the preSearch result by applying the finalization steps
 // specific to the preSearch flags
 func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) {
+	// if flags is nil then return
+	if flags == nil {
+		return
+	}
 	if flags.knn {
 		preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
 	}
diff --git a/pre_search.go b/pre_search.go
index 90d1293bf..5fd710d68 100644
--- a/pre_search.go
+++ b/pre_search.go
@@ -105,6 +105,10 @@ func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) {
 // -----------------------------------------------------------------------------
 // Function to create the appropriate preSearchResultProcessor(s)
 func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) preSearchResultProcessor {
+	// return nil for invalid input
+	if flags == nil || req == nil {
+		return nil
+	}
 	var processors []preSearchResultProcessor
 	// Add KNN processor if the request has KNN
 	if flags.knn {

From e062cd79cf66056e6a4f58473e8003f4bb6393f2 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Tue, 17 Dec 2024 15:00:00 +0530
Subject: [PATCH 31/35] reposition

---
 index_alias_impl.go | 64 ++++++++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/index_alias_impl.go b/index_alias_impl.go
index 21f2daa64..766240b4a 100644
--- a/index_alias_impl.go
+++ b/index_alias_impl.go
@@ -716,38 +716,6 @@ func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
 	return mergedOut, nil
 }
 
-// redistributePreSearchData redistributes the preSearchData sent in the search request to an index alias
-// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData
-// needs to be redistributed to the indexes at that level
-func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
-	rv := make(map[string]map[string]interface{})
-	for _, index := range indexes {
-		rv[index.Name()] = make(map[string]interface{})
-	}
-	if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok {
-		// the preSearchData for KNN is a list of DocumentMatch objects
-		// that need to be redistributed to the right index.
-		// This is used only in the case of an alias tree, where the indexes
-		// are at the leaves of the tree, and the master alias is at the root.
-		// At each level of the tree, the preSearchData needs to be redistributed
-		// to the indexes/aliases at that level. Because the preSearchData is
-		// specific to each final index at the leaf.
-		segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
-		if err != nil {
-			return nil, err
-		}
-		for _, index := range indexes {
-			rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
-		}
-	}
-	if fts, ok := req.PreSearchData[search.SynonymPreSearchDataKey].(search.FieldTermSynonymMap); ok {
-		for _, index := range indexes {
-			rv[index.Name()][search.SynonymPreSearchDataKey] = fts
-		}
-	}
-	return rv, nil
-}
-
 func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
 	asyncResults := make(chan *asyncSearchResult, len(indexes))
 	// run search on each index in separate go routine
@@ -825,6 +793,38 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSear
 	return sr, nil
 }
 
+// redistributePreSearchData redistributes the preSearchData sent in the search request to an index alias
+// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData
+// needs to be redistributed to the indexes at that level
+func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
+	rv := make(map[string]map[string]interface{})
+	for _, index := range indexes {
+		rv[index.Name()] = make(map[string]interface{})
+	}
+	if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok {
+		// the preSearchData for KNN is a list of DocumentMatch objects
+		// that need to be redistributed to the right index.
+		// This is used only in the case of an alias tree, where the indexes
+		// are at the leaves of the tree, and the master alias is at the root.
+		// At each level of the tree, the preSearchData needs to be redistributed
+		// to the indexes/aliases at that level. Because the preSearchData is
+		// specific to each final index at the leaf.
+		segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
+		if err != nil {
+			return nil, err
+		}
+		for _, index := range indexes {
+			rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
+		}
+	}
+	if fts, ok := req.PreSearchData[search.SynonymPreSearchDataKey].(search.FieldTermSynonymMap); ok {
+		for _, index := range indexes {
+			rv[index.Name()][search.SynonymPreSearchDataKey] = fts
+		}
+	}
+	return rv, nil
+}
+
 // finalizePreSearchResult finalizes the preSearch result by applying the finalization steps
 // specific to the preSearch flags
 func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) {

From 41fc99e8d6e73c906553921790d41f4a43d8341a Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Thu, 19 Dec 2024 15:34:21 +0530
Subject: [PATCH 32/35] refactor

---
 search/util.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/search/util.go b/search/util.go
index 9f5a15cac..2e95f1180 100644
--- a/search/util.go
+++ b/search/util.go
@@ -148,15 +148,15 @@ type SearcherEndCallbackFn func(size uint64) error
 // field -> term -> synonyms
 type FieldTermSynonymMap map[string]map[string][]string
 
-func (f *FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
+func (f FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
 	for field, termSynonymMap := range fts {
 		// Ensure the field exists in the receiver
-		if _, exists := (*f)[field]; !exists {
-			(*f)[field] = make(map[string][]string)
+		if _, exists := f[field]; !exists {
+			f[field] = make(map[string][]string)
 		}
 		for term, synonyms := range termSynonymMap {
 			// Append synonyms
-			(*f)[field][term] = append((*f)[field][term], synonyms...)
+			f[field][term] = append(f[field][term], synonyms...)
 		}
 	}
 }

From 0f11d73c10d7719a3eb01f7cd6dfb39937423af8 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Thu, 19 Dec 2024 20:53:54 +0530
Subject: [PATCH 33/35] minor fix

---
 document/field_synonym.go | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/document/field_synonym.go b/document/field_synonym.go
index 4aa603fc4..c34b481dd 100644
--- a/document/field_synonym.go
+++ b/document/field_synonym.go
@@ -81,12 +81,18 @@ func (s *SynonymField) Analyze() {
 	if len(s.input) > 0 {
 		analyzedInput = make([]string, 0, len(s.input))
 		for _, term := range s.input {
-			analyzedInput = append(analyzedInput, analyzeSynonymTerm(term, s.analyzer))
+			analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
+			if analyzedTerm != "" {
+				analyzedInput = append(analyzedInput, analyzedTerm)
+			}
 		}
 	}
 	analyzedSynonyms := make([]string, 0, len(s.synonyms))
 	for _, syn := range s.synonyms {
-		analyzedSynonyms = append(analyzedSynonyms, analyzeSynonymTerm(syn, s.analyzer))
+		analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
+		if analyzedTerm != "" {
+			analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
+		}
 	}
 	s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
 }
@@ -136,8 +142,8 @@ func processSynonymData(input []string, synonyms []string) map[string][]string {
 
 func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
 	tokenStream := analyzer.Analyze([]byte(term))
-	if len(tokenStream) == 0 {
-		return term
+	if len(tokenStream) == 1 {
+		return string(tokenStream[0].Term)
 	}
-	return string(tokenStream[0].Term)
+	return ""
 }

From c73184451c47509d9a943f8a6dc71a8637192854 Mon Sep 17 00:00:00 2001
From: CascadingRadium <rahul.rampure@couchbase.com>
Date: Thu, 19 Dec 2024 20:59:22 +0530
Subject: [PATCH 34/35] test fix

---
 search_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/search_test.go b/search_test.go
index 043be4073..fdfaa3efb 100644
--- a/search_test.go
+++ b/search_test.go
@@ -3836,7 +3836,7 @@ func TestThesaurusTermReader(t *testing.T) {
 	}
 
 	for synName, synDef := range synonymDocuments {
-		err := batch.IndexSynonym(synName, "collection1", synDef)
+		err := batch.IndexSynonym(synName, synonymCollection, synDef)
 		if err != nil {
 			t.Fatal(err)
 		}

From 148d32aa6b71cffbe9d6a8c4f87173f3b690bf33 Mon Sep 17 00:00:00 2001
From: Abhinav Dangeti <abhinav@couchbase.com>
Date: Thu, 19 Dec 2024 09:04:55 -0700
Subject: [PATCH 35/35] Bump up zap/v16

* 82553cd Rahul Rampure |  Add Thesaurus API and Synonym Index Handling in Search
---
 go.mod | 2 +-
 go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go.mod b/go.mod
index 7ea27dcde..cfee95607 100644
--- a/go.mod
+++ b/go.mod
@@ -24,7 +24,7 @@ require (
 	github.com/blevesearch/zapx/v13 v13.3.10
 	github.com/blevesearch/zapx/v14 v14.3.10
 	github.com/blevesearch/zapx/v15 v15.3.17
-	github.com/blevesearch/zapx/v16 v16.1.11-0.20241217210710-e1dde3e9876d
+	github.com/blevesearch/zapx/v16 v16.1.11-0.20241219160422-82553cdd4b38
 	github.com/couchbase/moss v0.2.0
 	github.com/golang/protobuf v1.3.2
 	github.com/spf13/cobra v1.7.0
diff --git a/go.sum b/go.sum
index 1aa0281d5..f21c89611 100644
--- a/go.sum
+++ b/go.sum
@@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
 github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
 github.com/blevesearch/zapx/v15 v15.3.17 h1:NkkMI98pYLq/uHnB6YWcITrrLpCVyvZ9iP+AyfpW1Ys=
 github.com/blevesearch/zapx/v15 v15.3.17/go.mod h1:vXRQzJJvlGVCdmOD5hg7t7JdjUT5DmDPhsAfjvtzIq8=
-github.com/blevesearch/zapx/v16 v16.1.11-0.20241217210710-e1dde3e9876d h1:XUZzJwWrRqRJwigYWE7iB2nYBP6rjcU3x+InZtvQOGo=
-github.com/blevesearch/zapx/v16 v16.1.11-0.20241217210710-e1dde3e9876d/go.mod h1:wZc3SFjKlrqxkiUkT+HVBBBBTX8oqXxUb2gjE+CMgIE=
+github.com/blevesearch/zapx/v16 v16.1.11-0.20241219160422-82553cdd4b38 h1:iJ3Q3sbyo2d0bjfb720RmGjj7cqzh/EdP3528ggDIMY=
+github.com/blevesearch/zapx/v16 v16.1.11-0.20241219160422-82553cdd4b38/go.mod h1:JTZseJiEpogtkepKSubIKAmfgbQiOReJXfmjxB1qta4=
 github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
 github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
 github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=