Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

score: introduce query.Boost to scale score #728

Merged
merged 1 commit into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func TestMatchSize(t *testing.T) {
size: 112,
}, {
v: candidateMatch{},
size: 72,
size: 80,
}, {
v: candidateChunk{},
size: 40,
Expand Down
5 changes: 5 additions & 0 deletions bits.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package zoekt

import (
"encoding/binary"
"math"
"sort"
"unicode"
"unicode/utf8"
Expand Down Expand Up @@ -391,3 +392,7 @@ func (m runeOffsetMap) lookup(runeOffset uint32) (uint32, uint32) {
func (m runeOffsetMap) sizeBytes() int {
return 8 * len(m)
}

func epsilonEqualsOne(scoreWeight float64) bool {
return scoreWeight == 1 || math.Abs(scoreWeight-1.0) < 1e-9
}
8 changes: 8 additions & 0 deletions contentprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,14 @@ func (p *contentProvider) candidateMatchScore(ms []*candidateMatch, language str
}
}

// scoreWeight != 1 means it affects score
if !epsilonEqualsOne(m.scoreWeight) {
score.score = score.score * m.scoreWeight
if debug {
score.what += fmt.Sprintf("boost:%.2f, ", m.scoreWeight)
}
}

if score.score > maxScore.score {
maxScore.score = score.score
maxScore.what = score.what
Expand Down
37 changes: 30 additions & 7 deletions eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ nextFileMatch:
// whether there's an exact match on a symbol, the number of query clauses that matched, etc.
func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *SearchOptions) {
atomMatchCount := 0
visitMatches(mt, known, func(mt matchTree) {
visitMatchAtoms(mt, known, func(mt matchTree) {
atomMatchCount++
})

Expand Down Expand Up @@ -544,6 +544,15 @@ func (m sortByOffsetSlice) Less(i, j int) bool {
return m[i].byteOffset < m[j].byteOffset
}

// setScoreWeight is a helper used by gatherMatches to set the weight based on
// the score weight of the matchTree.
func setScoreWeight(scoreWeight float64, cm []*candidateMatch) []*candidateMatch {
for _, m := range cm {
m.scoreWeight = scoreWeight
}
return cm
}

// Gather matches from this document. This never returns a mixture of
// filename/content matches: if there are content matches, all
// filename matches are trimmed from the result. The matches are
Expand All @@ -554,18 +563,18 @@ func (m sortByOffsetSlice) Less(i, j int) bool {
// but adjacent matches will remain.
func gatherMatches(mt matchTree, known map[matchTree]bool, merge bool) []*candidateMatch {
var cands []*candidateMatch
visitMatches(mt, known, func(mt matchTree) {
visitMatches(mt, known, 1, func(mt matchTree, scoreWeight float64) {
if smt, ok := mt.(*substrMatchTree); ok {
cands = append(cands, smt.current...)
cands = append(cands, setScoreWeight(scoreWeight, smt.current)...)
}
if rmt, ok := mt.(*regexpMatchTree); ok {
cands = append(cands, rmt.found...)
cands = append(cands, setScoreWeight(scoreWeight, rmt.found)...)
}
if rmt, ok := mt.(*wordMatchTree); ok {
cands = append(cands, rmt.found...)
cands = append(cands, setScoreWeight(scoreWeight, rmt.found)...)
}
if smt, ok := mt.(*symbolRegexpMatchTree); ok {
cands = append(cands, smt.found...)
cands = append(cands, setScoreWeight(scoreWeight, smt.found)...)
}
})

Expand All @@ -590,6 +599,7 @@ func gatherMatches(mt matchTree, known map[matchTree]bool, merge bool) []*candid
// are non-overlapping.
sort.Sort((sortByOffsetSlice)(cands))
res = cands[:0]
mergeRun := 1
for i, c := range cands {
if i == 0 {
res = append(res, c)
Expand All @@ -599,10 +609,23 @@ func gatherMatches(mt matchTree, known map[matchTree]bool, merge bool) []*candid
lastEnd := last.byteOffset + last.byteMatchSz
end := c.byteOffset + c.byteMatchSz
if lastEnd >= c.byteOffset {
mergeRun++

// Average out the score across the merged candidates. Only do it if
// we are boosting to avoid floating point funkiness in the normal
// case.
if !(epsilonEqualsOne(last.scoreWeight) && epsilonEqualsOne(c.scoreWeight)) {
last.scoreWeight = ((last.scoreWeight * float64(mergeRun-1)) + c.scoreWeight) / float64(mergeRun)
}

// latest candidate goes further, update our end
if end > lastEnd {
last.byteMatchSz = end - last.byteOffset
}

continue
} else {
mergeRun = 1
}

res = append(res, c)
Expand Down Expand Up @@ -649,7 +672,7 @@ func (d *indexData) branchIndex(docID uint32) int {
// returns all branches containing docID.
func (d *indexData) gatherBranches(docID uint32, mt matchTree, known map[matchTree]bool) []string {
var mask uint64
visitMatches(mt, known, func(mt matchTree) {
visitMatchAtoms(mt, known, func(mt matchTree) {
bq, ok := mt.(*branchQueryMatchTree)
if !ok {
return
Expand Down
6 changes: 5 additions & 1 deletion internal/e2e/e2e_rank_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ func TestRanking(t *testing.T) {
t.Fatal(err)
}

// q is marshalled as part of the test, so avoid our rewrites for
// ranking.
qSearch := query.ExpirementalPhraseBoost(q, rq.Query, query.ExperimentalPhraseBoostOptions{})

sOpts := zoekt.SearchOptions{
// Use the same options sourcegraph has by default
ChunkMatches: true,
Expand All @@ -128,7 +132,7 @@ func TestRanking(t *testing.T) {

DebugScore: *debugScore,
}
result, err := ss.Search(context.Background(), q, &sOpts)
result, err := ss.Search(context.Background(), qSearch, &sOpts)
if err != nil {
t.Fatal(err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ query: (and substr:"assets" substr:"are" substr:"not" substr:"configured" substr
targetRank: 1

**github.com/sourcegraph/sourcegraph/ui/assets/assets.go**
30: return nil, errors.New("assets are not configured for this binary, please see ui/assets")
34: panic("assets are not configured for this binary, please see ui/assets")
33:func (p FailingAssetsProvider) Assets() http.FileSystem {
14: Assets() http.FileSystem
1:package assets
hidden 12 more line matches

github.com/sourcegraph/sourcegraph/schema/schema.go
Expand Down
50 changes: 25 additions & 25 deletions internal/e2e/testdata/generate_unit_test.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
queryString: generate unit test
query: (and substr:"generate" substr:"unit" substr:"test")
targetRank: 11
targetRank: 1

**github.com/sourcegraph/cody/lib/shared/src/chat/recipes/generate-test.ts**
16: public title = 'Generate Unit Test'
14:export class GenerateTest implements Recipe {
15: public id: RecipeID = 'generate-unit-test'
hidden 3 more line matches

github.com/sourcegraph/sourcegraph/client/jetbrains/README.md
40:- Generate unit test
41:- Generate docstring
61:Cody is powered by Sourcegraph’s code graph and uses context of your codebase to extend its capabilities. By using context from entire repositories, Cody is able to give more accurate answers and generate idiomatic code.
hidden 7 more line matches

github.com/sourcegraph/cody/vscode/CHANGELOG.md
298:- The `/test` (Generate Unit Test) command was updated to use file dependencies and test examples when fetching context, in order to produce better results. To use this command, select code in your editor and run the `/test` command. It is recommended to set up test files before running the command to get optimal results. [pull/683](https://github.com/sourcegraph/cody/pull/683) [pull/602](https://github.com/sourcegraph/cody/pull/602)
218:- The `Generate Unit Tests` command has been improved with an enhanced context fetching process that produces test results with better quality. [pull/907](https://github.com/sourcegraph/cody/pull/907)
264:- The `Generate Unit Tests` command has been improved with an enhanced context fetching process that produces test results with better quality. [pull/907](https://github.com/sourcegraph/cody/pull/907)
hidden 17 more line matches

github.com/sourcegraph/sourcegraph/doc/cody/overview/install-jetbrains.md
158:- Generate unit test
138:Log in to your Sourcegraph instance and go to `settings` / `access token` (`https://<your-instance>.sourcegraph.com/users/<your-instance>/settings/tokens`). From here, generate a new access token.
159:- Generate docstring
hidden 3 more line matches

github.com/sourcegraph/sourcegraph/cmd/frontend/internal/insights/resolvers/insight_series_resolver.go
300:func (j *seriesResolverGenerator) Generate(ctx context.Context, series types.InsightViewSeries, baseResolver baseInsightResolver, filters types.InsightViewFilters, options types.SeriesDisplayOptions) ([]graphqlbackend.InsightSeriesResolver, error) {
Expand All @@ -14,28 +38,4 @@ github.com/golang/go/src/cmd/vendor/github.com/google/pprof/internal/report/repo
75: SampleUnit string // Unit for the sample data from the profile.
hidden 48 more line matches

github.com/sourcegraph/sourcegraph/internal/codeintel/autoindexing/internal/inference/lua/test.lua
9: generate = function(_, paths)
6: patterns = { pattern.new_path_basename "sg-test" },
8: -- Invoked as part of unit tests for the autoindexing service
hidden 1 more line matches

github.com/golang/go/src/cmd/internal/testdir/testdir_test.go
273:type test struct {
74:func Test(t *testing.T) {
263:type testCommon struct {
hidden 120 more line matches

github.com/golang/go/src/cmd/vendor/github.com/google/pprof/profile/profile.go
65: Unit string // seconds, nanoseconds, bytes, etc
77: NumUnit map[string][]string
68: unitX int64
hidden 44 more line matches

github.com/golang/go/src/cmd/link/internal/loader/loader.go
79: unit *sym.CompilationUnit
1544:func (l *Loader) SymUnit(i Sym) *sym.CompilationUnit {
228: generatedSyms Bitmap // symbols that generate their content, indexed by ext sym idx
hidden 50 more line matches

hidden 245 more file matches
6 changes: 3 additions & 3 deletions internal/e2e/testdata/rank_stats.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
queries: 14
recall@1: 7 (50%)
recall@5: 9 (64%)
mrr: 0.579471
recall@1: 9 (64%)
recall@5: 11 (79%)
mrr: 0.710733
13 changes: 6 additions & 7 deletions internal/e2e/testdata/sourcegraphserver_docker_image_build.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
queryString: sourcegraph/server docker image build
query: (and substr:"sourcegraph/server" substr:"docker" substr:"image" substr:"build")
targetRank: 14
targetRank: 1

**github.com/sourcegraph/sourcegraph/dev/tools.go**
7: // zoekt-* used in sourcegraph/server docker image build
1://go:build tools
2:// +build tools

github.com/sourcegraph/sourcegraph/dev/sg/internal/images/images.go
458: Build int
Expand Down Expand Up @@ -32,10 +37,4 @@ github.com/sourcegraph/sourcegraph/internal/updatecheck/handler.go
50: latestReleaseDockerComposeOrPureDocker = newPingResponse("5.1.8")
hidden 19 more line matches

github.com/sourcegraph/sourcegraph/doc/admin/deploy/docker-single-container/index.md
1:# Docker Single Container Deployment
294:### Insiders build
238:### File system performance on Docker for Mac
hidden 52 more line matches

hidden 15 more file matches
2 changes: 2 additions & 0 deletions matchiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ type candidateMatch struct {
substrBytes []byte
substrLowered []byte

scoreWeight float64

file uint32
symbolIdx uint32

Expand Down
Loading
Loading