1
1
mirror of https://github.com/go-gitea/gitea synced 2025-02-26 23:04:18 +00:00

Use MatchPhraseQuery for bleve code search (#33628)

Fix regression from #32210 which unintentionally changed the search mode
for bleve from MaatchPhraseQuery to MatchQuery.

On the main branch, meanwhile with #33590 a "literal code search" mode
(by using quotes) was implemented as workaround for this unexpected code
search behavior. Maybe that feature needs some redesign as it turns out
to have been caused by a regression.

But this PR at least already fixes the regression for 1.23.x
This commit is contained in:
Jimmy Praet 2025-02-25 21:20:54 +01:00 committed by GitHub
parent 81126daf53
commit e3021fae79
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 42 additions and 30 deletions

View File

@ -266,7 +266,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
pathQuery.FieldVal = "Filename" pathQuery.FieldVal = "Filename"
pathQuery.SetBoost(10) pathQuery.SetBoost(10)
contentQuery := bleve.NewMatchQuery(opts.Keyword) contentQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
contentQuery.FieldVal = "Content" contentQuery.FieldVal = "Content"
if opts.IsKeywordFuzzy { if opts.IsKeywordFuzzy {

View File

@ -165,35 +165,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
}, },
}, },
}, },
// Search for matches on the contents of files within the repo '62'.
// This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
{
RepoIDs: []int64{62},
Keyword: "This is not cheese",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "potato/ham.md",
Content: "This is not cheese",
},
{
Filename: "ham.md",
Content: "This is also not cheese",
},
},
},
// Search for matches on the contents of files regardless of case.
{
RepoIDs: nil,
Keyword: "dESCRIPTION",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "README.md",
Content: "# repo1\n\nDescription for repo1",
},
},
},
// Search for an exact match on the filename within the repo '62' (case insenstive). // Search for an exact match on the filename within the repo '62' (case insenstive).
// This scenario yields a single result (the file avocado.md on the repo '62') // This scenario yields a single result (the file avocado.md on the repo '62')
{ {
@ -233,6 +204,47 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
}, },
} }
if name == "elastic_search" {
// Additional scenarios for elastic_search only
additional := []struct {
RepoIDs []int64
Keyword string
Langs int
Results []codeSearchResult
}{
// Search for matches on the contents of files within the repo '62'.
// This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one)
{
RepoIDs: []int64{62},
Keyword: "This is not cheese",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "potato/ham.md",
Content: "This is not cheese",
},
{
Filename: "ham.md",
Content: "This is also not cheese",
},
},
},
// Search for matches on the contents of files regardless of case.
{
RepoIDs: nil,
Keyword: "dESCRIPTION",
Langs: 1,
Results: []codeSearchResult{
{
Filename: "README.md",
Content: "# repo1\n\nDescription for repo1",
},
},
},
}
keywords = append(keywords, additional...)
}
for _, kw := range keywords { for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) { t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{