mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-04 05:18:25 +00:00 
			
		
		
		
	Determine fuzziness of bleve indexer by keyword length (#29706)
also bleve did match on fuzzy search and the other way around. this also fix that bug.
This commit is contained in:
		@@ -39,6 +39,8 @@ import (
 | 
				
			|||||||
const (
 | 
					const (
 | 
				
			||||||
	unicodeNormalizeName = "unicodeNormalize"
 | 
						unicodeNormalizeName = "unicodeNormalize"
 | 
				
			||||||
	maxBatchSize         = 16
 | 
						maxBatchSize         = 16
 | 
				
			||||||
 | 
						// fuzzyDenominator determines the levenshtein distance per each character of a keyword
 | 
				
			||||||
 | 
						fuzzyDenominator = 4
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
 | 
					func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
 | 
				
			||||||
@@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
 | 
				
			|||||||
		keywordQuery query.Query
 | 
							keywordQuery query.Query
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
 | 
				
			||||||
 | 
						phraseQuery.FieldVal = "Content"
 | 
				
			||||||
 | 
						phraseQuery.Analyzer = repoIndexerAnalyzer
 | 
				
			||||||
 | 
						keywordQuery = phraseQuery
 | 
				
			||||||
	if opts.IsKeywordFuzzy {
 | 
						if opts.IsKeywordFuzzy {
 | 
				
			||||||
		phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
 | 
							phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
 | 
				
			||||||
		phraseQuery.FieldVal = "Content"
 | 
					 | 
				
			||||||
		phraseQuery.Analyzer = repoIndexerAnalyzer
 | 
					 | 
				
			||||||
		keywordQuery = phraseQuery
 | 
					 | 
				
			||||||
	} else {
 | 
					 | 
				
			||||||
		prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
 | 
					 | 
				
			||||||
		prefixQuery.FieldVal = "Content"
 | 
					 | 
				
			||||||
		keywordQuery = prefixQuery
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if len(opts.RepoIDs) > 0 {
 | 
						if len(opts.RepoIDs) > 0 {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
 | 
					// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
 | 
				
			||||||
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
 | 
					func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
 | 
				
			||||||
	q := bleve.NewMatchPhraseQuery(matchPhrase)
 | 
						q := bleve.NewMatchPhraseQuery(matchPhrase)
 | 
				
			||||||
	q.FieldVal = field
 | 
						q.FieldVal = field
 | 
				
			||||||
	q.Analyzer = analyzer
 | 
						q.Analyzer = analyzer
 | 
				
			||||||
	return q
 | 
						q.Fuzziness = fuzziness
 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// PrefixQuery generates a match prefix query for the given prefix and field
 | 
					 | 
				
			||||||
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
 | 
					 | 
				
			||||||
	q := bleve.NewPrefixQuery(matchPrefix)
 | 
					 | 
				
			||||||
	q.FieldVal = field
 | 
					 | 
				
			||||||
	return q
 | 
						return q
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
 | 
				
			|||||||
	})
 | 
						})
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const maxBatchSize = 16
 | 
					const (
 | 
				
			||||||
 | 
						maxBatchSize = 16
 | 
				
			||||||
 | 
						// fuzzyDenominator determines the levenshtein distance per each character of a keyword
 | 
				
			||||||
 | 
						fuzzyDenominator = 4
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// IndexerData an update to the issue indexer
 | 
					// IndexerData an update to the issue indexer
 | 
				
			||||||
type IndexerData internal.IndexerData
 | 
					type IndexerData internal.IndexerData
 | 
				
			||||||
@@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
 | 
				
			|||||||
	var queries []query.Query
 | 
						var queries []query.Query
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if options.Keyword != "" {
 | 
						if options.Keyword != "" {
 | 
				
			||||||
 | 
							fuzziness := 0
 | 
				
			||||||
		if options.IsFuzzyKeyword {
 | 
							if options.IsFuzzyKeyword {
 | 
				
			||||||
			queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
 | 
								fuzziness = len(options.Keyword) / fuzzyDenominator
 | 
				
			||||||
				inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
 | 
					 | 
				
			||||||
				inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
 | 
					 | 
				
			||||||
				inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
 | 
					 | 
				
			||||||
			}...))
 | 
					 | 
				
			||||||
		} else {
 | 
					 | 
				
			||||||
			queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
 | 
					 | 
				
			||||||
				inner_bleve.PrefixQuery(options.Keyword, "title"),
 | 
					 | 
				
			||||||
				inner_bleve.PrefixQuery(options.Keyword, "content"),
 | 
					 | 
				
			||||||
				inner_bleve.PrefixQuery(options.Keyword, "comments"),
 | 
					 | 
				
			||||||
			}...))
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
 | 
				
			||||||
 | 
								inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
 | 
				
			||||||
 | 
								inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
 | 
				
			||||||
 | 
								inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
 | 
				
			||||||
 | 
							}...))
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if len(options.RepoIDs) > 0 || options.AllPublic {
 | 
						if len(options.RepoIDs) > 0 || options.AllPublic {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) {
 | 
				
			|||||||
	repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
 | 
						repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
 | 
						code_indexer.UpdateRepoIndexer(repo)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
 | 
						testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -42,12 +42,14 @@ func TestSearchRepo(t *testing.T) {
 | 
				
			|||||||
	repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
 | 
						repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
 | 
						code_indexer.UpdateRepoIndexer(repo)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
 | 
						testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
 | 
				
			||||||
	testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
 | 
						testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
 | 
				
			||||||
	testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
 | 
						testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
 | 
				
			||||||
	testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
 | 
						testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
 | 
				
			||||||
 | 
						testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
 | 
				
			||||||
 | 
						testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func testSearch(t *testing.T, url string, expected []string) {
 | 
					func testSearch(t *testing.T, url string, expected []string) {
 | 
				
			||||||
@@ -57,7 +59,3 @@ func testSearch(t *testing.T, url string, expected []string) {
 | 
				
			|||||||
	filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
 | 
						filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
 | 
				
			||||||
	assert.EqualValues(t, expected, filenames)
 | 
						assert.EqualValues(t, expected, filenames)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
 | 
					 | 
				
			||||||
	op(repo)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user