mirror of
				https://github.com/go-gitea/gitea
				synced 2025-09-28 03:28:13 +00:00 
			
		
		
		
	enable literal string for code search (#33590)
Close: #33588 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
		| @@ -260,17 +260,28 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int | |||||||
| 	var ( | 	var ( | ||||||
| 		indexerQuery query.Query | 		indexerQuery query.Query | ||||||
| 		keywordQuery query.Query | 		keywordQuery query.Query | ||||||
|  | 		contentQuery query.Query | ||||||
| 	) | 	) | ||||||
|  |  | ||||||
| 	pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword)) | 	pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword)) | ||||||
| 	pathQuery.FieldVal = "Filename" | 	pathQuery.FieldVal = "Filename" | ||||||
| 	pathQuery.SetBoost(10) | 	pathQuery.SetBoost(10) | ||||||
|  |  | ||||||
| 	contentQuery := bleve.NewMatchQuery(opts.Keyword) | 	keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) | ||||||
| 	contentQuery.FieldVal = "Content" | 	if isPhrase { | ||||||
|  | 		q := bleve.NewMatchPhraseQuery(keywordAsPhrase) | ||||||
|  | 		q.FieldVal = "Content" | ||||||
| 		if opts.IsKeywordFuzzy { | 		if opts.IsKeywordFuzzy { | ||||||
| 		contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) | 			q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase) | ||||||
|  | 		} | ||||||
|  | 		contentQuery = q | ||||||
|  | 	} else { | ||||||
|  | 		q := bleve.NewMatchQuery(opts.Keyword) | ||||||
|  | 		q.FieldVal = "Content" | ||||||
|  | 		if opts.IsKeywordFuzzy { | ||||||
|  | 			q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword) | ||||||
|  | 		} | ||||||
|  | 		contentQuery = q | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery) | 	keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery) | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ import ( | |||||||
| 	"code.gitea.io/gitea/modules/setting" | 	"code.gitea.io/gitea/modules/setting" | ||||||
| 	"code.gitea.io/gitea/modules/timeutil" | 	"code.gitea.io/gitea/modules/timeutil" | ||||||
| 	"code.gitea.io/gitea/modules/typesniffer" | 	"code.gitea.io/gitea/modules/typesniffer" | ||||||
|  | 	"code.gitea.io/gitea/modules/util" | ||||||
|  |  | ||||||
| 	"github.com/go-enry/go-enry/v2" | 	"github.com/go-enry/go-enry/v2" | ||||||
| 	"github.com/olivere/elastic/v7" | 	"github.com/olivere/elastic/v7" | ||||||
| @@ -359,13 +360,19 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan | |||||||
|  |  | ||||||
| // Search searches for codes and language stats by given conditions. | // Search searches for codes and language stats by given conditions. | ||||||
| func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { | func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { | ||||||
| 	searchType := esMultiMatchTypePhrasePrefix | 	var contentQuery elastic.Query | ||||||
| 	if opts.IsKeywordFuzzy { | 	keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword) | ||||||
| 		searchType = esMultiMatchTypeBestFields | 	if isPhrase { | ||||||
|  | 		contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase) | ||||||
|  | 	} else { | ||||||
|  | 		// TODO: this is the old logic, but not really using "fuzziness" | ||||||
|  | 		// * IsKeywordFuzzy=true: "best_fields" | ||||||
|  | 		// * IsKeywordFuzzy=false: "phrase_prefix" | ||||||
|  | 		contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword). | ||||||
|  | 			Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix)) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	kwQuery := elastic.NewBoolQuery().Should( | 	kwQuery := elastic.NewBoolQuery().Should( | ||||||
| 		elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType), | 		contentQuery, | ||||||
| 		elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix), | 		elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix), | ||||||
| 	) | 	) | ||||||
| 	query := elastic.NewBoolQuery() | 	query := elastic.NewBoolQuery() | ||||||
|   | |||||||
							
								
								
									
										59
									
								
								modules/indexer/code/gitgrep/gitgrep.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								modules/indexer/code/gitgrep/gitgrep.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | // Copyright 2025 The Gitea Authors. All rights reserved. | ||||||
|  | // SPDX-License-Identifier: MIT | ||||||
|  |  | ||||||
|  | package gitgrep | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"context" | ||||||
|  | 	"fmt" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
|  | 	"code.gitea.io/gitea/modules/git" | ||||||
|  | 	code_indexer "code.gitea.io/gitea/modules/indexer/code" | ||||||
|  | 	"code.gitea.io/gitea/modules/setting" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func indexSettingToGitGrepPathspecList() (list []string) { | ||||||
|  | 	for _, expr := range setting.Indexer.IncludePatterns { | ||||||
|  | 		list = append(list, ":(glob)"+expr.PatternString()) | ||||||
|  | 	} | ||||||
|  | 	for _, expr := range setting.Indexer.ExcludePatterns { | ||||||
|  | 		list = append(list, ":(glob,exclude)"+expr.PatternString()) | ||||||
|  | 	} | ||||||
|  | 	return list | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) { | ||||||
|  | 	// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior | ||||||
|  | 	res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{ | ||||||
|  | 		ContextLineNumber: 1, | ||||||
|  | 		IsFuzzy:           isFuzzy, | ||||||
|  | 		RefName:           ref.String(), | ||||||
|  | 		PathspecList:      indexSettingToGitGrepPathspecList(), | ||||||
|  | 	}) | ||||||
|  | 	if err != nil { | ||||||
|  | 		// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree. | ||||||
|  | 		return nil, 0, fmt.Errorf("git.GrepSearch: %w", err) | ||||||
|  | 	} | ||||||
|  | 	commitID, err := gitRepo.GetRefCommitID(ref.String()) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	total = len(res) | ||||||
|  | 	pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) | ||||||
|  | 	pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) | ||||||
|  | 	res = res[pageStart:pageEnd] | ||||||
|  | 	for _, r := range res { | ||||||
|  | 		searchResults = append(searchResults, &code_indexer.Result{ | ||||||
|  | 			RepoID:   repoID, | ||||||
|  | 			Filename: r.Filename, | ||||||
|  | 			CommitID: commitID, | ||||||
|  | 			// UpdatedUnix: not supported yet | ||||||
|  | 			// Language:    not supported yet | ||||||
|  | 			// Color:       not supported yet | ||||||
|  | 			Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")), | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | 	return searchResults, total, nil | ||||||
|  | } | ||||||
| @@ -1,7 +1,7 @@ | |||||||
| // Copyright 2024 The Gitea Authors. All rights reserved. | // Copyright 2024 The Gitea Authors. All rights reserved. | ||||||
| // SPDX-License-Identifier: MIT | // SPDX-License-Identifier: MIT | ||||||
| 
 | 
 | ||||||
| package repo | package gitgrep | ||||||
| 
 | 
 | ||||||
| import ( | import ( | ||||||
| 	"testing" | 	"testing" | ||||||
| @@ -29,13 +29,11 @@ var ( | |||||||
| 	// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. | 	// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. | ||||||
| 	// So it's always safe use it as *globalIndexer.Load() and call its methods. | 	// So it's always safe use it as *globalIndexer.Load() and call its methods. | ||||||
| 	globalIndexer atomic.Pointer[internal.Indexer] | 	globalIndexer atomic.Pointer[internal.Indexer] | ||||||
| 	dummyIndexer  *internal.Indexer |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| func init() { | func init() { | ||||||
| 	i := internal.NewDummyIndexer() | 	dummyIndexer := internal.NewDummyIndexer() | ||||||
| 	dummyIndexer = &i | 	globalIndexer.Store(&dummyIndexer) | ||||||
| 	globalIndexer.Store(dummyIndexer) |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { | func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { | ||||||
|   | |||||||
| @@ -35,7 +35,7 @@ func FilenameOfIndexerID(indexerID string) string { | |||||||
| 	return indexerID[index+1:] | 	return indexerID[index+1:] | ||||||
| } | } | ||||||
|  |  | ||||||
| // Given the contents of file, returns the boundaries of its first seven lines. | // FilenameMatchIndexPos returns the boundaries of its first seven lines. | ||||||
| func FilenameMatchIndexPos(content string) (int, int) { | func FilenameMatchIndexPos(content string) (int, int) { | ||||||
| 	count := 1 | 	count := 1 | ||||||
| 	for i, c := range content { | 	for i, c := range content { | ||||||
| @@ -48,3 +48,11 @@ func FilenameMatchIndexPos(content string) (int, int) { | |||||||
| 	} | 	} | ||||||
| 	return 0, len(content) | 	return 0, len(content) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func ParseKeywordAsPhrase(keyword string) (string, bool) { | ||||||
|  | 	if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 { | ||||||
|  | 		// only remove the prefix and suffix quotes, no need to decode the content at the moment | ||||||
|  | 		return keyword[1 : len(keyword)-1], true | ||||||
|  | 	} | ||||||
|  | 	return "", false | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										30
									
								
								modules/indexer/code/internal/util_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								modules/indexer/code/internal/util_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | |||||||
|  | // Copyright 2025 The Gitea Authors. All rights reserved. | ||||||
|  | // SPDX-License-Identifier: MIT | ||||||
|  |  | ||||||
|  | package internal | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"testing" | ||||||
|  |  | ||||||
|  | 	"github.com/stretchr/testify/assert" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func TestParseKeywordAsPhrase(t *testing.T) { | ||||||
|  | 	cases := []struct { | ||||||
|  | 		keyword  string | ||||||
|  | 		phrase   string | ||||||
|  | 		isPhrase bool | ||||||
|  | 	}{ | ||||||
|  | 		{``, "", false}, | ||||||
|  | 		{`a`, "", false}, | ||||||
|  | 		{`"`, "", false}, | ||||||
|  | 		{`"a`, "", false}, | ||||||
|  | 		{`"a"`, "a", true}, | ||||||
|  | 		{`""\"""`, `"\""`, true}, | ||||||
|  | 	} | ||||||
|  | 	for _, c := range cases { | ||||||
|  | 		phrase, isPhrase := ParseKeywordAsPhrase(c.keyword) | ||||||
|  | 		assert.Equal(t, c.phrase, phrase, "keyword=%q", c.keyword) | ||||||
|  | 		assert.Equal(t, c.isPhrase, isPhrase, "keyword=%q", c.keyword) | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -5,11 +5,11 @@ package repo | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"net/http" | 	"net/http" | ||||||
| 	"strings" |  | ||||||
|  |  | ||||||
| 	"code.gitea.io/gitea/models/db" | 	"code.gitea.io/gitea/models/db" | ||||||
| 	"code.gitea.io/gitea/modules/git" | 	"code.gitea.io/gitea/modules/git" | ||||||
| 	code_indexer "code.gitea.io/gitea/modules/indexer/code" | 	code_indexer "code.gitea.io/gitea/modules/indexer/code" | ||||||
|  | 	"code.gitea.io/gitea/modules/indexer/code/gitgrep" | ||||||
| 	"code.gitea.io/gitea/modules/setting" | 	"code.gitea.io/gitea/modules/setting" | ||||||
| 	"code.gitea.io/gitea/modules/templates" | 	"code.gitea.io/gitea/modules/templates" | ||||||
| 	"code.gitea.io/gitea/routers/common" | 	"code.gitea.io/gitea/routers/common" | ||||||
| @@ -18,16 +18,6 @@ import ( | |||||||
|  |  | ||||||
| const tplSearch templates.TplName = "repo/search" | const tplSearch templates.TplName = "repo/search" | ||||||
|  |  | ||||||
| func indexSettingToGitGrepPathspecList() (list []string) { |  | ||||||
| 	for _, expr := range setting.Indexer.IncludePatterns { |  | ||||||
| 		list = append(list, ":(glob)"+expr.PatternString()) |  | ||||||
| 	} |  | ||||||
| 	for _, expr := range setting.Indexer.ExcludePatterns { |  | ||||||
| 		list = append(list, ":(glob,exclude)"+expr.PatternString()) |  | ||||||
| 	} |  | ||||||
| 	return list |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Search render repository search page | // Search render repository search page | ||||||
| func Search(ctx *context.Context) { | func Search(ctx *context.Context) { | ||||||
| 	ctx.Data["PageIsViewCode"] = true | 	ctx.Data["PageIsViewCode"] = true | ||||||
| @@ -67,38 +57,14 @@ func Search(ctx *context.Context) { | |||||||
| 			ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) | 			ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) | ||||||
| 		} | 		} | ||||||
| 	} else { | 	} else { | ||||||
| 		searchRefName := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) // BranchName should be default branch or the first existing branch | 		var err error | ||||||
| 		res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, prepareSearch.Keyword, git.GrepOptions{ | 		// ref should be default branch or the first existing branch | ||||||
| 			ContextLineNumber: 1, | 		searchRef := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) | ||||||
| 			IsFuzzy:           prepareSearch.IsFuzzy, | 		searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.IsFuzzy) | ||||||
| 			RefName:           searchRefName.String(), |  | ||||||
| 			PathspecList:      indexSettingToGitGrepPathspecList(), |  | ||||||
| 		}) |  | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree. | 			ctx.ServerError("gitgrep.PerformSearch", err) | ||||||
| 			ctx.ServerError("GrepSearch", err) |  | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
| 		commitID, err := ctx.Repo.GitRepo.GetRefCommitID(searchRefName.String()) |  | ||||||
| 		if err != nil { |  | ||||||
| 			ctx.ServerError("GetRefCommitID", err) |  | ||||||
| 			return |  | ||||||
| 		} |  | ||||||
| 		total = len(res) |  | ||||||
| 		pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res)) |  | ||||||
| 		pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res)) |  | ||||||
| 		res = res[pageStart:pageEnd] |  | ||||||
| 		for _, r := range res { |  | ||||||
| 			searchResults = append(searchResults, &code_indexer.Result{ |  | ||||||
| 				RepoID:   ctx.Repo.Repository.ID, |  | ||||||
| 				Filename: r.Filename, |  | ||||||
| 				CommitID: commitID, |  | ||||||
| 				// UpdatedUnix: not supported yet |  | ||||||
| 				// Language:    not supported yet |  | ||||||
| 				// Color:       not supported yet |  | ||||||
| 				Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")), |  | ||||||
| 			}) |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	ctx.Data["Repo"] = ctx.Repo.Repository | 	ctx.Data["Repo"] = ctx.Repo.Repository | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user