mirror of
https://github.com/go-gitea/gitea
synced 2025-09-28 03:28:13 +00:00
enable literal string for code search (#33590)
Close: #33588 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com> Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
@@ -260,17 +260,28 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||||||
var (
|
var (
|
||||||
indexerQuery query.Query
|
indexerQuery query.Query
|
||||||
keywordQuery query.Query
|
keywordQuery query.Query
|
||||||
|
contentQuery query.Query
|
||||||
)
|
)
|
||||||
|
|
||||||
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
|
pathQuery := bleve.NewPrefixQuery(strings.ToLower(opts.Keyword))
|
||||||
pathQuery.FieldVal = "Filename"
|
pathQuery.FieldVal = "Filename"
|
||||||
pathQuery.SetBoost(10)
|
pathQuery.SetBoost(10)
|
||||||
|
|
||||||
contentQuery := bleve.NewMatchQuery(opts.Keyword)
|
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
|
||||||
contentQuery.FieldVal = "Content"
|
if isPhrase {
|
||||||
|
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
|
||||||
|
q.FieldVal = "Content"
|
||||||
if opts.IsKeywordFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
contentQuery.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
|
||||||
|
}
|
||||||
|
contentQuery = q
|
||||||
|
} else {
|
||||||
|
q := bleve.NewMatchQuery(opts.Keyword)
|
||||||
|
q.FieldVal = "Content"
|
||||||
|
if opts.IsKeywordFuzzy {
|
||||||
|
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
|
||||||
|
}
|
||||||
|
contentQuery = q
|
||||||
}
|
}
|
||||||
|
|
||||||
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
|
keywordQuery = bleve.NewDisjunctionQuery(contentQuery, pathQuery)
|
||||||
|
@@ -24,6 +24,7 @@ import (
|
|||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/timeutil"
|
"code.gitea.io/gitea/modules/timeutil"
|
||||||
"code.gitea.io/gitea/modules/typesniffer"
|
"code.gitea.io/gitea/modules/typesniffer"
|
||||||
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/go-enry/go-enry/v2"
|
"github.com/go-enry/go-enry/v2"
|
||||||
"github.com/olivere/elastic/v7"
|
"github.com/olivere/elastic/v7"
|
||||||
@@ -359,13 +360,19 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
|
|||||||
|
|
||||||
// Search searches for codes and language stats by given conditions.
|
// Search searches for codes and language stats by given conditions.
|
||||||
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||||
searchType := esMultiMatchTypePhrasePrefix
|
var contentQuery elastic.Query
|
||||||
if opts.IsKeywordFuzzy {
|
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
|
||||||
searchType = esMultiMatchTypeBestFields
|
if isPhrase {
|
||||||
|
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
|
||||||
|
} else {
|
||||||
|
// TODO: this is the old logic, but not really using "fuzziness"
|
||||||
|
// * IsKeywordFuzzy=true: "best_fields"
|
||||||
|
// * IsKeywordFuzzy=false: "phrase_prefix"
|
||||||
|
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
|
||||||
|
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
|
||||||
}
|
}
|
||||||
|
|
||||||
kwQuery := elastic.NewBoolQuery().Should(
|
kwQuery := elastic.NewBoolQuery().Should(
|
||||||
elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType),
|
contentQuery,
|
||||||
elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
|
elastic.NewMultiMatchQuery(opts.Keyword, "filename^10").Type(esMultiMatchTypePhrasePrefix),
|
||||||
)
|
)
|
||||||
query := elastic.NewBoolQuery()
|
query := elastic.NewBoolQuery()
|
||||||
|
59
modules/indexer/code/gitgrep/gitgrep.go
Normal file
59
modules/indexer/code/gitgrep/gitgrep.go
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
// Copyright 2025 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package gitgrep
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/modules/git"
|
||||||
|
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
)
|
||||||
|
|
||||||
|
func indexSettingToGitGrepPathspecList() (list []string) {
|
||||||
|
for _, expr := range setting.Indexer.IncludePatterns {
|
||||||
|
list = append(list, ":(glob)"+expr.PatternString())
|
||||||
|
}
|
||||||
|
for _, expr := range setting.Indexer.ExcludePatterns {
|
||||||
|
list = append(list, ":(glob,exclude)"+expr.PatternString())
|
||||||
|
}
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
|
||||||
|
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
|
||||||
|
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
|
||||||
|
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
|
||||||
|
ContextLineNumber: 1,
|
||||||
|
IsFuzzy: isFuzzy,
|
||||||
|
RefName: ref.String(),
|
||||||
|
PathspecList: indexSettingToGitGrepPathspecList(),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
|
||||||
|
return nil, 0, fmt.Errorf("git.GrepSearch: %w", err)
|
||||||
|
}
|
||||||
|
commitID, err := gitRepo.GetRefCommitID(ref.String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("gitRepo.GetRefCommitID: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
total = len(res)
|
||||||
|
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
|
||||||
|
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
|
||||||
|
res = res[pageStart:pageEnd]
|
||||||
|
for _, r := range res {
|
||||||
|
searchResults = append(searchResults, &code_indexer.Result{
|
||||||
|
RepoID: repoID,
|
||||||
|
Filename: r.Filename,
|
||||||
|
CommitID: commitID,
|
||||||
|
// UpdatedUnix: not supported yet
|
||||||
|
// Language: not supported yet
|
||||||
|
// Color: not supported yet
|
||||||
|
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return searchResults, total, nil
|
||||||
|
}
|
@@ -1,7 +1,7 @@
|
|||||||
// Copyright 2024 The Gitea Authors. All rights reserved.
|
// Copyright 2024 The Gitea Authors. All rights reserved.
|
||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
package repo
|
package gitgrep
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
@@ -29,13 +29,11 @@ var (
|
|||||||
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
|
// When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready.
|
||||||
// So it's always safe use it as *globalIndexer.Load() and call its methods.
|
// So it's always safe use it as *globalIndexer.Load() and call its methods.
|
||||||
globalIndexer atomic.Pointer[internal.Indexer]
|
globalIndexer atomic.Pointer[internal.Indexer]
|
||||||
dummyIndexer *internal.Indexer
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
i := internal.NewDummyIndexer()
|
dummyIndexer := internal.NewDummyIndexer()
|
||||||
dummyIndexer = &i
|
globalIndexer.Store(&dummyIndexer)
|
||||||
globalIndexer.Store(dummyIndexer)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
|
func index(ctx context.Context, indexer internal.Indexer, repoID int64) error {
|
||||||
|
@@ -35,7 +35,7 @@ func FilenameOfIndexerID(indexerID string) string {
|
|||||||
return indexerID[index+1:]
|
return indexerID[index+1:]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Given the contents of file, returns the boundaries of its first seven lines.
|
// FilenameMatchIndexPos returns the boundaries of its first seven lines.
|
||||||
func FilenameMatchIndexPos(content string) (int, int) {
|
func FilenameMatchIndexPos(content string) (int, int) {
|
||||||
count := 1
|
count := 1
|
||||||
for i, c := range content {
|
for i, c := range content {
|
||||||
@@ -48,3 +48,11 @@ func FilenameMatchIndexPos(content string) (int, int) {
|
|||||||
}
|
}
|
||||||
return 0, len(content)
|
return 0, len(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseKeywordAsPhrase(keyword string) (string, bool) {
|
||||||
|
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 {
|
||||||
|
// only remove the prefix and suffix quotes, no need to decode the content at the moment
|
||||||
|
return keyword[1 : len(keyword)-1], true
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
30
modules/indexer/code/internal/util_test.go
Normal file
30
modules/indexer/code/internal/util_test.go
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
// Copyright 2025 The Gitea Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseKeywordAsPhrase(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
keyword string
|
||||||
|
phrase string
|
||||||
|
isPhrase bool
|
||||||
|
}{
|
||||||
|
{``, "", false},
|
||||||
|
{`a`, "", false},
|
||||||
|
{`"`, "", false},
|
||||||
|
{`"a`, "", false},
|
||||||
|
{`"a"`, "a", true},
|
||||||
|
{`""\"""`, `"\""`, true},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
phrase, isPhrase := ParseKeywordAsPhrase(c.keyword)
|
||||||
|
assert.Equal(t, c.phrase, phrase, "keyword=%q", c.keyword)
|
||||||
|
assert.Equal(t, c.isPhrase, isPhrase, "keyword=%q", c.keyword)
|
||||||
|
}
|
||||||
|
}
|
@@ -5,11 +5,11 @@ package repo
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"code.gitea.io/gitea/models/db"
|
"code.gitea.io/gitea/models/db"
|
||||||
"code.gitea.io/gitea/modules/git"
|
"code.gitea.io/gitea/modules/git"
|
||||||
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
code_indexer "code.gitea.io/gitea/modules/indexer/code"
|
||||||
|
"code.gitea.io/gitea/modules/indexer/code/gitgrep"
|
||||||
"code.gitea.io/gitea/modules/setting"
|
"code.gitea.io/gitea/modules/setting"
|
||||||
"code.gitea.io/gitea/modules/templates"
|
"code.gitea.io/gitea/modules/templates"
|
||||||
"code.gitea.io/gitea/routers/common"
|
"code.gitea.io/gitea/routers/common"
|
||||||
@@ -18,16 +18,6 @@ import (
|
|||||||
|
|
||||||
const tplSearch templates.TplName = "repo/search"
|
const tplSearch templates.TplName = "repo/search"
|
||||||
|
|
||||||
func indexSettingToGitGrepPathspecList() (list []string) {
|
|
||||||
for _, expr := range setting.Indexer.IncludePatterns {
|
|
||||||
list = append(list, ":(glob)"+expr.PatternString())
|
|
||||||
}
|
|
||||||
for _, expr := range setting.Indexer.ExcludePatterns {
|
|
||||||
list = append(list, ":(glob,exclude)"+expr.PatternString())
|
|
||||||
}
|
|
||||||
return list
|
|
||||||
}
|
|
||||||
|
|
||||||
// Search render repository search page
|
// Search render repository search page
|
||||||
func Search(ctx *context.Context) {
|
func Search(ctx *context.Context) {
|
||||||
ctx.Data["PageIsViewCode"] = true
|
ctx.Data["PageIsViewCode"] = true
|
||||||
@@ -67,38 +57,14 @@ func Search(ctx *context.Context) {
|
|||||||
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
searchRefName := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch) // BranchName should be default branch or the first existing branch
|
var err error
|
||||||
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, prepareSearch.Keyword, git.GrepOptions{
|
// ref should be default branch or the first existing branch
|
||||||
ContextLineNumber: 1,
|
searchRef := git.RefNameFromBranch(ctx.Repo.Repository.DefaultBranch)
|
||||||
IsFuzzy: prepareSearch.IsFuzzy,
|
searchResults, total, err = gitgrep.PerformSearch(ctx, page, ctx.Repo.Repository.ID, ctx.Repo.GitRepo, searchRef, prepareSearch.Keyword, prepareSearch.IsFuzzy)
|
||||||
RefName: searchRefName.String(),
|
|
||||||
PathspecList: indexSettingToGitGrepPathspecList(),
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: if no branch exists, it reports: exit status 128, fatal: this operation must be run in a work tree.
|
ctx.ServerError("gitgrep.PerformSearch", err)
|
||||||
ctx.ServerError("GrepSearch", err)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
commitID, err := ctx.Repo.GitRepo.GetRefCommitID(searchRefName.String())
|
|
||||||
if err != nil {
|
|
||||||
ctx.ServerError("GetRefCommitID", err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
total = len(res)
|
|
||||||
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
|
|
||||||
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
|
|
||||||
res = res[pageStart:pageEnd]
|
|
||||||
for _, r := range res {
|
|
||||||
searchResults = append(searchResults, &code_indexer.Result{
|
|
||||||
RepoID: ctx.Repo.Repository.ID,
|
|
||||||
Filename: r.Filename,
|
|
||||||
CommitID: commitID,
|
|
||||||
// UpdatedUnix: not supported yet
|
|
||||||
// Language: not supported yet
|
|
||||||
// Color: not supported yet
|
|
||||||
Lines: code_indexer.HighlightSearchResultCode(r.Filename, "", r.LineNumbers, strings.Join(r.LineCodes, "\n")),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.Data["Repo"] = ctx.Repo.Repository
|
ctx.Data["Repo"] = ctx.Repo.Repository
|
||||||
|
Reference in New Issue
Block a user