mirror of
https://github.com/go-gitea/gitea
synced 2025-07-29 21:58:36 +00:00
Improve issue & code search (#33860)
Each "indexer" should provide the "search modes" they support by themselves. And we need to remove the "fuzzy" search for code.
This commit is contained in:
@@ -6,6 +6,7 @@ package bleve
|
||||
import (
|
||||
"context"
|
||||
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||
@@ -120,6 +121,10 @@ type Indexer struct {
|
||||
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWordsFuzzy()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new bleve local indexer
|
||||
func NewIndexer(indexDir string) *Indexer {
|
||||
inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping)
|
||||
@@ -157,16 +162,23 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||
var queries []query.Query
|
||||
|
||||
if options.Keyword != "" {
|
||||
fuzziness := 0
|
||||
if options.IsFuzzyKeyword {
|
||||
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
||||
if options.SearchMode == indexer.SearchModeWords || options.SearchMode == indexer.SearchModeFuzzy {
|
||||
fuzziness := 0
|
||||
if options.SearchMode == indexer.SearchModeFuzzy {
|
||||
fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword)
|
||||
}
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchAndQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
|
||||
}...))
|
||||
} else /* exact */ {
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0),
|
||||
}...))
|
||||
}
|
||||
|
||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
|
||||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
|
||||
}...))
|
||||
}
|
||||
|
||||
if len(options.RepoIDs) > 0 || options.AllPublic {
|
||||
|
@@ -5,9 +5,11 @@ package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
issue_model "code.gitea.io/gitea/models/issues"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_db "code.gitea.io/gitea/modules/indexer/internal/db"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||
@@ -22,6 +24,10 @@ type Indexer struct {
|
||||
indexer_internal.Indexer
|
||||
}
|
||||
|
||||
func (i *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
func NewIndexer() *Indexer {
|
||||
return &Indexer{
|
||||
Indexer: &inner_db.Indexer{},
|
||||
@@ -38,6 +44,26 @@ func (i *Indexer) Delete(_ context.Context, _ ...int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildMatchQuery(mode indexer.SearchModeType, colName, keyword string) builder.Cond {
|
||||
if mode == indexer.SearchModeExact {
|
||||
return db.BuildCaseInsensitiveLike("issue.name", keyword)
|
||||
}
|
||||
|
||||
// match words
|
||||
cond := builder.NewCond()
|
||||
fields := strings.Fields(keyword)
|
||||
if len(fields) == 0 {
|
||||
return builder.Expr("1=1")
|
||||
}
|
||||
for _, field := range fields {
|
||||
if field == "" {
|
||||
continue
|
||||
}
|
||||
cond = cond.And(db.BuildCaseInsensitiveLike(colName, field))
|
||||
}
|
||||
return cond
|
||||
}
|
||||
|
||||
// Search searches for issues
|
||||
func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) {
|
||||
// FIXME: I tried to avoid importing models here, but it seems to be impossible.
|
||||
@@ -60,14 +86,14 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||
subQuery := builder.Select("id").From("issue").Where(repoCond)
|
||||
|
||||
cond = builder.Or(
|
||||
db.BuildCaseInsensitiveLike("issue.name", options.Keyword),
|
||||
db.BuildCaseInsensitiveLike("issue.content", options.Keyword),
|
||||
buildMatchQuery(options.SearchMode, "issue.name", options.Keyword),
|
||||
buildMatchQuery(options.SearchMode, "issue.content", options.Keyword),
|
||||
builder.In("issue.id", builder.Select("issue_id").
|
||||
From("comment").
|
||||
Where(builder.And(
|
||||
builder.Eq{"type": issue_model.CommentTypeComment},
|
||||
builder.In("issue_id", subQuery),
|
||||
db.BuildCaseInsensitiveLike("content", options.Keyword),
|
||||
buildMatchQuery(options.SearchMode, "content", options.Keyword),
|
||||
)),
|
||||
),
|
||||
)
|
||||
|
@@ -10,6 +10,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/graceful"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||
@@ -33,6 +34,11 @@ type Indexer struct {
|
||||
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
// TODO: es supports fuzzy search, but our code doesn't at the moment, and actually the default fuzziness is already "AUTO"
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new elasticsearch indexer
|
||||
func NewIndexer(url, indexerName string) *Indexer {
|
||||
inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping)
|
||||
@@ -146,12 +152,11 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||
query := elastic.NewBoolQuery()
|
||||
|
||||
if options.Keyword != "" {
|
||||
searchType := esMultiMatchTypePhrasePrefix
|
||||
if options.IsFuzzyKeyword {
|
||||
searchType = esMultiMatchTypeBestFields
|
||||
if options.SearchMode == indexer.SearchModeExact {
|
||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypePhrasePrefix))
|
||||
} else /* words */ {
|
||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(esMultiMatchTypeBestFields).Operator("and"))
|
||||
}
|
||||
|
||||
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
|
||||
}
|
||||
|
||||
if len(options.RepoIDs) > 0 {
|
||||
|
@@ -14,6 +14,7 @@ import (
|
||||
db_model "code.gitea.io/gitea/models/db"
|
||||
repo_model "code.gitea.io/gitea/models/repo"
|
||||
"code.gitea.io/gitea/modules/graceful"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/bleve"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/db"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/elasticsearch"
|
||||
@@ -313,3 +314,11 @@ func CountIssues(ctx context.Context, opts *SearchOptions) (int64, error) {
|
||||
_, total, err := SearchIssues(ctx, opts)
|
||||
return total, err
|
||||
}
|
||||
|
||||
func SupportedSearchModes() []indexer.SearchMode {
|
||||
gi := globalIndexer.Load()
|
||||
if gi == nil {
|
||||
return nil
|
||||
}
|
||||
return (*gi).SupportedSearchModes()
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/indexer/internal"
|
||||
)
|
||||
|
||||
@@ -16,6 +17,7 @@ type Indexer interface {
|
||||
Index(ctx context.Context, issue ...*IndexerData) error
|
||||
Delete(ctx context.Context, ids ...int64) error
|
||||
Search(ctx context.Context, options *SearchOptions) (*SearchResult, error)
|
||||
SupportedSearchModes() []indexer.SearchMode
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
@@ -29,6 +31,10 @@ type dummyIndexer struct {
|
||||
internal.Indexer
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Index(_ context.Context, _ ...*IndexerData) error {
|
||||
return fmt.Errorf("indexer is not ready")
|
||||
}
|
||||
|
@@ -7,6 +7,7 @@ import (
|
||||
"strconv"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
"code.gitea.io/gitea/modules/optional"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
)
|
||||
@@ -77,7 +78,7 @@ type SearchResult struct {
|
||||
type SearchOptions struct {
|
||||
Keyword string // keyword to search
|
||||
|
||||
IsFuzzyKeyword bool // if false the levenshtein distance is 0
|
||||
SearchMode indexer.SearchModeType
|
||||
|
||||
RepoIDs []int64 // repository IDs which the issues belong to
|
||||
AllPublic bool // if include all public repositories
|
||||
|
@@ -10,6 +10,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/indexer"
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
|
||||
inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch"
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal"
|
||||
@@ -35,6 +36,10 @@ type Indexer struct {
|
||||
indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
|
||||
}
|
||||
|
||||
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
|
||||
return indexer.SearchModesExactWords()
|
||||
}
|
||||
|
||||
// NewIndexer creates a new meilisearch indexer
|
||||
func NewIndexer(url, apiKey, indexerName string) *Indexer {
|
||||
settings := &meilisearch.Settings{
|
||||
@@ -230,9 +235,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||
limit = 1
|
||||
}
|
||||
|
||||
keyword := options.Keyword
|
||||
if !options.IsFuzzyKeyword {
|
||||
// to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s)
|
||||
keyword := options.Keyword // default to match "words"
|
||||
if options.SearchMode == indexer.SearchModeExact {
|
||||
// https://www.meilisearch.com/docs/reference/api/search#phrase-search
|
||||
keyword = doubleQuoteKeyword(keyword)
|
||||
}
|
||||
|
Reference in New Issue
Block a user