1
1
mirror of https://github.com/go-gitea/gitea synced 2024-12-22 16:44:26 +00:00

Meilisearch double quote on "match" query (#29740)

make `nonFuzzyWorkaround` unessesary

cc @Kerollmops
This commit is contained in:
6543 2024-03-16 14:19:41 +01:00 committed by GitHub
parent 3cd64949ae
commit c6e5ec51bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 70 deletions

View File

@ -6,6 +6,7 @@ package meilisearch
import ( import (
"context" "context"
"errors" "errors"
"fmt"
"strconv" "strconv"
"strings" "strings"
@ -217,7 +218,14 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits)
searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{ keyword := options.Keyword
if !options.IsFuzzyKeyword {
// to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s)
// https://www.meilisearch.com/docs/reference/api/search#phrase-search
keyword = doubleQuoteKeyword(keyword)
}
searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{
Filter: query.Statement(), Filter: query.Statement(),
Limit: int64(limit), Limit: int64(limit),
Offset: int64(skip), Offset: int64(skip),
@ -228,7 +236,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
return nil, err return nil, err
} }
hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword) hits, err := convertHits(searchRes)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -247,11 +255,20 @@ func parseSortBy(sortBy internal.SortBy) string {
return field + ":asc" return field + ":asc"
} }
// nonFuzzyWorkaround is needed as meilisearch does not have an exact search func doubleQuoteKeyword(k string) string {
// and you can only change "typo tolerance" per index. So we have to post-filter the results kp := strings.Split(k, " ")
// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance parts := 0
// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed for i := range kp {
func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) { part := strings.Trim(kp[i], "\"")
if part != "" {
kp[parts] = fmt.Sprintf(`"%s"`, part)
parts++
}
}
return strings.Join(kp[:parts], " ")
}
func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) {
hits := make([]internal.Match, 0, len(searchRes.Hits)) hits := make([]internal.Match, 0, len(searchRes.Hits))
for _, hit := range searchRes.Hits { for _, hit := range searchRes.Hits {
hit, ok := hit.(map[string]any) hit, ok := hit.(map[string]any)
@ -259,61 +276,11 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i
return nil, ErrMalformedResponse return nil, ErrMalformedResponse
} }
if !isFuzzy {
keyword = strings.ToLower(keyword)
// declare a anon func to check if the title, content or at least one comment contains the keyword
found, err := func() (bool, error) {
// check if title match first
title, ok := hit["title"].(string)
if !ok {
return false, ErrMalformedResponse
} else if strings.Contains(strings.ToLower(title), keyword) {
return true, nil
}
// check if content has a match
content, ok := hit["content"].(string)
if !ok {
return false, ErrMalformedResponse
} else if strings.Contains(strings.ToLower(content), keyword) {
return true, nil
}
// now check for each comment if one has a match
// so we first try to cast and skip if there are no comments
comments, ok := hit["comments"].([]any)
if !ok {
return false, ErrMalformedResponse
} else if len(comments) == 0 {
return false, nil
}
// now we iterate over all and report as soon as we detect one match
for i := range comments {
comment, ok := comments[i].(string)
if !ok {
return false, ErrMalformedResponse
}
if strings.Contains(strings.ToLower(comment), keyword) {
return true, nil
}
}
// we got no match
return false, nil
}()
if err != nil {
return nil, err
} else if !found {
continue
}
}
issueID, ok := hit["id"].(float64) issueID, ok := hit["id"].(float64)
if !ok { if !ok {
return nil, ErrMalformedResponse return nil, ErrMalformedResponse
} }
hits = append(hits, internal.Match{ hits = append(hits, internal.Match{
ID: int64(issueID), ID: int64(issueID),
}) })

View File

@ -53,11 +53,10 @@ func TestMeilisearchIndexer(t *testing.T) {
tests.TestIndexer(t, indexer) tests.TestIndexer(t, indexer)
} }
func TestNonFuzzyWorkaround(t *testing.T) { func TestConvertHits(t *testing.T) {
// get unexpected return _, err := convertHits(&meilisearch.SearchResponse{
_, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{
Hits: []any{"aa", "bb", "cc", "dd"}, Hits: []any{"aa", "bb", "cc", "dd"},
}, "bowling", false) })
assert.ErrorIs(t, err, ErrMalformedResponse) assert.ErrorIs(t, err, ErrMalformedResponse)
validResponse := &meilisearch.SearchResponse{ validResponse := &meilisearch.SearchResponse{
@ -82,14 +81,15 @@ func TestNonFuzzyWorkaround(t *testing.T) {
}, },
}, },
} }
hits, err := convertHits(validResponse)
// nonFuzzy
hits, err := nonFuzzyWorkaround(validResponse, "bowling", false)
assert.NoError(t, err)
assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits)
// fuzzy
hits, err = nonFuzzyWorkaround(validResponse, "bowling", true)
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
} }
func TestDoubleQuoteKeyword(t *testing.T) {
assert.EqualValues(t, "", doubleQuoteKeyword(""))
assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c"))
assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g"))
assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`))
}