1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Fix bleve fuzziness search (#33078) (#33087)

This commit is contained in:
wxiaoguang
2025-01-03 05:45:14 +08:00
committed by GitHub
parent 7a35f90b29
commit e10d222434
11 changed files with 86 additions and 55 deletions

View File

@@ -9,6 +9,7 @@ import (
"unicode"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
@@ -54,9 +55,9 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
return index, 0, nil
}
// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars
// may be different on two string and they still be considered equivalent.
// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
// may be different on two string, and they still be considered equivalent.
// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
func GuessFuzzinessByKeyword(s string) int {
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
tokens := tokenizer.Tokenize([]byte(s))
@@ -85,5 +86,5 @@ func guessFuzzinessByKeyword(s string) int {
return 0
}
}
return min(maxFuzziness, len(s)/4)
return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
}

View File

@@ -7,10 +7,15 @@ import (
"fmt"
"testing"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
)
func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
defer test.MockVariableValue(&setting.Indexer.TypeBleveMaxFuzzniess, 2)()
scenarios := []struct {
Input string
Fuzziness int // See util.go for the definition of fuzziness in this particular context
@@ -46,7 +51,7 @@ func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
}
for _, scenario := range scenarios {
t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
t.Run(fmt.Sprintf("Fuziniess:%s=%d", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
})
}