1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Fix bleve fuzziness search (#33078)

Close #31565
This commit is contained in:
wxiaoguang
2025-01-03 00:32:02 +08:00
committed by GitHub
parent 9882917bce
commit 45973a100b
11 changed files with 83 additions and 52 deletions

View File

@@ -9,6 +9,7 @@ import (
"unicode"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve/v2"
@@ -54,9 +55,9 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {
return index, 0, nil
}
// This method test the GuessFuzzinessByKeyword method. The fuzziness is based on the levenshtein distance and determines how many chars
// may be different on two string and they still be considered equivalent.
// Given a phrasse, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
// GuessFuzzinessByKeyword guesses fuzziness based on the levenshtein distance and determines how many chars
// may be different on two string, and they still be considered equivalent.
// Given a phrase, its shortest word determines its fuzziness. If a phrase uses CJK (eg: `갃갃갃` `啊啊啊`), the fuzziness is zero.
func GuessFuzzinessByKeyword(s string) int {
tokenizer := unicode_tokenizer.NewUnicodeTokenizer()
tokens := tokenizer.Tokenize([]byte(s))
@@ -85,5 +86,5 @@ func guessFuzzinessByKeyword(s string) int {
return 0
}
}
return min(maxFuzziness, len(s)/4)
return min(min(setting.Indexer.TypeBleveMaxFuzzniess, maxFuzziness), len(s)/4)
}