mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 19:38:23 +00:00 
			
		
		
		
	This PR improves the accuracy of Gitea's code search. 
Currently, Gitea does not consider statements such as
`onsole.log("hello")` as hits when the user searches for `log`. The
culprit is how both ES and Bleve are tokenizing the file contents (in
both cases, `console.log` is a whole token).
In ES' case, we changed the tokenizer to
[simple_pattern_split](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-simplepatternsplit-tokenizer.html#:~:text=The%20simple_pattern_split%20tokenizer%20uses%20a,the%20tokenization%20is%20generally%20faster.).
In such a case, tokens are words formed by digits and letters. In
Bleve's case, it employs a
[letter](https://blevesearch.com/docs/Tokenizers/) tokenizer.
Resolves #32220
---------
Signed-off-by: Bruno Sofiato <bruno.sofiato@gmail.com>
		
	
		
			
				
	
	
		
			54 lines
		
	
	
		
			976 B
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			54 lines
		
	
	
		
			976 B
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2024 The Gitea Authors. All rights reserved.
 | |
| // SPDX-License-Identifier: MIT
 | |
| 
 | |
| package bleve
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"testing"
 | |
| 
 | |
| 	"github.com/stretchr/testify/assert"
 | |
| )
 | |
| 
 | |
| func TestBleveGuessFuzzinessByKeyword(t *testing.T) {
 | |
| 	scenarios := []struct {
 | |
| 		Input     string
 | |
| 		Fuzziness int // See util.go for the definition of fuzziness in this particular context
 | |
| 	}{
 | |
| 		{
 | |
| 			Input:     "",
 | |
| 			Fuzziness: 0,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "Avocado",
 | |
| 			Fuzziness: 1,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "Geschwindigkeit",
 | |
| 			Fuzziness: 2,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "non-exist",
 | |
| 			Fuzziness: 0,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "갃갃갃",
 | |
| 			Fuzziness: 0,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "repo1",
 | |
| 			Fuzziness: 0,
 | |
| 		},
 | |
| 		{
 | |
| 			Input:     "avocado.md",
 | |
| 			Fuzziness: 0,
 | |
| 		},
 | |
| 	}
 | |
| 
 | |
| 	for _, scenario := range scenarios {
 | |
| 		t.Run(fmt.Sprintf("ensure fuzziness of '%s' is '%d'", scenario.Input, scenario.Fuzziness), func(t *testing.T) {
 | |
| 			assert.Equal(t, scenario.Fuzziness, GuessFuzzinessByKeyword(scenario.Input))
 | |
| 		})
 | |
| 	}
 | |
| }
 |