mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-26 08:58:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			126 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			126 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| //  Copyright (c) 2018 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package levenshtein
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"sort"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| type FullCharacteristicVector []uint32
 | |
| 
 | |
| func (fcv FullCharacteristicVector) shiftAndMask(offset, mask uint32) uint32 {
 | |
| 	bucketID := offset / 32
 | |
| 	align := offset - bucketID*32
 | |
| 	if align == 0 {
 | |
| 		return fcv[bucketID] & mask
 | |
| 	}
 | |
| 	left := fcv[bucketID] >> align
 | |
| 	right := fcv[bucketID+1] << (32 - align)
 | |
| 	return (left | right) & mask
 | |
| }
 | |
| 
 | |
| type tuple struct {
 | |
| 	char rune
 | |
| 	fcv  FullCharacteristicVector
 | |
| }
 | |
| 
 | |
| type sortRunes []rune
 | |
| 
 | |
| func (s sortRunes) Less(i, j int) bool {
 | |
| 	return s[i] < s[j]
 | |
| }
 | |
| 
 | |
| func (s sortRunes) Swap(i, j int) {
 | |
| 	s[i], s[j] = s[j], s[i]
 | |
| }
 | |
| 
 | |
| func (s sortRunes) Len() int {
 | |
| 	return len(s)
 | |
| }
 | |
| 
 | |
| func sortRune(r []rune) []rune {
 | |
| 	sort.Sort(sortRunes(r))
 | |
| 	return r
 | |
| }
 | |
| 
 | |
| type Alphabet struct {
 | |
| 	charset []tuple
 | |
| 	index   uint32
 | |
| }
 | |
| 
 | |
| func (a *Alphabet) resetNext() {
 | |
| 	a.index = 0
 | |
| }
 | |
| 
 | |
| func (a *Alphabet) next() (rune, FullCharacteristicVector, error) {
 | |
| 	if int(a.index) >= len(a.charset) {
 | |
| 		return 0, nil, fmt.Errorf("eof")
 | |
| 	}
 | |
| 
 | |
| 	rv := a.charset[a.index]
 | |
| 	a.index++
 | |
| 	return rv.char, rv.fcv, nil
 | |
| }
 | |
| 
 | |
| func dedupe(in string) string {
 | |
| 	lookUp := make(map[rune]struct{}, len(in))
 | |
| 	var rv string
 | |
| 	for len(in) > 0 {
 | |
| 		r, size := utf8.DecodeRuneInString(in)
 | |
| 		in = in[size:]
 | |
| 		if _, ok := lookUp[r]; !ok {
 | |
| 			rv += string(r)
 | |
| 			lookUp[r] = struct{}{}
 | |
| 		}
 | |
| 	}
 | |
| 	return rv
 | |
| }
 | |
| 
 | |
| func queryChars(qChars string) Alphabet {
 | |
| 	chars := dedupe(qChars)
 | |
| 	inChars := sortRune([]rune(chars))
 | |
| 	charsets := make([]tuple, 0, len(inChars))
 | |
| 
 | |
| 	for _, c := range inChars {
 | |
| 		tempChars := qChars
 | |
| 		var bits []uint32
 | |
| 		for len(tempChars) > 0 {
 | |
| 			var chunk string
 | |
| 			if len(tempChars) > 32 {
 | |
| 				chunk = tempChars[0:32]
 | |
| 				tempChars = tempChars[32:]
 | |
| 			} else {
 | |
| 				chunk = tempChars
 | |
| 				tempChars = tempChars[:0]
 | |
| 			}
 | |
| 
 | |
| 			chunkBits := uint32(0)
 | |
| 			bit := uint32(1)
 | |
| 			for _, chr := range chunk {
 | |
| 				if chr == c {
 | |
| 					chunkBits |= bit
 | |
| 				}
 | |
| 				bit <<= 1
 | |
| 			}
 | |
| 			bits = append(bits, chunkBits)
 | |
| 		}
 | |
| 		bits = append(bits, 0)
 | |
| 		charsets = append(charsets, tuple{char: c, fcv: FullCharacteristicVector(bits)})
 | |
| 	}
 | |
| 	return Alphabet{charset: charsets}
 | |
| }
 |