mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			251 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			251 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
//  Copyright (c) 2018 Couchbase, Inc.
 | 
						|
//
 | 
						|
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
// you may not use this file except in compliance with the License.
 | 
						|
// You may obtain a copy of the License at
 | 
						|
//
 | 
						|
// 		http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
//
 | 
						|
// Unless required by applicable law or agreed to in writing, software
 | 
						|
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
// See the License for the specific language governing permissions and
 | 
						|
// limitations under the License.
 | 
						|
 | 
						|
package levenshtein
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
	"math"
 | 
						|
)
 | 
						|
 | 
						|
const SinkState = uint32(0)
 | 
						|
 | 
						|
type DFA struct {
 | 
						|
	transitions [][256]uint32
 | 
						|
	distances   []Distance
 | 
						|
	initState   int
 | 
						|
	ed          uint8
 | 
						|
}
 | 
						|
 | 
						|
/// Returns the initial state
 | 
						|
func (d *DFA) initialState() int {
 | 
						|
	return d.initState
 | 
						|
}
 | 
						|
 | 
						|
/// Returns the Levenshtein distance associated to the
 | 
						|
/// current state.
 | 
						|
func (d *DFA) distance(stateId int) Distance {
 | 
						|
	return d.distances[stateId]
 | 
						|
}
 | 
						|
 | 
						|
/// Returns the number of states in the `DFA`.
 | 
						|
func (d *DFA) numStates() int {
 | 
						|
	return len(d.transitions)
 | 
						|
}
 | 
						|
 | 
						|
/// Returns the destination state reached after consuming a given byte.
 | 
						|
func (d *DFA) transition(fromState int, b uint8) int {
 | 
						|
	return int(d.transitions[fromState][b])
 | 
						|
}
 | 
						|
 | 
						|
func (d *DFA) eval(bytes []uint8) Distance {
 | 
						|
	state := d.initialState()
 | 
						|
 | 
						|
	for _, b := range bytes {
 | 
						|
		state = d.transition(state, b)
 | 
						|
	}
 | 
						|
 | 
						|
	return d.distance(state)
 | 
						|
}
 | 
						|
 | 
						|
func (d *DFA) Start() int {
 | 
						|
	return int(d.initialState())
 | 
						|
}
 | 
						|
 | 
						|
func (d *DFA) IsMatch(state int) bool {
 | 
						|
	if _, ok := d.distance(state).(Exact); ok {
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func (d *DFA) CanMatch(state int) bool {
 | 
						|
	return state > 0 && state < d.numStates()
 | 
						|
}
 | 
						|
 | 
						|
func (d *DFA) Accept(state int, b byte) int {
 | 
						|
	return int(d.transition(state, b))
 | 
						|
}
 | 
						|
 | 
						|
// WillAlwaysMatch returns if the specified state will always end in a
 | 
						|
// matching state.
 | 
						|
func (d *DFA) WillAlwaysMatch(state int) bool {
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func fill(dest []uint32, val uint32) {
 | 
						|
	for i := range dest {
 | 
						|
		dest[i] = val
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func fillTransitions(dest *[256]uint32, val uint32) {
 | 
						|
	for i := range dest {
 | 
						|
		dest[i] = val
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type Utf8DFAStateBuilder struct {
 | 
						|
	dfaBuilder       *Utf8DFABuilder
 | 
						|
	stateID          uint32
 | 
						|
	defaultSuccessor []uint32
 | 
						|
}
 | 
						|
 | 
						|
func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
 | 
						|
	toStateID uint32) {
 | 
						|
	sb.dfaBuilder.transitions[fromStateID][b] = toStateID
 | 
						|
}
 | 
						|
 | 
						|
func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
 | 
						|
	fromStateID := sb.stateID
 | 
						|
	chars := []byte(string(in))
 | 
						|
	lastByte := chars[len(chars)-1]
 | 
						|
 | 
						|
	for i, ch := range chars[:len(chars)-1] {
 | 
						|
		remNumBytes := len(chars) - i - 1
 | 
						|
		defaultSuccessor := sb.defaultSuccessor[remNumBytes]
 | 
						|
		intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
 | 
						|
 | 
						|
		if intermediateStateID == defaultSuccessor {
 | 
						|
			intermediateStateID = sb.dfaBuilder.allocate()
 | 
						|
			fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
 | 
						|
				sb.defaultSuccessor[remNumBytes-1])
 | 
						|
		}
 | 
						|
 | 
						|
		sb.addTransitionID(fromStateID, ch, intermediateStateID)
 | 
						|
		fromStateID = intermediateStateID
 | 
						|
	}
 | 
						|
 | 
						|
	toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
 | 
						|
	sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
 | 
						|
}
 | 
						|
 | 
						|
type Utf8StateId uint32
 | 
						|
 | 
						|
func original(stateId uint32) Utf8StateId {
 | 
						|
	return predecessor(stateId, 0)
 | 
						|
}
 | 
						|
 | 
						|
func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
 | 
						|
	return Utf8StateId(stateId*4 + uint32(numSteps))
 | 
						|
}
 | 
						|
 | 
						|
// Utf8DFABuilder makes it possible to define a DFA
 | 
						|
// that takes unicode character, and build a `DFA`
 | 
						|
// that operates on utf-8 encoded
 | 
						|
type Utf8DFABuilder struct {
 | 
						|
	index        []uint32
 | 
						|
	distances    []Distance
 | 
						|
	transitions  [][256]uint32
 | 
						|
	initialState uint32
 | 
						|
	numStates    uint32
 | 
						|
	maxNumStates uint32
 | 
						|
}
 | 
						|
 | 
						|
func withMaxStates(maxStates uint32) *Utf8DFABuilder {
 | 
						|
	rv := &Utf8DFABuilder{
 | 
						|
		index:        make([]uint32, maxStates*2+100),
 | 
						|
		distances:    make([]Distance, 0, maxStates),
 | 
						|
		transitions:  make([][256]uint32, 0, maxStates),
 | 
						|
		maxNumStates: maxStates,
 | 
						|
	}
 | 
						|
 | 
						|
	for i := range rv.index {
 | 
						|
		rv.index[i] = math.MaxUint32
 | 
						|
	}
 | 
						|
 | 
						|
	return rv
 | 
						|
}
 | 
						|
 | 
						|
func (dfab *Utf8DFABuilder) allocate() uint32 {
 | 
						|
	newState := dfab.numStates
 | 
						|
	dfab.numStates++
 | 
						|
 | 
						|
	dfab.distances = append(dfab.distances, Atleast{d: 255})
 | 
						|
	dfab.transitions = append(dfab.transitions, [256]uint32{})
 | 
						|
 | 
						|
	return newState
 | 
						|
}
 | 
						|
 | 
						|
func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
 | 
						|
	if int(state) >= cap(dfab.index) {
 | 
						|
		cloneIndex := make([]uint32, int(state)*2)
 | 
						|
		copy(cloneIndex, dfab.index)
 | 
						|
		dfab.index = cloneIndex
 | 
						|
	}
 | 
						|
	if dfab.index[state] != math.MaxUint32 {
 | 
						|
		return dfab.index[state]
 | 
						|
	}
 | 
						|
 | 
						|
	nstate := dfab.allocate()
 | 
						|
	dfab.index[state] = nstate
 | 
						|
 | 
						|
	return nstate
 | 
						|
}
 | 
						|
 | 
						|
func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
 | 
						|
	decodedID := dfab.getOrAllocate(original(iState))
 | 
						|
	dfab.initialState = decodedID
 | 
						|
}
 | 
						|
 | 
						|
func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
 | 
						|
	return &DFA{
 | 
						|
		transitions: dfab.transitions,
 | 
						|
		distances:   dfab.distances,
 | 
						|
		initState:   int(dfab.initialState),
 | 
						|
		ed:          ed,
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
 | 
						|
	distance Distance) (*Utf8DFAStateBuilder, error) {
 | 
						|
	if state > dfab.maxNumStates {
 | 
						|
		return nil, fmt.Errorf("State id is larger than maxNumStates")
 | 
						|
	}
 | 
						|
 | 
						|
	stateID := dfab.getOrAllocate(original(state))
 | 
						|
	dfab.distances[stateID] = distance
 | 
						|
 | 
						|
	defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
 | 
						|
	// creates a chain of states of predecessors of `default_suc_orig`.
 | 
						|
	// Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
 | 
						|
	// leads to the `default_suc_orig` state.
 | 
						|
	predecessorStates := []uint32{defaultSuccID,
 | 
						|
		defaultSuccID,
 | 
						|
		defaultSuccID,
 | 
						|
		defaultSuccID}
 | 
						|
 | 
						|
	for numBytes := uint8(1); numBytes < 4; numBytes++ {
 | 
						|
		predecessorState := predecessor(default_suc_orig, numBytes)
 | 
						|
		predecessorStateID := dfab.getOrAllocate(predecessorState)
 | 
						|
		predecessorStates[numBytes] = predecessorStateID
 | 
						|
		succ := predecessorStates[numBytes-1]
 | 
						|
		fillTransitions(&dfab.transitions[predecessorStateID], succ)
 | 
						|
	}
 | 
						|
 | 
						|
	// 1-byte encoded chars.
 | 
						|
	fill(dfab.transitions[stateID][0:192], predecessorStates[0])
 | 
						|
	// 2-bytes encoded chars.
 | 
						|
	fill(dfab.transitions[stateID][192:224], predecessorStates[1])
 | 
						|
	// 3-bytes encoded chars.
 | 
						|
	fill(dfab.transitions[stateID][224:240], predecessorStates[2])
 | 
						|
	// 4-bytes encoded chars.
 | 
						|
	fill(dfab.transitions[stateID][240:256], predecessorStates[3])
 | 
						|
 | 
						|
	return &Utf8DFAStateBuilder{
 | 
						|
		dfaBuilder:       dfab,
 | 
						|
		stateID:          stateID,
 | 
						|
		defaultSuccessor: predecessorStates}, nil
 | 
						|
}
 |