mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 11:28:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			251 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| //  Copyright (c) 2018 Couchbase, Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| // 		http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| package levenshtein
 | |
| 
 | |
| import (
 | |
| 	"fmt"
 | |
| 	"math"
 | |
| )
 | |
| 
 | |
| const SinkState = uint32(0)
 | |
| 
 | |
| type DFA struct {
 | |
| 	transitions [][256]uint32
 | |
| 	distances   []Distance
 | |
| 	initState   int
 | |
| 	ed          uint8
 | |
| }
 | |
| 
 | |
| /// Returns the initial state
 | |
| func (d *DFA) initialState() int {
 | |
| 	return d.initState
 | |
| }
 | |
| 
 | |
| /// Returns the Levenshtein distance associated to the
 | |
| /// current state.
 | |
| func (d *DFA) distance(stateId int) Distance {
 | |
| 	return d.distances[stateId]
 | |
| }
 | |
| 
 | |
| /// Returns the number of states in the `DFA`.
 | |
| func (d *DFA) numStates() int {
 | |
| 	return len(d.transitions)
 | |
| }
 | |
| 
 | |
| /// Returns the destination state reached after consuming a given byte.
 | |
| func (d *DFA) transition(fromState int, b uint8) int {
 | |
| 	return int(d.transitions[fromState][b])
 | |
| }
 | |
| 
 | |
| func (d *DFA) eval(bytes []uint8) Distance {
 | |
| 	state := d.initialState()
 | |
| 
 | |
| 	for _, b := range bytes {
 | |
| 		state = d.transition(state, b)
 | |
| 	}
 | |
| 
 | |
| 	return d.distance(state)
 | |
| }
 | |
| 
 | |
| func (d *DFA) Start() int {
 | |
| 	return int(d.initialState())
 | |
| }
 | |
| 
 | |
| func (d *DFA) IsMatch(state int) bool {
 | |
| 	if _, ok := d.distance(state).(Exact); ok {
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func (d *DFA) CanMatch(state int) bool {
 | |
| 	return state > 0 && state < d.numStates()
 | |
| }
 | |
| 
 | |
| func (d *DFA) Accept(state int, b byte) int {
 | |
| 	return int(d.transition(state, b))
 | |
| }
 | |
| 
 | |
| // WillAlwaysMatch returns if the specified state will always end in a
 | |
| // matching state.
 | |
| func (d *DFA) WillAlwaysMatch(state int) bool {
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func fill(dest []uint32, val uint32) {
 | |
| 	for i := range dest {
 | |
| 		dest[i] = val
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func fillTransitions(dest *[256]uint32, val uint32) {
 | |
| 	for i := range dest {
 | |
| 		dest[i] = val
 | |
| 	}
 | |
| }
 | |
| 
 | |
| type Utf8DFAStateBuilder struct {
 | |
| 	dfaBuilder       *Utf8DFABuilder
 | |
| 	stateID          uint32
 | |
| 	defaultSuccessor []uint32
 | |
| }
 | |
| 
 | |
| func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
 | |
| 	toStateID uint32) {
 | |
| 	sb.dfaBuilder.transitions[fromStateID][b] = toStateID
 | |
| }
 | |
| 
 | |
| func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
 | |
| 	fromStateID := sb.stateID
 | |
| 	chars := []byte(string(in))
 | |
| 	lastByte := chars[len(chars)-1]
 | |
| 
 | |
| 	for i, ch := range chars[:len(chars)-1] {
 | |
| 		remNumBytes := len(chars) - i - 1
 | |
| 		defaultSuccessor := sb.defaultSuccessor[remNumBytes]
 | |
| 		intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
 | |
| 
 | |
| 		if intermediateStateID == defaultSuccessor {
 | |
| 			intermediateStateID = sb.dfaBuilder.allocate()
 | |
| 			fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
 | |
| 				sb.defaultSuccessor[remNumBytes-1])
 | |
| 		}
 | |
| 
 | |
| 		sb.addTransitionID(fromStateID, ch, intermediateStateID)
 | |
| 		fromStateID = intermediateStateID
 | |
| 	}
 | |
| 
 | |
| 	toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
 | |
| 	sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
 | |
| }
 | |
| 
 | |
| type Utf8StateId uint32
 | |
| 
 | |
| func original(stateId uint32) Utf8StateId {
 | |
| 	return predecessor(stateId, 0)
 | |
| }
 | |
| 
 | |
| func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
 | |
| 	return Utf8StateId(stateId*4 + uint32(numSteps))
 | |
| }
 | |
| 
 | |
| // Utf8DFABuilder makes it possible to define a DFA
 | |
| // that takes unicode character, and build a `DFA`
 | |
| // that operates on utf-8 encoded
 | |
| type Utf8DFABuilder struct {
 | |
| 	index        []uint32
 | |
| 	distances    []Distance
 | |
| 	transitions  [][256]uint32
 | |
| 	initialState uint32
 | |
| 	numStates    uint32
 | |
| 	maxNumStates uint32
 | |
| }
 | |
| 
 | |
| func withMaxStates(maxStates uint32) *Utf8DFABuilder {
 | |
| 	rv := &Utf8DFABuilder{
 | |
| 		index:        make([]uint32, maxStates*2+100),
 | |
| 		distances:    make([]Distance, 0, maxStates),
 | |
| 		transitions:  make([][256]uint32, 0, maxStates),
 | |
| 		maxNumStates: maxStates,
 | |
| 	}
 | |
| 
 | |
| 	for i := range rv.index {
 | |
| 		rv.index[i] = math.MaxUint32
 | |
| 	}
 | |
| 
 | |
| 	return rv
 | |
| }
 | |
| 
 | |
| func (dfab *Utf8DFABuilder) allocate() uint32 {
 | |
| 	newState := dfab.numStates
 | |
| 	dfab.numStates++
 | |
| 
 | |
| 	dfab.distances = append(dfab.distances, Atleast{d: 255})
 | |
| 	dfab.transitions = append(dfab.transitions, [256]uint32{})
 | |
| 
 | |
| 	return newState
 | |
| }
 | |
| 
 | |
| func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
 | |
| 	if int(state) >= cap(dfab.index) {
 | |
| 		cloneIndex := make([]uint32, int(state)*2)
 | |
| 		copy(cloneIndex, dfab.index)
 | |
| 		dfab.index = cloneIndex
 | |
| 	}
 | |
| 	if dfab.index[state] != math.MaxUint32 {
 | |
| 		return dfab.index[state]
 | |
| 	}
 | |
| 
 | |
| 	nstate := dfab.allocate()
 | |
| 	dfab.index[state] = nstate
 | |
| 
 | |
| 	return nstate
 | |
| }
 | |
| 
 | |
| func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
 | |
| 	decodedID := dfab.getOrAllocate(original(iState))
 | |
| 	dfab.initialState = decodedID
 | |
| }
 | |
| 
 | |
| func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
 | |
| 	return &DFA{
 | |
| 		transitions: dfab.transitions,
 | |
| 		distances:   dfab.distances,
 | |
| 		initState:   int(dfab.initialState),
 | |
| 		ed:          ed,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
 | |
| 	distance Distance) (*Utf8DFAStateBuilder, error) {
 | |
| 	if state > dfab.maxNumStates {
 | |
| 		return nil, fmt.Errorf("State id is larger than maxNumStates")
 | |
| 	}
 | |
| 
 | |
| 	stateID := dfab.getOrAllocate(original(state))
 | |
| 	dfab.distances[stateID] = distance
 | |
| 
 | |
| 	defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
 | |
| 	// creates a chain of states of predecessors of `default_suc_orig`.
 | |
| 	// Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
 | |
| 	// leads to the `default_suc_orig` state.
 | |
| 	predecessorStates := []uint32{defaultSuccID,
 | |
| 		defaultSuccID,
 | |
| 		defaultSuccID,
 | |
| 		defaultSuccID}
 | |
| 
 | |
| 	for numBytes := uint8(1); numBytes < 4; numBytes++ {
 | |
| 		predecessorState := predecessor(default_suc_orig, numBytes)
 | |
| 		predecessorStateID := dfab.getOrAllocate(predecessorState)
 | |
| 		predecessorStates[numBytes] = predecessorStateID
 | |
| 		succ := predecessorStates[numBytes-1]
 | |
| 		fillTransitions(&dfab.transitions[predecessorStateID], succ)
 | |
| 	}
 | |
| 
 | |
| 	// 1-byte encoded chars.
 | |
| 	fill(dfab.transitions[stateID][0:192], predecessorStates[0])
 | |
| 	// 2-bytes encoded chars.
 | |
| 	fill(dfab.transitions[stateID][192:224], predecessorStates[1])
 | |
| 	// 3-bytes encoded chars.
 | |
| 	fill(dfab.transitions[stateID][224:240], predecessorStates[2])
 | |
| 	// 4-bytes encoded chars.
 | |
| 	fill(dfab.transitions[stateID][240:256], predecessorStates[3])
 | |
| 
 | |
| 	return &Utf8DFAStateBuilder{
 | |
| 		dfaBuilder:       dfab,
 | |
| 		stateID:          stateID,
 | |
| 		defaultSuccessor: predecessorStates}, nil
 | |
| }
 |