mirror of
				https://github.com/go-gitea/gitea
				synced 2025-09-28 03:28:13 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			336 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			336 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2015 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package bidi
 | |
| 
 | |
| import (
 | |
| 	"container/list"
 | |
| 	"fmt"
 | |
| 	"sort"
 | |
| )
 | |
| 
 | |
| // This file contains a port of the reference implementation of the
 | |
| // Bidi Parentheses Algorithm:
 | |
| // https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
 | |
| //
 | |
| // The implementation in this file covers definitions BD14-BD16 and rule N0
 | |
| // of UAX#9.
 | |
| //
 | |
| // Some preprocessing is done for each rune before data is passed to this
 | |
| // algorithm:
 | |
| //  - opening and closing brackets are identified
 | |
| //  - a bracket pair type, like '(' and ')' is assigned a unique identifier that
 | |
| //    is identical for the opening and closing bracket. It is left to do these
 | |
| //    mappings.
 | |
| //  - The BPA algorithm requires that bracket characters that are canonical
 | |
| //    equivalents of each other be able to be substituted for each other.
 | |
| //    It is the responsibility of the caller to do this canonicalization.
 | |
| //
 | |
| // In implementing BD16, this implementation departs slightly from the "logical"
 | |
| // algorithm defined in UAX#9. In particular, the stack referenced there
 | |
| // supports operations that go beyond a "basic" stack. An equivalent
 | |
| // implementation based on a linked list is used here.
 | |
| 
 | |
| // Bidi_Paired_Bracket_Type
 | |
| // BD14. An opening paired bracket is a character whose
 | |
| // Bidi_Paired_Bracket_Type property value is Open.
 | |
| //
 | |
| // BD15. A closing paired bracket is a character whose
 | |
| // Bidi_Paired_Bracket_Type property value is Close.
 | |
| type bracketType byte
 | |
| 
 | |
| const (
 | |
| 	bpNone bracketType = iota
 | |
| 	bpOpen
 | |
| 	bpClose
 | |
| )
 | |
| 
 | |
| // bracketPair holds a pair of index values for opening and closing bracket
 | |
| // location of a bracket pair.
 | |
| type bracketPair struct {
 | |
| 	opener int
 | |
| 	closer int
 | |
| }
 | |
| 
 | |
| func (b *bracketPair) String() string {
 | |
| 	return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
 | |
| }
 | |
| 
 | |
| // bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
 | |
| type bracketPairs []bracketPair
 | |
| 
 | |
| func (b bracketPairs) Len() int           { return len(b) }
 | |
| func (b bracketPairs) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
 | |
| func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
 | |
| 
 | |
| // resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
 | |
| //
 | |
| // For each rune, it takes the indexes into the original string, the class the
 | |
| // bracket type (in pairTypes) and the bracket identifier (pairValues). It also
 | |
| // takes the direction type for the start-of-sentence and the embedding level.
 | |
| //
 | |
| // The identifiers for bracket types are the rune of the canonicalized opening
 | |
| // bracket for brackets (open or close) or 0 for runes that are not brackets.
 | |
| func resolvePairedBrackets(s *isolatingRunSequence) {
 | |
| 	p := bracketPairer{
 | |
| 		sos:              s.sos,
 | |
| 		openers:          list.New(),
 | |
| 		codesIsolatedRun: s.types,
 | |
| 		indexes:          s.indexes,
 | |
| 	}
 | |
| 	dirEmbed := L
 | |
| 	if s.level&1 != 0 {
 | |
| 		dirEmbed = R
 | |
| 	}
 | |
| 	p.locateBrackets(s.p.pairTypes, s.p.pairValues)
 | |
| 	p.resolveBrackets(dirEmbed, s.p.initialTypes)
 | |
| }
 | |
| 
 | |
| type bracketPairer struct {
 | |
| 	sos Class // direction corresponding to start of sequence
 | |
| 
 | |
| 	// The following is a restatement of BD 16 using non-algorithmic language.
 | |
| 	//
 | |
| 	// A bracket pair is a pair of characters consisting of an opening
 | |
| 	// paired bracket and a closing paired bracket such that the
 | |
| 	// Bidi_Paired_Bracket property value of the former equals the latter,
 | |
| 	// subject to the following constraints.
 | |
| 	// - both characters of a pair occur in the same isolating run sequence
 | |
| 	// - the closing character of a pair follows the opening character
 | |
| 	// - any bracket character can belong at most to one pair, the earliest possible one
 | |
| 	// - any bracket character not part of a pair is treated like an ordinary character
 | |
| 	// - pairs may nest properly, but their spans may not overlap otherwise
 | |
| 
 | |
| 	// Bracket characters with canonical decompositions are supposed to be
 | |
| 	// treated as if they had been normalized, to allow normalized and non-
 | |
| 	// normalized text to give the same result. In this implementation that step
 | |
| 	// is pushed out to the caller. The caller has to ensure that the pairValue
 | |
| 	// slices contain the rune of the opening bracket after normalization for
 | |
| 	// any opening or closing bracket.
 | |
| 
 | |
| 	openers *list.List // list of positions for opening brackets
 | |
| 
 | |
| 	// bracket pair positions sorted by location of opening bracket
 | |
| 	pairPositions bracketPairs
 | |
| 
 | |
| 	codesIsolatedRun []Class // directional bidi codes for an isolated run
 | |
| 	indexes          []int   // array of index values into the original string
 | |
| 
 | |
| }
 | |
| 
 | |
| // matchOpener reports whether characters at given positions form a matching
 | |
| // bracket pair.
 | |
| func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
 | |
| 	return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
 | |
| }
 | |
| 
 | |
| const maxPairingDepth = 63
 | |
| 
 | |
| // locateBrackets locates matching bracket pairs according to BD16.
 | |
| //
 | |
| // This implementation uses a linked list instead of a stack, because, while
 | |
| // elements are added at the front (like a push) they are not generally removed
 | |
| // in atomic 'pop' operations, reducing the benefit of the stack archetype.
 | |
| func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
 | |
| 	// traverse the run
 | |
| 	// do that explicitly (not in a for-each) so we can record position
 | |
| 	for i, index := range p.indexes {
 | |
| 
 | |
| 		// look at the bracket type for each character
 | |
| 		if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
 | |
| 			// continue scanning
 | |
| 			continue
 | |
| 		}
 | |
| 		switch pairTypes[index] {
 | |
| 		case bpOpen:
 | |
| 			// check if maximum pairing depth reached
 | |
| 			if p.openers.Len() == maxPairingDepth {
 | |
| 				p.openers.Init()
 | |
| 				return
 | |
| 			}
 | |
| 			// remember opener location, most recent first
 | |
| 			p.openers.PushFront(i)
 | |
| 
 | |
| 		case bpClose:
 | |
| 			// see if there is a match
 | |
| 			count := 0
 | |
| 			for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
 | |
| 				count++
 | |
| 				opener := elem.Value.(int)
 | |
| 				if p.matchOpener(pairValues, opener, i) {
 | |
| 					// if the opener matches, add nested pair to the ordered list
 | |
| 					p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
 | |
| 					// remove up to and including matched opener
 | |
| 					for ; count > 0; count-- {
 | |
| 						p.openers.Remove(p.openers.Front())
 | |
| 					}
 | |
| 					break
 | |
| 				}
 | |
| 			}
 | |
| 			sort.Sort(p.pairPositions)
 | |
| 			// if we get here, the closing bracket matched no openers
 | |
| 			// and gets ignored
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Bracket pairs within an isolating run sequence are processed as units so
 | |
| // that both the opening and the closing paired bracket in a pair resolve to
 | |
| // the same direction.
 | |
| //
 | |
| // N0. Process bracket pairs in an isolating run sequence sequentially in
 | |
| // the logical order of the text positions of the opening paired brackets
 | |
| // using the logic given below. Within this scope, bidirectional types EN
 | |
| // and AN are treated as R.
 | |
| //
 | |
| // Identify the bracket pairs in the current isolating run sequence
 | |
| // according to BD16. For each bracket-pair element in the list of pairs of
 | |
| // text positions:
 | |
| //
 | |
| // a Inspect the bidirectional types of the characters enclosed within the
 | |
| // bracket pair.
 | |
| //
 | |
| // b If any strong type (either L or R) matching the embedding direction is
 | |
| // found, set the type for both brackets in the pair to match the embedding
 | |
| // direction.
 | |
| //
 | |
| // o [ e ] o -> o e e e o
 | |
| //
 | |
| // o [ o e ] -> o e o e e
 | |
| //
 | |
| // o [ NI e ] -> o e NI e e
 | |
| //
 | |
| // c Otherwise, if a strong type (opposite the embedding direction) is
 | |
| // found, test for adjacent strong types as follows: 1 First, check
 | |
| // backwards before the opening paired bracket until the first strong type
 | |
| // (L, R, or sos) is found. If that first preceding strong type is opposite
 | |
| // the embedding direction, then set the type for both brackets in the pair
 | |
| // to that type. 2 Otherwise, set the type for both brackets in the pair to
 | |
| // the embedding direction.
 | |
| //
 | |
| // o [ o ] e -> o o o o e
 | |
| //
 | |
| // o [ o NI ] o -> o o o NI o o
 | |
| //
 | |
| // e [ o ] o -> e e o e o
 | |
| //
 | |
| // e [ o ] e -> e e o e e
 | |
| //
 | |
| // e ( o [ o ] NI ) e -> e e o o o o NI e e
 | |
| //
 | |
| // d Otherwise, do not set the type for the current bracket pair. Note that
 | |
| // if the enclosed text contains no strong types the paired brackets will
 | |
| // both resolve to the same level when resolved individually using rules N1
 | |
| // and N2.
 | |
| //
 | |
| // e ( NI ) o -> e ( NI ) o
 | |
| 
 | |
| // getStrongTypeN0 maps character's directional code to strong type as required
 | |
| // by rule N0.
 | |
| //
 | |
| // TODO: have separate type for "strong" directionality.
 | |
| func (p *bracketPairer) getStrongTypeN0(index int) Class {
 | |
| 	switch p.codesIsolatedRun[index] {
 | |
| 	// in the scope of N0, number types are treated as R
 | |
| 	case EN, AN, AL, R:
 | |
| 		return R
 | |
| 	case L:
 | |
| 		return L
 | |
| 	default:
 | |
| 		return ON
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // classifyPairContent reports the strong types contained inside a Bracket Pair,
 | |
| // assuming the given embedding direction.
 | |
| //
 | |
| // It returns ON if no strong type is found. If a single strong type is found,
 | |
| // it returns this type. Otherwise it returns the embedding direction.
 | |
| //
 | |
| // TODO: use separate type for "strong" directionality.
 | |
| func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
 | |
| 	dirOpposite := ON
 | |
| 	for i := loc.opener + 1; i < loc.closer; i++ {
 | |
| 		dir := p.getStrongTypeN0(i)
 | |
| 		if dir == ON {
 | |
| 			continue
 | |
| 		}
 | |
| 		if dir == dirEmbed {
 | |
| 			return dir // type matching embedding direction found
 | |
| 		}
 | |
| 		dirOpposite = dir
 | |
| 	}
 | |
| 	// return ON if no strong type found, or class opposite to dirEmbed
 | |
| 	return dirOpposite
 | |
| }
 | |
| 
 | |
| // classBeforePair determines which strong types are present before a Bracket
 | |
| // Pair. Return R or L if strong type found, otherwise ON.
 | |
| func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
 | |
| 	for i := loc.opener - 1; i >= 0; i-- {
 | |
| 		if dir := p.getStrongTypeN0(i); dir != ON {
 | |
| 			return dir
 | |
| 		}
 | |
| 	}
 | |
| 	// no strong types found, return sos
 | |
| 	return p.sos
 | |
| }
 | |
| 
 | |
| // assignBracketType implements rule N0 for a single bracket pair.
 | |
| func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
 | |
| 	// rule "N0, a", inspect contents of pair
 | |
| 	dirPair := p.classifyPairContent(loc, dirEmbed)
 | |
| 
 | |
| 	// dirPair is now L, R, or N (no strong type found)
 | |
| 
 | |
| 	// the following logical tests are performed out of order compared to
 | |
| 	// the statement of the rules but yield the same results
 | |
| 	if dirPair == ON {
 | |
| 		return // case "d" - nothing to do
 | |
| 	}
 | |
| 
 | |
| 	if dirPair != dirEmbed {
 | |
| 		// case "c": strong type found, opposite - check before (c.1)
 | |
| 		dirPair = p.classBeforePair(loc)
 | |
| 		if dirPair == dirEmbed || dirPair == ON {
 | |
| 			// no strong opposite type found before - use embedding (c.2)
 | |
| 			dirPair = dirEmbed
 | |
| 		}
 | |
| 	}
 | |
| 	// else: case "b", strong type found matching embedding,
 | |
| 	// no explicit action needed, as dirPair is already set to embedding
 | |
| 	// direction
 | |
| 
 | |
| 	// set the bracket types to the type found
 | |
| 	p.setBracketsToType(loc, dirPair, initialTypes)
 | |
| }
 | |
| 
 | |
| func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
 | |
| 	p.codesIsolatedRun[loc.opener] = dirPair
 | |
| 	p.codesIsolatedRun[loc.closer] = dirPair
 | |
| 
 | |
| 	for i := loc.opener + 1; i < loc.closer; i++ {
 | |
| 		index := p.indexes[i]
 | |
| 		if initialTypes[index] != NSM {
 | |
| 			break
 | |
| 		}
 | |
| 		p.codesIsolatedRun[i] = dirPair
 | |
| 	}
 | |
| 
 | |
| 	for i := loc.closer + 1; i < len(p.indexes); i++ {
 | |
| 		index := p.indexes[i]
 | |
| 		if initialTypes[index] != NSM {
 | |
| 			break
 | |
| 		}
 | |
| 		p.codesIsolatedRun[i] = dirPair
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // resolveBrackets implements rule N0 for a list of pairs.
 | |
| func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
 | |
| 	for _, loc := range p.pairPositions {
 | |
| 		p.assignBracketType(loc, dirEmbed, initialTypes)
 | |
| 	}
 | |
| }
 |