mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-26 08:58:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			207 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			207 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Copyright 2016 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package bidi
 | |
| 
 | |
| import "unicode/utf8"
 | |
| 
 | |
| // Properties provides access to BiDi properties of runes.
 | |
| type Properties struct {
 | |
| 	entry uint8
 | |
| 	last  uint8
 | |
| }
 | |
| 
 | |
| var trie = newBidiTrie(0)
 | |
| 
 | |
| // TODO: using this for bidirule reduces the running time by about 5%. Consider
 | |
| // if this is worth exposing or if we can find a way to speed up the Class
 | |
| // method.
 | |
| //
 | |
| // // CompactClass is like Class, but maps all of the BiDi control classes
 | |
| // // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
 | |
| // func (p Properties) CompactClass() Class {
 | |
| // 	return Class(p.entry & 0x0F)
 | |
| // }
 | |
| 
 | |
| // Class returns the Bidi class for p.
 | |
| func (p Properties) Class() Class {
 | |
| 	c := Class(p.entry & 0x0F)
 | |
| 	if c == Control {
 | |
| 		c = controlByteToClass[p.last&0xF]
 | |
| 	}
 | |
| 	return c
 | |
| }
 | |
| 
 | |
| // IsBracket reports whether the rune is a bracket.
 | |
| func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
 | |
| 
 | |
| // IsOpeningBracket reports whether the rune is an opening bracket.
 | |
| // IsBracket must return true.
 | |
| func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
 | |
| 
 | |
| // TODO: find a better API and expose.
 | |
| func (p Properties) reverseBracket(r rune) rune {
 | |
| 	return xorMasks[p.entry>>xorMaskShift] ^ r
 | |
| }
 | |
| 
 | |
| var controlByteToClass = [16]Class{
 | |
| 	0xD: LRO, // U+202D LeftToRightOverride,
 | |
| 	0xE: RLO, // U+202E RightToLeftOverride,
 | |
| 	0xA: LRE, // U+202A LeftToRightEmbedding,
 | |
| 	0xB: RLE, // U+202B RightToLeftEmbedding,
 | |
| 	0xC: PDF, // U+202C PopDirectionalFormat,
 | |
| 	0x6: LRI, // U+2066 LeftToRightIsolate,
 | |
| 	0x7: RLI, // U+2067 RightToLeftIsolate,
 | |
| 	0x8: FSI, // U+2068 FirstStrongIsolate,
 | |
| 	0x9: PDI, // U+2069 PopDirectionalIsolate,
 | |
| }
 | |
| 
 | |
| // LookupRune returns properties for r.
 | |
| func LookupRune(r rune) (p Properties, size int) {
 | |
| 	var buf [4]byte
 | |
| 	n := utf8.EncodeRune(buf[:], r)
 | |
| 	return Lookup(buf[:n])
 | |
| }
 | |
| 
 | |
| // TODO: these lookup methods are based on the generated trie code. The returned
 | |
| // sizes have slightly different semantics from the generated code, in that it
 | |
| // always returns size==1 for an illegal UTF-8 byte (instead of the length
 | |
| // of the maximum invalid subsequence). Most Transformers, like unicode/norm,
 | |
| // leave invalid UTF-8 untouched, in which case it has performance benefits to
 | |
| // do so (without changing the semantics). Bidi requires the semantics used here
 | |
| // for the bidirule implementation to be compatible with the Go semantics.
 | |
| //  They ultimately should perhaps be adopted by all trie implementations, for
 | |
| // convenience sake.
 | |
| // This unrolled code also boosts performance of the secure/bidirule package by
 | |
| // about 30%.
 | |
| // So, to remove this code:
 | |
| //   - add option to trie generator to define return type.
 | |
| //   - always return 1 byte size for ill-formed UTF-8 runes.
 | |
| 
 | |
| // Lookup returns properties for the first rune in s and the width in bytes of
 | |
| // its encoding. The size will be 0 if s does not hold enough bytes to complete
 | |
| // the encoding.
 | |
| func Lookup(s []byte) (p Properties, sz int) {
 | |
| 	c0 := s[0]
 | |
| 	switch {
 | |
| 	case c0 < 0x80: // is ASCII
 | |
| 		return Properties{entry: bidiValues[c0]}, 1
 | |
| 	case c0 < 0xC2:
 | |
| 		return Properties{}, 1
 | |
| 	case c0 < 0xE0: // 2-byte UTF-8
 | |
| 		if len(s) < 2 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
 | |
| 	case c0 < 0xF0: // 3-byte UTF-8
 | |
| 		if len(s) < 3 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o := uint32(i)<<6 + uint32(c1)
 | |
| 		i = bidiIndex[o]
 | |
| 		c2 := s[2]
 | |
| 		if c2 < 0x80 || 0xC0 <= c2 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
 | |
| 	case c0 < 0xF8: // 4-byte UTF-8
 | |
| 		if len(s) < 4 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o := uint32(i)<<6 + uint32(c1)
 | |
| 		i = bidiIndex[o]
 | |
| 		c2 := s[2]
 | |
| 		if c2 < 0x80 || 0xC0 <= c2 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o = uint32(i)<<6 + uint32(c2)
 | |
| 		i = bidiIndex[o]
 | |
| 		c3 := s[3]
 | |
| 		if c3 < 0x80 || 0xC0 <= c3 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
 | |
| 	}
 | |
| 	// Illegal rune
 | |
| 	return Properties{}, 1
 | |
| }
 | |
| 
 | |
| // LookupString returns properties for the first rune in s and the width in
 | |
| // bytes of its encoding. The size will be 0 if s does not hold enough bytes to
 | |
| // complete the encoding.
 | |
| func LookupString(s string) (p Properties, sz int) {
 | |
| 	c0 := s[0]
 | |
| 	switch {
 | |
| 	case c0 < 0x80: // is ASCII
 | |
| 		return Properties{entry: bidiValues[c0]}, 1
 | |
| 	case c0 < 0xC2:
 | |
| 		return Properties{}, 1
 | |
| 	case c0 < 0xE0: // 2-byte UTF-8
 | |
| 		if len(s) < 2 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
 | |
| 	case c0 < 0xF0: // 3-byte UTF-8
 | |
| 		if len(s) < 3 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o := uint32(i)<<6 + uint32(c1)
 | |
| 		i = bidiIndex[o]
 | |
| 		c2 := s[2]
 | |
| 		if c2 < 0x80 || 0xC0 <= c2 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
 | |
| 	case c0 < 0xF8: // 4-byte UTF-8
 | |
| 		if len(s) < 4 {
 | |
| 			return Properties{}, 0
 | |
| 		}
 | |
| 		i := bidiIndex[c0]
 | |
| 		c1 := s[1]
 | |
| 		if c1 < 0x80 || 0xC0 <= c1 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o := uint32(i)<<6 + uint32(c1)
 | |
| 		i = bidiIndex[o]
 | |
| 		c2 := s[2]
 | |
| 		if c2 < 0x80 || 0xC0 <= c2 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		o = uint32(i)<<6 + uint32(c2)
 | |
| 		i = bidiIndex[o]
 | |
| 		c3 := s[3]
 | |
| 		if c3 < 0x80 || 0xC0 <= c3 {
 | |
| 			return Properties{}, 1
 | |
| 		}
 | |
| 		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
 | |
| 	}
 | |
| 	// Illegal rune
 | |
| 	return Properties{}, 1
 | |
| }
 |