mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			93 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			93 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package mahonia
 | 
						|
 | 
						|
// Generic converters for multibyte character sets.
 | 
						|
 | 
						|
// An mbcsTrie contains the data to convert from the character set to Unicode.
 | 
						|
// If a character would be encoded as "\x01\x02\x03", its unicode value would be found at t.children[1].children[2].children[3].rune
 | 
						|
// children either is nil or has 256 elements.
 | 
						|
type mbcsTrie struct {
 | 
						|
	// For leaf nodes, the Unicode character that is represented.
 | 
						|
	char rune
 | 
						|
 | 
						|
	// For non-leaf nodes, the trie to decode the remainder of the character.
 | 
						|
	children []mbcsTrie
 | 
						|
}
 | 
						|
 | 
						|
// A MBCSTable holds the data to convert to and from Unicode.
 | 
						|
type MBCSTable struct {
 | 
						|
	toUnicode   mbcsTrie
 | 
						|
	fromUnicode map[rune]string
 | 
						|
}
 | 
						|
 | 
						|
// AddCharacter adds a character to the table. rune is its Unicode code point,
 | 
						|
// and bytes contains the bytes used to encode it in the character set.
 | 
						|
func (table *MBCSTable) AddCharacter(c rune, bytes string) {
 | 
						|
	if table.fromUnicode == nil {
 | 
						|
		table.fromUnicode = make(map[rune]string)
 | 
						|
	}
 | 
						|
 | 
						|
	table.fromUnicode[c] = bytes
 | 
						|
 | 
						|
	trie := &table.toUnicode
 | 
						|
	for i := 0; i < len(bytes); i++ {
 | 
						|
		if trie.children == nil {
 | 
						|
			trie.children = make([]mbcsTrie, 256)
 | 
						|
		}
 | 
						|
 | 
						|
		b := bytes[i]
 | 
						|
		trie = &trie.children[b]
 | 
						|
	}
 | 
						|
 | 
						|
	trie.char = c
 | 
						|
}
 | 
						|
 | 
						|
func (table *MBCSTable) Decoder() Decoder {
 | 
						|
	return func(p []byte) (c rune, size int, status Status) {
 | 
						|
		if len(p) == 0 {
 | 
						|
			status = NO_ROOM
 | 
						|
			return
 | 
						|
		}
 | 
						|
 | 
						|
		if p[0] == 0 {
 | 
						|
			return 0, 1, SUCCESS
 | 
						|
		}
 | 
						|
 | 
						|
		trie := &table.toUnicode
 | 
						|
		for trie.char == 0 {
 | 
						|
			if trie.children == nil {
 | 
						|
				return 0xfffd, 1, INVALID_CHAR
 | 
						|
			}
 | 
						|
			if len(p) < size+1 {
 | 
						|
				return 0, 0, NO_ROOM
 | 
						|
			}
 | 
						|
 | 
						|
			trie = &trie.children[p[size]]
 | 
						|
			size++
 | 
						|
		}
 | 
						|
 | 
						|
		c = trie.char
 | 
						|
		status = SUCCESS
 | 
						|
		return
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (table *MBCSTable) Encoder() Encoder {
 | 
						|
	return func(p []byte, c rune) (size int, status Status) {
 | 
						|
		bytes := table.fromUnicode[c]
 | 
						|
		if bytes == "" {
 | 
						|
			if len(p) > 0 {
 | 
						|
				p[0] = '?'
 | 
						|
				return 1, INVALID_CHAR
 | 
						|
			} else {
 | 
						|
				return 0, NO_ROOM
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if len(p) < len(bytes) {
 | 
						|
			return 0, NO_ROOM
 | 
						|
		}
 | 
						|
 | 
						|
		return copy(p, bytes), SUCCESS
 | 
						|
	}
 | 
						|
}
 |