mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	* Super expansion * Explain which features are in 1.11 vs 1.12 * Move imports Signed-off-by: jolheiser <john.olheiser@gmail.com>
		
			
				
	
	
		
			121 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			121 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2015 Huan Du. All rights reserved.
 | 
						|
// Licensed under the MIT license that can be found in the LICENSE file.
 | 
						|
 | 
						|
package xstrings
 | 
						|
 | 
						|
import (
 | 
						|
	"unicode"
 | 
						|
	"unicode/utf8"
 | 
						|
)
 | 
						|
 | 
						|
// Len returns str's utf8 rune length.
 | 
						|
func Len(str string) int {
 | 
						|
	return utf8.RuneCountInString(str)
 | 
						|
}
 | 
						|
 | 
						|
// WordCount returns number of words in a string.
 | 
						|
//
 | 
						|
// Word is defined as a locale dependent string containing alphabetic characters,
 | 
						|
// which may also contain but not start with `'` and `-` characters.
 | 
						|
func WordCount(str string) int {
 | 
						|
	var r rune
 | 
						|
	var size, n int
 | 
						|
 | 
						|
	inWord := false
 | 
						|
 | 
						|
	for len(str) > 0 {
 | 
						|
		r, size = utf8.DecodeRuneInString(str)
 | 
						|
 | 
						|
		switch {
 | 
						|
		case isAlphabet(r):
 | 
						|
			if !inWord {
 | 
						|
				inWord = true
 | 
						|
				n++
 | 
						|
			}
 | 
						|
 | 
						|
		case inWord && (r == '\'' || r == '-'):
 | 
						|
			// Still in word.
 | 
						|
 | 
						|
		default:
 | 
						|
			inWord = false
 | 
						|
		}
 | 
						|
 | 
						|
		str = str[size:]
 | 
						|
	}
 | 
						|
 | 
						|
	return n
 | 
						|
}
 | 
						|
 | 
						|
const minCJKCharacter = '\u3400'
 | 
						|
 | 
						|
// Checks r is a letter but not CJK character.
 | 
						|
func isAlphabet(r rune) bool {
 | 
						|
	if !unicode.IsLetter(r) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	switch {
 | 
						|
	// Quick check for non-CJK character.
 | 
						|
	case r < minCJKCharacter:
 | 
						|
		return true
 | 
						|
 | 
						|
	// Common CJK characters.
 | 
						|
	case r >= '\u4E00' && r <= '\u9FCC':
 | 
						|
		return false
 | 
						|
 | 
						|
	// Rare CJK characters.
 | 
						|
	case r >= '\u3400' && r <= '\u4D85':
 | 
						|
		return false
 | 
						|
 | 
						|
	// Rare and historic CJK characters.
 | 
						|
	case r >= '\U00020000' && r <= '\U0002B81D':
 | 
						|
		return false
 | 
						|
	}
 | 
						|
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// Width returns string width in monotype font.
 | 
						|
// Multi-byte characters are usually twice the width of single byte characters.
 | 
						|
//
 | 
						|
// Algorithm comes from `mb_strwidth` in PHP.
 | 
						|
// http://php.net/manual/en/function.mb-strwidth.php
 | 
						|
func Width(str string) int {
 | 
						|
	var r rune
 | 
						|
	var size, n int
 | 
						|
 | 
						|
	for len(str) > 0 {
 | 
						|
		r, size = utf8.DecodeRuneInString(str)
 | 
						|
		n += RuneWidth(r)
 | 
						|
		str = str[size:]
 | 
						|
	}
 | 
						|
 | 
						|
	return n
 | 
						|
}
 | 
						|
 | 
						|
// RuneWidth returns character width in monotype font.
 | 
						|
// Multi-byte characters are usually twice the width of single byte characters.
 | 
						|
//
 | 
						|
// Algorithm comes from `mb_strwidth` in PHP.
 | 
						|
// http://php.net/manual/en/function.mb-strwidth.php
 | 
						|
func RuneWidth(r rune) int {
 | 
						|
	switch {
 | 
						|
	case r == utf8.RuneError || r < '\x20':
 | 
						|
		return 0
 | 
						|
 | 
						|
	case '\x20' <= r && r < '\u2000':
 | 
						|
		return 1
 | 
						|
 | 
						|
	case '\u2000' <= r && r < '\uFF61':
 | 
						|
		return 2
 | 
						|
 | 
						|
	case '\uFF61' <= r && r < '\uFFA0':
 | 
						|
		return 1
 | 
						|
 | 
						|
	case '\uFFA0' <= r:
 | 
						|
		return 2
 | 
						|
	}
 | 
						|
 | 
						|
	return 0
 | 
						|
}
 |