mirror of
https://github.com/go-gitea/gitea
synced 2025-07-22 18:28:37 +00:00
Refactor "string truncate" (#32984)
This commit is contained in:
@@ -16,11 +16,11 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
)
|
||||
@@ -35,7 +35,7 @@ func EncodeSha256(str string) string {
|
||||
// ShortSha is basically just truncating.
|
||||
// It is DEPRECATED and will be removed in the future.
|
||||
func ShortSha(sha1 string) string {
|
||||
return TruncateString(sha1, 10)
|
||||
return util.TruncateRunes(sha1, 10)
|
||||
}
|
||||
|
||||
// BasicAuthDecode decode basic auth string
|
||||
@@ -116,27 +116,6 @@ func FileSize(s int64) string {
|
||||
return humanize.IBytes(uint64(s))
|
||||
}
|
||||
|
||||
// EllipsisString returns a truncated short string,
|
||||
// it appends '...' in the end of the length of string is too large.
|
||||
func EllipsisString(str string, length int) string {
|
||||
if length <= 3 {
|
||||
return "..."
|
||||
}
|
||||
if utf8.RuneCountInString(str) <= length {
|
||||
return str
|
||||
}
|
||||
return string([]rune(str)[:length-3]) + "..."
|
||||
}
|
||||
|
||||
// TruncateString returns a truncated string with given limit,
|
||||
// it returns input string if length is not reached limit.
|
||||
func TruncateString(str string, limit int) string {
|
||||
if utf8.RuneCountInString(str) < limit {
|
||||
return str
|
||||
}
|
||||
return string([]rune(str)[:limit])
|
||||
}
|
||||
|
||||
// StringsToInt64s converts a slice of string to a slice of int64.
|
||||
func StringsToInt64s(strs []string) ([]int64, error) {
|
||||
if strs == nil {
|
||||
|
@@ -113,36 +113,6 @@ func TestFileSize(t *testing.T) {
|
||||
assert.Equal(t, "2.0 EiB", FileSize(size))
|
||||
}
|
||||
|
||||
func TestEllipsisString(t *testing.T) {
|
||||
assert.Equal(t, "...", EllipsisString("foobar", 0))
|
||||
assert.Equal(t, "...", EllipsisString("foobar", 1))
|
||||
assert.Equal(t, "...", EllipsisString("foobar", 2))
|
||||
assert.Equal(t, "...", EllipsisString("foobar", 3))
|
||||
assert.Equal(t, "f...", EllipsisString("foobar", 4))
|
||||
assert.Equal(t, "fo...", EllipsisString("foobar", 5))
|
||||
assert.Equal(t, "foobar", EllipsisString("foobar", 6))
|
||||
assert.Equal(t, "foobar", EllipsisString("foobar", 10))
|
||||
assert.Equal(t, "测...", EllipsisString("测试文本一二三四", 4))
|
||||
assert.Equal(t, "测试...", EllipsisString("测试文本一二三四", 5))
|
||||
assert.Equal(t, "测试文...", EllipsisString("测试文本一二三四", 6))
|
||||
assert.Equal(t, "测试文本一二三四", EllipsisString("测试文本一二三四", 10))
|
||||
}
|
||||
|
||||
func TestTruncateString(t *testing.T) {
|
||||
assert.Equal(t, "", TruncateString("foobar", 0))
|
||||
assert.Equal(t, "f", TruncateString("foobar", 1))
|
||||
assert.Equal(t, "fo", TruncateString("foobar", 2))
|
||||
assert.Equal(t, "foo", TruncateString("foobar", 3))
|
||||
assert.Equal(t, "foob", TruncateString("foobar", 4))
|
||||
assert.Equal(t, "fooba", TruncateString("foobar", 5))
|
||||
assert.Equal(t, "foobar", TruncateString("foobar", 6))
|
||||
assert.Equal(t, "foobar", TruncateString("foobar", 7))
|
||||
assert.Equal(t, "测试文本", TruncateString("测试文本一二三四", 4))
|
||||
assert.Equal(t, "测试文本一", TruncateString("测试文本一二三四", 5))
|
||||
assert.Equal(t, "测试文本一二", TruncateString("测试文本一二三四", 6))
|
||||
assert.Equal(t, "测试文本一二三", TruncateString("测试文本一二三四", 7))
|
||||
}
|
||||
|
||||
func TestStringsToInt64s(t *testing.T) {
|
||||
testSuccess := func(input []string, expected []int64) {
|
||||
result, err := StringsToInt64s(input)
|
||||
|
@@ -109,7 +109,7 @@ func unmarshal(filename string, content []byte) (*api.IssueTemplate, error) {
|
||||
|
||||
it.Content = string(content)
|
||||
it.Name = path.Base(it.FileName) // paths in Git are always '/' separated - do not use filepath!
|
||||
it.About, _ = util.SplitStringAtByteN(it.Content, 80)
|
||||
it.About = util.EllipsisDisplayString(it.Content, 80)
|
||||
} else {
|
||||
it.Content = templateBody
|
||||
if it.About == "" {
|
||||
|
@@ -173,7 +173,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) {
|
||||
|
||||
uri := node.Data[m[0]:m[1]]
|
||||
remaining := node.Data[m[1]:]
|
||||
if util.IsLikelySplitLeftPart(remaining) {
|
||||
if util.IsLikelyEllipsisLeftPart(remaining) {
|
||||
return
|
||||
}
|
||||
replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/))
|
||||
|
@@ -207,12 +207,12 @@ func TestRender_links(t *testing.T) {
|
||||
"ftps://gitea.com",
|
||||
`<p>ftps://gitea.com</p>`)
|
||||
|
||||
t.Run("LinkSplit", func(t *testing.T) {
|
||||
input, _ := util.SplitStringAtByteN("http://10.1.2.3", 12)
|
||||
t.Run("LinkEllipsis", func(t *testing.T) {
|
||||
input := util.EllipsisDisplayString("http://10.1.2.3", 12)
|
||||
assert.Equal(t, "http://10…", input)
|
||||
test(input, "<p>http://10…</p>")
|
||||
|
||||
input, _ = util.SplitStringAtByteN("http://10.1.2.3", 13)
|
||||
input = util.EllipsisDisplayString("http://10.1.2.3", 13)
|
||||
assert.Equal(t, "http://10.…", input)
|
||||
test(input, "<p>http://10.…</p>")
|
||||
})
|
||||
|
@@ -11,9 +11,9 @@ import (
|
||||
"strings"
|
||||
texttmpl "text/template"
|
||||
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
)
|
||||
|
||||
var mailSubjectSplit = regexp.MustCompile(`(?m)^-{3,}\s*$`)
|
||||
@@ -24,7 +24,7 @@ func mailSubjectTextFuncMap() texttmpl.FuncMap {
|
||||
"dict": dict,
|
||||
"Eval": evalTokens,
|
||||
|
||||
"EllipsisString": base.EllipsisString,
|
||||
"EllipsisString": util.EllipsisDisplayString,
|
||||
"AppName": func() string {
|
||||
return setting.AppName
|
||||
},
|
||||
|
@@ -8,7 +8,7 @@ import (
|
||||
"html/template"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
)
|
||||
|
||||
type StringUtils struct{}
|
||||
@@ -54,7 +54,7 @@ func (su *StringUtils) Cut(s, sep string) []any {
|
||||
}
|
||||
|
||||
func (su *StringUtils) EllipsisString(s string, maxLength int) string {
|
||||
return base.EllipsisString(s, maxLength)
|
||||
return util.EllipsisDisplayString(s, maxLength)
|
||||
}
|
||||
|
||||
func (su *StringUtils) ToUpper(s string) string {
|
||||
|
@@ -14,31 +14,92 @@ const (
|
||||
asciiEllipsis = "..."
|
||||
)
|
||||
|
||||
func IsLikelySplitLeftPart(s string) bool {
|
||||
func IsLikelyEllipsisLeftPart(s string) bool {
|
||||
return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis)
|
||||
}
|
||||
|
||||
// SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
|
||||
func SplitStringAtByteN(input string, n int) (left, right string) {
|
||||
if len(input) <= n {
|
||||
return input, ""
|
||||
}
|
||||
// EllipsisDisplayString returns a truncated short string for display purpose.
|
||||
// The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width)
|
||||
// It appends "…" or "..." at the end of truncated string.
|
||||
// It guarantees the length of the returned runes doesn't exceed the limit.
|
||||
func EllipsisDisplayString(str string, limit int) string {
|
||||
s, _, _, _ := ellipsisDisplayString(str, limit)
|
||||
return s
|
||||
}
|
||||
|
||||
if !utf8.ValidString(input) {
|
||||
if n-3 < 0 {
|
||||
return input, ""
|
||||
// EllipsisDisplayStringX works like EllipsisDisplayString while it also returns the right part
|
||||
func EllipsisDisplayStringX(str string, limit int) (left, right string) {
|
||||
left, offset, truncated, encounterInvalid := ellipsisDisplayString(str, limit)
|
||||
if truncated {
|
||||
right = str[offset:]
|
||||
r, _ := utf8.DecodeRune(UnsafeStringToBytes(right))
|
||||
encounterInvalid = encounterInvalid || r == utf8.RuneError
|
||||
ellipsis := utf8Ellipsis
|
||||
if encounterInvalid {
|
||||
ellipsis = asciiEllipsis
|
||||
}
|
||||
return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
|
||||
right = ellipsis + right
|
||||
}
|
||||
return left, right
|
||||
}
|
||||
|
||||
func ellipsisDisplayString(str string, limit int) (res string, offset int, truncated, encounterInvalid bool) {
|
||||
if len(str) <= limit {
|
||||
return str, len(str), false, false
|
||||
}
|
||||
|
||||
end := 0
|
||||
for end <= n-3 {
|
||||
_, size := utf8.DecodeRuneInString(input[end:])
|
||||
if end+size > n-3 {
|
||||
// To future maintainers: this logic must guarantee that the length of the returned runes doesn't exceed the limit,
|
||||
// because the returned string will also be used as database value. UTF-8 VARCHAR(10) could store 10 rune characters,
|
||||
// So each rune must be countered as at least 1 width.
|
||||
// Even if there are some special Unicode characters (zero-width, combining, etc.), they should NEVER be counted as zero.
|
||||
pos, used := 0, 0
|
||||
for i, r := range str {
|
||||
encounterInvalid = encounterInvalid || r == utf8.RuneError
|
||||
pos = i
|
||||
runeWidth := 1
|
||||
if r >= 128 {
|
||||
runeWidth = 2 // CJK/emoji chars are considered as 2-ASCII width
|
||||
}
|
||||
if used+runeWidth+3 > limit {
|
||||
break
|
||||
}
|
||||
end += size
|
||||
used += runeWidth
|
||||
offset += utf8.RuneLen(r)
|
||||
}
|
||||
|
||||
return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
|
||||
// if the remaining are fewer than 3 runes, then maybe we could add them, no need to ellipse
|
||||
if len(str)-pos <= 12 {
|
||||
var nextCnt, nextWidth int
|
||||
for _, r := range str[pos:] {
|
||||
if nextCnt >= 4 {
|
||||
break
|
||||
}
|
||||
nextWidth++
|
||||
if r >= 128 {
|
||||
nextWidth++ // CJK/emoji chars are considered as 2-ASCII width
|
||||
}
|
||||
nextCnt++
|
||||
}
|
||||
if nextCnt <= 3 && used+nextWidth <= limit {
|
||||
return str, len(str), false, false
|
||||
}
|
||||
}
|
||||
if limit < 3 {
|
||||
// if the limit is so small, do not add ellipsis
|
||||
return str[:offset], offset, true, false
|
||||
}
|
||||
ellipsis := utf8Ellipsis
|
||||
if encounterInvalid {
|
||||
ellipsis = asciiEllipsis
|
||||
}
|
||||
return str[:offset] + ellipsis, offset, true, encounterInvalid
|
||||
}
|
||||
|
||||
// TruncateRunes returns a truncated string with given rune limit,
|
||||
// it returns input string if its rune length doesn't exceed the limit.
|
||||
func TruncateRunes(str string, limit int) string {
|
||||
if utf8.RuneCountInString(str) < limit {
|
||||
return str
|
||||
}
|
||||
return string([]rune(str)[:limit])
|
||||
}
|
||||
|
@@ -4,43 +4,94 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestSplitString(t *testing.T) {
|
||||
type testCase struct {
|
||||
input string
|
||||
n int
|
||||
leftSub string
|
||||
ellipsis string
|
||||
func TestEllipsisString(t *testing.T) {
|
||||
cases := []struct {
|
||||
limit int
|
||||
|
||||
input, left, right string
|
||||
}{
|
||||
{limit: 0, input: "abcde", left: "", right: "…abcde"},
|
||||
{limit: 1, input: "abcde", left: "", right: "…abcde"},
|
||||
{limit: 2, input: "abcde", left: "", right: "…abcde"},
|
||||
{limit: 3, input: "abcde", left: "…", right: "…abcde"},
|
||||
{limit: 4, input: "abcde", left: "a…", right: "…bcde"},
|
||||
{limit: 5, input: "abcde", left: "abcde", right: ""},
|
||||
{limit: 6, input: "abcde", left: "abcde", right: ""},
|
||||
{limit: 7, input: "abcde", left: "abcde", right: ""},
|
||||
|
||||
// a CJK char or emoji is considered as 2-ASCII width, the ellipsis is 3-ASCII width
|
||||
{limit: 0, input: "测试文本", left: "", right: "…测试文本"},
|
||||
{limit: 1, input: "测试文本", left: "", right: "…测试文本"},
|
||||
{limit: 2, input: "测试文本", left: "", right: "…测试文本"},
|
||||
{limit: 3, input: "测试文本", left: "…", right: "…测试文本"},
|
||||
{limit: 4, input: "测试文本", left: "…", right: "…测试文本"},
|
||||
{limit: 5, input: "测试文本", left: "测…", right: "…试文本"},
|
||||
{limit: 6, input: "测试文本", left: "测…", right: "…试文本"},
|
||||
{limit: 7, input: "测试文本", left: "测试…", right: "…文本"},
|
||||
{limit: 8, input: "测试文本", left: "测试文本", right: ""},
|
||||
{limit: 9, input: "测试文本", left: "测试文本", right: ""},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(fmt.Sprintf("%s(%d)", c.input, c.limit), func(t *testing.T) {
|
||||
left, right := EllipsisDisplayStringX(c.input, c.limit)
|
||||
assert.Equal(t, c.left, left, "left")
|
||||
assert.Equal(t, c.right, right, "right")
|
||||
})
|
||||
}
|
||||
|
||||
test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
|
||||
for _, c := range tc {
|
||||
l, r := f(c.input, c.n)
|
||||
if c.ellipsis != "" {
|
||||
assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
|
||||
assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
|
||||
} else {
|
||||
assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
|
||||
assert.Empty(t, r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
|
||||
}
|
||||
t.Run("LongInput", func(t *testing.T) {
|
||||
left, right := EllipsisDisplayStringX(strings.Repeat("abc", 240), 90)
|
||||
assert.Equal(t, strings.Repeat("abc", 29)+"…", left)
|
||||
assert.Equal(t, "…"+strings.Repeat("abc", 211), right)
|
||||
})
|
||||
|
||||
t.Run("InvalidUtf8", func(t *testing.T) {
|
||||
invalidCases := []struct {
|
||||
limit int
|
||||
left, right string
|
||||
}{
|
||||
{limit: 0, left: "", right: "...\xef\x03\xfe\xef\x03\xfe"},
|
||||
{limit: 1, left: "", right: "...\xef\x03\xfe\xef\x03\xfe"},
|
||||
{limit: 2, left: "", right: "...\xef\x03\xfe\xef\x03\xfe"},
|
||||
{limit: 3, left: "...", right: "...\xef\x03\xfe\xef\x03\xfe"},
|
||||
{limit: 4, left: "...", right: "...\xef\x03\xfe\xef\x03\xfe"},
|
||||
{limit: 5, left: "\xef\x03\xfe...", right: "...\xef\x03\xfe"},
|
||||
{limit: 6, left: "\xef\x03\xfe\xef\x03\xfe", right: ""},
|
||||
{limit: 7, left: "\xef\x03\xfe\xef\x03\xfe", right: ""},
|
||||
}
|
||||
}
|
||||
for _, c := range invalidCases {
|
||||
t.Run(fmt.Sprintf("%d", c.limit), func(t *testing.T) {
|
||||
left, right := EllipsisDisplayStringX("\xef\x03\xfe\xef\x03\xfe", c.limit)
|
||||
assert.Equal(t, c.left, left, "left")
|
||||
assert.Equal(t, c.right, right, "right")
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
tc := []*testCase{
|
||||
{"abc123xyz", 0, "", utf8Ellipsis},
|
||||
{"abc123xyz", 1, "", utf8Ellipsis},
|
||||
{"abc123xyz", 4, "a", utf8Ellipsis},
|
||||
{"啊bc123xyz", 4, "", utf8Ellipsis},
|
||||
{"啊bc123xyz", 6, "啊", utf8Ellipsis},
|
||||
{"啊bc", 5, "啊bc", ""},
|
||||
{"啊bc", 6, "啊bc", ""},
|
||||
{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
|
||||
{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
|
||||
{"\xef\x03", 1, "\xef\x03", ""},
|
||||
}
|
||||
test(tc, SplitStringAtByteN)
|
||||
t.Run("IsLikelyEllipsisLeftPart", func(t *testing.T) {
|
||||
assert.True(t, IsLikelyEllipsisLeftPart("abcde…"))
|
||||
assert.True(t, IsLikelyEllipsisLeftPart("abcde..."))
|
||||
})
|
||||
}
|
||||
|
||||
func TestTruncateRunes(t *testing.T) {
|
||||
assert.Equal(t, "", TruncateRunes("", 0))
|
||||
assert.Equal(t, "", TruncateRunes("", 1))
|
||||
|
||||
assert.Equal(t, "", TruncateRunes("ab", 0))
|
||||
assert.Equal(t, "a", TruncateRunes("ab", 1))
|
||||
assert.Equal(t, "ab", TruncateRunes("ab", 2))
|
||||
assert.Equal(t, "ab", TruncateRunes("ab", 3))
|
||||
|
||||
assert.Equal(t, "", TruncateRunes("测试", 0))
|
||||
assert.Equal(t, "测", TruncateRunes("测试", 1))
|
||||
assert.Equal(t, "测试", TruncateRunes("测试", 2))
|
||||
assert.Equal(t, "测试", TruncateRunes("测试", 3))
|
||||
}
|
||||
|
Reference in New Issue
Block a user