1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

Convert files to utf-8 for indexing (#7814)

* Convert files to utf-8 for indexing

* Move utf8 functions to modules/base

* Bump repoIndexerLatestVersion to 3

* Add tests for base/encoding.go

* Changes to pass gosimple

* Move UTF8 funcs into new modules/charset package
This commit is contained in:
guillep2k
2019-08-15 09:07:28 -03:00
committed by Lunny Xiao
parent c2c35d169c
commit 5a44be627c
13 changed files with 371 additions and 166 deletions

View File

@@ -11,17 +11,17 @@ import (
"path"
"strings"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/structs"
stdcharset "golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)
// IdentityOptions for a person's identity like an author or committer
@@ -87,15 +87,15 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string
}
encoding, err := base.DetectEncoding(buf)
encoding, err := charset.DetectEncoding(buf)
if err != nil {
// just default to utf-8 and no bom
return "UTF-8", false
}
if encoding == "UTF-8" {
return encoding, bytes.Equal(buf[0:3], base.UTF8BOM)
return encoding, bytes.Equal(buf[0:3], charset.UTF8BOM)
}
charsetEncoding, _ := charset.Lookup(encoding)
charsetEncoding, _ := stdcharset.Lookup(encoding)
if charsetEncoding == nil {
return "UTF-8", false
}
@@ -107,7 +107,7 @@ func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string
}
if n > 2 {
return encoding, bytes.Equal([]byte(result)[0:3], base.UTF8BOM)
return encoding, bytes.Equal([]byte(result)[0:3], charset.UTF8BOM)
}
return encoding, false
@@ -321,10 +321,10 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
content := opts.Content
if bom {
content = string(base.UTF8BOM) + content
content = string(charset.UTF8BOM) + content
}
if encoding != "UTF-8" {
charsetEncoding, _ := charset.Lookup(encoding)
charsetEncoding, _ := stdcharset.Lookup(encoding)
if charsetEncoding != nil {
result, _, err := transform.String(charsetEncoding.NewEncoder(), content)
if err != nil {