mirror of
https://github.com/go-gitea/gitea
synced 2025-07-25 19:58:36 +00:00
Detect and remove a decoded BOM when showing content. Restore the previous encoding and BOM when updating content. On error keep as UTF-8 encoding. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
@@ -5,15 +5,85 @@
|
||||
package uploader
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
"golang.org/x/text/transform"
|
||||
|
||||
"code.gitea.io/git"
|
||||
"code.gitea.io/gitea/models"
|
||||
"code.gitea.io/gitea/modules/base"
|
||||
"code.gitea.io/gitea/modules/lfs"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
func detectEncodingAndBOM(entry *git.TreeEntry, repo *models.Repository) (string, bool) {
|
||||
reader, err := entry.Blob().DataAsync()
|
||||
if err != nil {
|
||||
// return default
|
||||
return "UTF-8", false
|
||||
}
|
||||
defer reader.Close()
|
||||
buf := make([]byte, 1024)
|
||||
n, err := reader.Read(buf)
|
||||
if err != nil {
|
||||
// return default
|
||||
return "UTF-8", false
|
||||
}
|
||||
buf = buf[:n]
|
||||
|
||||
if setting.LFS.StartServer {
|
||||
meta := lfs.IsPointerFile(&buf)
|
||||
if meta != nil {
|
||||
meta, err = repo.GetLFSMetaObjectByOid(meta.Oid)
|
||||
if err != nil && err != models.ErrLFSObjectNotExist {
|
||||
// return default
|
||||
return "UTF-8", false
|
||||
}
|
||||
}
|
||||
if meta != nil {
|
||||
dataRc, err := lfs.ReadMetaObject(meta)
|
||||
if err != nil {
|
||||
// return default
|
||||
return "UTF-8", false
|
||||
}
|
||||
defer dataRc.Close()
|
||||
buf = make([]byte, 1024)
|
||||
n, err = dataRc.Read(buf)
|
||||
if err != nil {
|
||||
// return default
|
||||
return "UTF-8", false
|
||||
}
|
||||
buf = buf[:n]
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
encoding, err := base.DetectEncoding(buf)
|
||||
if err != nil {
|
||||
// just default to utf-8 and no bom
|
||||
return "UTF-8", false
|
||||
}
|
||||
if encoding == "UTF-8" {
|
||||
return encoding, bytes.Equal(buf[0:3], base.UTF8BOM)
|
||||
}
|
||||
charsetEncoding, _ := charset.Lookup(encoding)
|
||||
if charsetEncoding == nil {
|
||||
return "UTF-8", false
|
||||
}
|
||||
|
||||
result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf))
|
||||
|
||||
if n > 2 {
|
||||
return encoding, bytes.Equal([]byte(result)[0:3], base.UTF8BOM)
|
||||
}
|
||||
|
||||
return encoding, false
|
||||
}
|
||||
|
||||
// UpdateRepoFileOptions holds the repository file update options
|
||||
type UpdateRepoFileOptions struct {
|
||||
LastCommitID string
|
||||
@@ -45,12 +115,29 @@ func UpdateRepoFile(repo *models.Repository, doer *models.User, opts *UpdateRepo
|
||||
return fmt.Errorf("UpdateRepoFile: %v", err)
|
||||
}
|
||||
|
||||
encoding := "UTF-8"
|
||||
bom := false
|
||||
|
||||
if opts.IsNewFile {
|
||||
for _, file := range filesInIndex {
|
||||
if file == opts.NewTreeName {
|
||||
return models.ErrRepoFileAlreadyExist{FileName: opts.NewTreeName}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
gitRepo, err := git.OpenRepository(t.basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tree, err := gitRepo.GetTree("HEAD")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
entry, err := tree.GetTreeEntryByPath(opts.OldTreeName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
encoding, bom = detectEncodingAndBOM(entry, repo)
|
||||
}
|
||||
|
||||
//var stdout string
|
||||
@@ -72,9 +159,28 @@ func UpdateRepoFile(repo *models.Repository, doer *models.User, opts *UpdateRepo
|
||||
}
|
||||
|
||||
content := opts.Content
|
||||
if bom {
|
||||
content = string(base.UTF8BOM) + content
|
||||
}
|
||||
if encoding != "UTF-8" {
|
||||
charsetEncoding, _ := charset.Lookup(encoding)
|
||||
if charsetEncoding != nil {
|
||||
result, _, err := transform.String(charsetEncoding.NewEncoder(), string(content))
|
||||
if err != nil {
|
||||
// Look if we can't encode back in to the original we should just stick with utf-8
|
||||
log.Error(4, "Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", opts.NewTreeName, opts.OldTreeName, encoding, err)
|
||||
result = content
|
||||
}
|
||||
content = result
|
||||
} else {
|
||||
log.Error(4, "Unknown encoding: %s", encoding)
|
||||
}
|
||||
}
|
||||
// Reset the opts.Content with the re-encoded and BOM'd content
|
||||
opts.Content = content
|
||||
var lfsMetaObject *models.LFSMetaObject
|
||||
|
||||
if filename2attribute2info[opts.NewTreeName] != nil && filename2attribute2info[opts.NewTreeName]["filter"] == "lfs" {
|
||||
if setting.LFS.StartServer && filename2attribute2info[opts.NewTreeName] != nil && filename2attribute2info[opts.NewTreeName]["filter"] == "lfs" {
|
||||
// OK so we are supposed to LFS this data!
|
||||
oid, err := models.GenerateLFSOid(strings.NewReader(opts.Content))
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user