1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-22 18:28:37 +00:00

UI: Detect and restore encoding and BOM in content (#6727)

* detect and remove a decoded BOM

Signed-off-by: Andrew Thornton <art27@cantab.net>

* Restore the previous encoding and BOM

* On error keep as UTF-8

Signed-off-by: Andrew Thornton <art27@cantab.net>

* create remove BOM function

* Deal with LFSed content

* Update modules/repofiles/update.go

* Fix final LFS bug

* Keep LFS sections referring to opts.Content
This commit is contained in:
zeripath
2019-04-26 13:00:30 +01:00
committed by Lauris BH
parent 4c34bc111c
commit f6eedd4dc8
3 changed files with 114 additions and 7 deletions

View File

@@ -267,7 +267,7 @@ func ToUTF8WithErr(content []byte) (string, error) {
if err != nil {
return "", err
} else if charsetLabel == "UTF-8" {
return string(content), nil
return string(base.RemoveBOMIfPresent(content)), nil
}
encoding, _ := charset.Lookup(charsetLabel)
@@ -277,19 +277,21 @@ func ToUTF8WithErr(content []byte) (string, error) {
// If there is an error, we concatenate the nicely decoded part and the
// original left over. This way we won't lose data.
result, n, err := transform.String(encoding.NewDecoder(), string(content))
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
if err != nil {
result = result + string(content[n:])
result = append(result, content[n:]...)
}
return result, err
result = base.RemoveBOMIfPresent(result)
return string(result), err
}
// ToUTF8WithFallback detects the encoding of content and coverts to UTF-8 if possible
func ToUTF8WithFallback(content []byte) []byte {
charsetLabel, err := base.DetectEncoding(content)
if err != nil || charsetLabel == "UTF-8" {
return content
return base.RemoveBOMIfPresent(content)
}
encoding, _ := charset.Lookup(charsetLabel)
@@ -304,7 +306,7 @@ func ToUTF8WithFallback(content []byte) []byte {
return append(result, content[n:]...)
}
return result
return base.RemoveBOMIfPresent(result)
}
// ToUTF8 converts content to UTF8 encoding and ignore error