1
1
mirror of https://github.com/go-gitea/gitea synced 2025-08-01 15:18:37 +00:00

Add support for 3D/CAD file formats preview (#34794)

Fix #34775 

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Kerwin Bryant
2025-06-30 16:12:25 +08:00
committed by GitHub
parent f74a13610d
commit 176962c03e
29 changed files with 627 additions and 409 deletions

View File

@@ -6,18 +6,14 @@ package typesniffer
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strings"
"code.gitea.io/gitea/modules/util"
"sync"
)
// Use at most this many bytes to determine Content Type.
const sniffLen = 1024
const SniffContentSize = 1024
const (
MimeTypeImageSvg = "image/svg+xml"
@@ -26,22 +22,30 @@ const (
MimeTypeApplicationOctetStream = "application/octet-stream"
)
var (
svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)
var globalVars = sync.OnceValue(func() (ret struct {
svgComment, svgTagRegex, svgTagInXMLRegex *regexp.Regexp
},
) {
ret.svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
ret.svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
ret.svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
return ret
})
// SniffedType contains information about a blobs type.
// SniffedType contains information about a blob's type.
type SniffedType struct {
contentType string
}
// IsText etects if content format is plain text.
// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
}
func (ct SniffedType) IsTextPlain() bool {
return strings.Contains(ct.contentType, "text/plain")
}
// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
@@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
}
// IsVideo detects if data is an video format
// IsVideo detects if data is a video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
}
// IsAudio detects if data is an video format
// IsAudio detects if data is a video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
}
@@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
return brands, true
}
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
return SniffedType{"text/unknown"}
return SniffedType{"text/plain"}
}
ct := http.DetectContentType(data)
if len(data) > sniffLen {
data = data[:sniffLen]
if len(data) > SniffContentSize {
data = data[:SniffContentSize]
}
vars := globalVars()
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
detectByXML := strings.Contains(ct, "text/xml")
if detectByHTML || detectByXML {
dataProcessed := svgComment.ReplaceAll(data, nil)
dataProcessed := vars.svgComment.ReplaceAll(data, nil)
dataProcessed = bytes.TrimSpace(dataProcessed)
if detectByHTML && svgTagRegex.Match(dataProcessed) ||
detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
if detectByHTML && vars.svgTagRegex.Match(dataProcessed) ||
detectByXML && vars.svgTagInXMLRegex.Match(dataProcessed) {
ct = MimeTypeImageSvg
}
}
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
// So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
ct2 := http.DetectContentType(data[3:])
if strings.HasPrefix(ct2, "text/") {
@@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
}
return SniffedType{ct}
}
// DetectContentTypeFromReader guesses the content type contained in the reader.
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
buf := make([]byte, sniffLen)
n, err := util.ReadAtMost(r, buf)
if err != nil {
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
}
buf = buf[:n]
return DetectContentType(buf), nil
}

View File

@@ -4,7 +4,6 @@
package typesniffer
import (
"bytes"
"encoding/base64"
"encoding/hex"
"strings"
@@ -17,7 +16,7 @@ func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
// Pre-condition: Shorter than sniffLen detects SVG.
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
// Longer than sniffLen detects something else.
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", SniffContentSize)+` --><svg></svg>`)).contentType)
}
func TestIsTextFile(t *testing.T) {
@@ -116,22 +115,13 @@ func TestIsAudio(t *testing.T) {
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}
func TestDetectContentTypeFromReader(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
assert.NoError(t, err)
assert.True(t, st.IsAudio())
}
func TestDetectContentTypeOgg(t *testing.T) {
oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
assert.NoError(t, err)
st := DetectContentType(oggAudio)
assert.True(t, st.IsAudio())
oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
assert.NoError(t, err)
st = DetectContentType(oggVideo)
assert.True(t, st.IsVideo())
}