diff --git a/modules/git/blob.go b/modules/git/blob.go
index ab9deec8d1..40d8f44e79 100644
--- a/modules/git/blob.go
+++ b/modules/git/blob.go
@@ -22,17 +22,22 @@ func (b *Blob) Name() string {
return b.name
}
-// GetBlobContent Gets the limited content of the blob as raw text
-func (b *Blob) GetBlobContent(limit int64) (string, error) {
+// GetBlobBytes Gets the limited content of the blob
+func (b *Blob) GetBlobBytes(limit int64) ([]byte, error) {
if limit <= 0 {
- return "", nil
+ return nil, nil
}
dataRc, err := b.DataAsync()
if err != nil {
- return "", err
+ return nil, err
}
defer dataRc.Close()
- buf, err := util.ReadWithLimit(dataRc, int(limit))
+ return util.ReadWithLimit(dataRc, int(limit))
+}
+
+// GetBlobContent Gets the limited content of the blob as raw text
+func (b *Blob) GetBlobContent(limit int64) (string, error) {
+ buf, err := b.GetBlobBytes(limit)
return string(buf), err
}
@@ -99,11 +104,9 @@ loop:
// GuessContentType guesses the content type of the blob.
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
- r, err := b.DataAsync()
+ buf, err := b.GetBlobBytes(typesniffer.SniffContentSize)
if err != nil {
return typesniffer.SniffedType{}, err
}
- defer r.Close()
-
- return typesniffer.DetectContentTypeFromReader(r)
+ return typesniffer.DetectContentType(buf), nil
}
diff --git a/modules/markup/console/console.go b/modules/markup/console/console.go
index 06f3acfa68..492579b0a5 100644
--- a/modules/markup/console/console.go
+++ b/modules/markup/console/console.go
@@ -6,13 +6,14 @@ package console
import (
"bytes"
"io"
- "path"
+ "unicode/utf8"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/typesniffer"
+ "code.gitea.io/gitea/modules/util"
trend "github.com/buildkite/terminal-to-html/v3"
- "github.com/go-enry/go-enry/v2"
)
func init() {
@@ -22,6 +23,8 @@ func init() {
// Renderer implements markup.Renderer
type Renderer struct{}
+var _ markup.RendererContentDetector = (*Renderer)(nil)
+
// Name implements markup.Renderer
func (Renderer) Name() string {
return "console"
@@ -40,15 +43,36 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
}
// CanRender implements markup.RendererContentDetector
-func (Renderer) CanRender(filename string, input io.Reader) bool {
- buf, err := io.ReadAll(input)
- if err != nil {
+func (Renderer) CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool {
+ if !sniffedType.IsTextPlain() {
return false
}
- if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {
+
+ s := util.UnsafeBytesToString(prefetchBuf)
+ rs := []rune(s)
+ cnt := 0
+ firstErrPos := -1
+ isCtrlSep := func(p int) bool {
+ return p < len(rs) && (rs[p] == ';' || rs[p] == 'm')
+ }
+ for i, c := range rs {
+ if c == 0 {
+ return false
+ }
+ if c == '\x1b' {
+ match := i+1 < len(rs) && rs[i+1] == '['
+ if match && (isCtrlSep(i+2) || isCtrlSep(i+3) || isCtrlSep(i+4) || isCtrlSep(i+5)) {
+ cnt++
+ }
+ }
+ if c == utf8.RuneError && firstErrPos == -1 {
+ firstErrPos = i
+ }
+ }
+ if firstErrPos != -1 && firstErrPos != len(rs)-1 {
return false
}
- return bytes.ContainsRune(buf, '\x1b')
+ return cnt >= 2 // only render it as console output if there are at least two escape sequences
}
// Render renders terminal colors to HTML with all specific handling stuff.
diff --git a/modules/markup/console/console_test.go b/modules/markup/console/console_test.go
index 539f965ea1..d1192bebc2 100644
--- a/modules/markup/console/console_test.go
+++ b/modules/markup/console/console_test.go
@@ -8,23 +8,39 @@ import (
"testing"
"code.gitea.io/gitea/modules/markup"
+ "code.gitea.io/gitea/modules/typesniffer"
"github.com/stretchr/testify/assert"
)
func TestRenderConsole(t *testing.T) {
- var render Renderer
- kases := map[string]string{
- "\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok": "npm info it worked if it ends with ok",
+ cases := []struct {
+ input string
+ expected string
+ }{
+ {"\x1b[37m\x1b[40mnpm\x1b[0m \x1b[0m\x1b[32minfo\x1b[0m \x1b[0m\x1b[35mit worked if it ends with\x1b[0m ok", `npm info it worked if it ends with ok`},
+ {"\x1b[1;2m \x1b[123m 啊", ` 啊`},
+ {"\x1b[1;2m \x1b[123m \xef", ` �`},
+ {"\x1b[1;2m \x1b[123m \xef \xef", ``},
+ {"\x1b[12", ``},
+ {"\x1b[1", ``},
+ {"\x1b[FOO\x1b[", ``},
+ {"\x1b[mFOO\x1b[m", `FOO`},
}
- for k, v := range kases {
+ var render Renderer
+ for i, c := range cases {
var buf strings.Builder
- canRender := render.CanRender("test", strings.NewReader(k))
- assert.True(t, canRender)
+ st := typesniffer.DetectContentType([]byte(c.input))
+ canRender := render.CanRender("test", st, []byte(c.input))
+ if c.expected == "" {
+ assert.False(t, canRender, "case %d: expected not to render", i)
+ continue
+ }
- err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(k), &buf)
+ assert.True(t, canRender)
+ err := render.Render(markup.NewRenderContext(t.Context()), strings.NewReader(c.input), &buf)
assert.NoError(t, err)
- assert.Equal(t, v, buf.String())
+ assert.Equal(t, c.expected, buf.String())
}
}
diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go
index 35f90eb46c..b6e9c348b7 100644
--- a/modules/markup/renderer.go
+++ b/modules/markup/renderer.go
@@ -4,12 +4,12 @@
package markup
import (
- "bytes"
"io"
"path"
"strings"
"code.gitea.io/gitea/modules/setting"
+ "code.gitea.io/gitea/modules/typesniffer"
)
// Renderer defines an interface for rendering markup file to HTML
@@ -37,7 +37,7 @@ type ExternalRenderer interface {
// RendererContentDetector detects if the content can be rendered
// by specified renderer
type RendererContentDetector interface {
- CanRender(filename string, input io.Reader) bool
+ CanRender(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) bool
}
var (
@@ -60,13 +60,9 @@ func GetRendererByFileName(filename string) Renderer {
}
// DetectRendererType detects the markup type of the content
-func DetectRendererType(filename string, input io.Reader) string {
- buf, err := io.ReadAll(input)
- if err != nil {
- return ""
- }
+func DetectRendererType(filename string, sniffedType typesniffer.SniffedType, prefetchBuf []byte) string {
for _, renderer := range renderers {
- if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, bytes.NewReader(buf)) {
+ if detector, ok := renderer.(RendererContentDetector); ok && detector.CanRender(filename, sniffedType, prefetchBuf) {
return renderer.Name()
}
}
diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go
index 8cb3d278ce..2e8d9c4a1e 100644
--- a/modules/typesniffer/typesniffer.go
+++ b/modules/typesniffer/typesniffer.go
@@ -6,18 +6,14 @@ package typesniffer
import (
"bytes"
"encoding/binary"
- "fmt"
- "io"
"net/http"
"regexp"
"slices"
"strings"
-
- "code.gitea.io/gitea/modules/util"
+ "sync"
)
-// Use at most this many bytes to determine Content Type.
-const sniffLen = 1024
+const SniffContentSize = 1024
const (
MimeTypeImageSvg = "image/svg+xml"
@@ -26,22 +22,30 @@ const (
MimeTypeApplicationOctetStream = "application/octet-stream"
)
-var (
- svgComment = regexp.MustCompile(`(?s)`)
- svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(|>))\s*)*