1
1
mirror of https://github.com/go-gitea/gitea synced 2024-12-22 16:44:26 +00:00

Refactor markup render system (#32533)

Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
This commit is contained in:
wxiaoguang 2024-11-18 13:25:42 +08:00 committed by GitHub
parent 4f879a00df
commit 8a20fba8eb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
42 changed files with 568 additions and 508 deletions

View File

@ -1,25 +0,0 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package html
// ParseSizeAndClass get size and class from string with default values
// If present, "others" expects the new size first and then the classes to use
func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) {
size := defaultSize
if len(others) >= 1 {
if v, ok := others[0].(int); ok && v != 0 {
size = v
}
}
class := defaultClass
if len(others) >= 2 {
if v, ok := others[1].(string); ok && v != "" {
if class != "" {
class += " "
}
class += v
}
}
return size, class
}

48
modules/htmlutil/html.go Normal file
View File

@ -0,0 +1,48 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package htmlutil
import (
"fmt"
"html/template"
"slices"
)
// ParseSizeAndClass get size and class from string with default values
// If present, "others" expects the new size first and then the classes to use
func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) {
size := defaultSize
if len(others) >= 1 {
if v, ok := others[0].(int); ok && v != 0 {
size = v
}
}
class := defaultClass
if len(others) >= 2 {
if v, ok := others[1].(string); ok && v != "" {
if class != "" {
class += " "
}
class += v
}
}
return size, class
}
func HTMLFormat(s string, rawArgs ...any) template.HTML {
args := slices.Clone(rawArgs)
for i, v := range args {
switch v := v.(type) {
case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
// for most basic types (including template.HTML which is safe), just do nothing and use it
case string:
args[i] = template.HTMLEscapeString(v)
case fmt.Stringer:
args[i] = template.HTMLEscapeString(v.String())
default:
args[i] = template.HTMLEscapeString(fmt.Sprint(v))
}
}
return template.HTML(fmt.Sprintf(s, args...))
}

View File

@ -0,0 +1,15 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package htmlutil
import (
"html/template"
"testing"
"github.com/stretchr/testify/assert"
)
func TestHTMLFormat(t *testing.T) {
assert.Equal(t, template.HTML("<a>&lt; < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1))
}

View File

@ -7,7 +7,6 @@ import (
"fmt" "fmt"
"io" "io"
"net/url" "net/url"
"regexp"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -38,10 +37,7 @@ const (
// SanitizerRules implements markup.Renderer // SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{ return []setting.MarkupSanitizerRule{{Element: "div", AllowAttr: playerSrcAttr}}
{Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(playerClassName)},
{Element: "div", AllowAttr: playerSrcAttr},
}
} }
// Render implements markup.Renderer // Render implements markup.Renderer
@ -53,12 +49,5 @@ func (Renderer) Render(ctx *markup.RenderContext, _ io.Reader, output io.Writer)
ctx.Metas["BranchNameSubURL"], ctx.Metas["BranchNameSubURL"],
url.PathEscape(ctx.RelativePath), url.PathEscape(ctx.RelativePath),
) )
return ctx.RenderInternal.FormatWithSafeAttrs(output, `<div class="%s" %s="%s"></div>`, playerClassName, playerSrcAttr, rawURL)
_, err := io.WriteString(output, fmt.Sprintf(
`<div class="%s" %s="%s"></div>`,
playerClassName,
playerSrcAttr,
rawURL,
))
return err
} }

View File

@ -1,16 +0,0 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package common
import (
"mvdan.cc/xurls/v2"
)
// NOTE: All below regex matching do not perform any extra validation.
// Thus a link is produced even if the linked entity does not exist.
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
// LinkRegex is a regexp matching a valid link
var LinkRegex, _ = xurls.StrictMatchingScheme("https?://")

View File

@ -9,15 +9,27 @@ package common
import ( import (
"bytes" "bytes"
"regexp" "regexp"
"sync"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
"mvdan.cc/xurls/v2"
) )
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) type GlobalVarsType struct {
wwwURLRegxp *regexp.Regexp
LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation.
}
var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType {
v := &GlobalVarsType{}
v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
return v
})
type linkifyParser struct{} type linkifyParser struct{}
@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
var protocol []byte var protocol []byte
typ := ast.AutoLinkURL typ := ast.AutoLinkURL
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
m = LinkRegex.FindSubmatchIndex(line) m = GlobalVars().LinkRegex.FindSubmatchIndex(line)
} }
if m == nil && bytes.HasPrefix(line, domainWWW) { if m == nil && bytes.HasPrefix(line, domainWWW) {
m = wwwURLRegxp.FindSubmatchIndex(line) m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line)
protocol = []byte("http") protocol = []byte("http")
} }
if m != nil { if m != nil {

View File

@ -6,8 +6,7 @@ package console
import ( import (
"bytes" "bytes"
"io" "io"
"path/filepath" "path"
"regexp"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -36,7 +35,7 @@ func (Renderer) Extensions() []string {
// SanitizerRules implements markup.Renderer // SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{ return []setting.MarkupSanitizerRule{
{Element: "span", AllowAttr: "class", Regexp: regexp.MustCompile(`^term-((fg[ix]?|bg)\d+|container)$`)}, {Element: "span", AllowAttr: "class", Regexp: `^term-((fg[ix]?|bg)\d+|container)$`},
} }
} }
@ -46,7 +45,7 @@ func (Renderer) CanRender(filename string, input io.Reader) bool {
if err != nil { if err != nil {
return false return false
} }
if enry.GetLanguage(filepath.Base(filename), buf) != enry.OtherLanguage { if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {
return false return false
} }
return bytes.ContainsRune(buf, '\x1b') return bytes.ContainsRune(buf, '\x1b')

View File

@ -7,7 +7,6 @@ import (
"bufio" "bufio"
"html" "html"
"io" "io"
"regexp"
"strconv" "strconv"
"code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/csv"
@ -37,9 +36,9 @@ func (Renderer) Extensions() []string {
// SanitizerRules implements markup.Renderer // SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{ return []setting.MarkupSanitizerRule{
{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, {Element: "table", AllowAttr: "class", Regexp: `^data-table$`},
{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "th", AllowAttr: "class", Regexp: `^line-num$`},
{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, {Element: "td", AllowAttr: "class", Regexp: `^line-num$`},
} }
} }
@ -51,13 +50,13 @@ func writeField(w io.Writer, element, class, field string) error {
return err return err
} }
if len(class) > 0 { if len(class) > 0 {
if _, err := io.WriteString(w, " class=\""); err != nil { if _, err := io.WriteString(w, ` class="`); err != nil {
return err return err
} }
if _, err := io.WriteString(w, class); err != nil { if _, err := io.WriteString(w, class); err != nil {
return err return err
} }
if _, err := io.WriteString(w, "\""); err != nil { if _, err := io.WriteString(w, `"`); err != nil {
return err return err
} }
} }

View File

@ -102,7 +102,7 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
_, err = io.Copy(f, input) _, err = io.Copy(f, input)
if err != nil { if err != nil {
f.Close() _ = f.Close()
return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err) return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err)
} }
@ -113,10 +113,9 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
args = append(args, f.Name()) args = append(args, f.Name())
} }
if ctx == nil || ctx.Ctx == nil { if ctx.Ctx == nil {
if ctx == nil { if !setting.IsProd || setting.IsInTesting {
log.Warn("RenderContext not provided defaulting to empty ctx") panic("RenderContext did not provide context")
ctx = &markup.RenderContext{}
} }
log.Warn("RenderContext did not provide context, defaulting to Shutdown context") log.Warn("RenderContext did not provide context, defaulting to Shutdown context")
ctx.Ctx = graceful.GetManager().ShutdownContext() ctx.Ctx = graceful.GetManager().ShutdownContext()

View File

@ -25,9 +25,6 @@ const (
IssueNameStyleRegexp = "regexp" IssueNameStyleRegexp = "regexp"
) )
// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"
type globalVarsType struct { type globalVarsType struct {
hashCurrentPattern *regexp.Regexp hashCurrentPattern *regexp.Regexp
shortLinkPattern *regexp.Regexp shortLinkPattern *regexp.Regexp
@ -39,6 +36,7 @@ type globalVarsType struct {
emojiShortCodeRegex *regexp.Regexp emojiShortCodeRegex *regexp.Regexp
issueFullPattern *regexp.Regexp issueFullPattern *regexp.Regexp
filesChangedFullPattern *regexp.Regexp filesChangedFullPattern *regexp.Regexp
codePreviewPattern *regexp.Regexp
tagCleaner *regexp.Regexp tagCleaner *regexp.Regexp
nulCleaner *strings.Replacer nulCleaner *strings.Replacer
@ -88,6 +86,9 @@ var globalVars = sync.OnceValue[*globalVarsType](func() *globalVarsType {
// example: https://domain/org/repo/pulls/27/files#hash // example: https://domain/org/repo/pulls/27/files#hash
v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`) v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`)
// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
v.nulCleaner = strings.NewReplacer("\000", "") v.nulCleaner = strings.NewReplacer("\000", "")
return v return v
@ -129,7 +130,7 @@ func CustomLinkURLSchemes(schemes []string) {
} }
withAuth = append(withAuth, s) withAuth = append(withAuth, s)
} }
common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) common.GlobalVars().LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
} }
type postProcessError struct { type postProcessError struct {
@ -164,11 +165,7 @@ var defaultProcessors = []processor{
// emails with HTML links, parsing shortlinks in the format of [[Link]], like // emails with HTML links, parsing shortlinks in the format of [[Link]], like
// MediaWiki, linking issues in the format #ID, and mentions in the format // MediaWiki, linking issues in the format #ID, and mentions in the format
// @user, and others. // @user, and others.
func PostProcess( func PostProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
ctx *RenderContext,
input io.Reader,
output io.Writer,
) error {
return postProcess(ctx, defaultProcessors, input, output) return postProcess(ctx, defaultProcessors, input, output)
} }
@ -189,10 +186,7 @@ var commitMessageProcessors = []processor{
// RenderCommitMessage will use the same logic as PostProcess, but will disable // RenderCommitMessage will use the same logic as PostProcess, but will disable
// the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
// set, which changes every text node into a link to the passed default link. // set, which changes every text node into a link to the passed default link.
func RenderCommitMessage( func RenderCommitMessage(ctx *RenderContext, content string) (string, error) {
ctx *RenderContext,
content string,
) (string, error) {
procs := commitMessageProcessors procs := commitMessageProcessors
return renderProcessString(ctx, procs, content) return renderProcessString(ctx, procs, content)
} }
@ -219,10 +213,7 @@ var emojiProcessors = []processor{
// RenderCommitMessage, but will disable the shortLinkProcessor and // RenderCommitMessage, but will disable the shortLinkProcessor and
// emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
// which changes every text node into a link to the passed default link. // which changes every text node into a link to the passed default link.
func RenderCommitMessageSubject( func RenderCommitMessageSubject(ctx *RenderContext, defaultLink, content string) (string, error) {
ctx *RenderContext,
defaultLink, content string,
) (string, error) {
procs := slices.Clone(commitMessageSubjectProcessors) procs := slices.Clone(commitMessageSubjectProcessors)
procs = append(procs, func(ctx *RenderContext, node *html.Node) { procs = append(procs, func(ctx *RenderContext, node *html.Node) {
ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data} ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data}
@ -236,10 +227,7 @@ func RenderCommitMessageSubject(
} }
// RenderIssueTitle to process title on individual issue/pull page // RenderIssueTitle to process title on individual issue/pull page
func RenderIssueTitle( func RenderIssueTitle(ctx *RenderContext, title string) (string, error) {
ctx *RenderContext,
title string,
) (string, error) {
// do not render other issue/commit links in an issue's title - which in most cases is already a link. // do not render other issue/commit links in an issue's title - which in most cases is already a link.
return renderProcessString(ctx, []processor{ return renderProcessString(ctx, []processor{
emojiShortCodeProcessor, emojiShortCodeProcessor,
@ -257,10 +245,7 @@ func renderProcessString(ctx *RenderContext, procs []processor, content string)
// RenderDescriptionHTML will use similar logic as PostProcess, but will // RenderDescriptionHTML will use similar logic as PostProcess, but will
// use a single special linkProcessor. // use a single special linkProcessor.
func RenderDescriptionHTML( func RenderDescriptionHTML(ctx *RenderContext, content string) (string, error) {
ctx *RenderContext,
content string,
) (string, error) {
return renderProcessString(ctx, []processor{ return renderProcessString(ctx, []processor{
descriptionLinkProcessor, descriptionLinkProcessor,
emojiShortCodeProcessor, emojiShortCodeProcessor,
@ -270,10 +255,7 @@ func RenderDescriptionHTML(
// RenderEmoji for when we want to just process emoji and shortcodes // RenderEmoji for when we want to just process emoji and shortcodes
// in various places it isn't already run through the normal markdown processor // in various places it isn't already run through the normal markdown processor
func RenderEmoji( func RenderEmoji(ctx *RenderContext, content string) (string, error) {
ctx *RenderContext,
content string,
) (string, error) {
return renderProcessString(ctx, emojiProcessors, content) return renderProcessString(ctx, emojiProcessors, content)
} }
@ -333,6 +315,17 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
return nil return nil
} }
func isEmojiNode(node *html.Node) bool {
if node.Type == html.ElementNode && node.Data == atom.Span.String() {
for _, attr := range node.Attr {
if (attr.Key == "class" || attr.Key == "data-attr-class") && strings.Contains(attr.Val, "emoji") {
return true
}
}
}
return false
}
func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node { func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node {
// Add user-content- to IDs and "#" links if they don't already have them // Add user-content- to IDs and "#" links if they don't already have them
for idx, attr := range node.Attr { for idx, attr := range node.Attr {
@ -346,47 +339,27 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod
if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
node.Attr[idx].Val = "#user-content-" + val node.Attr[idx].Val = "#user-content-" + val
} }
if attr.Key == "class" && attr.Val == "emoji" {
procs = nil
}
} }
switch node.Type { switch node.Type {
case html.TextNode: case html.TextNode:
processTextNodes(ctx, procs, node) for _, proc := range procs {
proc(ctx, node) // it might add siblings
}
case html.ElementNode: case html.ElementNode:
if node.Data == "code" || node.Data == "pre" { if isEmojiNode(node) {
// ignore code and pre nodes // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
// if we don't stop it, it will go into the TextNode again and create an infinite recursion
return node.NextSibling return node.NextSibling
} else if node.Data == "code" || node.Data == "pre" {
return node.NextSibling // ignore code and pre nodes
} else if node.Data == "img" { } else if node.Data == "img" {
return visitNodeImg(ctx, node) return visitNodeImg(ctx, node)
} else if node.Data == "video" { } else if node.Data == "video" {
return visitNodeVideo(ctx, node) return visitNodeVideo(ctx, node)
} else if node.Data == "a" { } else if node.Data == "a" {
// Restrict text in links to emojis procs = emojiProcessors // Restrict text in links to emojis
procs = emojiProcessors
} else if node.Data == "i" {
for _, attr := range node.Attr {
if attr.Key != "class" {
continue
}
classes := strings.Split(attr.Val, " ")
for i, class := range classes {
if class == "icon" {
classes[0], classes[i] = classes[i], classes[0]
attr.Val = strings.Join(classes, " ")
// Remove all children of icons
child := node.FirstChild
for child != nil {
node.RemoveChild(child)
child = node.FirstChild
}
break
}
}
}
} }
for n := node.FirstChild; n != nil; { for n := node.FirstChild; n != nil; {
n = visitNode(ctx, procs, n) n = visitNode(ctx, procs, n)
@ -396,22 +369,17 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod
return node.NextSibling return node.NextSibling
} }
// processTextNodes runs the passed node through various processors, in order to handle
// all kinds of special links handled by the post-processing.
func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) {
for _, p := range procs {
p(ctx, node)
}
}
// createKeyword() renders a highlighted version of an action keyword // createKeyword() renders a highlighted version of an action keyword
func createKeyword(content string) *html.Node { func createKeyword(ctx *RenderContext, content string) *html.Node {
// CSS class for action keywords (e.g. "closes: #1")
const keywordClass = "issue-keyword"
span := &html.Node{ span := &html.Node{
Type: html.ElementNode, Type: html.ElementNode,
Data: atom.Span.String(), Data: atom.Span.String(),
Attr: []html.Attribute{}, Attr: []html.Attribute{},
} }
span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", keywordClass))
text := &html.Node{ text := &html.Node{
Type: html.TextNode, Type: html.TextNode,
@ -422,7 +390,7 @@ func createKeyword(content string) *html.Node {
return span return span
} }
func createLink(href, content, class string) *html.Node { func createLink(ctx *RenderContext, href, content, class string) *html.Node {
a := &html.Node{ a := &html.Node{
Type: html.ElementNode, Type: html.ElementNode,
Data: atom.A.String(), Data: atom.A.String(),
@ -432,7 +400,7 @@ func createLink(href, content, class string) *html.Node {
a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"}) a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"})
} }
if class != "" { if class != "" {
a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) a.Attr = append(a.Attr, ctx.RenderInternal.NodeSafeAttr("class", class))
} }
text := &html.Node{ text := &html.Node{

View File

@ -6,7 +6,6 @@ package markup
import ( import (
"html/template" "html/template"
"net/url" "net/url"
"regexp"
"strconv" "strconv"
"strings" "strings"
@ -16,9 +15,6 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
var codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
type RenderCodePreviewOptions struct { type RenderCodePreviewOptions struct {
FullURL string FullURL string
OwnerName string OwnerName string
@ -30,7 +26,7 @@ type RenderCodePreviewOptions struct {
} }
func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) { func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) {
m := codePreviewPattern.FindStringSubmatchIndex(node.Data) m := globalVars().codePreviewPattern.FindStringSubmatchIndex(node.Data)
if m == nil { if m == nil {
return 0, 0, "", nil return 0, 0, "", nil
} }
@ -66,8 +62,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
node = node.NextSibling node = node.NextSibling
continue continue
} }
urlPosStart, urlPosEnd, h, err := renderCodeBlock(ctx, node) urlPosStart, urlPosEnd, renderedCodeBlock, err := renderCodeBlock(ctx, node)
if err != nil || h == "" { if err != nil || renderedCodeBlock == "" {
if err != nil { if err != nil {
log.Error("Unable to render code preview: %v", err) log.Error("Unable to render code preview: %v", err)
} }
@ -84,7 +80,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
// then it is resolved as: "<p>{TextBefore}</p><div NewNode/><p>{TextAfter}</p>", // then it is resolved as: "<p>{TextBefore}</p><div NewNode/><p>{TextAfter}</p>",
// so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node. // so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node.
node.Data = textBefore node.Data = textBefore
node.Parent.InsertBefore(&html.Node{Type: html.RawNode, Data: string(h)}, next) renderedCodeNode := &html.Node{Type: html.RawNode, Data: string(ctx.RenderInternal.ProtectSafeAttrs(renderedCodeBlock))}
node.Parent.InsertBefore(renderedCodeNode, next)
if textAfter != "" { if textAfter != "" {
node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next) node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next)
} }

View File

@ -15,7 +15,7 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
} }
mail := node.Data[m[2]:m[3]] mail := node.Data[m[2]:m[3]]
replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
} }
} }

View File

@ -13,15 +13,13 @@ import (
"golang.org/x/net/html/atom" "golang.org/x/net/html/atom"
) )
func createEmoji(content, class, name string) *html.Node { func createEmoji(ctx *RenderContext, content, name string) *html.Node {
span := &html.Node{ span := &html.Node{
Type: html.ElementNode, Type: html.ElementNode,
Data: atom.Span.String(), Data: atom.Span.String(),
Attr: []html.Attribute{}, Attr: []html.Attribute{},
} }
if class != "" { span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji"))
span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
}
if name != "" { if name != "" {
span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
} }
@ -35,13 +33,13 @@ func createEmoji(content, class, name string) *html.Node {
return span return span
} }
func createCustomEmoji(alias string) *html.Node { func createCustomEmoji(ctx *RenderContext, alias string) *html.Node {
span := &html.Node{ span := &html.Node{
Type: html.ElementNode, Type: html.ElementNode,
Data: atom.Span.String(), Data: atom.Span.String(),
Attr: []html.Attribute{}, Attr: []html.Attribute{},
} }
span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji"))
span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
img := &html.Node{ img := &html.Node{
@ -77,7 +75,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
if converted == nil { if converted == nil {
// check if this is a custom reaction // check if this is a custom reaction
if _, exist := setting.UI.CustomEmojisMap[alias]; exist { if _, exist := setting.UI.CustomEmojisMap[alias]; exist {
replaceContent(node, m[0], m[1], createCustomEmoji(alias)) replaceContent(node, m[0], m[1], createCustomEmoji(ctx, alias))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
start = 0 start = 0
continue continue
@ -85,7 +83,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
continue continue
} }
replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) replaceContent(node, m[0], m[1], createEmoji(ctx, converted.Emoji, converted.Description))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
start = 0 start = 0
} }
@ -107,7 +105,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) {
start = m[1] start = m[1]
val := emoji.FromCode(codepoint) val := emoji.FromCode(codepoint)
if val != nil { if val != nil {
replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) replaceContent(node, m[0], m[1], createEmoji(ctx, codepoint, val.Description))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
start = 0 start = 0
} }

View File

@ -57,10 +57,10 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
matchRepo := linkParts[len(linkParts)-3] matchRepo := linkParts[len(linkParts)-3]
if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue"))
} else { } else {
text = matchOrg + "/" + matchRepo + text text = matchOrg + "/" + matchRepo + text
replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue"))
} }
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
} }
@ -129,16 +129,16 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err)
} }
link = createLink(res, reftext, "ref-issue ref-external-issue") link = createLink(ctx, res, reftext, "ref-issue ref-external-issue")
} else { } else {
// Path determines the type of link that will be rendered. It's unknown at this point whether // Path determines the type of link that will be rendered. It's unknown at this point whether
// the linked item is actually a PR or an issue. Luckily it's of no real consequence because // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
// Gitea will redirect on click as appropriate. // Gitea will redirect on click as appropriate.
issuePath := util.Iif(ref.IsPull, "pulls", "issues") issuePath := util.Iif(ref.IsPull, "pulls", "issues")
if ref.Owner == "" { if ref.Owner == "" {
link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue")
} else { } else {
link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue")
} }
} }
@ -151,7 +151,7 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
// Decorate action keywords if actionable // Decorate action keywords if actionable
var keyword *html.Node var keyword *html.Node
if references.IsXrefActionable(ref, hasExtTrackFormat) { if references.IsXrefActionable(ref, hasExtTrackFormat) {
keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) keyword = createKeyword(ctx, node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
} else { } else {
keyword = &html.Node{ keyword = &html.Node{
Type: html.TextNode, Type: html.TextNode,
@ -177,7 +177,7 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
} }
reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha)
link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") link := createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit")
replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling

View File

@ -189,13 +189,13 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
func linkProcessor(ctx *RenderContext, node *html.Node) { func linkProcessor(ctx *RenderContext, node *html.Node) {
next := node.NextSibling next := node.NextSibling
for node != nil && node != next { for node != nil && node != next {
m := common.LinkRegex.FindStringIndex(node.Data) m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data)
if m == nil { if m == nil {
return return
} }
uri := node.Data[m[0]:m[1]] uri := node.Data[m[0]:m[1]]
replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
} }
} }
@ -204,7 +204,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) {
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
next := node.NextSibling next := node.NextSibling
for node != nil && node != next { for node != nil && node != next {
m := common.LinkRegex.FindStringIndex(node.Data) m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data)
if m == nil { if m == nil {
return return
} }

View File

@ -33,7 +33,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
if ok && strings.Contains(mention, "/") { if ok && strings.Contains(mention, "/") {
mentionOrgAndTeam := strings.Split(mention, "/") mentionOrgAndTeam := strings.Split(mention, "/")
if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "" /*mention*/))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
start = 0 start = 0
continue continue
@ -44,7 +44,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
mentionedUsername := mention[1:] mentionedUsername := mention[1:]
if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) {
replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "" /*mention*/))
node = node.NextSibling.NextSibling node = node.NextSibling.NextSibling
start = 0 start = 0
} else { } else {

View File

@ -0,0 +1,30 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"bytes"
"io"
)
type finalProcessor struct {
renderInternal *RenderInternal
output io.Writer
buf bytes.Buffer
}
func (p *finalProcessor) Write(data []byte) (int, error) {
p.buf.Write(data)
return len(data), nil
}
func (p *finalProcessor) Close() error {
// TODO: reading the whole markdown isn't a problem at the moment,
// because "postProcess" already does so. In the future we could optimize the code to process data on the fly.
buf := p.buf.Bytes()
buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`))
_, err := p.output.Write(buf)
return err
}

View File

@ -0,0 +1,61 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"bytes"
"html/template"
"io"
"testing"
"github.com/stretchr/testify/assert"
)
func TestRenderInternal(t *testing.T) {
cases := []struct {
input, protected, recovered string
}{
{
input: `<div class="test">class="content"</div>`,
protected: `<div data-attr-class="sec:test">class="content"</div>`,
recovered: `<div class="test">class="content"</div>`,
},
{
input: "<div\nclass=\"test\" data-xxx></div>",
protected: `<div data-attr-class="sec:test" data-xxx></div>`,
recovered: `<div class="test" data-xxx></div>`,
},
}
for _, c := range cases {
var r RenderInternal
out := &bytes.Buffer{}
in := r.init("sec", out)
protected := r.ProtectSafeAttrs(template.HTML(c.input))
assert.EqualValues(t, c.protected, protected)
_, _ = io.WriteString(in, string(protected))
_ = in.Close()
assert.EqualValues(t, c.recovered, out.String())
}
var r1, r2 RenderInternal
protected := r1.ProtectSafeAttrs(`<div class="test"></div>`)
assert.EqualValues(t, `<div class="test"></div>`, protected, "non-initialized RenderInternal should not protect any attributes")
_ = r1.init("sec", nil)
protected = r1.ProtectSafeAttrs(`<div class="test"></div>`)
assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, protected)
assert.EqualValues(t, "data-attr-class", r1.SafeAttr("class"))
assert.EqualValues(t, "sec:val", r1.SafeValue("val"))
recovered, ok := r1.RecoverProtectedValue("sec:val")
assert.True(t, ok)
assert.EqualValues(t, "val", recovered)
recovered, ok = r1.RecoverProtectedValue("other:val")
assert.False(t, ok)
assert.Empty(t, recovered)
out2 := &bytes.Buffer{}
in2 := r2.init("sec-other", out2)
_, _ = io.WriteString(in2, string(protected))
_ = in2.Close()
assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, out2.String(), "different secureID should not recover the value")
}

View File

@ -0,0 +1,82 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"crypto/rand"
"encoding/base64"
"html/template"
"io"
"regexp"
"strings"
"sync"
"code.gitea.io/gitea/modules/htmlutil"
"golang.org/x/net/html"
)
var reAttrClass = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
// TODO: it isn't a problem at the moment because our HTML contents are always well constructed
return regexp.MustCompile(`(<[^>]+)\s+class="([^"]+)"([^>]*>)`)
})
// RenderInternal also works without initialization
// If no initialization (no secureID), it will not protect any attributes and return the original name&value
type RenderInternal struct {
secureID string
secureIDPrefix string
}
func (r *RenderInternal) Init(output io.Writer) io.WriteCloser {
buf := make([]byte, 12)
_, err := rand.Read(buf)
if err != nil {
panic("unable to generate secure id")
}
return r.init(base64.URLEncoding.EncodeToString(buf), output)
}
func (r *RenderInternal) init(secID string, output io.Writer) io.WriteCloser {
r.secureID = secID
r.secureIDPrefix = r.secureID + ":"
return &finalProcessor{renderInternal: r, output: output}
}
func (r *RenderInternal) RecoverProtectedValue(v string) (string, bool) {
if !strings.HasPrefix(v, r.secureIDPrefix) {
return "", false
}
return v[len(r.secureIDPrefix):], true
}
func (r *RenderInternal) SafeAttr(name string) string {
if r.secureID == "" {
return name
}
return "data-attr-" + name
}
func (r *RenderInternal) SafeValue(val string) string {
if r.secureID == "" {
return val
}
return r.secureID + ":" + val
}
func (r *RenderInternal) NodeSafeAttr(attr, val string) html.Attribute {
return html.Attribute{Key: r.SafeAttr(attr), Val: r.SafeValue(val)}
}
func (r *RenderInternal) ProtectSafeAttrs(content template.HTML) template.HTML {
if r.secureID == "" {
return content
}
return template.HTML(reAttrClass().ReplaceAllString(string(content), `$1 data-attr-class="`+r.secureIDPrefix+`$2"$3`))
}
func (r *RenderInternal) FormatWithSafeAttrs(w io.Writer, fmt string, a ...any) error {
_, err := w.Write([]byte(r.ProtectSafeAttrs(htmlutil.HTMLFormat(fmt, a...))))
return err
}

View File

@ -34,13 +34,6 @@ func NewDetails() *Details {
} }
} }
// IsDetails returns true if the given node implements the Details interface,
// otherwise false.
func IsDetails(node ast.Node) bool {
_, ok := node.(*Details)
return ok
}
// Summary is a block that contains the summary of details block // Summary is a block that contains the summary of details block
type Summary struct { type Summary struct {
ast.BaseBlock ast.BaseBlock
@ -66,13 +59,6 @@ func NewSummary() *Summary {
} }
} }
// IsSummary returns true if the given node implements the Summary interface,
// otherwise false.
func IsSummary(node ast.Node) bool {
_, ok := node.(*Summary)
return ok
}
// TaskCheckBoxListItem is a block that represents a list item of a markdown block with a checkbox // TaskCheckBoxListItem is a block that represents a list item of a markdown block with a checkbox
type TaskCheckBoxListItem struct { type TaskCheckBoxListItem struct {
*ast.ListItem *ast.ListItem
@ -103,14 +89,7 @@ func NewTaskCheckBoxListItem(listItem *ast.ListItem) *TaskCheckBoxListItem {
} }
} }
// IsTaskCheckBoxListItem returns true if the given node implements the TaskCheckBoxListItem interface, // Icon is an inline for a Fomantic UI icon
// otherwise false.
func IsTaskCheckBoxListItem(node ast.Node) bool {
_, ok := node.(*TaskCheckBoxListItem)
return ok
}
// Icon is an inline for a fomantic icon
type Icon struct { type Icon struct {
ast.BaseInline ast.BaseInline
Name []byte Name []byte
@ -139,13 +118,6 @@ func NewIcon(name string) *Icon {
} }
} }
// IsIcon returns true if the given node implements the Icon interface,
// otherwise false.
func IsIcon(node ast.Node) bool {
_, ok := node.(*Icon)
return ok
}
// ColorPreview is an inline for a color preview // ColorPreview is an inline for a color preview
type ColorPreview struct { type ColorPreview struct {
ast.BaseInline ast.BaseInline

View File

@ -7,9 +7,11 @@ import (
"fmt" "fmt"
"regexp" "regexp"
"strings" "strings"
"sync"
"code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/internal"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
@ -23,11 +25,13 @@ import (
// ASTTransformer is a default transformer of the goldmark tree. // ASTTransformer is a default transformer of the goldmark tree.
type ASTTransformer struct { type ASTTransformer struct {
renderInternal *internal.RenderInternal
attentionTypes container.Set[string] attentionTypes container.Set[string]
} }
func NewASTTransformer() *ASTTransformer { func NewASTTransformer(renderInternal *internal.RenderInternal) *ASTTransformer {
return &ASTTransformer{ return &ASTTransformer{
renderInternal: renderInternal,
attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"), attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"),
} }
} }
@ -109,12 +113,16 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
} }
} }
// NewHTMLRenderer creates a HTMLRenderer to render // it is copied from old code, which is quite doubtful whether it is correct
// in the gitea form. var reValidIconName = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { return regexp.MustCompile(`^[-\w]+$`) // old: regexp.MustCompile("^[a-z ]+$")
})
// NewHTMLRenderer creates a HTMLRenderer to render in the gitea form.
func NewHTMLRenderer(renderInternal *internal.RenderInternal, opts ...html.Option) renderer.NodeRenderer {
r := &HTMLRenderer{ r := &HTMLRenderer{
Config: html.NewConfig(), renderInternal: renderInternal,
reValidName: regexp.MustCompile("^[a-z ]+$"), Config: html.NewConfig(),
} }
for _, opt := range opts { for _, opt := range opts {
opt.SetHTMLOption(&r.Config) opt.SetHTMLOption(&r.Config)
@ -126,7 +134,7 @@ func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
// renders gitea specific features. // renders gitea specific features.
type HTMLRenderer struct { type HTMLRenderer struct {
html.Config html.Config
reValidName *regexp.Regexp renderInternal *internal.RenderInternal
} }
// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. // RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs.
@ -214,12 +222,13 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
if !r.reValidName.MatchString(name) { if !reValidIconName().MatchString(name) {
// skip this // skip this
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
_, err := w.WriteString(fmt.Sprintf(`<i class="icon %s"></i>`, name)) // FIXME: the "icon xxx" is from Fomantic UI, it's really questionable whether it still works correctly
err := r.renderInternal.FormatWithSafeAttrs(w, `<i class="icon %s"></i>`, name)
if err != nil { if err != nil {
return ast.WalkStop, err return ast.WalkStop, err
} }

View File

@ -9,7 +9,6 @@ import (
"html/template" "html/template"
"io" "io"
"strings" "strings"
"sync"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
@ -29,11 +28,6 @@ import (
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
var (
specMarkdown goldmark.Markdown
specMarkdownOnce sync.Once
)
var ( var (
renderContextKey = parser.NewContextKey() renderContextKey = parser.NewContextKey()
renderConfigKey = parser.NewContextKey() renderConfigKey = parser.NewContextKey()
@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context {
return pc return pc
} }
// SpecializedMarkdown sets up the Gitea specific markdown extensions type GlodmarkRender struct {
func SpecializedMarkdown() goldmark.Markdown { ctx *markup.RenderContext
specMarkdownOnce.Do(func() {
specMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(
extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
languageStr := string(language) goldmarkMarkdown goldmark.Markdown
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
_, err := w.WriteString(`<pre class="` + strings.Join(preClasses, " ") + `">`)
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
_, err = w.WriteString(`<code class="chroma language-` + string(language) + ` display">`)
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}),
),
math.NewExtension(
math.Enabled(setting.Markdown.EnableMath),
),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(
util.Prioritized(NewASTTransformer(), 10000),
),
),
goldmark.WithRendererOptions(
html.WithUnsafe(),
),
)
// Override the original Tasklist renderer!
specMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(
util.Prioritized(NewHTMLRenderer(), 10),
),
)
})
return specMarkdown
} }
// actualRender renders Markdown to HTML without handling special links. func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return r.goldmarkMarkdown.Convert(source, writer, opts...)
converter := SpecializedMarkdown() }
func (r *GlodmarkRender) Renderer() renderer.Renderer {
return r.goldmarkMarkdown.Renderer()
}
func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
if entering {
language, _ := c.Language()
if language == nil {
language = []byte("text")
}
languageStr := string(language)
preClasses := []string{"code-block"}
if languageStr == "mermaid" || languageStr == "math" {
preClasses = append(preClasses, "is-loading")
}
err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " "))
if err != nil {
return
}
// include language-x class as part of commonmark spec
// the "display" class is used by "js/markup/math.js" to render the code element as a block
err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language))
if err != nil {
return
}
} else {
_, err := w.WriteString("</code></pre>")
if err != nil {
return
}
}
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
// TODO: it could use a pool to cache the renderers to reuse them with different contexts
// at the moment it is fast enough (see the benchmarks)
r := &GlodmarkRender{ctx: ctx}
r.goldmarkMarkdown = goldmark.New(
goldmark.WithExtensions(
extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
extension.TaskList,
extension.DefinitionList,
common.FootnoteExtension,
highlighting.NewHighlighting(
highlighting.WithFormatOptions(
chromahtml.WithClasses(true),
chromahtml.PreventSurroundingPre(true),
),
highlighting.WithWrapperRenderer(r.highlightingRenderer),
),
math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
meta.Meta,
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
),
goldmark.WithRendererOptions(html.WithUnsafe()),
)
// Override the original Tasklist renderer!
r.goldmarkMarkdown.Renderer().AddOptions(
renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)),
)
return r
}
// render calls goldmark render to convert Markdown to HTML
// NOTE: The output of this method MUST get sanitized separately!!!
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
converter := SpecializedMarkdown(ctx)
lw := &limitWriter{ lw := &limitWriter{
w: output, w: output,
limit: setting.UI.MaxDisplayFileSize * 3, limit: setting.UI.MaxDisplayFileSize * 3,
@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
} }
log.Warn("Unable to render markdown due to panic in goldmark: %v", err) log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
if log.IsDebug() { if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2)) log.Error("Panic in markdown: %v\n%s", err, log.Stack(2))
} }
}() }()
@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
return nil return nil
} }
// Note: The output of this method must get sanitized.
func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
defer func() {
err := recover()
if err == nil {
return
}
log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
}
_, err = io.Copy(output, input)
if err != nil {
log.Error("io.Copy failed: %v", err)
}
}()
return actualRender(ctx, input, output)
}
// MarkupName describes markup's name // MarkupName describes markup's name
var MarkupName = "markdown" var MarkupName = "markdown"

View File

@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) {
// legacy GitHub style // legacy GitHub style
test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n</blockquote>") test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n</blockquote>")
} }
func BenchmarkSpecializedMarkdown(b *testing.B) {
// 240856 4719 ns/op
for i := 0; i < b.N; i++ {
markdown.SpecializedMarkdown(&markup.RenderContext{})
}
}
func BenchmarkMarkdownRender(b *testing.B) {
// 23202 50840 ns/op
for i := 0; i < b.N; i++ {
_, _ = markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n")
}
}

View File

@ -4,17 +4,21 @@
package math package math
import ( import (
"code.gitea.io/gitea/modules/markup/internal"
gast "github.com/yuin/goldmark/ast" gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
// BlockRenderer represents a renderer for math Blocks // BlockRenderer represents a renderer for math Blocks
type BlockRenderer struct{} type BlockRenderer struct {
renderInternal *internal.RenderInternal
}
// NewBlockRenderer creates a new renderer for math Blocks // NewBlockRenderer creates a new renderer for math Blocks
func NewBlockRenderer() renderer.NodeRenderer { func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
return &BlockRenderer{} return &BlockRenderer{renderInternal: renderInternal}
} }
// RegisterFuncs registers the renderer for math Blocks // RegisterFuncs registers the renderer for math Blocks
@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node)
func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
n := node.(*Block) n := node.(*Block)
if entering { if entering {
_, _ = w.WriteString(`<pre class="code-block is-loading"><code class="chroma language-math display">`) _ = r.renderInternal.FormatWithSafeAttrs(w, `<pre class="code-block is-loading"><code class="chroma language-math display">`)
r.writeLines(w, source, n) r.writeLines(w, source, n)
} else { } else {
_, _ = w.WriteString(`</code></pre>` + "\n") _, _ = w.WriteString(`</code></pre>` + "\n")

View File

@ -6,17 +6,21 @@ package math
import ( import (
"bytes" "bytes"
"code.gitea.io/gitea/modules/markup/internal"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
) )
// InlineRenderer is an inline renderer // InlineRenderer is an inline renderer
type InlineRenderer struct{} type InlineRenderer struct {
renderInternal *internal.RenderInternal
}
// NewInlineRenderer returns a new renderer for inline math // NewInlineRenderer returns a new renderer for inline math
func NewInlineRenderer() renderer.NodeRenderer { func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
return &InlineRenderer{} return &InlineRenderer{renderInternal: renderInternal}
} }
func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
if _, ok := n.(*InlineBlock); ok { if _, ok := n.(*InlineBlock); ok {
extraClass = "display " extraClass = "display "
} }
_, _ = w.WriteString(`<code class="language-math ` + extraClass + `is-loading">`) _ = r.renderInternal.FormatWithSafeAttrs(w, `<code class="language-math %sis-loading">`, extraClass)
for c := n.FirstChild(); c != nil; c = c.NextSibling() { for c := n.FirstChild(); c != nil; c = c.NextSibling() {
segment := c.(*ast.Text).Segment segment := c.(*ast.Text).Segment
value := util.EscapeHTML(segment.Value(source)) value := util.EscapeHTML(segment.Value(source))

View File

@ -4,6 +4,8 @@
package math package math
import ( import (
"code.gitea.io/gitea/modules/markup/internal"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer"
@ -12,6 +14,7 @@ import (
// Extension is a math extension // Extension is a math extension
type Extension struct { type Extension struct {
renderInternal *internal.RenderInternal
enabled bool enabled bool
parseDollarInline bool parseDollarInline bool
parseDollarBlock bool parseDollarBlock bool
@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option {
}) })
} }
// WithInlineDollarParser enables or disables the parsing of $...$
func WithInlineDollarParser(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.parseDollarInline = value
})
}
// WithBlockDollarParser enables or disables the parsing of $$...$$
func WithBlockDollarParser(enable ...bool) Option {
value := true
if len(enable) > 0 {
value = enable[0]
}
return extensionFunc(func(e *Extension) {
e.parseDollarBlock = value
})
}
// Math represents a math extension with default rendered delimiters
var Math = &Extension{
enabled: true,
parseDollarBlock: true,
parseDollarInline: true,
}
// NewExtension creates a new math extension with the provided options // NewExtension creates a new math extension with the provided options
func NewExtension(opts ...Option) *Extension { func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension {
r := &Extension{ r := &Extension{
renderInternal: renderInternal,
enabled: true, enabled: true,
parseDollarBlock: true, parseDollarBlock: true,
parseDollarInline: true, parseDollarInline: true,
@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) {
m.Parser().AddOptions(parser.WithInlineParsers(inlines...)) m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
m.Renderer().AddOptions(renderer.WithNodeRenderers( m.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewBlockRenderer(), 501), util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
util.Prioritized(NewInlineRenderer(), 502), util.Prioritized(NewInlineRenderer(e.renderInternal), 502),
)) ))
} }

View File

@ -11,10 +11,8 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
/* // IssueTemplate is a legacy to keep the unit tests working.
IssueTemplate is a legacy to keep the unit tests working. // Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
*/
type IssueTemplate struct { type IssueTemplate struct {
Name string `json:"name" yaml:"name"` Name string `json:"name" yaml:"name"`
Title string `json:"title" yaml:"title"` Title string `json:"title" yaml:"title"`

View File

@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast
default: // including "note" default: // including "note"
octiconName = "info" octiconName = "info"
} }
_, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType))) svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)
_, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML)))
} }
return ast.WalkContinue, nil return ast.WalkContinue, nil
} }
@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read
} }
// color the blockquote // color the blockquote
v.SetAttributeString("class", []byte("attention-header attention-"+attentionType)) v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType)))
// create an emphasis to make it bold // create an emphasis to make it bold
attentionParagraph := ast.NewParagraph() attentionParagraph := ast.NewParagraph()
g.applyElementDir(attentionParagraph) g.applyElementDir(attentionParagraph)
emphasis := ast.NewEmphasis(2) emphasis := ast.NewEmphasis(2)
emphasis.SetAttributeString("class", []byte("attention-"+attentionType)) emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType)))
attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType))) attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType)))

View File

@ -5,7 +5,6 @@ package markdown
import ( import (
"bytes" "bytes"
"fmt"
"strings" "strings"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod
r.Writer.RawWrite(w, value) r.Writer.RawWrite(w, value)
} }
case *ColorPreview: case *ColorPreview:
_, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color))) _ = r.renderInternal.FormatWithSafeAttrs(w, `<span class="color-preview" style="background-color: %s"></span>`, string(v.Color))
} }
} }
return ast.WalkSkipChildren, nil return ast.WalkSkipChildren, nil

View File

@ -72,7 +72,7 @@ func (g *ASTTransformer) transformList(_ *markup.RenderContext, v *ast.List, rc
} }
newChild := NewTaskCheckBoxListItem(listItem) newChild := NewTaskCheckBoxListItem(listItem)
newChild.IsChecked = taskCheckBox.IsChecked newChild.IsChecked = taskCheckBox.IsChecked
newChild.SetAttributeString("class", []byte("task-list-item")) newChild.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("task-list-item")))
segments := newChild.FirstChild().Lines() segments := newChild.FirstChild().Lines()
if segments.Len() > 0 { if segments.Len() > 0 {
segment := segments.At(0) segment := segments.At(0)

View File

@ -9,14 +9,15 @@ import (
"io" "io"
"net/url" "net/url"
"strings" "strings"
"sync"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo" "code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/markup/internal"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"golang.org/x/sync/errgroup"
) )
type RenderMetaMode string type RenderMetaMode string
@ -65,6 +66,8 @@ type RenderContext struct {
SidebarTocNode ast.Node SidebarTocNode ast.Node
RenderMetaAs RenderMetaMode RenderMetaAs RenderMetaMode
InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page
RenderInternal internal.RenderInternal
} }
// Cancel runs any cleanup functions that have been registered for this Ctx // Cancel runs any cleanup functions that have been registered for this Ctx
@ -156,59 +159,53 @@ sandbox="allow-scripts"
return err return err
} }
func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { func pipes() (io.ReadCloser, io.WriteCloser, func()) {
var wg sync.WaitGroup
var err error
pr, pw := io.Pipe() pr, pw := io.Pipe()
defer func() { return pr, pw, func() {
_ = pr.Close() _ = pr.Close()
_ = pw.Close() _ = pw.Close()
}() }
}
var pr2 io.ReadCloser func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
var pw2 io.WriteCloser finalProcessor := ctx.RenderInternal.Init(output)
defer finalProcessor.Close()
var sanitizerDisabled bool // input -> (pw1=pr1) -> renderer -> (pw2=pr2) -> SanitizeReader -> finalProcessor -> output
if r, ok := renderer.(ExternalRenderer); ok { // no sanitizer: input -> (pw1=pr1) -> renderer -> pw2(finalProcessor) -> output
sanitizerDisabled = r.SanitizerDisabled() pr1, pw1, close1 := pipes()
defer close1()
eg, _ := errgroup.WithContext(ctx.Ctx)
var pw2 io.WriteCloser = util.NopCloser{Writer: finalProcessor}
if r, ok := renderer.(ExternalRenderer); !ok || !r.SanitizerDisabled() {
var pr2 io.ReadCloser
var close2 func()
pr2, pw2, close2 = pipes()
defer close2()
eg.Go(func() error {
defer pr2.Close()
return SanitizeReader(pr2, renderer.Name(), finalProcessor)
})
} }
if !sanitizerDisabled { eg.Go(func() (err error) {
pr2, pw2 = io.Pipe()
defer func() {
_ = pr2.Close()
_ = pw2.Close()
}()
wg.Add(1)
go func() {
err = SanitizeReader(pr2, renderer.Name(), output)
_ = pr2.Close()
wg.Done()
}()
} else {
pw2 = util.NopCloser{Writer: output}
}
wg.Add(1)
go func() {
if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() {
err = PostProcess(ctx, pr, pw2) err = PostProcess(ctx, pr1, pw2)
} else { } else {
_, err = io.Copy(pw2, pr) _, err = io.Copy(pw2, pr1)
} }
_ = pr.Close() _, _ = pr1.Close(), pw2.Close()
_ = pw2.Close() return err
wg.Done() })
}()
if err1 := renderer.Render(ctx, input, pw); err1 != nil { if err := renderer.Render(ctx, input, pw1); err != nil {
return err1 return err
} }
_ = pw.Close() _ = pw1.Close()
wg.Wait() return eg.Wait()
return err
} }
// Init initializes the render global variables // Init initializes the render global variables

View File

@ -4,6 +4,9 @@
package markup package markup
import ( import (
"regexp"
"strings"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"github.com/microcosm-cc/bluemonday" "github.com/microcosm-cc/bluemonday"
@ -15,8 +18,11 @@ func (st *Sanitizer) addSanitizerRules(policy *bluemonday.Policy, rules []settin
policy.AllowDataURIImages() policy.AllowDataURIImages()
} }
if rule.Element != "" { if rule.Element != "" {
if rule.Regexp != nil { if rule.Regexp != "" {
policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) if !strings.HasPrefix(rule.Regexp, "^") || !strings.HasSuffix(rule.Regexp, "$") {
panic("Markup sanitizer rule regexp must start with ^ and end with $ to be strict")
}
policy.AllowAttrs(rule.AllowAttr).Matching(regexp.MustCompile(rule.Regexp)).OnElements(rule.Element)
} else { } else {
policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
} }

View File

@ -16,37 +16,12 @@ import (
func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
policy := bluemonday.UGCPolicy() policy := bluemonday.UGCPolicy()
// For JS code copy and Mermaid loading state // NOTICE: DO NOT add special "class" regexp rules here anymore, use RenderInternal.SafeAttr instead
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
// For code preview // General safe SVG attributes
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally() policy.AllowAttrs("viewBox", "width", "height", "aria-hidden", "data-attr-class").OnElements("svg")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td")
policy.AllowAttrs("data-line-number").OnElements("span")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div")
// For code preview (unicode escape)
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span")
policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span")
// For color preview
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")
// For attention
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg")
policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
policy.AllowAttrs("fill-rule", "d").OnElements("path") policy.AllowAttrs("fill-rule", "d").OnElements("path")
// For Chroma markdown plugin
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")
// Checkboxes // Checkboxes
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
@ -66,28 +41,15 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme) policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme)
} }
// Allow classes for anchors
policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
// Allow classes for task lists
policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
// Allow classes for org mode list item status. // Allow classes for org mode list item status.
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li") policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li")
// Allow icons
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
// Allow classes for emojis
policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
// Allow icons, emojis, chroma syntax and keyword markup on span
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
// Allow 'color' and 'background-color' properties for the style attribute on text elements. // Allow 'color' and 'background-color' properties for the style attribute on text elements.
policy.AllowStyles("color", "background-color").OnElements("span", "p") policy.AllowStyles("color", "background-color").OnElements("span", "p")
// Allow generally safe attributes policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
// Allow generally safe attributes (reference: https://github.com/jch/html-pipeline)
generalSafeAttrs := []string{ generalSafeAttrs := []string{
"abbr", "accept", "accept-charset", "abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt", "accesskey", "action", "align", "alt",
@ -106,10 +68,9 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
"selected", "shape", "size", "span", "selected", "shape", "size", "span",
"start", "summary", "tabindex", "target", "start", "summary", "tabindex", "target",
"title", "type", "usemap", "valign", "value", "title", "type", "usemap", "valign", "value",
"vspace", "width", "itemprop", "vspace", "width", "itemprop", "itemscope", "itemtype",
"data-markdown-generated-content", "data-markdown-generated-content", "data-attr-class",
} }
generalSafeElements := []string{ generalSafeElements := []string{
"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
"div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
@ -117,14 +78,8 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
"details", "caption", "figure", "figcaption", "details", "caption", "figure", "figcaption",
"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
} }
policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
// FIXME: Need to handle longdesc in img but there is no easy way to do it // FIXME: Need to handle longdesc in img but there is no easy way to do it
policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
// Custom keyword markup // Custom keyword markup
defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules) defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules)

View File

@ -19,7 +19,6 @@ func TestSanitizer(t *testing.T) {
// Code highlighting class // Code highlighting class
`<code class="random string"></code>`, `<code></code>`, `<code class="random string"></code>`, `<code></code>`,
`<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, `<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`,
`<code class="language-go"></code>`, `<code class="language-go"></code>`,
// Input checkbox // Input checkbox
`<input type="hidden">`, ``, `<input type="hidden">`, ``,
@ -38,10 +37,8 @@ func TestSanitizer(t *testing.T) {
// <kbd> tags // <kbd> tags
`<kbd>Ctrl + C</kbd>`, `<kbd>Ctrl + C</kbd>`, `<kbd>Ctrl + C</kbd>`, `<kbd>Ctrl + C</kbd>`,
`<i class="dropdown icon">NAUGHTY</i>`, `<i>NAUGHTY</i>`, `<i class="dropdown icon">NAUGHTY</i>`, `<i>NAUGHTY</i>`,
`<i class="icon dropdown"></i>`, `<i class="icon dropdown"></i>`,
`<input type="checkbox" disabled=""/>unchecked`, `<input type="checkbox" disabled=""/>unchecked`, `<input type="checkbox" disabled=""/>unchecked`, `<input type="checkbox" disabled=""/>unchecked`,
`<span class="emoji dropdown">NAUGHTY</span>`, `<span>NAUGHTY</span>`, `<span class="emoji dropdown">NAUGHTY</span>`, `<span>NAUGHTY</span>`,
`<span class="emoji">contents</span>`, `<span class="emoji">contents</span>`,
// Color property // Color property
`<span style="color: red">Hello World</span>`, `<span style="color: red">Hello World</span>`, `<span style="color: red">Hello World</span>`, `<span style="color: red">Hello World</span>`,

View File

@ -54,7 +54,7 @@ type MarkupRenderer struct {
type MarkupSanitizerRule struct { type MarkupSanitizerRule struct {
Element string Element string
AllowAttr string AllowAttr string
Regexp *regexp.Regexp Regexp string
AllowDataURIImages bool AllowDataURIImages bool
} }
@ -117,15 +117,24 @@ func createMarkupSanitizerRule(name string, sec ConfigSection) (MarkupSanitizerR
regexpStr := sec.Key("REGEXP").Value() regexpStr := sec.Key("REGEXP").Value()
if regexpStr != "" { if regexpStr != "" {
// Validate when parsing the config that this is a valid regular hasPrefix := strings.HasPrefix(regexpStr, "^")
// expression. Then we can use regexp.MustCompile(...) later. hasSuffix := strings.HasSuffix(regexpStr, "$")
compiled, err := regexp.Compile(regexpStr) if !hasPrefix || !hasSuffix {
log.Error("In markup.%s: REGEXP must start with ^ and end with $ to be strict", name)
// to avoid breaking existing user configurations and satisfy the strict requirement in addSanitizerRules
if !hasPrefix {
regexpStr = "^.*" + regexpStr
}
if !hasSuffix {
regexpStr += ".*$"
}
}
_, err := regexp.Compile(regexpStr)
if err != nil { if err != nil {
log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err) log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err)
return rule, false return rule, false
} }
rule.Regexp = regexpStr
rule.Regexp = compiled
} }
ok = true ok = true

View File

@ -9,7 +9,7 @@ import (
"path" "path"
"strings" "strings"
gitea_html "code.gitea.io/gitea/modules/html" gitea_html "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/public" "code.gitea.io/gitea/modules/public"
) )

View File

@ -10,12 +10,12 @@ import (
"html/template" "html/template"
"net/url" "net/url"
"reflect" "reflect"
"slices"
"strings" "strings"
"time" "time"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/svg" "code.gitea.io/gitea/modules/svg"
@ -39,7 +39,7 @@ func NewFuncMap() template.FuncMap {
"Iif": iif, "Iif": iif,
"Eval": evalTokens, "Eval": evalTokens,
"SafeHTML": safeHTML, "SafeHTML": safeHTML,
"HTMLFormat": HTMLFormat, "HTMLFormat": htmlutil.HTMLFormat,
"HTMLEscape": htmlEscape, "HTMLEscape": htmlEscape,
"QueryEscape": queryEscape, "QueryEscape": queryEscape,
"JSEscape": jsEscapeSafe, "JSEscape": jsEscapeSafe,
@ -184,23 +184,6 @@ func NewFuncMap() template.FuncMap {
} }
} }
func HTMLFormat(s string, rawArgs ...any) template.HTML {
args := slices.Clone(rawArgs)
for i, v := range args {
switch v := v.(type) {
case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
// for most basic types (including template.HTML which is safe), just do nothing and use it
case string:
args[i] = template.HTMLEscapeString(v)
case fmt.Stringer:
args[i] = template.HTMLEscapeString(v.String())
default:
args[i] = template.HTMLEscapeString(fmt.Sprint(v))
}
}
return template.HTML(fmt.Sprintf(s, args...))
}
// safeHTML render raw as HTML // safeHTML render raw as HTML
func safeHTML(s any) template.HTML { func safeHTML(s any) template.HTML {
switch v := s.(type) { switch v := s.(type) {

View File

@ -61,10 +61,6 @@ func TestJSEscapeSafe(t *testing.T) {
assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`)) assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`))
} }
func TestHTMLFormat(t *testing.T) {
assert.Equal(t, template.HTML("<a>&lt; < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1))
}
func TestSanitizeHTML(t *testing.T) { func TestSanitizeHTML(t *testing.T) {
assert.Equal(t, template.HTML(`<a href="/" rel="nofollow">link</a> xss <div>inline</div>`), SanitizeHTML(`<a href="/">link</a> <a href="javascript:">xss</a> <div style="dangerous">inline</div>`)) assert.Equal(t, template.HTML(`<a href="/" rel="nofollow">link</a> xss <div>inline</div>`), SanitizeHTML(`<a href="/">link</a> <a href="javascript:">xss</a> <div style="dangerous">inline</div>`))
} }

View File

@ -14,7 +14,7 @@ import (
"code.gitea.io/gitea/models/organization" "code.gitea.io/gitea/models/organization"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
gitea_html "code.gitea.io/gitea/modules/html" gitea_html "code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
) )

View File

@ -16,6 +16,7 @@ import (
issues_model "code.gitea.io/gitea/models/issues" issues_model "code.gitea.io/gitea/models/issues"
"code.gitea.io/gitea/modules/emoji" "code.gitea.io/gitea/modules/emoji"
"code.gitea.io/gitea/modules/htmlutil"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/markdown" "code.gitea.io/gitea/modules/markup/markdown"
@ -140,7 +141,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
if labelScope == "" { if labelScope == "" {
// Regular label // Regular label
return HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`, return htmlutil.HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`,
extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name)) extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name))
} }
@ -174,7 +175,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
itemColor := "#" + hex.EncodeToString(itemBytes) itemColor := "#" + hex.EncodeToString(itemBytes)
scopeColor := "#" + hex.EncodeToString(scopeBytes) scopeColor := "#" + hex.EncodeToString(scopeBytes)
return HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+ return htmlutil.HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+
`<div class="ui label scope-left" style="color: %s !important; background-color: %s !important">%s</div>`+ `<div class="ui label scope-left" style="color: %s !important; background-color: %s !important">%s</div>`+
`<div class="ui label scope-right" style="color: %s !important; background-color: %s !important">%s</div>`+ `<div class="ui label scope-right" style="color: %s !important; background-color: %s !important">%s</div>`+
`</span>`, `</span>`,

View File

@ -113,34 +113,34 @@ func TestRenderCommitBody(t *testing.T) {
} }
expected := `/just/a/path.bin expected := `/just/a/path.bin
<a href="https://example.com/file.bin" class="link">https://example.com/file.bin</a> <a href="https://example.com/file.bin">https://example.com/file.bin</a>
[local link](file.bin) [local link](file.bin)
[remote link](<a href="https://example.com" class="link">https://example.com</a>) [remote link](<a href="https://example.com">https://example.com</a>)
[[local link|file.bin]] [[local link|file.bin]]
[[remote link|<a href="https://example.com" class="link">https://example.com</a>]] [[remote link|<a href="https://example.com">https://example.com</a>]]
![local image](image.jpg) ![local image](image.jpg)
![remote image](<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>) ![remote image](<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>)
[[local image|image.jpg]] [[local image|image.jpg]]
[[remote link|<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>]] [[remote link|<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>]]
<a href="https://example.com/user/repo/compare/88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb#hash" class="compare"><code class="nohighlight">88fc37a3c0...12fc37a3c0 (hash)</code></a> <a href="https://example.com/user/repo/compare/88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb#hash" class="compare"><code class="nohighlight">88fc37a3c0...12fc37a3c0 (hash)</code></a>
com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare
<a href="https://example.com/user/repo/commit/88fc37a3c0a4dda553bdcfc80c178a58247f42fb" class="commit"><code class="nohighlight">88fc37a3c0</code></a> <a href="https://example.com/user/repo/commit/88fc37a3c0a4dda553bdcfc80c178a58247f42fb" class="commit"><code class="nohighlight">88fc37a3c0</code></a>
com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit
<span class="emoji" aria-label="thumbs up">👍</span> <span class="emoji" aria-label="thumbs up">👍</span>
<a href="mailto:mail@domain.com" class="mailto">mail@domain.com</a> <a href="mailto:mail@domain.com">mail@domain.com</a>
<a href="/mention-user" class="mention">@mention-user</a> test <a href="/mention-user">@mention-user</a> test
<a href="/user13/repo11/issues/123" class="ref-issue">#123</a> <a href="/user13/repo11/issues/123" class="ref-issue">#123</a>
space` space`
assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas))) assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas)))
} }
func TestRenderCommitMessage(t *testing.T) { func TestRenderCommitMessage(t *testing.T) {
expected := `space <a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a> ` expected := `space <a href="/mention-user" data-markdown-generated-content="">@mention-user</a> `
assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas)) assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas))
} }
func TestRenderCommitMessageLinkSubject(t *testing.T) { func TestRenderCommitMessageLinkSubject(t *testing.T) {
expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a>` expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="">@mention-user</a>`
assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas)) assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas))
} }

View File

@ -326,7 +326,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
if rctx.SidebarTocNode != nil { if rctx.SidebarTocNode != nil {
sb := &strings.Builder{} sb := &strings.Builder{}
err = markdown.SpecializedMarkdown().Renderer().Render(sb, nil, rctx.SidebarTocNode) err = markdown.SpecializedMarkdown(rctx).Renderer().Render(sb, nil, rctx.SidebarTocNode)
if err != nil { if err != nil {
log.Error("Failed to render wiki sidebar TOC: %v", err) log.Error("Failed to render wiki sidebar TOC: %v", err)
} else { } else {