1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-19 00:38:36 +00:00

Refactor markup render system (#32533)

Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
This commit is contained in:
wxiaoguang
2024-11-18 13:25:42 +08:00
committed by GitHub
parent 4f879a00df
commit 8a20fba8eb
42 changed files with 568 additions and 508 deletions

View File

@@ -0,0 +1,30 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"bytes"
"io"
)
type finalProcessor struct {
renderInternal *RenderInternal
output io.Writer
buf bytes.Buffer
}
func (p *finalProcessor) Write(data []byte) (int, error) {
p.buf.Write(data)
return len(data), nil
}
func (p *finalProcessor) Close() error {
// TODO: reading the whole markdown isn't a problem at the moment,
// because "postProcess" already does so. In the future we could optimize the code to process data on the fly.
buf := p.buf.Bytes()
buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`))
_, err := p.output.Write(buf)
return err
}

View File

@@ -0,0 +1,61 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"bytes"
"html/template"
"io"
"testing"
"github.com/stretchr/testify/assert"
)
func TestRenderInternal(t *testing.T) {
cases := []struct {
input, protected, recovered string
}{
{
input: `<div class="test">class="content"</div>`,
protected: `<div data-attr-class="sec:test">class="content"</div>`,
recovered: `<div class="test">class="content"</div>`,
},
{
input: "<div\nclass=\"test\" data-xxx></div>",
protected: `<div data-attr-class="sec:test" data-xxx></div>`,
recovered: `<div class="test" data-xxx></div>`,
},
}
for _, c := range cases {
var r RenderInternal
out := &bytes.Buffer{}
in := r.init("sec", out)
protected := r.ProtectSafeAttrs(template.HTML(c.input))
assert.EqualValues(t, c.protected, protected)
_, _ = io.WriteString(in, string(protected))
_ = in.Close()
assert.EqualValues(t, c.recovered, out.String())
}
var r1, r2 RenderInternal
protected := r1.ProtectSafeAttrs(`<div class="test"></div>`)
assert.EqualValues(t, `<div class="test"></div>`, protected, "non-initialized RenderInternal should not protect any attributes")
_ = r1.init("sec", nil)
protected = r1.ProtectSafeAttrs(`<div class="test"></div>`)
assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, protected)
assert.EqualValues(t, "data-attr-class", r1.SafeAttr("class"))
assert.EqualValues(t, "sec:val", r1.SafeValue("val"))
recovered, ok := r1.RecoverProtectedValue("sec:val")
assert.True(t, ok)
assert.EqualValues(t, "val", recovered)
recovered, ok = r1.RecoverProtectedValue("other:val")
assert.False(t, ok)
assert.Empty(t, recovered)
out2 := &bytes.Buffer{}
in2 := r2.init("sec-other", out2)
_, _ = io.WriteString(in2, string(protected))
_ = in2.Close()
assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, out2.String(), "different secureID should not recover the value")
}

View File

@@ -0,0 +1,82 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"crypto/rand"
"encoding/base64"
"html/template"
"io"
"regexp"
"strings"
"sync"
"code.gitea.io/gitea/modules/htmlutil"
"golang.org/x/net/html"
)
var reAttrClass = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
// TODO: it isn't a problem at the moment because our HTML contents are always well constructed
return regexp.MustCompile(`(<[^>]+)\s+class="([^"]+)"([^>]*>)`)
})
// RenderInternal also works without initialization
// If no initialization (no secureID), it will not protect any attributes and return the original name&value
type RenderInternal struct {
secureID string
secureIDPrefix string
}
func (r *RenderInternal) Init(output io.Writer) io.WriteCloser {
buf := make([]byte, 12)
_, err := rand.Read(buf)
if err != nil {
panic("unable to generate secure id")
}
return r.init(base64.URLEncoding.EncodeToString(buf), output)
}
func (r *RenderInternal) init(secID string, output io.Writer) io.WriteCloser {
r.secureID = secID
r.secureIDPrefix = r.secureID + ":"
return &finalProcessor{renderInternal: r, output: output}
}
func (r *RenderInternal) RecoverProtectedValue(v string) (string, bool) {
if !strings.HasPrefix(v, r.secureIDPrefix) {
return "", false
}
return v[len(r.secureIDPrefix):], true
}
func (r *RenderInternal) SafeAttr(name string) string {
if r.secureID == "" {
return name
}
return "data-attr-" + name
}
func (r *RenderInternal) SafeValue(val string) string {
if r.secureID == "" {
return val
}
return r.secureID + ":" + val
}
func (r *RenderInternal) NodeSafeAttr(attr, val string) html.Attribute {
return html.Attribute{Key: r.SafeAttr(attr), Val: r.SafeValue(val)}
}
func (r *RenderInternal) ProtectSafeAttrs(content template.HTML) template.HTML {
if r.secureID == "" {
return content
}
return template.HTML(reAttrClass().ReplaceAllString(string(content), `$1 data-attr-class="`+r.secureIDPrefix+`$2"$3`))
}
func (r *RenderInternal) FormatWithSafeAttrs(w io.Writer, fmt string, a ...any) error {
_, err := w.Write([]byte(r.ProtectSafeAttrs(htmlutil.HTMLFormat(fmt, a...))))
return err
}