1
1
mirror of https://github.com/go-gitea/gitea synced 2025-07-29 05:38:37 +00:00

Update bluemonday to v1.0.15 (#16379) (#16380)

* Update bluemonday to v1.0.15 (#16379)

* Fix TESTS
This commit is contained in:
6543
2021-07-09 02:47:27 +02:00
committed by GitHub
parent ac0f452b30
commit d98694e6ca
174 changed files with 14579 additions and 11967 deletions

View File

@@ -177,6 +177,7 @@ func IsPrintableASCII(str string) bool
func IsRFC3339(str string) bool
func IsRFC3339WithoutZone(str string) bool
func IsRGBcolor(str string) bool
func IsRegex(str string) bool
func IsRequestURI(rawurl string) bool
func IsRequestURL(rawurl string) bool
func IsRipeMD128(str string) bool
@@ -203,6 +204,7 @@ func IsUUID(str string) bool
func IsUUIDv3(str string) bool
func IsUUIDv4(str string) bool
func IsUUIDv5(str string) bool
func IsULID(str string) bool
func IsUnixTime(str string) bool
func IsUpperCase(str string) bool
func IsVariableWidth(str string) bool
@@ -382,6 +384,7 @@ Here is a list of available validators for struct fields (validator - used funct
"rfc3339WithoutZone": IsRFC3339WithoutZone,
"ISO3166Alpha2": IsISO3166Alpha2,
"ISO3166Alpha3": IsISO3166Alpha3,
"ulid": IsULID,
```
Validators with parameters

View File

@@ -42,6 +42,8 @@ const (
SSN string = `^\d{3}[- ]?\d{2}[- ]?\d{4}$`
WinPath string = `^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$`
UnixPath string = `^(/[^/\x00]*)+/?$`
WinARPath string = `^(?:(?:[a-zA-Z]:|\\\\[a-z0-9_.$●-]+\\[a-z0-9_.$●-]+)\\|\\?[^\\/:*?"<>|\r\n]+\\?)(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$`
UnixARPath string = `^((\.{0,2}/)?([^/\x00]*))+/?$`
Semver string = "^v?(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)(-(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(\\.(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\\+[0-9a-zA-Z-]+(\\.[0-9a-zA-Z-]+)*)?$"
tagName string = "valid"
hasLowerCase string = ".*[[:lower:]]"
@@ -50,6 +52,7 @@ const (
hasWhitespaceOnly string = "^[[:space:]]+$"
IMEI string = "^[0-9a-f]{14}$|^\\d{15}$|^\\d{18}$"
IMSI string = "^\\d{14,15}$"
E164 string = `^\+?[1-9]\d{1,14}$`
)
// Used by IsFilePath func
@@ -97,6 +100,8 @@ var (
rxSSN = regexp.MustCompile(SSN)
rxWinPath = regexp.MustCompile(WinPath)
rxUnixPath = regexp.MustCompile(UnixPath)
rxARWinPath = regexp.MustCompile(WinARPath)
rxARUnixPath = regexp.MustCompile(UnixARPath)
rxSemver = regexp.MustCompile(Semver)
rxHasLowerCase = regexp.MustCompile(hasLowerCase)
rxHasUpperCase = regexp.MustCompile(hasUpperCase)
@@ -104,4 +109,5 @@ var (
rxHasWhitespaceOnly = regexp.MustCompile(hasWhitespaceOnly)
rxIMEI = regexp.MustCompile(IMEI)
rxIMSI = regexp.MustCompile(IMSI)
rxE164 = regexp.MustCompile(E164)
)

View File

@@ -165,6 +165,7 @@ var TagMap = map[string]Validator{
"ISO3166Alpha3": IsISO3166Alpha3,
"ISO4217": IsISO4217,
"IMEI": IsIMEI,
"ulid": IsULID,
}
// ISO3166Entry stores country codes

View File

@@ -361,9 +361,96 @@ func IsUUID(str string) bool {
return rxUUID.MatchString(str)
}
// Byte to index table for O(1) lookups when unmarshaling.
// We use 0xFF as sentinel value for invalid indexes.
var ulidDec = [...]byte{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF,
0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, 0x1D, 0x1E,
0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C,
0x0D, 0x0E, 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14,
0x15, 0xFF, 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C,
0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
}
// EncodedSize is the length of a text encoded ULID.
const ulidEncodedSize = 26
// IsULID checks if the string is a ULID.
//
// Implementation got from:
// https://github.com/oklog/ulid (Apache-2.0 License)
//
func IsULID(str string) bool {
// Check if a base32 encoded ULID is the right length.
if len(str) != ulidEncodedSize {
return false
}
// Check if all the characters in a base32 encoded ULID are part of the
// expected base32 character set.
if ulidDec[str[0]] == 0xFF ||
ulidDec[str[1]] == 0xFF ||
ulidDec[str[2]] == 0xFF ||
ulidDec[str[3]] == 0xFF ||
ulidDec[str[4]] == 0xFF ||
ulidDec[str[5]] == 0xFF ||
ulidDec[str[6]] == 0xFF ||
ulidDec[str[7]] == 0xFF ||
ulidDec[str[8]] == 0xFF ||
ulidDec[str[9]] == 0xFF ||
ulidDec[str[10]] == 0xFF ||
ulidDec[str[11]] == 0xFF ||
ulidDec[str[12]] == 0xFF ||
ulidDec[str[13]] == 0xFF ||
ulidDec[str[14]] == 0xFF ||
ulidDec[str[15]] == 0xFF ||
ulidDec[str[16]] == 0xFF ||
ulidDec[str[17]] == 0xFF ||
ulidDec[str[18]] == 0xFF ||
ulidDec[str[19]] == 0xFF ||
ulidDec[str[20]] == 0xFF ||
ulidDec[str[21]] == 0xFF ||
ulidDec[str[22]] == 0xFF ||
ulidDec[str[23]] == 0xFF ||
ulidDec[str[24]] == 0xFF ||
ulidDec[str[25]] == 0xFF {
return false
}
// Check if the first character in a base32 encoded ULID will overflow. This
// happens because the base32 representation encodes 130 bits, while the
// ULID is only 128 bits.
//
// See https://github.com/oklog/ulid/issues/9 for details.
if str[0] > '7' {
return false
}
return true
}
// IsCreditCard checks if the string is a credit card.
func IsCreditCard(str string) bool {
sanitized := notNumberRegexp.ReplaceAllString(str, "")
sanitized := whiteSpacesAndMinus.ReplaceAllString(str, "")
if !rxCreditCard.MatchString(sanitized) {
return false
}
@@ -509,6 +596,27 @@ func IsFilePath(str string) (bool, int) {
return false, Unknown
}
//IsWinFilePath checks both relative & absolute paths in Windows
func IsWinFilePath(str string) bool {
if rxARWinPath.MatchString(str) {
//check windows path limit see:
// http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx#maxpath
if len(str[3:]) > 32767 {
return false
}
return true
}
return false
}
//IsUnixFilePath checks both relative & absolute paths in Unix
func IsUnixFilePath(str string) bool {
if rxARUnixPath.MatchString(str) {
return true
}
return false
}
// IsDataURI checks if a string is base64 encoded data URI such as an image
func IsDataURI(str string) bool {
dataURI := strings.Split(str, ",")
@@ -586,11 +694,13 @@ func IsHash(str string, algorithm string) bool {
len = "40"
} else if algo == "tiger192" {
len = "48"
} else if algo == "sha256" {
} else if algo == "sha3-224" {
len = "56"
} else if algo == "sha256" || algo == "sha3-256" {
len = "64"
} else if algo == "sha384" {
} else if algo == "sha384" || algo == "sha3-384" {
len = "96"
} else if algo == "sha512" {
} else if algo == "sha512" || algo == "sha3-512" {
len = "128"
} else {
return false
@@ -599,6 +709,26 @@ func IsHash(str string, algorithm string) bool {
return Matches(str, "^[a-f0-9]{"+len+"}$")
}
// IsSHA3224 checks is a string is a SHA3-224 hash. Alias for `IsHash(str, "sha3-224")`
func IsSHA3224(str string) bool {
return IsHash(str, "sha3-224")
}
// IsSHA3256 checks is a string is a SHA3-256 hash. Alias for `IsHash(str, "sha3-256")`
func IsSHA3256(str string) bool {
return IsHash(str, "sha3-256")
}
// IsSHA3384 checks is a string is a SHA3-384 hash. Alias for `IsHash(str, "sha3-384")`
func IsSHA3384(str string) bool {
return IsHash(str, "sha3-384")
}
// IsSHA3512 checks is a string is a SHA3-512 hash. Alias for `IsHash(str, "sha3-512")`
func IsSHA3512(str string) bool {
return IsHash(str, "sha3-512")
}
// IsSHA512 checks is a string is a SHA512 hash. Alias for `IsHash(str, "sha512")`
func IsSHA512(str string) bool {
return IsHash(str, "sha512")
@@ -819,6 +949,14 @@ func IsRsaPublicKey(str string, keylen int) bool {
return bitlen == int(keylen)
}
// IsRegex checks if a give string is a valid regex with RE2 syntax or not
func IsRegex(str string) bool {
if _, err := regexp.Compile(str); err == nil {
return true
}
return false
}
func toJSONName(tag string) string {
if tag == "" {
return ""
@@ -1625,3 +1763,7 @@ func (sv stringValues) Len() int { return len(sv) }
func (sv stringValues) Swap(i, j int) { sv[i], sv[j] = sv[j], sv[i] }
func (sv stringValues) Less(i, j int) bool { return sv.get(i) < sv.get(j) }
func (sv stringValues) get(i int) string { return sv[i].String() }
func IsE164(str string) bool {
return rxE164.MatchString(str)
}

View File

@@ -11,6 +11,10 @@ go:
- 1.10.x
- 1.11.x
- 1.12.x
- 1.13.x
- 1.14.x
- 1.15.x
- 1.16.x
- tip
matrix:
allow_failures:

View File

@@ -9,6 +9,7 @@ Third-party patches are essential for keeping bluemonday secure and offering the
## Guidelines
1. Do not vendor dependencies. As a security package, were we to vendor dependencies the projects that then vendor bluemonday may not receive the latest security updates to the dependencies. By not vendoring dependencies the project that implements bluemonday will vendor the latest version of any dependent packages. Vendoring is a project problem, not a package problem. bluemonday will be tested against the latest version of dependencies periodically and during any PR/merge.
2. I do not care about spelling mistakes or whitespace and I do not believe that you should either. PRs therefore must be functional in their nature or be substantial and impactful if documentation or examples.
## Submitting an Issue

View File

@@ -25,7 +25,7 @@ build:
@go build
vet:
@go vet *.go
@go vet
lint:
@golint *.go

View File

@@ -2,7 +2,7 @@
bluemonday is a HTML sanitizer implemented in Go. It is fast and highly configurable.
bluemonday takes untrusted user generated content as an input, and will return HTML that has been sanitised against a whitelist of approved HTML elements and attributes so that you can safely include the content in your web page.
bluemonday takes untrusted user generated content as an input, and will return HTML that has been sanitised against an allowlist of approved HTML elements and attributes so that you can safely include the content in your web page.
If you accept user generated content, and your server uses Go, you **need** bluemonday.
@@ -50,15 +50,15 @@ bluemonday is heavily inspired by both the [OWASP Java HTML Sanitizer](https://c
## Technical Summary
Whitelist based, you need to either build a policy describing the HTML elements and attributes to permit (and the `regexp` patterns of attributes), or use one of the supplied policies representing good defaults.
Allowlist based, you need to either build a policy describing the HTML elements and attributes to permit (and the `regexp` patterns of attributes), or use one of the supplied policies representing good defaults.
The policy containing the whitelist is applied using a fast non-validating, forward only, token-based parser implemented in the [Go net/html library](https://godoc.org/golang.org/x/net/html) by the core Go team.
The policy containing the allowlist is applied using a fast non-validating, forward only, token-based parser implemented in the [Go net/html library](https://godoc.org/golang.org/x/net/html) by the core Go team.
We expect to be supplied with well-formatted HTML (closing elements for every applicable open element, nested correctly) and so we do not focus on repairing badly nested or incomplete HTML. We focus on simply ensuring that whatever elements do exist are described in the policy whitelist and that attributes and links are safe for use on your web page. [GIGO](http://en.wikipedia.org/wiki/Garbage_in,_garbage_out) does apply and if you feed it bad HTML bluemonday is not tasked with figuring out how to make it good again.
We expect to be supplied with well-formatted HTML (closing elements for every applicable open element, nested correctly) and so we do not focus on repairing badly nested or incomplete HTML. We focus on simply ensuring that whatever elements do exist are described in the policy allowlist and that attributes and links are safe for use on your web page. [GIGO](http://en.wikipedia.org/wiki/Garbage_in,_garbage_out) does apply and if you feed it bad HTML bluemonday is not tasked with figuring out how to make it good again.
### Supported Go Versions
bluemonday is tested against Go 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 1.10, 1.11, 1.12, and tip.
bluemonday is tested on all versions since Go 1.2 including tip.
We do not support Go 1.0 as we depend on `golang.org/x/net/html` which includes a reference to `io.ErrNoProgress` which did not exist in Go 1.0.
@@ -146,8 +146,8 @@ func main() {
We ship two default policies:
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its whitelist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* whitelist iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
1. `bluemonday.StrictPolicy()` which can be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist. An example usage scenario would be blog post titles where HTML tags are not expected at all and if they are then the elements *and* the content of the elements should be stripped. This is a *very* strict policy.
2. `bluemonday.UGCPolicy()` which allows a broad selection of HTML elements and attributes that are safe for user generated content. Note that this policy does *not* allow iframes, object, embed, styles, script, etc. An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.
## Policy Building
@@ -220,7 +220,7 @@ p.AllowElements("fieldset", "select", "option")
### Inline CSS
Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can whitelist the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles.
Although it's possible to handle inline CSS using `AllowAttrs` with a `Matching` rule, writing a single monolithic regular expression to safely process all inline CSS which you wish to allow is not a trivial task. Instead of attempting to do so, you can allow the `style` attribute on whichever element(s) you desire and use style policies to control and sanitize inline styles.
It is suggested that you use `Matching` (with a suitable regular expression)
`MatchingEnum`, or `MatchingHandler` to ensure each style matches your needs,
@@ -241,7 +241,7 @@ p.AllowAttrs("style").OnElements("span", "p")
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
```
Or you can specify elements based on a regex patterm match:
Or you can specify elements based on a regex pattern match:
```go
p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`))
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
@@ -254,6 +254,7 @@ validate the values for a given property. The string parameter has been
converted to lowercase and unicode code points have been converted.
```go
myHandler := func(value string) bool{
// Validate your input here
return true
}
p.AllowAttrs("style").OnElements("span", "p")
@@ -279,12 +280,12 @@ We provide some additional global options for safely working with links.
p.RequireParseableURLs(true)
```
If you have enabled parseable URLs then the following option will `AllowRelativeURLs`. By default this is disabled (bluemonday is a whitelist tool... you need to explicitly tell us to permit things) and when disabled it will prevent all local and scheme relative URLs (i.e. `href="localpage.html"`, `href="../home.html"` and even `href="//www.google.com"` are relative):
If you have enabled parseable URLs then the following option will `AllowRelativeURLs`. By default this is disabled (bluemonday is an allowlist tool... you need to explicitly tell us to permit things) and when disabled it will prevent all local and scheme relative URLs (i.e. `href="localpage.html"`, `href="../home.html"` and even `href="//www.google.com"` are relative):
```go
p.AllowRelativeURLs(true)
```
If you have enabled parseable URLs then you can whitelist the schemes (commonly called protocol when thinking of `http` and `https`) that are permitted. Bear in mind that allowing relative URLs in the above option will allow for a blank scheme:
If you have enabled parseable URLs then you can allow the schemes (commonly called protocol when thinking of `http` and `https`) that are permitted. Bear in mind that allowing relative URLs in the above option will allow for a blank scheme:
```go
p.AllowURLSchemes("mailto", "http", "https")
```
@@ -302,7 +303,7 @@ p.RequireNoReferrerOnLinks(true)
```
We provide a convenience method that applies all of the above, but you will still need to whitelist the linkable elements for the URL rules to be applied to:
We provide a convenience method that applies all of the above, but you will still need to allow the linkable elements for the URL rules to be applied to:
```go
p.AllowStandardURLs()
p.AllowAttrs("cite").OnElements("blockquote", "q")
@@ -372,11 +373,11 @@ p.AllowAttrs(
)
```
Both examples exhibit the same issue, they declare attributes but do not then specify whether they are whitelisted globally or only on specific elements (and which elements). Attributes belong to one or more elements, and the policy needs to declare this.
Both examples exhibit the same issue, they declare attributes but do not then specify whether they are allowed globally or only on specific elements (and which elements). Attributes belong to one or more elements, and the policy needs to declare this.
## Limitations
We are not yet including any tools to help whitelist and sanitize CSS. Which means that unless you wish to do the heavy lifting in a single regular expression (inadvisable), **you should not allow the "style" attribute anywhere**.
We are not yet including any tools to help allow and sanitize CSS. Which means that unless you wish to do the heavy lifting in a single regular expression (inadvisable), **you should not allow the "style" attribute anywhere**.
It is not the job of bluemonday to fix your bad HTML, it is merely the job of bluemonday to prevent malicious HTML getting through. If you have mismatched HTML elements, or non-conforming nesting of elements, those will remain. But if you have well-structured HTML bluemonday will not break it.

View File

@@ -27,7 +27,7 @@
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package bluemonday
package css
import (
"regexp"
@@ -329,7 +329,7 @@ func splitValues(value string) []string {
return values
}
func getDefaultHandler(attr string) func(string) bool {
func GetDefaultHandler(attr string) func(string) bool {
if defaultStyleHandlers[attr] != nil {
return defaultStyleHandlers[attr]

View File

@@ -28,10 +28,10 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*
Package bluemonday provides a way of describing a whitelist of HTML elements
Package bluemonday provides a way of describing an allowlist of HTML elements
and attributes as a policy, and for that policy to be applied to untrusted
strings from users that may contain markup. All elements and attributes not on
the whitelist will be stripped.
the allowlist will be stripped.
The default bluemonday.UGCPolicy().Sanitize() turns this:
@@ -84,21 +84,21 @@ bluemonday is heavily inspired by both the OWASP Java HTML Sanitizer
We ship two default policies, one is bluemonday.StrictPolicy() and can be
thought of as equivalent to stripping all HTML elements and their attributes as
it has nothing on its whitelist.
it has nothing on its allowlist.
The other is bluemonday.UGCPolicy() and allows a broad selection of HTML
elements and attributes that are safe for user generated content. Note that
this policy does not whitelist iframes, object, embed, styles, script, etc.
this policy does not allow iframes, object, embed, styles, script, etc.
The essence of building a policy is to determine which HTML elements and
attributes are considered safe for your scenario. OWASP provide an XSS
prevention cheat sheet ( https://www.google.com/search?q=xss+prevention+cheat+sheet )
to help explain the risks, but essentially:
1. Avoid whitelisting anything other than plain HTML elements
2. Avoid whitelisting `script`, `style`, `iframe`, `object`, `embed`, `base`
1. Avoid allowing anything other than plain HTML elements
2. Avoid allowing `script`, `style`, `iframe`, `object`, `embed`, `base`
elements
3. Avoid whitelisting anything other than plain HTML elements with simple
3. Avoid allowing anything other than plain HTML elements with simple
values that you can match to a regexp
*/
package bluemonday

View File

@@ -3,7 +3,8 @@ module github.com/microcosm-cc/bluemonday
go 1.16
require (
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/aymerick/douceur v0.2.0
github.com/gorilla/css v1.0.0 // indirect
golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
)

View File

@@ -1,11 +1,18 @@
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ=
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c h1:KHUzaHIpjWVlVVNh65G3hhuj3KB1HnjY6Cq5cTvRQT8=
golang.org/x/net v0.0.0-20210331212208-0fccb6fa2b5c/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210421230115-4e50805a0758 h1:aEpZnXcAmXkd6AvLb2OPt+EN1Zu/8Ne3pCqPjja5PXY=
golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1Kcs5dz7/ng1VjMUvfKvpfy+jM=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b h1:k+E048sYJHyVnsr1GDrRZWQ32D2C7lWs9JRc0bel53A=
golang.org/x/net v0.0.0-20210610132358-84b48f89b13b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210420072515-93ed5bcd2bfe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@@ -141,7 +141,7 @@ func (p *Policy) AllowStandardURLs() {
}
// AllowStandardAttributes will enable "id", "title" and the language specific
// attributes "dir" and "lang" on all elements that are whitelisted
// attributes "dir" and "lang" on all elements that are allowed
func (p *Policy) AllowStandardAttributes() {
// "dir" "lang" are permitted as both language attributes affect charsets
// and direction of text.

View File

@@ -35,9 +35,11 @@ import (
"net/url"
"regexp"
"strings"
"github.com/microcosm-cc/bluemonday/css"
)
// Policy encapsulates the whitelist of HTML elements and attributes that will
// Policy encapsulates the allowlist of HTML elements and attributes that will
// be applied to the sanitised HTML.
//
// You should use bluemonday.NewPolicy() to create a blank policy as the
@@ -86,28 +88,31 @@ type Policy struct {
// When true, allow data attributes.
allowDataAttributes bool
// map[htmlElementName]map[htmlAttributeName]attrPolicy
elsAndAttrs map[string]map[string]attrPolicy
// When true, allow comments.
allowComments bool
// map[htmlElementName]map[htmlAttributeName][]attrPolicy
elsAndAttrs map[string]map[string][]attrPolicy
// elsMatchingAndAttrs stores regex based element matches along with attributes
elsMatchingAndAttrs map[*regexp.Regexp]map[string]attrPolicy
elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
// map[htmlAttributeName]attrPolicy
globalAttrs map[string]attrPolicy
// map[htmlAttributeName][]attrPolicy
globalAttrs map[string][]attrPolicy
// map[htmlElementName]map[cssPropertyName]stylePolicy
elsAndStyles map[string]map[string]stylePolicy
// map[htmlElementName]map[cssPropertyName][]stylePolicy
elsAndStyles map[string]map[string][]stylePolicy
// map[regex]map[cssPropertyName]stylePolicy
elsMatchingAndStyles map[*regexp.Regexp]map[string]stylePolicy
// map[regex]map[cssPropertyName][]stylePolicy
elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
// map[cssPropertyName]stylePolicy
globalStyles map[string]stylePolicy
// map[cssPropertyName][]stylePolicy
globalStyles map[string][]stylePolicy
// If urlPolicy is nil, all URLs with matching schema are allowed.
// Otherwise, only the URLs with matching schema and urlPolicy(url)
// returning true are allowed.
allowURLSchemes map[string]urlPolicy
allowURLSchemes map[string][]urlPolicy
// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
@@ -174,22 +179,22 @@ type urlPolicy func(url *url.URL) (allowUrl bool)
// init initializes the maps if this has not been done already
func (p *Policy) init() {
if !p.initialized {
p.elsAndAttrs = make(map[string]map[string]attrPolicy)
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy)
p.globalAttrs = make(map[string]attrPolicy)
p.elsAndStyles = make(map[string]map[string]stylePolicy)
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string]stylePolicy)
p.globalStyles = make(map[string]stylePolicy)
p.allowURLSchemes = make(map[string]urlPolicy)
p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
p.globalAttrs = make(map[string][]attrPolicy)
p.elsAndStyles = make(map[string]map[string][]stylePolicy)
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
p.globalStyles = make(map[string][]stylePolicy)
p.allowURLSchemes = make(map[string][]urlPolicy)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
p.setOfElementsToSkipContent = make(map[string]struct{})
p.initialized = true
}
}
// NewPolicy returns a blank policy with nothing whitelisted or permitted. This
// NewPolicy returns a blank policy with nothing allowed or permitted. This
// is the recommended way to start building a policy and you should now use
// AllowAttrs() and/or AllowElements() to construct the whitelist of HTML
// AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
// elements and attributes.
func NewPolicy() *Policy {
@@ -203,7 +208,7 @@ func NewPolicy() *Policy {
// AllowAttrs takes a range of HTML attribute names and returns an
// attribute policy builder that allows you to specify the pattern and scope of
// the whitelisted attribute.
// the allowed attribute.
//
// The attribute policy is only added to the core policy when either Globally()
// or OnElements(...) are called.
@@ -223,7 +228,7 @@ func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
return &abp
}
// AllowDataAttributes whitelists all data attributes. We can't specify the name
// AllowDataAttributes permits all data attributes. We can't specify the name
// of each attribute exactly as they are customized.
//
// NOTE: These values are not sanitized and applications that evaluate or process
@@ -238,6 +243,22 @@ func (p *Policy) AllowDataAttributes() {
p.allowDataAttributes = true
}
// AllowComments allows comments.
//
// Please note that only one type of comment will be allowed by this, this is the
// the standard HTML comment <!-- --> which includes the use of that to permit
// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
//
// What is not permitted are CDATA XML comments, as the x/net/html package we depend
// on does not handle this fully and we are not choosing to take on that work:
// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
// package changes this then these will be considered, otherwise if you AllowComments
// but provide a CDATA comment, then as per the documentation in x/net/html this will
// be treated as a plain HTML comment.
func (p *Policy) AllowComments() {
p.allowComments = true
}
// AllowNoAttrs says that attributes on element are optional.
//
// The attribute policy is only added to the core policy when OnElements(...)
@@ -265,8 +286,7 @@ func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
}
// Matching allows a regular expression to be applied to a nascent attribute
// policy, and returns the attribute policy. Calling this more than once will
// replace the existing regexp.
// policy, and returns the attribute policy.
func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
abp.regexp = regex
@@ -284,7 +304,7 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string]attrPolicy)
abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
ap := attrPolicy{}
@@ -292,14 +312,14 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
ap.regexp = abp.regexp
}
abp.p.elsAndAttrs[element][attr] = ap
abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
}
if abp.allowEmpty {
abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
if _, ok := abp.p.elsAndAttrs[element]; !ok {
abp.p.elsAndAttrs[element] = make(map[string]attrPolicy)
abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
}
}
@@ -312,19 +332,19 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsMatchingAndAttrs[regex][attr] = ap
abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
}
if abp.allowEmpty {
abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
}
@@ -337,7 +357,7 @@ func (abp *attrPolicyBuilder) Globally() *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.globalAttrs[attr]; !ok {
abp.p.globalAttrs[attr] = attrPolicy{}
abp.p.globalAttrs[attr] = []attrPolicy{}
}
ap := attrPolicy{}
@@ -345,7 +365,7 @@ func (abp *attrPolicyBuilder) Globally() *Policy {
ap.regexp = abp.regexp
}
abp.p.globalAttrs[attr] = ap
abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
}
return abp.p
@@ -353,7 +373,7 @@ func (abp *attrPolicyBuilder) Globally() *Policy {
// AllowStyles takes a range of CSS property names and returns a
// style policy builder that allows you to specify the pattern and scope of
// the whitelisted property.
// the allowed property.
//
// The style policy is only added to the core policy when either Globally()
// or OnElements(...) are called.
@@ -373,8 +393,7 @@ func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
}
// Matching allows a regular expression to be applied to a nascent style
// policy, and returns the style policy. Calling this more than once will
// replace the existing regexp.
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
spb.regexp = regex
@@ -383,8 +402,7 @@ func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilde
}
// MatchingEnum allows a list of allowed values to be applied to a nascent style
// policy, and returns the style policy. Calling this more than once will
// replace the existing list of allowed values.
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
spb.enum = enum
@@ -393,8 +411,7 @@ func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder
}
// MatchingHandler allows a handler to be applied to a nascent style
// policy, and returns the style policy. Calling this more than once will
// replace the existing handler.
// policy, and returns the style policy.
func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
spb.handler = handler
@@ -412,7 +429,7 @@ func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
for _, attr := range spb.propertyNames {
if _, ok := spb.p.elsAndStyles[element]; !ok {
spb.p.elsAndStyles[element] = make(map[string]stylePolicy)
spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
}
sp := stylePolicy{}
@@ -423,9 +440,9 @@ func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = getDefaultHandler(attr)
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.elsAndStyles[element][attr] = sp
spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
}
}
@@ -439,7 +456,7 @@ func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy
for _, attr := range spb.propertyNames {
if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
spb.p.elsMatchingAndStyles[regex] = make(map[string]stylePolicy)
spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
}
sp := stylePolicy{}
@@ -450,9 +467,9 @@ func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = getDefaultHandler(attr)
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.elsMatchingAndStyles[regex][attr] = sp
spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
}
return spb.p
@@ -464,7 +481,7 @@ func (spb *stylePolicyBuilder) Globally() *Policy {
for _, attr := range spb.propertyNames {
if _, ok := spb.p.globalStyles[attr]; !ok {
spb.p.globalStyles[attr] = stylePolicy{}
spb.p.globalStyles[attr] = []stylePolicy{}
}
// Use only one strategy for validating styles, fallback to default
@@ -476,15 +493,15 @@ func (spb *stylePolicyBuilder) Globally() *Policy {
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = getDefaultHandler(attr)
sp.handler = css.GetDefaultHandler(attr)
}
spb.p.globalStyles[attr] = sp
spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
}
return spb.p
}
// AllowElements will append HTML elements to the whitelist without applying an
// AllowElements will append HTML elements to the allowlist without applying an
// attribute policy to those elements (the elements are permitted
// sans-attributes)
func (p *Policy) AllowElements(names ...string) *Policy {
@@ -494,17 +511,19 @@ func (p *Policy) AllowElements(names ...string) *Policy {
element = strings.ToLower(element)
if _, ok := p.elsAndAttrs[element]; !ok {
p.elsAndAttrs[element] = make(map[string]attrPolicy)
p.elsAndAttrs[element] = make(map[string][]attrPolicy)
}
}
return p
}
// AllowElementsMatching will append HTML elements to the allowlist if they
// match a regexp.
func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
p.init()
if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
}
return p
}
@@ -611,7 +630,7 @@ func (p *Policy) AllowRelativeURLs(require bool) *Policy {
return p
}
// AllowURLSchemes will append URL schemes to the whitelist
// AllowURLSchemes will append URL schemes to the allowlist
// Example: p.AllowURLSchemes("mailto", "http", "https")
func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
p.init()
@@ -629,7 +648,7 @@ func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
}
// AllowURLSchemeWithCustomPolicy will append URL schemes with
// a custom URL policy to the whitelist.
// a custom URL policy to the allowlist.
// Only the URLs with matching schema and urlPolicy(url)
// returning true will be allowed.
func (p *Policy) AllowURLSchemeWithCustomPolicy(
@@ -643,13 +662,13 @@ func (p *Policy) AllowURLSchemeWithCustomPolicy(
scheme = strings.ToLower(scheme)
p.allowURLSchemes[scheme] = urlPolicy
p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
return p
}
// AddSpaceWhenStrippingTag states whether to add a single space " " when
// removing tags that are not whitelisted by the policy.
// removing tags that are not allowed by the policy.
//
// This is useful if you expect to strip tags in dense markup and may lose the
// value of whitespace.

View File

@@ -31,6 +31,7 @@ package bluemonday
import (
"bytes"
"fmt"
"io"
"net/url"
"regexp"
@@ -47,10 +48,11 @@ var (
dataAttributeXMLPrefix = regexp.MustCompile("^xml.+")
dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+")
cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`)
dataURIbase64Prefix = regexp.MustCompile(`^data:[^,]*;base64,`)
)
// Sanitize takes a string that contains a HTML fragment or document and applies
// the given policy whitelist.
// the given policy allowlist.
//
// It returns a HTML string that has been sanitized by the policy or an empty
// string if an error has occurred (most likely as a consequence of extremely
@@ -60,11 +62,11 @@ func (p *Policy) Sanitize(s string) string {
return s
}
return p.sanitize(strings.NewReader(s)).String()
return p.sanitizeWithBuff(strings.NewReader(s)).String()
}
// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
// the given policy whitelist.
// the given policy allowlist.
//
// It returns a []byte containing the HTML that has been sanitized by the policy
// or an empty []byte if an error has occurred (most likely as a consequence of
@@ -74,26 +76,32 @@ func (p *Policy) SanitizeBytes(b []byte) []byte {
return b
}
return p.sanitize(bytes.NewReader(b)).Bytes()
return p.sanitizeWithBuff(bytes.NewReader(b)).Bytes()
}
// SanitizeReader takes an io.Reader that contains a HTML fragment or document
// and applies the given policy whitelist.
// and applies the given policy allowlist.
//
// It returns a bytes.Buffer containing the HTML that has been sanitized by the
// policy. Errors during sanitization will merely return an empty result.
func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
return p.sanitize(r)
return p.sanitizeWithBuff(r)
}
// SanitizeReaderToWriter takes an io.Reader that contains a HTML fragment or document
// and applies the given policy allowlist and writes to the provided writer returning
// an error if there is one.
func (p *Policy) SanitizeReaderToWriter(r io.Reader, w io.Writer) error {
return p.sanitize(r, w)
}
const escapedURLChars = "'<>\"\r"
func escapeUrlComponent(val string) string {
w := bytes.NewBufferString("")
func escapeUrlComponent(w stringWriterWriter, val string) error {
i := strings.IndexAny(val, escapedURLChars)
for i != -1 {
if _, err := w.WriteString(val[:i]); err != nil {
return w.String()
return err
}
var esc string
switch val[i] {
@@ -114,15 +122,15 @@ func escapeUrlComponent(val string) string {
}
val = val[i+1:]
if _, err := w.WriteString(esc); err != nil {
return w.String()
return err
}
i = strings.IndexAny(val, escapedURLChars)
}
w.WriteString(val)
return w.String()
_, err := w.WriteString(val)
return err
}
// Query represents a query
// Query represents a single part of the query string, a query param
type Query struct {
Key string
Value string
@@ -130,6 +138,10 @@ type Query struct {
}
func parseQuery(query string) (values []Query, err error) {
// This is essentially a copy of parseQuery from
// https://golang.org/src/net/url/url.go but adjusted to build our values
// based on our type, which we need to preserve the ordering of the query
// string
for query != "" {
key := query
if i := strings.IndexAny(key, "&;"); i >= 0 {
@@ -170,18 +182,18 @@ func parseQuery(query string) (values []Query, err error) {
}
func encodeQueries(queries []Query) string {
var b strings.Builder
var buff bytes.Buffer
for i, query := range queries {
b.WriteString(url.QueryEscape(query.Key))
buff.WriteString(url.QueryEscape(query.Key))
if query.HasValue {
b.WriteString("=")
b.WriteString(url.QueryEscape(query.Value))
buff.WriteString("=")
buff.WriteString(url.QueryEscape(query.Value))
}
if i < len(queries)-1 {
b.WriteString("&")
buff.WriteString("&")
}
}
return b.String()
return buff.String()
}
func sanitizedURL(val string) (string, error) {
@@ -205,45 +217,24 @@ func sanitizedURL(val string) (string, error) {
return u.String(), nil
}
func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) {
// do not escape multiple query parameters
tokenBuff := bytes.NewBufferString("")
tokenBuff.WriteString("<")
tokenBuff.WriteString(token.Data)
for _, attr := range token.Attr {
tokenBuff.WriteByte(' ')
tokenBuff.WriteString(attr.Key)
tokenBuff.WriteString(`="`)
switch attr.Key {
case "href", "src":
u, ok := p.validURL(attr.Val)
if !ok {
tokenBuff.WriteString(html.EscapeString(attr.Val))
continue
}
u, err := sanitizedURL(u)
if err == nil {
tokenBuff.WriteString(u)
} else {
// fallthrough
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
default:
// re-apply
tokenBuff.WriteString(html.EscapeString(attr.Val))
}
tokenBuff.WriteByte('"')
// Performs the actual sanitization process.
func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
var buff bytes.Buffer
if err := p.sanitize(r, &buff); err != nil {
return &bytes.Buffer{}
}
if token.Type == html.SelfClosingTagToken {
tokenBuff.WriteString("/")
}
tokenBuff.WriteString(">")
buff.WriteString(tokenBuff.String())
return &buff
}
// Performs the actual sanitization process.
func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
type asStringWriter struct {
io.Writer
}
func (a *asStringWriter) WriteString(s string) (int, error) {
return a.Write([]byte(s))
}
func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
// It is possible that the developer has created the policy via:
// p := bluemonday.Policy{}
// rather than:
@@ -252,8 +243,12 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
// would initiliaze the maps, then we need to do that.
p.init()
buff, ok := w.(stringWriterWriter)
if !ok {
buff = &asStringWriter{w}
}
var (
buff bytes.Buffer
skipElementContent bool
skippingElementsCount int64
skipClosingTag bool
@@ -267,11 +262,11 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
err := tokenizer.Err()
if err == io.EOF {
// End of input means end of processing
return &buff
return nil
}
// Raw tokenizer error
return &bytes.Buffer{}
return err
}
token := tokenizer.Token()
@@ -289,6 +284,10 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html.CommentToken:
// Comments are ignored by default
if p.allowComments {
// But if allowed then write the comment out as-is
buff.WriteString(token.String())
}
case html.StartTagToken:
@@ -303,14 +302,18 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skippingElementsCount++
}
if p.addSpaces {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
}
break
}
aps = aa
}
if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
token.Attr = escapeAttributes(
p.sanitizeAttrs(token.Data, token.Attr, aps),
)
}
if len(token.Attr) == 0 {
@@ -318,18 +321,17 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skipClosingTag = true
closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
if p.addSpaces {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
}
break
}
}
if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
p.writeLinkableBuf(&buff, &token)
} else {
buff.WriteString(token.String())
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}
@@ -345,7 +347,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skipClosingTag = false
}
if p.addSpaces {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
}
break
}
@@ -366,14 +370,18 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}
if !match {
if p.addSpaces {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
}
break
}
}
if !skipElementContent {
buff.WriteString(token.String())
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}
case html.SelfClosingTagToken:
@@ -383,7 +391,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
aa, matched := p.matchRegex(token.Data)
if !matched {
if p.addSpaces && !matched {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
}
break
}
@@ -391,21 +401,20 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}
if len(token.Attr) != 0 {
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps))
}
if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
if p.addSpaces {
buff.WriteString(" ")
if _, err := buff.WriteString(" "); err != nil {
return err
}
break
}
}
if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
p.writeLinkableBuf(&buff, &token)
} else {
buff.WriteString(token.String())
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}
@@ -416,20 +425,26 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case `script`:
// not encouraged, but if a policy allows JavaScript we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
case `style`:
if _, err := buff.WriteString(token.Data); err != nil {
return err
}
case "style":
// not encouraged, but if a policy allows CSS styles we
// should not HTML escape it as that would break the output
buff.WriteString(token.Data)
if _, err := buff.WriteString(token.Data); err != nil {
return err
}
default:
// HTML escape the text
buff.WriteString(token.String())
if _, err := buff.WriteString(token.String()); err != nil {
return err
}
}
}
default:
// A token that didn't exist in the html package when we wrote this
return &bytes.Buffer{}
return fmt.Errorf("unknown token: %v", token)
}
}
}
@@ -440,7 +455,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
func (p *Policy) sanitizeAttrs(
elementName string,
attrs []html.Attribute,
aps map[string]attrPolicy,
aps map[string][]attrPolicy,
) []html.Attribute {
if len(attrs) == 0 {
@@ -465,8 +480,9 @@ func (p *Policy) sanitizeAttrs(
}
// Builds a new attribute slice based on the whether the attribute has been
// whitelisted explicitly or globally.
// allowed explicitly or globally.
cleanAttrs := []html.Attribute{}
attrsLoop:
for _, htmlAttr := range attrs {
if p.allowDataAttributes {
// If we see a data attribute, let it through.
@@ -489,27 +505,32 @@ func (p *Policy) sanitizeAttrs(
}
// Is there an element specific attribute policy that applies?
if ap, ok := aps[htmlAttr.Key]; ok {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
if apl, ok := aps[htmlAttr.Key]; ok {
for _, ap := range apl {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
cleanAttrs = append(cleanAttrs, htmlAttr)
continue attrsLoop
}
} else {
cleanAttrs = append(cleanAttrs, htmlAttr)
continue
continue attrsLoop
}
} else {
cleanAttrs = append(cleanAttrs, htmlAttr)
continue
}
}
// Is there a global attribute policy that applies?
if ap, ok := p.globalAttrs[htmlAttr.Key]; ok {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
if apl, ok := p.globalAttrs[htmlAttr.Key]; ok {
for _, ap := range apl {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
}
@@ -533,7 +554,7 @@ func (p *Policy) sanitizeAttrs(
tmpAttrs := []html.Attribute{}
for _, htmlAttr := range cleanAttrs {
switch elementName {
case "a", "area", "link":
case "a", "area", "base", "link":
if htmlAttr.Key == "href" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -542,7 +563,7 @@ func (p *Policy) sanitizeAttrs(
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
case "blockquote", "q":
case "blockquote", "del", "ins", "q":
if htmlAttr.Key == "cite" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -551,7 +572,7 @@ func (p *Policy) sanitizeAttrs(
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
case "img", "script":
case "audio", "embed", "iframe", "img", "script", "source", "track", "video":
if htmlAttr.Key == "src" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -576,7 +597,7 @@ func (p *Policy) sanitizeAttrs(
// Add rel="nofollow" if a "href" exists
switch elementName {
case "a", "area", "link":
case "a", "area", "base", "link":
var hrefFound bool
var externalLink bool
for _, htmlAttr := range cleanAttrs {
@@ -753,14 +774,14 @@ func (p *Policy) sanitizeAttrs(
func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute {
sps := p.elsAndStyles[elementName]
if len(sps) == 0 {
sps = map[string]stylePolicy{}
sps = map[string][]stylePolicy{}
// check for any matching elements, if we don't already have a policy found
// if multiple matches are found they will be overwritten, it's best
// to not have overlapping matchers
for regex, policies := range p.elsMatchingAndStyles {
if regex.MatchString(elementName) {
for k, v := range policies {
sps[k] = v
sps[k] = append(sps[k], v...)
}
}
}
@@ -778,46 +799,51 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At
clean := []string{}
prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"}
decLoop:
for _, dec := range decs {
addedProperty := false
tempProperty := strings.ToLower(dec.Property)
tempValue := removeUnicode(strings.ToLower(dec.Value))
for _, i := range prefixes {
tempProperty = strings.TrimPrefix(tempProperty, i)
}
if sp, ok := sps[tempProperty]; ok {
if sp.handler != nil {
if sp.handler(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
addedProperty = true
if spl, ok := sps[tempProperty]; ok {
for _, sp := range spl {
if sp.handler != nil {
if sp.handler(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
} else if len(sp.enum) > 0 {
if stringInSlice(tempValue, sp.enum) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
} else if sp.regexp != nil {
if sp.regexp.MatchString(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
}
} else if len(sp.enum) > 0 {
if stringInSlice(tempValue, sp.enum) {
clean = append(clean, dec.Property+": "+dec.Value)
addedProperty = true
}
} else if sp.regexp != nil {
if sp.regexp.MatchString(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
addedProperty = true
}
continue
}
}
if sp, ok := p.globalStyles[tempProperty]; ok && !addedProperty {
if sp.handler != nil {
if sp.handler(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
if spl, ok := p.globalStyles[tempProperty]; ok {
for _, sp := range spl {
if sp.handler != nil {
if sp.handler(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
} else if len(sp.enum) > 0 {
if stringInSlice(tempValue, sp.enum) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
} else if sp.regexp != nil {
if sp.regexp.MatchString(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
continue decLoop
}
}
} else if len(sp.enum) > 0 {
if stringInSlice(tempValue, sp.enum) {
clean = append(clean, dec.Property+": "+dec.Value)
}
} else if sp.regexp != nil {
if sp.regexp.MatchString(tempValue) {
clean = append(clean, dec.Property+": "+dec.Value)
}
continue
}
}
}
@@ -848,11 +874,28 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
rawurl = strings.TrimSpace(rawurl)
// URLs cannot contain whitespace, unless it is a data-uri
if (strings.Contains(rawurl, " ") ||
if strings.Contains(rawurl, " ") ||
strings.Contains(rawurl, "\t") ||
strings.Contains(rawurl, "\n")) &&
!strings.HasPrefix(rawurl, `data:`) {
return "", false
strings.Contains(rawurl, "\n") {
if !strings.HasPrefix(rawurl, `data:`) {
return "", false
}
// Remove \r and \n from base64 encoded data to pass url.Parse.
matched := dataURIbase64Prefix.FindString(rawurl)
if matched != "" {
rawurl = matched + strings.Replace(
strings.Replace(
rawurl[len(matched):],
"\r",
"",
-1,
),
"\n",
"",
-1,
)
}
}
// URLs are valid if they parse
@@ -863,16 +906,21 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
if u.Scheme != "" {
urlPolicy, ok := p.allowURLSchemes[u.Scheme]
urlPolicies, ok := p.allowURLSchemes[u.Scheme]
if !ok {
return "", false
}
if urlPolicy == nil || urlPolicy(u) == true {
if len(urlPolicies) == 0 {
return u.String(), true
}
for _, urlPolicy := range urlPolicies {
if urlPolicy(u) == true {
return u.String(), true
}
}
return "", false
}
@@ -890,7 +938,14 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
func linkable(elementName string) bool {
switch elementName {
case "a", "area", "blockquote", "img", "link", "script":
case "a", "area", "base", "link":
// elements that allow .href
return true
case "blockquote", "del", "ins", "q":
// elements that allow .cite
return true
case "audio", "embed", "iframe", "img", "input", "script", "track", "video":
// elements that allow .src
return true
default:
return false
@@ -957,14 +1012,14 @@ func removeUnicode(value string) string {
return substitutedValue
}
func (p *Policy) matchRegex(elementName string) (map[string]attrPolicy, bool) {
aps := make(map[string]attrPolicy, 0)
func (p *Policy) matchRegex(elementName string) (map[string][]attrPolicy, bool) {
aps := make(map[string][]attrPolicy, 0)
matched := false
for regex, attrs := range p.elsMatchingAndAttrs {
if regex.MatchString(elementName) {
matched = true
for k, v := range attrs {
aps[k] = v
aps[k] = append(aps[k], v...)
}
}
}
@@ -989,3 +1044,18 @@ func normaliseElementName(str string) string {
`"`,
)
}
func escapeAttributes(attrs []html.Attribute) []html.Attribute {
escapedAttrs := []html.Attribute{}
for _, attr := range attrs {
attr.Val = escapeAttribute(attr.Val)
escapedAttrs = append(escapedAttrs, attr)
}
return escapedAttrs
}
func escapeAttribute(val string) string {
val = strings.Replace(val, string([]rune{'\u00A0'}), `&nbsp;`, -1)
val = strings.Replace(val, `"`, `&quot;`, -1)
return val
}

View File

@@ -0,0 +1,10 @@
// +build go1.12
package bluemonday
import "io"
type stringWriterWriter interface {
io.Writer
io.StringWriter
}

View File

@@ -0,0 +1,14 @@
// +build go1.1,!go1.12
package bluemonday
import "io"
type stringWriterWriter interface {
io.Writer
StringWriter
}
type StringWriter interface {
WriteString(s string) (n int, err error)
}