mirror of
				https://github.com/go-gitea/gitea
				synced 2025-11-03 21:08:25 +00:00 
			
		
		
		
	* Use vendored go-swagger * vendor go-swagger * revert un wanteed change * remove un-needed GO111MODULE * Update Makefile Co-Authored-By: techknowlogick <matti@mdranta.net>
		
			
				
	
	
		
			380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Package purell offers URL normalization as described on the wikipedia page:
 | 
						|
http://en.wikipedia.org/wiki/URL_normalization
 | 
						|
*/
 | 
						|
package purell
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"fmt"
 | 
						|
	"net/url"
 | 
						|
	"regexp"
 | 
						|
	"sort"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"github.com/PuerkitoBio/urlesc"
 | 
						|
	"golang.org/x/net/idna"
 | 
						|
	"golang.org/x/text/unicode/norm"
 | 
						|
	"golang.org/x/text/width"
 | 
						|
)
 | 
						|
 | 
						|
// A set of normalization flags determines how a URL will
 | 
						|
// be normalized.
 | 
						|
type NormalizationFlags uint
 | 
						|
 | 
						|
const (
 | 
						|
	// Safe normalizations
 | 
						|
	FlagLowercaseScheme           NormalizationFlags = 1 << iota // HTTP://host -> http://host, applied by default in Go1.1
 | 
						|
	FlagLowercaseHost                                            // http://HOST -> http://host
 | 
						|
	FlagUppercaseEscapes                                         // http://host/t%ef -> http://host/t%EF
 | 
						|
	FlagDecodeUnnecessaryEscapes                                 // http://host/t%41 -> http://host/tA
 | 
						|
	FlagEncodeNecessaryEscapes                                   // http://host/!"#$ -> http://host/%21%22#$
 | 
						|
	FlagRemoveDefaultPort                                        // http://host:80 -> http://host
 | 
						|
	FlagRemoveEmptyQuerySeparator                                // http://host/path? -> http://host/path
 | 
						|
 | 
						|
	// Usually safe normalizations
 | 
						|
	FlagRemoveTrailingSlash // http://host/path/ -> http://host/path
 | 
						|
	FlagAddTrailingSlash    // http://host/path -> http://host/path/ (should choose only one of these add/remove trailing slash flags)
 | 
						|
	FlagRemoveDotSegments   // http://host/path/./a/b/../c -> http://host/path/a/c
 | 
						|
 | 
						|
	// Unsafe normalizations
 | 
						|
	FlagRemoveDirectoryIndex   // http://host/path/index.html -> http://host/path/
 | 
						|
	FlagRemoveFragment         // http://host/path#fragment -> http://host/path
 | 
						|
	FlagForceHTTP              // https://host -> http://host
 | 
						|
	FlagRemoveDuplicateSlashes // http://host/path//a///b -> http://host/path/a/b
 | 
						|
	FlagRemoveWWW              // http://www.host/ -> http://host/
 | 
						|
	FlagAddWWW                 // http://host/ -> http://www.host/ (should choose only one of these add/remove WWW flags)
 | 
						|
	FlagSortQuery              // http://host/path?c=3&b=2&a=1&b=1 -> http://host/path?a=1&b=1&b=2&c=3
 | 
						|
 | 
						|
	// Normalizations not in the wikipedia article, required to cover tests cases
 | 
						|
	// submitted by jehiah
 | 
						|
	FlagDecodeDWORDHost           // http://1113982867 -> http://66.102.7.147
 | 
						|
	FlagDecodeOctalHost           // http://0102.0146.07.0223 -> http://66.102.7.147
 | 
						|
	FlagDecodeHexHost             // http://0x42660793 -> http://66.102.7.147
 | 
						|
	FlagRemoveUnnecessaryHostDots // http://.host../path -> http://host/path
 | 
						|
	FlagRemoveEmptyPortSeparator  // http://host:/path -> http://host/path
 | 
						|
 | 
						|
	// Convenience set of safe normalizations
 | 
						|
	FlagsSafe NormalizationFlags = FlagLowercaseHost | FlagLowercaseScheme | FlagUppercaseEscapes | FlagDecodeUnnecessaryEscapes | FlagEncodeNecessaryEscapes | FlagRemoveDefaultPort | FlagRemoveEmptyQuerySeparator
 | 
						|
 | 
						|
	// For convenience sets, "greedy" uses the "remove trailing slash" and "remove www. prefix" flags,
 | 
						|
	// while "non-greedy" uses the "add (or keep) the trailing slash" and "add www. prefix".
 | 
						|
 | 
						|
	// Convenience set of usually safe normalizations (includes FlagsSafe)
 | 
						|
	FlagsUsuallySafeGreedy    NormalizationFlags = FlagsSafe | FlagRemoveTrailingSlash | FlagRemoveDotSegments
 | 
						|
	FlagsUsuallySafeNonGreedy NormalizationFlags = FlagsSafe | FlagAddTrailingSlash | FlagRemoveDotSegments
 | 
						|
 | 
						|
	// Convenience set of unsafe normalizations (includes FlagsUsuallySafe)
 | 
						|
	FlagsUnsafeGreedy    NormalizationFlags = FlagsUsuallySafeGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagRemoveWWW | FlagSortQuery
 | 
						|
	FlagsUnsafeNonGreedy NormalizationFlags = FlagsUsuallySafeNonGreedy | FlagRemoveDirectoryIndex | FlagRemoveFragment | FlagForceHTTP | FlagRemoveDuplicateSlashes | FlagAddWWW | FlagSortQuery
 | 
						|
 | 
						|
	// Convenience set of all available flags
 | 
						|
	FlagsAllGreedy    = FlagsUnsafeGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
 | 
						|
	FlagsAllNonGreedy = FlagsUnsafeNonGreedy | FlagDecodeDWORDHost | FlagDecodeOctalHost | FlagDecodeHexHost | FlagRemoveUnnecessaryHostDots | FlagRemoveEmptyPortSeparator
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	defaultHttpPort  = ":80"
 | 
						|
	defaultHttpsPort = ":443"
 | 
						|
)
 | 
						|
 | 
						|
// Regular expressions used by the normalizations
 | 
						|
var rxPort = regexp.MustCompile(`(:\d+)/?$`)
 | 
						|
var rxDirIndex = regexp.MustCompile(`(^|/)((?:default|index)\.\w{1,4})$`)
 | 
						|
var rxDupSlashes = regexp.MustCompile(`/{2,}`)
 | 
						|
var rxDWORDHost = regexp.MustCompile(`^(\d+)((?:\.+)?(?:\:\d*)?)$`)
 | 
						|
var rxOctalHost = regexp.MustCompile(`^(0\d*)\.(0\d*)\.(0\d*)\.(0\d*)((?:\.+)?(?:\:\d*)?)$`)
 | 
						|
var rxHexHost = regexp.MustCompile(`^0x([0-9A-Fa-f]+)((?:\.+)?(?:\:\d*)?)$`)
 | 
						|
var rxHostDots = regexp.MustCompile(`^(.+?)(:\d+)?$`)
 | 
						|
var rxEmptyPort = regexp.MustCompile(`:+$`)
 | 
						|
 | 
						|
// Map of flags to implementation function.
 | 
						|
// FlagDecodeUnnecessaryEscapes has no action, since it is done automatically
 | 
						|
// by parsing the string as an URL. Same for FlagUppercaseEscapes and FlagRemoveEmptyQuerySeparator.
 | 
						|
 | 
						|
// Since maps have undefined traversing order, make a slice of ordered keys
 | 
						|
var flagsOrder = []NormalizationFlags{
 | 
						|
	FlagLowercaseScheme,
 | 
						|
	FlagLowercaseHost,
 | 
						|
	FlagRemoveDefaultPort,
 | 
						|
	FlagRemoveDirectoryIndex,
 | 
						|
	FlagRemoveDotSegments,
 | 
						|
	FlagRemoveFragment,
 | 
						|
	FlagForceHTTP, // Must be after remove default port (because https=443/http=80)
 | 
						|
	FlagRemoveDuplicateSlashes,
 | 
						|
	FlagRemoveWWW,
 | 
						|
	FlagAddWWW,
 | 
						|
	FlagSortQuery,
 | 
						|
	FlagDecodeDWORDHost,
 | 
						|
	FlagDecodeOctalHost,
 | 
						|
	FlagDecodeHexHost,
 | 
						|
	FlagRemoveUnnecessaryHostDots,
 | 
						|
	FlagRemoveEmptyPortSeparator,
 | 
						|
	FlagRemoveTrailingSlash, // These two (add/remove trailing slash) must be last
 | 
						|
	FlagAddTrailingSlash,
 | 
						|
}
 | 
						|
 | 
						|
// ... and then the map, where order is unimportant
 | 
						|
var flags = map[NormalizationFlags]func(*url.URL){
 | 
						|
	FlagLowercaseScheme:           lowercaseScheme,
 | 
						|
	FlagLowercaseHost:             lowercaseHost,
 | 
						|
	FlagRemoveDefaultPort:         removeDefaultPort,
 | 
						|
	FlagRemoveDirectoryIndex:      removeDirectoryIndex,
 | 
						|
	FlagRemoveDotSegments:         removeDotSegments,
 | 
						|
	FlagRemoveFragment:            removeFragment,
 | 
						|
	FlagForceHTTP:                 forceHTTP,
 | 
						|
	FlagRemoveDuplicateSlashes:    removeDuplicateSlashes,
 | 
						|
	FlagRemoveWWW:                 removeWWW,
 | 
						|
	FlagAddWWW:                    addWWW,
 | 
						|
	FlagSortQuery:                 sortQuery,
 | 
						|
	FlagDecodeDWORDHost:           decodeDWORDHost,
 | 
						|
	FlagDecodeOctalHost:           decodeOctalHost,
 | 
						|
	FlagDecodeHexHost:             decodeHexHost,
 | 
						|
	FlagRemoveUnnecessaryHostDots: removeUnncessaryHostDots,
 | 
						|
	FlagRemoveEmptyPortSeparator:  removeEmptyPortSeparator,
 | 
						|
	FlagRemoveTrailingSlash:       removeTrailingSlash,
 | 
						|
	FlagAddTrailingSlash:          addTrailingSlash,
 | 
						|
}
 | 
						|
 | 
						|
// MustNormalizeURLString returns the normalized string, and panics if an error occurs.
 | 
						|
// It takes an URL string as input, as well as the normalization flags.
 | 
						|
func MustNormalizeURLString(u string, f NormalizationFlags) string {
 | 
						|
	result, e := NormalizeURLString(u, f)
 | 
						|
	if e != nil {
 | 
						|
		panic(e)
 | 
						|
	}
 | 
						|
	return result
 | 
						|
}
 | 
						|
 | 
						|
// NormalizeURLString returns the normalized string, or an error if it can't be parsed into an URL object.
 | 
						|
// It takes an URL string as input, as well as the normalization flags.
 | 
						|
func NormalizeURLString(u string, f NormalizationFlags) (string, error) {
 | 
						|
	parsed, err := url.Parse(u)
 | 
						|
	if err != nil {
 | 
						|
		return "", err
 | 
						|
	}
 | 
						|
 | 
						|
	if f&FlagLowercaseHost == FlagLowercaseHost {
 | 
						|
		parsed.Host = strings.ToLower(parsed.Host)
 | 
						|
	}
 | 
						|
 | 
						|
	// The idna package doesn't fully conform to RFC 5895
 | 
						|
	// (https://tools.ietf.org/html/rfc5895), so we do it here.
 | 
						|
	// Taken from Go 1.8 cycle source, courtesy of bradfitz.
 | 
						|
	// TODO: Remove when (if?) idna package conforms to RFC 5895.
 | 
						|
	parsed.Host = width.Fold.String(parsed.Host)
 | 
						|
	parsed.Host = norm.NFC.String(parsed.Host)
 | 
						|
	if parsed.Host, err = idna.ToASCII(parsed.Host); err != nil {
 | 
						|
		return "", err
 | 
						|
	}
 | 
						|
 | 
						|
	return NormalizeURL(parsed, f), nil
 | 
						|
}
 | 
						|
 | 
						|
// NormalizeURL returns the normalized string.
 | 
						|
// It takes a parsed URL object as input, as well as the normalization flags.
 | 
						|
func NormalizeURL(u *url.URL, f NormalizationFlags) string {
 | 
						|
	for _, k := range flagsOrder {
 | 
						|
		if f&k == k {
 | 
						|
			flags[k](u)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return urlesc.Escape(u)
 | 
						|
}
 | 
						|
 | 
						|
func lowercaseScheme(u *url.URL) {
 | 
						|
	if len(u.Scheme) > 0 {
 | 
						|
		u.Scheme = strings.ToLower(u.Scheme)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func lowercaseHost(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		u.Host = strings.ToLower(u.Host)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeDefaultPort(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		scheme := strings.ToLower(u.Scheme)
 | 
						|
		u.Host = rxPort.ReplaceAllStringFunc(u.Host, func(val string) string {
 | 
						|
			if (scheme == "http" && val == defaultHttpPort) || (scheme == "https" && val == defaultHttpsPort) {
 | 
						|
				return ""
 | 
						|
			}
 | 
						|
			return val
 | 
						|
		})
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeTrailingSlash(u *url.URL) {
 | 
						|
	if l := len(u.Path); l > 0 {
 | 
						|
		if strings.HasSuffix(u.Path, "/") {
 | 
						|
			u.Path = u.Path[:l-1]
 | 
						|
		}
 | 
						|
	} else if l = len(u.Host); l > 0 {
 | 
						|
		if strings.HasSuffix(u.Host, "/") {
 | 
						|
			u.Host = u.Host[:l-1]
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func addTrailingSlash(u *url.URL) {
 | 
						|
	if l := len(u.Path); l > 0 {
 | 
						|
		if !strings.HasSuffix(u.Path, "/") {
 | 
						|
			u.Path += "/"
 | 
						|
		}
 | 
						|
	} else if l = len(u.Host); l > 0 {
 | 
						|
		if !strings.HasSuffix(u.Host, "/") {
 | 
						|
			u.Host += "/"
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeDotSegments(u *url.URL) {
 | 
						|
	if len(u.Path) > 0 {
 | 
						|
		var dotFree []string
 | 
						|
		var lastIsDot bool
 | 
						|
 | 
						|
		sections := strings.Split(u.Path, "/")
 | 
						|
		for _, s := range sections {
 | 
						|
			if s == ".." {
 | 
						|
				if len(dotFree) > 0 {
 | 
						|
					dotFree = dotFree[:len(dotFree)-1]
 | 
						|
				}
 | 
						|
			} else if s != "." {
 | 
						|
				dotFree = append(dotFree, s)
 | 
						|
			}
 | 
						|
			lastIsDot = (s == "." || s == "..")
 | 
						|
		}
 | 
						|
		// Special case if host does not end with / and new path does not begin with /
 | 
						|
		u.Path = strings.Join(dotFree, "/")
 | 
						|
		if u.Host != "" && !strings.HasSuffix(u.Host, "/") && !strings.HasPrefix(u.Path, "/") {
 | 
						|
			u.Path = "/" + u.Path
 | 
						|
		}
 | 
						|
		// Special case if the last segment was a dot, make sure the path ends with a slash
 | 
						|
		if lastIsDot && !strings.HasSuffix(u.Path, "/") {
 | 
						|
			u.Path += "/"
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeDirectoryIndex(u *url.URL) {
 | 
						|
	if len(u.Path) > 0 {
 | 
						|
		u.Path = rxDirIndex.ReplaceAllString(u.Path, "$1")
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeFragment(u *url.URL) {
 | 
						|
	u.Fragment = ""
 | 
						|
}
 | 
						|
 | 
						|
func forceHTTP(u *url.URL) {
 | 
						|
	if strings.ToLower(u.Scheme) == "https" {
 | 
						|
		u.Scheme = "http"
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeDuplicateSlashes(u *url.URL) {
 | 
						|
	if len(u.Path) > 0 {
 | 
						|
		u.Path = rxDupSlashes.ReplaceAllString(u.Path, "/")
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeWWW(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 && strings.HasPrefix(strings.ToLower(u.Host), "www.") {
 | 
						|
		u.Host = u.Host[4:]
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func addWWW(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 && !strings.HasPrefix(strings.ToLower(u.Host), "www.") {
 | 
						|
		u.Host = "www." + u.Host
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func sortQuery(u *url.URL) {
 | 
						|
	q := u.Query()
 | 
						|
 | 
						|
	if len(q) > 0 {
 | 
						|
		arKeys := make([]string, len(q))
 | 
						|
		i := 0
 | 
						|
		for k := range q {
 | 
						|
			arKeys[i] = k
 | 
						|
			i++
 | 
						|
		}
 | 
						|
		sort.Strings(arKeys)
 | 
						|
		buf := new(bytes.Buffer)
 | 
						|
		for _, k := range arKeys {
 | 
						|
			sort.Strings(q[k])
 | 
						|
			for _, v := range q[k] {
 | 
						|
				if buf.Len() > 0 {
 | 
						|
					buf.WriteRune('&')
 | 
						|
				}
 | 
						|
				buf.WriteString(fmt.Sprintf("%s=%s", k, urlesc.QueryEscape(v)))
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		// Rebuild the raw query string
 | 
						|
		u.RawQuery = buf.String()
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func decodeDWORDHost(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		if matches := rxDWORDHost.FindStringSubmatch(u.Host); len(matches) > 2 {
 | 
						|
			var parts [4]int64
 | 
						|
 | 
						|
			dword, _ := strconv.ParseInt(matches[1], 10, 0)
 | 
						|
			for i, shift := range []uint{24, 16, 8, 0} {
 | 
						|
				parts[i] = dword >> shift & 0xFF
 | 
						|
			}
 | 
						|
			u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[2])
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func decodeOctalHost(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		if matches := rxOctalHost.FindStringSubmatch(u.Host); len(matches) > 5 {
 | 
						|
			var parts [4]int64
 | 
						|
 | 
						|
			for i := 1; i <= 4; i++ {
 | 
						|
				parts[i-1], _ = strconv.ParseInt(matches[i], 8, 0)
 | 
						|
			}
 | 
						|
			u.Host = fmt.Sprintf("%d.%d.%d.%d%s", parts[0], parts[1], parts[2], parts[3], matches[5])
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func decodeHexHost(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		if matches := rxHexHost.FindStringSubmatch(u.Host); len(matches) > 2 {
 | 
						|
			// Conversion is safe because of regex validation
 | 
						|
			parsed, _ := strconv.ParseInt(matches[1], 16, 0)
 | 
						|
			// Set host as DWORD (base 10) encoded host
 | 
						|
			u.Host = fmt.Sprintf("%d%s", parsed, matches[2])
 | 
						|
			// The rest is the same as decoding a DWORD host
 | 
						|
			decodeDWORDHost(u)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeUnncessaryHostDots(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		if matches := rxHostDots.FindStringSubmatch(u.Host); len(matches) > 1 {
 | 
						|
			// Trim the leading and trailing dots
 | 
						|
			u.Host = strings.Trim(matches[1], ".")
 | 
						|
			if len(matches) > 2 {
 | 
						|
				u.Host += matches[2]
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func removeEmptyPortSeparator(u *url.URL) {
 | 
						|
	if len(u.Host) > 0 {
 | 
						|
		u.Host = rxEmptyPort.ReplaceAllString(u.Host, "")
 | 
						|
	}
 | 
						|
}
 |