mirror of
https://github.com/go-gitea/gitea
synced 2025-07-22 18:28:37 +00:00
use go 1.13 (#8088)
* use go 1.13 * use go 1.13 in gomod file * Update Makefile * update swagger deps
This commit is contained in:
committed by
Lauris BH
parent
d0ad47bd5d
commit
3f5cdfe359
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
@@ -1,556 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
|
||||
` !"#$%&'()*+,-./0123456789:;<=>?` +
|
||||
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
|
||||
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
|
||||
|
||||
var encodings = []struct {
|
||||
name string
|
||||
mib string
|
||||
comment string
|
||||
varName string
|
||||
replacement byte
|
||||
mapping string
|
||||
}{
|
||||
{
|
||||
"IBM Code Page 037",
|
||||
"IBM037",
|
||||
"",
|
||||
"CodePage037",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 437",
|
||||
"PC8CodePage437",
|
||||
"",
|
||||
"CodePage437",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 850",
|
||||
"PC850Multilingual",
|
||||
"",
|
||||
"CodePage850",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 852",
|
||||
"PCp852",
|
||||
"",
|
||||
"CodePage852",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 855",
|
||||
"IBM855",
|
||||
"",
|
||||
"CodePage855",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"Windows Code Page 858", // PC latin1 with Euro
|
||||
"IBM00858",
|
||||
"",
|
||||
"CodePage858",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 860",
|
||||
"IBM860",
|
||||
"",
|
||||
"CodePage860",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 862",
|
||||
"PC862LatinHebrew",
|
||||
"",
|
||||
"CodePage862",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 863",
|
||||
"IBM863",
|
||||
"",
|
||||
"CodePage863",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 865",
|
||||
"IBM865",
|
||||
"",
|
||||
"CodePage865",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 866",
|
||||
"IBM866",
|
||||
"",
|
||||
"CodePage866",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-ibm866.txt",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1047",
|
||||
"IBM1047",
|
||||
"",
|
||||
"CodePage1047",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1140",
|
||||
"IBM01140",
|
||||
"",
|
||||
"CodePage1140",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-1",
|
||||
"ISOLatin1",
|
||||
"",
|
||||
"ISO8859_1",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-2",
|
||||
"ISOLatin2",
|
||||
"",
|
||||
"ISO8859_2",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-3",
|
||||
"ISOLatin3",
|
||||
"",
|
||||
"ISO8859_3",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-4",
|
||||
"ISOLatin4",
|
||||
"",
|
||||
"ISO8859_4",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-5",
|
||||
"ISOLatinCyrillic",
|
||||
"",
|
||||
"ISO8859_5",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-6",
|
||||
"ISOLatinArabic",
|
||||
"",
|
||||
"ISO8859_6,ISO8859_6E,ISO8859_6I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-7",
|
||||
"ISOLatinGreek",
|
||||
"",
|
||||
"ISO8859_7",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-8",
|
||||
"ISOLatinHebrew",
|
||||
"",
|
||||
"ISO8859_8,ISO8859_8E,ISO8859_8I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-9",
|
||||
"ISOLatin5",
|
||||
"",
|
||||
"ISO8859_9",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-10",
|
||||
"ISOLatin6",
|
||||
"",
|
||||
"ISO8859_10",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-13",
|
||||
"ISO885913",
|
||||
"",
|
||||
"ISO8859_13",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-14",
|
||||
"ISO885914",
|
||||
"",
|
||||
"ISO8859_14",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-15",
|
||||
"ISO885915",
|
||||
"",
|
||||
"ISO8859_15",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-16",
|
||||
"ISO885916",
|
||||
"",
|
||||
"ISO8859_16",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-R",
|
||||
"KOI8R",
|
||||
"",
|
||||
"KOI8R",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-U",
|
||||
"KOI8U",
|
||||
"",
|
||||
"KOI8U",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh",
|
||||
"Macintosh",
|
||||
"",
|
||||
"Macintosh",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-macintosh.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh Cyrillic",
|
||||
"MacintoshCyrillic",
|
||||
"",
|
||||
"MacintoshCyrillic",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
|
||||
},
|
||||
{
|
||||
"Windows 874",
|
||||
"Windows874",
|
||||
"",
|
||||
"Windows874",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-874.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1250",
|
||||
"Windows1250",
|
||||
"",
|
||||
"Windows1250",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1251",
|
||||
"Windows1251",
|
||||
"",
|
||||
"Windows1251",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1252",
|
||||
"Windows1252",
|
||||
"",
|
||||
"Windows1252",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1253",
|
||||
"Windows1253",
|
||||
"",
|
||||
"Windows1253",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1254",
|
||||
"Windows1254",
|
||||
"",
|
||||
"Windows1254",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1255",
|
||||
"Windows1255",
|
||||
"",
|
||||
"Windows1255",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1256",
|
||||
"Windows1256",
|
||||
"",
|
||||
"Windows1256",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1257",
|
||||
"Windows1257",
|
||||
"",
|
||||
"Windows1257",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1258",
|
||||
"Windows1258",
|
||||
"",
|
||||
"Windows1258",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
|
||||
},
|
||||
{
|
||||
"X-User-Defined",
|
||||
"XUserDefined",
|
||||
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
|
||||
"XUserDefined",
|
||||
encoding.ASCIISub,
|
||||
ascii +
|
||||
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
|
||||
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
|
||||
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
|
||||
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
|
||||
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
|
||||
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
|
||||
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
|
||||
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
|
||||
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
|
||||
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
|
||||
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
|
||||
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
|
||||
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
|
||||
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
|
||||
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
|
||||
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
|
||||
},
|
||||
}
|
||||
|
||||
func getWHATWG(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 128)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, 0
|
||||
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 128 <= x {
|
||||
log.Fatalf("code %d is out of range", x)
|
||||
}
|
||||
if 0x80 <= y && y < 0xa0 {
|
||||
// We diverge from the WHATWG spec by mapping control characters
|
||||
// in the range [0x80, 0xa0) to U+FFFD.
|
||||
continue
|
||||
}
|
||||
mapping[x] = rune(y)
|
||||
}
|
||||
return ascii + string(mapping)
|
||||
}
|
||||
|
||||
func getUCM(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 256)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
charsFound := 0
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
var c byte
|
||||
var r rune
|
||||
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
|
||||
continue
|
||||
}
|
||||
mapping[c] = r
|
||||
charsFound++
|
||||
}
|
||||
|
||||
if charsFound < 200 {
|
||||
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
|
||||
}
|
||||
|
||||
return string(mapping)
|
||||
}
|
||||
|
||||
func main() {
|
||||
mibs := map[string]bool{}
|
||||
all := []string{}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "charmap")
|
||||
|
||||
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
|
||||
|
||||
printf("import (\n")
|
||||
printf("\t\"golang.org/x/text/encoding\"\n")
|
||||
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
|
||||
printf(")\n\n")
|
||||
for _, e := range encodings {
|
||||
varNames := strings.Split(e.varName, ",")
|
||||
all = append(all, varNames...)
|
||||
varName := varNames[0]
|
||||
switch {
|
||||
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
|
||||
e.mapping = getWHATWG(e.mapping)
|
||||
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
|
||||
e.mapping = getUCM(e.mapping)
|
||||
}
|
||||
|
||||
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
|
||||
if asciiSuperset {
|
||||
low = 0x80
|
||||
}
|
||||
lvn := 1
|
||||
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
|
||||
lvn = 3
|
||||
}
|
||||
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
|
||||
printf("// %s is the %s encoding.\n", varName, e.name)
|
||||
if e.comment != "" {
|
||||
printf("//\n// %s\n", e.comment)
|
||||
}
|
||||
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
|
||||
varName, lowerVarName, lowerVarName, e.name)
|
||||
if mibs[e.mib] {
|
||||
log.Fatalf("MIB type %q declared multiple times.", e.mib)
|
||||
}
|
||||
printf("mib: identifier.%s,\n", e.mib)
|
||||
printf("asciiSuperset: %t,\n", asciiSuperset)
|
||||
printf("low: 0x%02x,\n", low)
|
||||
printf("replacement: 0x%02x,\n", e.replacement)
|
||||
|
||||
printf("decode: [256]utf8Enc{\n")
|
||||
i, backMapping := 0, map[rune]byte{}
|
||||
for _, c := range e.mapping {
|
||||
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
|
||||
backMapping[c] = byte(i)
|
||||
}
|
||||
var buf [8]byte
|
||||
n := utf8.EncodeRune(buf[:], c)
|
||||
if n > 3 {
|
||||
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
|
||||
}
|
||||
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
|
||||
if i%2 == 1 {
|
||||
printf("\n")
|
||||
}
|
||||
i++
|
||||
}
|
||||
printf("},\n")
|
||||
|
||||
printf("encode: [256]uint32{\n")
|
||||
encode := make([]uint32, 0, 256)
|
||||
for c, i := range backMapping {
|
||||
encode = append(encode, uint32(i)<<24|uint32(c))
|
||||
}
|
||||
sort.Sort(byRune(encode))
|
||||
for len(encode) < cap(encode) {
|
||||
encode = append(encode, encode[len(encode)-1])
|
||||
}
|
||||
for i, enc := range encode {
|
||||
printf("0x%08x,", enc)
|
||||
if i%8 == 7 {
|
||||
printf("\n")
|
||||
}
|
||||
}
|
||||
printf("},\n}\n")
|
||||
|
||||
// Add an estimate of the size of a single Charmap{} struct value, which
|
||||
// includes two 256 elem arrays of 4 bytes and some extra fields, which
|
||||
// align to 3 uint64s on 64-bit architectures.
|
||||
w.Size += 2*4*256 + 3*8
|
||||
}
|
||||
// TODO: add proper line breaking.
|
||||
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
|
||||
}
|
||||
|
||||
type byRune []uint32
|
||||
|
||||
func (b byRune) Len() int { return len(b) }
|
||||
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
|
||||
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
@@ -1,173 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
Encodings []struct {
|
||||
Labels []string
|
||||
Name string
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
|
||||
var groups []group
|
||||
if err := json.NewDecoder(r).Decode(&groups); err != nil {
|
||||
log.Fatalf("Error reading encodings.json: %v", err)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintln(w, "type htmlEncoding byte")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
if name == "" {
|
||||
log.Fatalf("No const defined for %s.", key)
|
||||
}
|
||||
if i == 0 {
|
||||
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
|
||||
} else {
|
||||
fmt.Fprintf(w, "%s\n", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "numEncodings")
|
||||
fmt.Fprint(w, ")\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
for _, l := range e.Labels {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
fmt.Fprintf(w, "%q: %s,\n", l, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
var tags []string
|
||||
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
|
||||
for _, loc := range locales {
|
||||
tags = append(tags, loc.tag)
|
||||
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
|
||||
|
||||
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
|
||||
}
|
||||
|
||||
// consts maps canonical encoding name to internal constant.
|
||||
var consts = map[string]string{
|
||||
"utf-8": "utf8",
|
||||
"ibm866": "ibm866",
|
||||
"iso-8859-2": "iso8859_2",
|
||||
"iso-8859-3": "iso8859_3",
|
||||
"iso-8859-4": "iso8859_4",
|
||||
"iso-8859-5": "iso8859_5",
|
||||
"iso-8859-6": "iso8859_6",
|
||||
"iso-8859-7": "iso8859_7",
|
||||
"iso-8859-8": "iso8859_8",
|
||||
"iso-8859-8-i": "iso8859_8I",
|
||||
"iso-8859-10": "iso8859_10",
|
||||
"iso-8859-13": "iso8859_13",
|
||||
"iso-8859-14": "iso8859_14",
|
||||
"iso-8859-15": "iso8859_15",
|
||||
"iso-8859-16": "iso8859_16",
|
||||
"koi8-r": "koi8r",
|
||||
"koi8-u": "koi8u",
|
||||
"macintosh": "macintosh",
|
||||
"windows-874": "windows874",
|
||||
"windows-1250": "windows1250",
|
||||
"windows-1251": "windows1251",
|
||||
"windows-1252": "windows1252",
|
||||
"windows-1253": "windows1253",
|
||||
"windows-1254": "windows1254",
|
||||
"windows-1255": "windows1255",
|
||||
"windows-1256": "windows1256",
|
||||
"windows-1257": "windows1257",
|
||||
"windows-1258": "windows1258",
|
||||
"x-mac-cyrillic": "macintoshCyrillic",
|
||||
"gbk": "gbk",
|
||||
"gb18030": "gb18030",
|
||||
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
|
||||
"big5": "big5",
|
||||
"euc-jp": "eucjp",
|
||||
"iso-2022-jp": "iso2022jp",
|
||||
"shift_jis": "shiftJIS",
|
||||
"euc-kr": "euckr",
|
||||
"replacement": "replacement",
|
||||
"utf-16be": "utf16be",
|
||||
"utf-16le": "utf16le",
|
||||
"x-user-defined": "xUserDefined",
|
||||
}
|
||||
|
||||
// locales is taken from
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
|
||||
var locales = []struct{ tag, name string }{
|
||||
// The default value. Explicitly state latin to benefit from the exact
|
||||
// script option, while still making 1252 the default encoding for languages
|
||||
// written in Latin script.
|
||||
{"und_Latn", "windows-1252"},
|
||||
{"ar", "windows-1256"},
|
||||
{"ba", "windows-1251"},
|
||||
{"be", "windows-1251"},
|
||||
{"bg", "windows-1251"},
|
||||
{"cs", "windows-1250"},
|
||||
{"el", "iso-8859-7"},
|
||||
{"et", "windows-1257"},
|
||||
{"fa", "windows-1256"},
|
||||
{"he", "windows-1255"},
|
||||
{"hr", "windows-1250"},
|
||||
{"hu", "iso-8859-2"},
|
||||
{"ja", "shift_jis"},
|
||||
{"kk", "windows-1251"},
|
||||
{"ko", "euc-kr"},
|
||||
{"ku", "windows-1254"},
|
||||
{"ky", "windows-1251"},
|
||||
{"lt", "windows-1257"},
|
||||
{"lv", "windows-1257"},
|
||||
{"mk", "windows-1251"},
|
||||
{"pl", "iso-8859-2"},
|
||||
{"ru", "windows-1251"},
|
||||
{"sah", "windows-1251"},
|
||||
{"sk", "windows-1250"},
|
||||
{"sl", "iso-8859-2"},
|
||||
{"sr", "windows-1251"},
|
||||
{"tg", "windows-1251"},
|
||||
{"th", "windows-874"},
|
||||
{"tr", "windows-1254"},
|
||||
{"tt", "windows-1251"},
|
||||
{"uk", "windows-1251"},
|
||||
{"vi", "windows-1258"},
|
||||
{"zh-hans", "gb18030"},
|
||||
{"zh-hant", "big5"},
|
||||
}
|
142
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
142
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
@@ -1,142 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type registry struct {
|
||||
XMLName xml.Name `xml:"registry"`
|
||||
Updated string `xml:"updated"`
|
||||
Registry []struct {
|
||||
ID string `xml:"id,attr"`
|
||||
Record []struct {
|
||||
Name string `xml:"name"`
|
||||
Xref []struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:"data,attr"`
|
||||
} `xml:"xref"`
|
||||
Desc struct {
|
||||
Data string `xml:",innerxml"`
|
||||
// Any []struct {
|
||||
// Data string `xml:",chardata"`
|
||||
// } `xml:",any"`
|
||||
// Data string `xml:",chardata"`
|
||||
} `xml:"description,"`
|
||||
MIB string `xml:"value"`
|
||||
Alias []string `xml:"alias"`
|
||||
MIME string `xml:"preferred_alias"`
|
||||
} `xml:"record"`
|
||||
} `xml:"registry"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
|
||||
reg := ®istry{}
|
||||
if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF {
|
||||
log.Fatalf("Error decoding charset registry: %v", err)
|
||||
}
|
||||
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
|
||||
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, "const (\n")
|
||||
for _, rec := range reg.Registry[0].Record {
|
||||
constName := ""
|
||||
for _, a := range rec.Alias {
|
||||
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
|
||||
// Some of the constant definitions have comments in them. Strip those.
|
||||
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
|
||||
}
|
||||
}
|
||||
if constName == "" {
|
||||
switch rec.MIB {
|
||||
case "2085":
|
||||
constName = "HZGB2312" // Not listed as alias for some reason.
|
||||
default:
|
||||
log.Fatalf("No cs alias defined for %s.", rec.MIB)
|
||||
}
|
||||
}
|
||||
if rec.MIME != "" {
|
||||
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
|
||||
}
|
||||
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
|
||||
if len(rec.Desc.Data) > 0 {
|
||||
fmt.Fprint(w, "// ")
|
||||
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
|
||||
inElem := true
|
||||
attr := ""
|
||||
for {
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
log.Fatal(err)
|
||||
}
|
||||
break
|
||||
}
|
||||
switch x := t.(type) {
|
||||
case xml.CharData:
|
||||
attr = "" // Don't need attribute info.
|
||||
a := bytes.Split([]byte(x), []byte("\n"))
|
||||
for i, b := range a {
|
||||
if b = bytes.TrimSpace(b); len(b) != 0 {
|
||||
if !inElem && i > 0 {
|
||||
fmt.Fprint(w, "\n// ")
|
||||
}
|
||||
inElem = false
|
||||
fmt.Fprintf(w, "%s ", string(b))
|
||||
}
|
||||
}
|
||||
case xml.StartElement:
|
||||
if x.Name.Local == "xref" {
|
||||
inElem = true
|
||||
use := false
|
||||
for _, a := range x.Attr {
|
||||
if a.Name.Local == "type" {
|
||||
use = use || a.Value != "person"
|
||||
}
|
||||
if a.Name.Local == "data" && use {
|
||||
// Patch up URLs to use https. From some links, the
|
||||
// https version is different from the http one.
|
||||
s := a.Value
|
||||
s = strings.Replace(s, "http://", "https://", -1)
|
||||
s = strings.Replace(s, "/unicode/", "/", -1)
|
||||
attr = s + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
case xml.EndElement:
|
||||
inElem = false
|
||||
fmt.Fprint(w, attr)
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "\n")
|
||||
}
|
||||
for _, x := range rec.Xref {
|
||||
switch x.Type {
|
||||
case "rfc":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
|
||||
case "uri":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
|
||||
}
|
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
@@ -1,161 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
// TODO: Emoji extensions?
|
||||
// https://www.unicode.org/faq/emoji_dingbats.html
|
||||
// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
jisCode, table int
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
|
||||
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
|
||||
|
||||
reverse := [65536]entry{}
|
||||
for i := range reverse {
|
||||
reverse[i].table = -1
|
||||
}
|
||||
|
||||
tables := []struct {
|
||||
url string
|
||||
name string
|
||||
}{
|
||||
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
|
||||
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
|
||||
}
|
||||
for i, table := range tables {
|
||||
res, err := http.Get(table.url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", table.url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("%q: could not parse %q", table.url, s)
|
||||
}
|
||||
if x < 0 || 120*94 <= x {
|
||||
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y].table == -1 {
|
||||
reverse[y] = entry{jisCode: x, table: i}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("%q: scanner error: %v", table.url, err)
|
||||
}
|
||||
|
||||
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
|
||||
table.name, table.name, table.url)
|
||||
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
|
||||
for i, m := range mapping {
|
||||
if m != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, m)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v.table == -1 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const (\n")
|
||||
fmt.Printf("\tjis0208 = 1\n")
|
||||
fmt.Printf("\tjis0212 = 2\n")
|
||||
fmt.Printf("\tcodeMask = 0x7f\n")
|
||||
fmt.Printf("\tcodeShift = 7\n")
|
||||
fmt.Printf("\ttableShift = 14\n")
|
||||
fmt.Printf(")\n\n")
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("//\n")
|
||||
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
|
||||
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
|
||||
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
|
||||
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x.table == -1 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
|
||||
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
@@ -1,143 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
|
||||
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
|
||||
log.Fatalf("EUC-KR code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := uint16(0), uint16(0)
|
||||
if x < 178*(0xc7-0x81) {
|
||||
c0 = uint16(x/178) + 0x81
|
||||
c1 = uint16(x % 178)
|
||||
switch {
|
||||
case c1 < 1*26:
|
||||
c1 += 0x41
|
||||
case c1 < 2*26:
|
||||
c1 += 0x47
|
||||
default:
|
||||
c1 += 0x4d
|
||||
}
|
||||
} else {
|
||||
x -= 178 * (0xc7 - 0x81)
|
||||
c0 = uint16(x/94) + 0xc7
|
||||
c1 = uint16(x%94) + 0xa1
|
||||
}
|
||||
reverse[y] = c0<<8 | c1
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
@@ -1,161 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
|
||||
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
|
||||
|
||||
printGB18030()
|
||||
printGBK()
|
||||
}
|
||||
|
||||
func printGB18030() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
|
||||
fmt.Printf("var gb18030 = [...][2]uint16{\n")
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint32(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0x10000 && y < 0x10000 {
|
||||
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
func printGBK() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*190 <= x {
|
||||
log.Fatalf("GBK code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := x/190, x%190
|
||||
if c1 >= 0x3f {
|
||||
c1++
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
@@ -1,140 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
|
||||
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint32{}
|
||||
reverse := [65536 * 4]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*157 <= x {
|
||||
log.Fatalf("Big5 code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
|
||||
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
|
||||
// "The index pointer for code point in index is the first pointer
|
||||
// corresponding to code point in index", which would normally mean
|
||||
// that the code below should be guarded by "if reverse[y] == 0", but
|
||||
// last instead of first seems to match the behavior of
|
||||
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
|
||||
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
|
||||
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
|
||||
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
|
||||
c0, c1 := x/157, x%157
|
||||
if c1 < 0x3f {
|
||||
c1 += 0x40
|
||||
} else {
|
||||
c1 += 0x62
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | c1
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
|
||||
fmt.Printf("var decode = [...]uint32{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%08X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
@@ -1,64 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "compact")
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeCompactIndex()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
@@ -1,113 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Compact indices:
|
||||
// Note -va-X variants only apply to localization variants.
|
||||
// BCP variants only ever apply to language.
|
||||
// The only ambiguity between tags is with regions.
|
||||
|
||||
func (b *builder) writeCompactIndex() {
|
||||
// Collect all language tags for which we have any data in CLDR.
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range b.data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: plural rules are also defined for the deprecated tags:
|
||||
// iw mo sh tl
|
||||
// Consider removing these as compact tags.
|
||||
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range b.supp.Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var coreTags []language.CompactCoreInfo
|
||||
var special []string
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
log.Fatalf("Locale for non-basic language %q", t)
|
||||
}
|
||||
coreTags = append(coreTags, cci)
|
||||
} else {
|
||||
special = append(special, t.String())
|
||||
}
|
||||
}
|
||||
|
||||
w := b.w
|
||||
|
||||
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
|
||||
sort.Strings(special)
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(m))
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, t := range coreTags {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
|
||||
}
|
||||
for i, t := range special {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("coreTags", coreTags)
|
||||
|
||||
w.WriteConst("specialTagsStr", strings.Join(special, " "))
|
||||
}
|
||||
|
||||
func ident(s string) string {
|
||||
return strings.Replace(s, "-", "", -1) + "Index"
|
||||
}
|
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
@@ -1,54 +0,0 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("parents.go", "compact")
|
||||
|
||||
// Create parents table.
|
||||
type ID uint16
|
||||
parents := make([]ID, compact.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := compact.FromTag(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := compact.ID(0) // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := compact.FromTag(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = ID(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
parents maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("parents", parents)
|
||||
}
|
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
File diff suppressed because it is too large
Load Diff
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
@@ -1,20 +0,0 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
305
vendor/golang.org/x/text/language/gen.go
generated
vendored
305
vendor/golang.org/x/text/language/gen.go
generated
vendored
@@ -1,305 +0,0 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "language")
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeConstants()
|
||||
b.writeMatchData()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
hw io.Writer // MultiWriter for w and w.Hash
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func (b *builder) langIndex(s string) uint16 {
|
||||
return uint16(language.MustParseBase(s))
|
||||
}
|
||||
|
||||
func (b *builder) regionIndex(s string) int {
|
||||
return int(language.MustParseRegion(s))
|
||||
}
|
||||
|
||||
func (b *builder) scriptIndex(s string) int {
|
||||
return int(language.MustParseScript(s))
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
hw: io.MultiWriter(w, w.Hash),
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
|
||||
// writeConsts computes f(v) for all v in values and writes the results
|
||||
// as constants named _v to a single constant block.
|
||||
func (b *builder) writeConsts(f func(string) int, values ...string) {
|
||||
fmt.Fprintln(b.w, "const (")
|
||||
for _, v := range values {
|
||||
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
|
||||
}
|
||||
fmt.Fprintln(b.w, ")")
|
||||
}
|
||||
|
||||
// TODO: region inclusion data will probably not be use used in future matchers.
|
||||
|
||||
var langConsts = []string{
|
||||
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
|
||||
}
|
||||
|
||||
var scriptConsts = []string{
|
||||
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
|
||||
"Zzzz",
|
||||
}
|
||||
|
||||
var regionConsts = []string{
|
||||
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
|
||||
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
|
||||
}
|
||||
|
||||
func (b *builder) writeConstants() {
|
||||
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
|
||||
b.writeConsts(b.regionIndex, regionConsts...)
|
||||
b.writeConsts(b.scriptIndex, scriptConsts...)
|
||||
}
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want, have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
|
||||
type scriptIntelligibility struct {
|
||||
wantLang, haveLang uint16
|
||||
wantScript, haveScript uint8
|
||||
distance uint8
|
||||
// Always oneway
|
||||
}
|
||||
|
||||
type regionIntelligibility struct {
|
||||
lang uint16 // compact language id
|
||||
script uint8 // 0 means any
|
||||
group uint8 // 0 means any; if bit 7 is set it means inverse
|
||||
distance uint8
|
||||
// Always twoway.
|
||||
}
|
||||
|
||||
// writeMatchData writes tables with languages and scripts for which there is
|
||||
// mutual intelligibility. The data is based on CLDR's languageMatching data.
|
||||
// Note that we use a different algorithm than the one defined by CLDR and that
|
||||
// we slightly modify the data. For example, we convert scores to confidence levels.
|
||||
// We also drop all region-related data as we use a different algorithm to
|
||||
// determine region equivalence.
|
||||
func (b *builder) writeMatchData() {
|
||||
lm := b.supp.LanguageMatching.LanguageMatches
|
||||
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
|
||||
|
||||
regionHierarchy := map[string][]string{}
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
regions := strings.Split(g.Contains, " ")
|
||||
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
|
||||
}
|
||||
regionToGroups := make([]uint8, language.NumRegions)
|
||||
|
||||
idToIndex := map[string]uint8{}
|
||||
for i, mv := range lm[0].MatchVariable {
|
||||
if i > 6 {
|
||||
log.Fatalf("Too many groups: %d", i)
|
||||
}
|
||||
idToIndex[mv.Id] = uint8(i + 1)
|
||||
// TODO: also handle '-'
|
||||
for _, r := range strings.Split(mv.Value, "+") {
|
||||
todo := []string{r}
|
||||
for k := 0; k < len(todo); k++ {
|
||||
r := todo[k]
|
||||
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
|
||||
todo = append(todo, regionHierarchy[r]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.w.WriteVar("regionToGroups", regionToGroups)
|
||||
|
||||
// maps language id to in- and out-of-group region.
|
||||
paradigmLocales := [][3]uint16{}
|
||||
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
|
||||
for i := 0; i < len(locales); i += 2 {
|
||||
x := [3]uint16{}
|
||||
for j := 0; j < 2; j++ {
|
||||
pc := strings.SplitN(locales[i+j], "-", 2)
|
||||
x[0] = b.langIndex(pc[0])
|
||||
if len(pc) == 2 {
|
||||
x[1+j] = uint16(b.regionIndex(pc[1]))
|
||||
}
|
||||
}
|
||||
paradigmLocales = append(paradigmLocales, x)
|
||||
}
|
||||
b.w.WriteVar("paradigmLocales", paradigmLocales)
|
||||
|
||||
b.w.WriteType(mutualIntelligibility{})
|
||||
b.w.WriteType(scriptIntelligibility{})
|
||||
b.w.WriteType(regionIntelligibility{})
|
||||
|
||||
matchLang := []mutualIntelligibility{}
|
||||
matchScript := []scriptIntelligibility{}
|
||||
matchRegion := []regionIntelligibility{}
|
||||
// Convert the languageMatch entries in lists keyed by desired language.
|
||||
for _, m := range lm[0].LanguageMatch {
|
||||
// Different versions of CLDR use different separators.
|
||||
desired := strings.Replace(m.Desired, "-", "_", -1)
|
||||
supported := strings.Replace(m.Supported, "-", "_", -1)
|
||||
d := strings.Split(desired, "_")
|
||||
s := strings.Split(supported, "_")
|
||||
if len(d) != len(s) {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
continue
|
||||
}
|
||||
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
|
||||
switch len(d) {
|
||||
case 2:
|
||||
if desired == supported && desired == "*_*" {
|
||||
continue
|
||||
}
|
||||
// language-script pair.
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(d[0])),
|
||||
haveLang: uint16(b.langIndex(s[0])),
|
||||
wantScript: uint8(b.scriptIndex(d[1])),
|
||||
haveScript: uint8(b.scriptIndex(s[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
if m.Oneway != "true" {
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(s[0])),
|
||||
haveLang: uint16(b.langIndex(d[0])),
|
||||
wantScript: uint8(b.scriptIndex(s[1])),
|
||||
haveScript: uint8(b.scriptIndex(d[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
}
|
||||
case 1:
|
||||
if desired == supported && desired == "*" {
|
||||
continue
|
||||
}
|
||||
if distance == 1 {
|
||||
// nb == no is already handled by macro mapping. Check there
|
||||
// really is only this case.
|
||||
if d[0] != "no" || s[0] != "nb" {
|
||||
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
|
||||
}
|
||||
continue
|
||||
}
|
||||
// TODO: consider dropping oneway field and just doubling the entry.
|
||||
matchLang = append(matchLang, mutualIntelligibility{
|
||||
want: uint16(b.langIndex(d[0])),
|
||||
have: uint16(b.langIndex(s[0])),
|
||||
distance: uint8(distance),
|
||||
oneway: m.Oneway == "true",
|
||||
})
|
||||
case 3:
|
||||
if desired == supported && desired == "*_*_*" {
|
||||
continue
|
||||
}
|
||||
if desired != supported {
|
||||
// This is now supported by CLDR, but only one case, which
|
||||
// should already be covered by paradigm locales. For instance,
|
||||
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
|
||||
// testdata/CLDRLocaleMatcherTest.txt tests this.
|
||||
if supported != "en_*_GB" {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
continue
|
||||
}
|
||||
ri := regionIntelligibility{
|
||||
lang: b.langIndex(d[0]),
|
||||
distance: uint8(distance),
|
||||
}
|
||||
if d[1] != "*" {
|
||||
ri.script = uint8(b.scriptIndex(d[1]))
|
||||
}
|
||||
switch {
|
||||
case d[2] == "*":
|
||||
ri.group = 0x80 // not contained in anything
|
||||
case strings.HasPrefix(d[2], "$!"):
|
||||
ri.group = 0x80
|
||||
d[2] = "$" + d[2][len("$!"):]
|
||||
fallthrough
|
||||
case strings.HasPrefix(d[2], "$"):
|
||||
ri.group |= idToIndex[d[2]]
|
||||
}
|
||||
matchRegion = append(matchRegion, ri)
|
||||
default:
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
}
|
||||
sort.SliceStable(matchLang, func(i, j int) bool {
|
||||
return matchLang[i].distance < matchLang[j].distance
|
||||
})
|
||||
b.w.WriteComment(`
|
||||
matchLang holds pairs of langIDs of base languages that are typically
|
||||
mutually intelligible. Each pair is associated with a confidence and
|
||||
whether the intelligibility goes one or both ways.`)
|
||||
b.w.WriteVar("matchLang", matchLang)
|
||||
|
||||
b.w.WriteComment(`
|
||||
matchScript holds pairs of scriptIDs where readers of one script
|
||||
can typically also read the other. Each is associated with a confidence.`)
|
||||
sort.SliceStable(matchScript, func(i, j int) bool {
|
||||
return matchScript[i].distance < matchScript[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchScript", matchScript)
|
||||
|
||||
sort.SliceStable(matchRegion, func(i, j int) bool {
|
||||
return matchRegion[i].distance < matchRegion[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchRegion", matchRegion)
|
||||
}
|
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
@@ -1,133 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("out", "tables.go", "output file")
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
|
||||
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
|
||||
|
||||
genTables()
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // ArabicLetter
|
||||
"AN": AN, // ArabicNumber
|
||||
"B": B, // ParagraphSeparator
|
||||
"BN": BN, // BoundaryNeutral
|
||||
"CS": CS, // CommonSeparator
|
||||
"EN": EN, // EuropeanNumber
|
||||
"ES": ES, // EuropeanSeparator
|
||||
"ET": ET, // EuropeanTerminator
|
||||
"L": L, // LeftToRight
|
||||
"NSM": NSM, // NonspacingMark
|
||||
"ON": ON, // OtherNeutral
|
||||
"R": R, // RightToLeft
|
||||
"S": S, // SegmentSeparator
|
||||
"WS": WS, // WhiteSpace
|
||||
|
||||
"FSI": Control,
|
||||
"PDF": Control,
|
||||
"PDI": Control,
|
||||
"LRE": Control,
|
||||
"LRI": Control,
|
||||
"LRO": Control,
|
||||
"RLE": Control,
|
||||
"RLI": Control,
|
||||
"RLO": Control,
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
if numClass > 0x0F {
|
||||
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
|
||||
}
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteVersionedGoFile(*outputFile, "bidi")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
t := triegen.NewTrie("bidi")
|
||||
|
||||
// Build data about bracket mapping. These bits need to be or-ed with
|
||||
// any other bits.
|
||||
orMask := map[rune]uint64{}
|
||||
|
||||
xorMap := map[rune]int{}
|
||||
xorMasks := []rune{0} // First value is no-op.
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
r2 := p.Rune(1)
|
||||
xor := r1 ^ r2
|
||||
if _, ok := xorMap[xor]; !ok {
|
||||
xorMap[xor] = len(xorMasks)
|
||||
xorMasks = append(xorMasks, xor)
|
||||
}
|
||||
entry := uint64(xorMap[xor]) << xorMaskShift
|
||||
switch p.String(2) {
|
||||
case "o":
|
||||
entry |= openMask
|
||||
case "c", "n":
|
||||
default:
|
||||
log.Fatalf("Unknown bracket class %q.", p.String(2))
|
||||
}
|
||||
orMask[r1] = entry
|
||||
})
|
||||
|
||||
w.WriteComment(`
|
||||
xorMasks contains masks to be xor-ed with brackets to get the reverse
|
||||
version.`)
|
||||
w.WriteVar("xorMasks", xorMasks)
|
||||
|
||||
done := map[rune]bool{}
|
||||
|
||||
insert := func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
t.Insert(r, orMask[r]|uint64(c))
|
||||
done[r] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
class, ok := bidiClass[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
|
||||
}
|
||||
insert(r, class)
|
||||
})
|
||||
visitDefaults(insert)
|
||||
|
||||
// TODO: use sparse blocks. This would reduce table size considerably
|
||||
// from the looks of it.
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
// dummy values to make methods in gen_common compile. The real versions
|
||||
// will be generated by this file to tables.go.
|
||||
var (
|
||||
xorMasks []rune
|
||||
)
|
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
@@ -1,57 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// https://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
@@ -1,64 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
986
vendor/golang.org/x/text/unicode/norm/maketables.go
generated
vendored
986
vendor/golang.org/x/text/unicode/norm/maketables.go
generated
vendored
@@ -1,986 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Normalization table generator.
|
||||
// Data read from the web.
|
||||
// See forminfo.go for a description of the trie values associated with each rune.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
loadUnicodeData()
|
||||
compactCCC()
|
||||
loadCompositionExclusions()
|
||||
completeCharFields(FCanonical)
|
||||
completeCharFields(FCompatibility)
|
||||
computeNonStarterCounts()
|
||||
verifyComputed()
|
||||
printChars()
|
||||
testDerived()
|
||||
printTestdata()
|
||||
makeTables()
|
||||
}
|
||||
|
||||
var (
|
||||
tablelist = flag.String("tables",
|
||||
"all",
|
||||
"comma-separated list of which tables to generate; "+
|
||||
"can be 'decomp', 'recomp', 'info' and 'all'")
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
|
||||
verbose = flag.Bool("verbose",
|
||||
false,
|
||||
"write data to stdout as it is parsed")
|
||||
)
|
||||
|
||||
const MaxChar = 0x10FFFF // anything above this shouldn't exist
|
||||
|
||||
// Quick Check properties of runes allow us to quickly
|
||||
// determine whether a rune may occur in a normal form.
|
||||
// For a given normal form, a rune may be guaranteed to occur
|
||||
// verbatim (QC=Yes), may or may not combine with another
|
||||
// rune (QC=Maybe), or may not occur (QC=No).
|
||||
type QCResult int
|
||||
|
||||
const (
|
||||
QCUnknown QCResult = iota
|
||||
QCYes
|
||||
QCNo
|
||||
QCMaybe
|
||||
)
|
||||
|
||||
func (r QCResult) String() string {
|
||||
switch r {
|
||||
case QCYes:
|
||||
return "Yes"
|
||||
case QCNo:
|
||||
return "No"
|
||||
case QCMaybe:
|
||||
return "Maybe"
|
||||
}
|
||||
return "***UNKNOWN***"
|
||||
}
|
||||
|
||||
const (
|
||||
FCanonical = iota // NFC or NFD
|
||||
FCompatibility // NFKC or NFKD
|
||||
FNumberOfFormTypes
|
||||
)
|
||||
|
||||
const (
|
||||
MComposed = iota // NFC or NFKC
|
||||
MDecomposed // NFD or NFKD
|
||||
MNumberOfModes
|
||||
)
|
||||
|
||||
// This contains only the properties we're interested in.
|
||||
type Char struct {
|
||||
name string
|
||||
codePoint rune // if zero, this index is not a valid code point.
|
||||
ccc uint8 // canonical combining class
|
||||
origCCC uint8
|
||||
excludeInComp bool // from CompositionExclusions.txt
|
||||
compatDecomp bool // it has a compatibility expansion
|
||||
|
||||
nTrailingNonStarters uint8
|
||||
nLeadingNonStarters uint8 // must be equal to trailing if non-zero
|
||||
|
||||
forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
|
||||
|
||||
state State
|
||||
}
|
||||
|
||||
var chars = make([]Char, MaxChar+1)
|
||||
var cccMap = make(map[uint8]uint8)
|
||||
|
||||
func (c Char) String() string {
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
|
||||
fmt.Fprintf(buf, " ccc: %v\n", c.ccc)
|
||||
fmt.Fprintf(buf, " excludeInComp: %v\n", c.excludeInComp)
|
||||
fmt.Fprintf(buf, " compatDecomp: %v\n", c.compatDecomp)
|
||||
fmt.Fprintf(buf, " state: %v\n", c.state)
|
||||
fmt.Fprintf(buf, " NFC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCanonical])
|
||||
fmt.Fprintf(buf, " NFKC:\n")
|
||||
fmt.Fprint(buf, c.forms[FCompatibility])
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// In UnicodeData.txt, some ranges are marked like this:
|
||||
// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
|
||||
// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
|
||||
// parseCharacter keeps a state variable indicating the weirdness.
|
||||
type State int
|
||||
|
||||
const (
|
||||
SNormal State = iota // known to be zero for the type
|
||||
SFirst
|
||||
SLast
|
||||
SMissing
|
||||
)
|
||||
|
||||
var lastChar = rune('\u0000')
|
||||
|
||||
func (c Char) isValid() bool {
|
||||
return c.codePoint != 0 && c.state != SMissing
|
||||
}
|
||||
|
||||
type FormInfo struct {
|
||||
quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
|
||||
verified [MNumberOfModes]bool // index: MComposed or MDecomposed
|
||||
|
||||
combinesForward bool // May combine with rune on the right
|
||||
combinesBackward bool // May combine with rune on the left
|
||||
isOneWay bool // Never appears in result
|
||||
inDecomp bool // Some decompositions result in this char.
|
||||
decomp Decomposition
|
||||
expandedDecomp Decomposition
|
||||
}
|
||||
|
||||
func (f FormInfo) String() string {
|
||||
buf := bytes.NewBuffer(make([]byte, 0))
|
||||
|
||||
fmt.Fprintf(buf, " quickCheck[C]: %v\n", f.quickCheck[MComposed])
|
||||
fmt.Fprintf(buf, " quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
|
||||
fmt.Fprintf(buf, " cmbForward: %v\n", f.combinesForward)
|
||||
fmt.Fprintf(buf, " cmbBackward: %v\n", f.combinesBackward)
|
||||
fmt.Fprintf(buf, " isOneWay: %v\n", f.isOneWay)
|
||||
fmt.Fprintf(buf, " inDecomp: %v\n", f.inDecomp)
|
||||
fmt.Fprintf(buf, " decomposition: %X\n", f.decomp)
|
||||
fmt.Fprintf(buf, " expandedDecomp: %X\n", f.expandedDecomp)
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type Decomposition []rune
|
||||
|
||||
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
|
||||
decomp := strings.Split(s, " ")
|
||||
if len(decomp) > 0 && skipfirst {
|
||||
decomp = decomp[1:]
|
||||
}
|
||||
for _, d := range decomp {
|
||||
point, err := strconv.ParseUint(d, 16, 64)
|
||||
if err != nil {
|
||||
return a, err
|
||||
}
|
||||
a = append(a, rune(point))
|
||||
}
|
||||
return a, nil
|
||||
}
|
||||
|
||||
func loadUnicodeData() {
|
||||
f := gen.OpenUCDFile("UnicodeData.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
r := p.Rune(ucd.CodePoint)
|
||||
char := &chars[r]
|
||||
|
||||
char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
|
||||
decmap := p.String(ucd.DecompMapping)
|
||||
|
||||
exp, err := parseDecomposition(decmap, false)
|
||||
isCompat := false
|
||||
if err != nil {
|
||||
if len(decmap) > 0 {
|
||||
exp, err = parseDecomposition(decmap, true)
|
||||
if err != nil {
|
||||
log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
|
||||
}
|
||||
isCompat = true
|
||||
}
|
||||
}
|
||||
|
||||
char.name = p.String(ucd.Name)
|
||||
char.codePoint = r
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
if !isCompat {
|
||||
char.forms[FCanonical].decomp = exp
|
||||
} else {
|
||||
char.compatDecomp = true
|
||||
}
|
||||
if len(decmap) > 0 {
|
||||
char.forms[FCompatibility].decomp = exp
|
||||
}
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// compactCCC converts the sparse set of CCC values to a continguous one,
|
||||
// reducing the number of bits needed from 8 to 6.
|
||||
func compactCCC() {
|
||||
m := make(map[uint8]uint8)
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
m[c.ccc] = 0
|
||||
}
|
||||
cccs := []int{}
|
||||
for v, _ := range m {
|
||||
cccs = append(cccs, int(v))
|
||||
}
|
||||
sort.Ints(cccs)
|
||||
for i, c := range cccs {
|
||||
cccMap[uint8(i)] = uint8(c)
|
||||
m[uint8(c)] = uint8(i)
|
||||
}
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
c.origCCC = c.ccc
|
||||
c.ccc = m[c.ccc]
|
||||
}
|
||||
if len(m) >= 1<<6 {
|
||||
log.Fatalf("too many difference CCC values: %d >= 64", len(m))
|
||||
}
|
||||
}
|
||||
|
||||
// CompositionExclusions.txt has form:
|
||||
// 0958 # ...
|
||||
// See https://unicode.org/reports/tr44/ for full explanation
|
||||
func loadCompositionExclusions() {
|
||||
f := gen.OpenUCDFile("CompositionExclusions.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
c := &chars[p.Rune(0)]
|
||||
if c.excludeInComp {
|
||||
log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
|
||||
}
|
||||
c.excludeInComp = true
|
||||
}
|
||||
if e := p.Err(); e != nil {
|
||||
log.Fatal(e)
|
||||
}
|
||||
}
|
||||
|
||||
// hasCompatDecomp returns true if any of the recursive
|
||||
// decompositions contains a compatibility expansion.
|
||||
// In this case, the character may not occur in NFK*.
|
||||
func hasCompatDecomp(r rune) bool {
|
||||
c := &chars[r]
|
||||
if c.compatDecomp {
|
||||
return true
|
||||
}
|
||||
for _, d := range c.forms[FCompatibility].decomp {
|
||||
if hasCompatDecomp(d) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Hangul related constants.
|
||||
const (
|
||||
HangulBase = 0xAC00
|
||||
HangulEnd = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
|
||||
|
||||
JamoLBase = 0x1100
|
||||
JamoLEnd = 0x1113
|
||||
JamoVBase = 0x1161
|
||||
JamoVEnd = 0x1176
|
||||
JamoTBase = 0x11A8
|
||||
JamoTEnd = 0x11C3
|
||||
|
||||
JamoLVTCount = 19 * 21 * 28
|
||||
JamoTCount = 28
|
||||
)
|
||||
|
||||
func isHangul(r rune) bool {
|
||||
return HangulBase <= r && r < HangulEnd
|
||||
}
|
||||
|
||||
func isHangulWithoutJamoT(r rune) bool {
|
||||
if !isHangul(r) {
|
||||
return false
|
||||
}
|
||||
r -= HangulBase
|
||||
return r < JamoLVTCount && r%JamoTCount == 0
|
||||
}
|
||||
|
||||
func ccc(r rune) uint8 {
|
||||
return chars[r].ccc
|
||||
}
|
||||
|
||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
|
||||
func insertOrdered(b Decomposition, r rune) Decomposition {
|
||||
n := len(b)
|
||||
b = append(b, 0)
|
||||
cc := ccc(r)
|
||||
if cc > 0 {
|
||||
// Use bubble sort.
|
||||
for ; n > 0; n-- {
|
||||
if ccc(b[n-1]) <= cc {
|
||||
break
|
||||
}
|
||||
b[n] = b[n-1]
|
||||
}
|
||||
}
|
||||
b[n] = r
|
||||
return b
|
||||
}
|
||||
|
||||
// Recursively decompose.
|
||||
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
|
||||
dcomp := chars[r].forms[form].decomp
|
||||
if len(dcomp) == 0 {
|
||||
return insertOrdered(d, r)
|
||||
}
|
||||
for _, c := range dcomp {
|
||||
d = decomposeRecursive(form, c, d)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func completeCharFields(form int) {
|
||||
// Phase 0: pre-expand decomposition.
|
||||
for i := range chars {
|
||||
f := &chars[i].forms[form]
|
||||
if len(f.decomp) == 0 {
|
||||
continue
|
||||
}
|
||||
exp := make(Decomposition, 0)
|
||||
for _, c := range f.decomp {
|
||||
exp = decomposeRecursive(form, c, exp)
|
||||
}
|
||||
f.expandedDecomp = exp
|
||||
}
|
||||
|
||||
// Phase 1: composition exclusion, mark decomposition.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
// Marks script-specific exclusions and version restricted.
|
||||
f.isOneWay = c.excludeInComp
|
||||
|
||||
// Singletons
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) == 1
|
||||
|
||||
// Non-starter decompositions
|
||||
if len(f.decomp) > 1 {
|
||||
chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
|
||||
f.isOneWay = f.isOneWay || chk
|
||||
}
|
||||
|
||||
// Runes that decompose into more than two runes.
|
||||
f.isOneWay = f.isOneWay || len(f.decomp) > 2
|
||||
|
||||
if form == FCompatibility {
|
||||
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
|
||||
}
|
||||
|
||||
for _, r := range f.decomp {
|
||||
chars[r].forms[form].inDecomp = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: forward and backward combining.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
if !f.isOneWay && len(f.decomp) == 2 {
|
||||
f0 := &chars[f.decomp[0]].forms[form]
|
||||
f1 := &chars[f.decomp[1]].forms[form]
|
||||
if !f0.isOneWay {
|
||||
f0.combinesForward = true
|
||||
}
|
||||
if !f1.isOneWay {
|
||||
f1.combinesBackward = true
|
||||
}
|
||||
}
|
||||
if isHangulWithoutJamoT(rune(i)) {
|
||||
f.combinesForward = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 3: quick check values.
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
f := &c.forms[form]
|
||||
|
||||
switch {
|
||||
case len(f.decomp) > 0:
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
case isHangul(rune(i)):
|
||||
f.quickCheck[MDecomposed] = QCNo
|
||||
default:
|
||||
f.quickCheck[MDecomposed] = QCYes
|
||||
}
|
||||
switch {
|
||||
case f.isOneWay:
|
||||
f.quickCheck[MComposed] = QCNo
|
||||
case (i & 0xffff00) == JamoLBase:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
if JamoLBase <= i && i < JamoLEnd {
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoVBase <= i && i < JamoVEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
f.combinesForward = true
|
||||
}
|
||||
if JamoTBase <= i && i < JamoTEnd {
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
f.combinesBackward = true
|
||||
}
|
||||
case !f.combinesBackward:
|
||||
f.quickCheck[MComposed] = QCYes
|
||||
default:
|
||||
f.quickCheck[MComposed] = QCMaybe
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeNonStarterCounts() {
|
||||
// Phase 4: leading and trailing non-starter count
|
||||
for i := range chars {
|
||||
c := &chars[i]
|
||||
|
||||
runes := []rune{rune(i)}
|
||||
// We always use FCompatibility so that the CGJ insertion points do not
|
||||
// change for repeated normalizations with different forms.
|
||||
if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
|
||||
runes = exp
|
||||
}
|
||||
// We consider runes that combine backwards to be non-starters for the
|
||||
// purpose of Stream-Safe Text Processing.
|
||||
for _, r := range runes {
|
||||
if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
|
||||
break
|
||||
}
|
||||
c.nLeadingNonStarters++
|
||||
}
|
||||
for i := len(runes) - 1; i >= 0; i-- {
|
||||
if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
|
||||
break
|
||||
}
|
||||
c.nTrailingNonStarters++
|
||||
}
|
||||
if c.nTrailingNonStarters > 3 {
|
||||
log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
|
||||
}
|
||||
|
||||
if isHangul(rune(i)) {
|
||||
c.nTrailingNonStarters = 2
|
||||
if isHangulWithoutJamoT(rune(i)) {
|
||||
c.nTrailingNonStarters = 1
|
||||
}
|
||||
}
|
||||
|
||||
if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
|
||||
log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
|
||||
}
|
||||
if t := c.nTrailingNonStarters; t > 3 {
|
||||
log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func printBytes(w io.Writer, b []byte, name string) {
|
||||
fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
|
||||
fmt.Fprintf(w, "var %s = [...]byte {", name)
|
||||
for i, c := range b {
|
||||
switch {
|
||||
case i%64 == 0:
|
||||
fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
|
||||
case i%8 == 0:
|
||||
fmt.Fprintf(w, "\n")
|
||||
}
|
||||
fmt.Fprintf(w, "0x%.2X, ", c)
|
||||
}
|
||||
fmt.Fprint(w, "\n}\n\n")
|
||||
}
|
||||
|
||||
// See forminfo.go for format.
|
||||
func makeEntry(f *FormInfo, c *Char) uint16 {
|
||||
e := uint16(0)
|
||||
if r := c.codePoint; HangulBase <= r && r < HangulEnd {
|
||||
e |= 0x40
|
||||
}
|
||||
if f.combinesForward {
|
||||
e |= 0x20
|
||||
}
|
||||
if f.quickCheck[MDecomposed] == QCNo {
|
||||
e |= 0x4
|
||||
}
|
||||
switch f.quickCheck[MComposed] {
|
||||
case QCYes:
|
||||
case QCNo:
|
||||
e |= 0x10
|
||||
case QCMaybe:
|
||||
e |= 0x18
|
||||
default:
|
||||
log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
|
||||
}
|
||||
e |= uint16(c.nTrailingNonStarters)
|
||||
return e
|
||||
}
|
||||
|
||||
// decompSet keeps track of unique decompositions, grouped by whether
|
||||
// the decomposition is followed by a trailing and/or leading CCC.
|
||||
type decompSet [7]map[string]bool
|
||||
|
||||
const (
|
||||
normalDecomp = iota
|
||||
firstMulti
|
||||
firstCCC
|
||||
endMulti
|
||||
firstLeadingCCC
|
||||
firstCCCZeroExcept
|
||||
firstStarterWithNLead
|
||||
lastDecomp
|
||||
)
|
||||
|
||||
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
|
||||
|
||||
func makeDecompSet() decompSet {
|
||||
m := decompSet{}
|
||||
for i := range m {
|
||||
m[i] = make(map[string]bool)
|
||||
}
|
||||
return m
|
||||
}
|
||||
func (m *decompSet) insert(key int, s string) {
|
||||
m[key][s] = true
|
||||
}
|
||||
|
||||
func printCharInfoTables(w io.Writer) int {
|
||||
mkstr := func(r rune, f *FormInfo) (int, string) {
|
||||
d := f.expandedDecomp
|
||||
s := string([]rune(d))
|
||||
if max := 1 << 6; len(s) >= max {
|
||||
const msg = "%U: too many bytes in decomposition: %d >= %d"
|
||||
log.Fatalf(msg, r, len(s), max)
|
||||
}
|
||||
head := uint8(len(s))
|
||||
if f.quickCheck[MComposed] != QCYes {
|
||||
head |= 0x40
|
||||
}
|
||||
if f.combinesForward {
|
||||
head |= 0x80
|
||||
}
|
||||
s = string([]byte{head}) + s
|
||||
|
||||
lccc := ccc(d[0])
|
||||
tccc := ccc(d[len(d)-1])
|
||||
cc := ccc(r)
|
||||
if cc != 0 && lccc == 0 && tccc == 0 {
|
||||
log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
|
||||
}
|
||||
if tccc < lccc && lccc != 0 {
|
||||
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||
log.Fatalf(msg, r, lccc, tccc)
|
||||
}
|
||||
index := normalDecomp
|
||||
nTrail := chars[r].nTrailingNonStarters
|
||||
nLead := chars[r].nLeadingNonStarters
|
||||
if tccc > 0 || lccc > 0 || nTrail > 0 {
|
||||
tccc <<= 2
|
||||
tccc |= nTrail
|
||||
s += string([]byte{tccc})
|
||||
index = endMulti
|
||||
for _, r := range d[1:] {
|
||||
if ccc(r) == 0 {
|
||||
index = firstCCC
|
||||
}
|
||||
}
|
||||
if lccc > 0 || nLead > 0 {
|
||||
s += string([]byte{lccc})
|
||||
if index == firstCCC {
|
||||
log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
|
||||
}
|
||||
index = firstLeadingCCC
|
||||
}
|
||||
if cc != lccc {
|
||||
if cc != 0 {
|
||||
log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
|
||||
}
|
||||
index = firstCCCZeroExcept
|
||||
}
|
||||
} else if len(d) > 1 {
|
||||
index = firstMulti
|
||||
}
|
||||
return index, s
|
||||
}
|
||||
|
||||
decompSet := makeDecompSet()
|
||||
const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
|
||||
decompSet.insert(firstStarterWithNLead, nLeadStr)
|
||||
|
||||
// Store the uniqued decompositions in a byte buffer,
|
||||
// preceded by their byte length.
|
||||
for _, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
if len(f.expandedDecomp) == 0 {
|
||||
continue
|
||||
}
|
||||
if f.combinesBackward {
|
||||
log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
|
||||
}
|
||||
index, s := mkstr(c.codePoint, &f)
|
||||
decompSet.insert(index, s)
|
||||
}
|
||||
}
|
||||
|
||||
decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
|
||||
size := 0
|
||||
positionMap := make(map[string]uint16)
|
||||
decompositions.WriteString("\000")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, m := range decompSet {
|
||||
sa := []string{}
|
||||
for s := range m {
|
||||
sa = append(sa, s)
|
||||
}
|
||||
sort.Strings(sa)
|
||||
for _, s := range sa {
|
||||
p := decompositions.Len()
|
||||
decompositions.WriteString(s)
|
||||
positionMap[s] = uint16(p)
|
||||
}
|
||||
if cname[i] != "" {
|
||||
fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "maxDecomp = 0x8000")
|
||||
fmt.Fprintln(w, ")")
|
||||
b := decompositions.Bytes()
|
||||
printBytes(w, b, "decomps")
|
||||
size += len(b)
|
||||
|
||||
varnames := []string{"nfc", "nfkc"}
|
||||
for i := 0; i < FNumberOfFormTypes; i++ {
|
||||
trie := triegen.NewTrie(varnames[i])
|
||||
|
||||
for r, c := range chars {
|
||||
f := c.forms[i]
|
||||
d := f.expandedDecomp
|
||||
if len(d) != 0 {
|
||||
_, key := mkstr(c.codePoint, &f)
|
||||
trie.Insert(rune(r), uint64(positionMap[key]))
|
||||
if c.ccc != ccc(d[0]) {
|
||||
// We assume the lead ccc of a decomposition !=0 in this case.
|
||||
if ccc(d[0]) == 0 {
|
||||
log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
|
||||
}
|
||||
}
|
||||
} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
|
||||
// Handle cases where it can't be detected that the nLead should be equal
|
||||
// to nTrail.
|
||||
trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
|
||||
} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
|
||||
trie.Insert(c.codePoint, uint64(0x8000|v))
|
||||
}
|
||||
}
|
||||
sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
return size
|
||||
}
|
||||
|
||||
func contains(sa []string, s string) bool {
|
||||
for _, a := range sa {
|
||||
if a == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func makeTables() {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
size := 0
|
||||
if *tablelist == "" {
|
||||
return
|
||||
}
|
||||
list := strings.Split(*tablelist, ",")
|
||||
if *tablelist == "all" {
|
||||
list = []string{"recomp", "info"}
|
||||
}
|
||||
|
||||
// Compute maximum decomposition size.
|
||||
max := 0
|
||||
for _, c := range chars {
|
||||
if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
|
||||
max = n
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, `import "sync"`)
|
||||
fmt.Fprintln(w)
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
|
||||
fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
|
||||
fmt.Fprintln(w)
|
||||
fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
|
||||
fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
|
||||
fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
|
||||
fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
|
||||
fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
|
||||
fmt.Fprintln(w, ")\n")
|
||||
|
||||
// Print the CCC remap table.
|
||||
size += len(cccMap)
|
||||
fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
|
||||
for i := 0; i < len(cccMap); i++ {
|
||||
if i%8 == 0 {
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
|
||||
}
|
||||
fmt.Fprintln(w, "\n}\n")
|
||||
|
||||
if contains(list, "info") {
|
||||
size += printCharInfoTables(w)
|
||||
}
|
||||
|
||||
if contains(list, "recomp") {
|
||||
// Note that we use 32 bit keys, instead of 64 bit.
|
||||
// This clips the bits of three entries, but we know
|
||||
// this won't cause a collision. The compiler will catch
|
||||
// any changes made to UnicodeData.txt that introduces
|
||||
// a collision.
|
||||
// Note that the recomposition map for NFC and NFKC
|
||||
// are identical.
|
||||
|
||||
// Recomposition map
|
||||
nrentries := 0
|
||||
for _, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
if !f.isOneWay && len(f.decomp) > 0 {
|
||||
nrentries++
|
||||
}
|
||||
}
|
||||
sz := nrentries * 8
|
||||
size += sz
|
||||
fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
|
||||
fmt.Fprintln(w, "var recompMap map[uint32]rune")
|
||||
fmt.Fprintln(w, "var recompMapOnce sync.Once\n")
|
||||
fmt.Fprintln(w, `const recompMapPacked = "" +`)
|
||||
var buf [8]byte
|
||||
for i, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
d := f.decomp
|
||||
if !f.isOneWay && len(d) > 0 {
|
||||
key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
|
||||
binary.BigEndian.PutUint32(buf[:4], key)
|
||||
binary.BigEndian.PutUint32(buf[4:], uint32(i))
|
||||
fmt.Fprintf(w, "\t\t%q + // 0x%.8X: 0x%.8X\n", string(buf[:]), key, uint32(i))
|
||||
}
|
||||
}
|
||||
// hack so we don't have to special case the trailing plus sign
|
||||
fmt.Fprintf(w, ` ""`)
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
|
||||
gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes())
|
||||
}
|
||||
|
||||
func printChars() {
|
||||
if *verbose {
|
||||
for _, c := range chars {
|
||||
if !c.isValid() || c.state == SMissing {
|
||||
continue
|
||||
}
|
||||
fmt.Println(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// verifyComputed does various consistency tests.
|
||||
func verifyComputed() {
|
||||
for i, c := range chars {
|
||||
for _, f := range c.forms {
|
||||
isNo := (f.quickCheck[MDecomposed] == QCNo)
|
||||
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
|
||||
log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
|
||||
}
|
||||
|
||||
isMaybe := f.quickCheck[MComposed] == QCMaybe
|
||||
if f.combinesBackward != isMaybe {
|
||||
log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
|
||||
}
|
||||
if len(f.decomp) > 0 && f.combinesForward && isMaybe {
|
||||
log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
|
||||
}
|
||||
|
||||
if len(f.expandedDecomp) != 0 {
|
||||
continue
|
||||
}
|
||||
if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
|
||||
// We accept these runes to be treated differently (it only affects
|
||||
// segment breaking in iteration, most likely on improper use), but
|
||||
// reconsider if more characters are added.
|
||||
// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
|
||||
// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
|
||||
// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
|
||||
// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
|
||||
// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
|
||||
if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
|
||||
log.Fatalf("%U: nLead was %v; want %v", i, a, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
nfc := c.forms[FCanonical]
|
||||
nfkc := c.forms[FCompatibility]
|
||||
if nfc.combinesBackward != nfkc.combinesBackward {
|
||||
log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use values in DerivedNormalizationProps.txt to compare against the
|
||||
// values we computed.
|
||||
// DerivedNormalizationProps.txt has form:
|
||||
// 00C0..00C5 ; NFD_QC; N # ...
|
||||
// 0374 ; NFD_QC; N # ...
|
||||
// See https://unicode.org/reports/tr44/ for full explanation
|
||||
func testDerived() {
|
||||
f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
|
||||
defer f.Close()
|
||||
p := ucd.New(f)
|
||||
for p.Next() {
|
||||
r := p.Rune(0)
|
||||
c := &chars[r]
|
||||
|
||||
var ftype, mode int
|
||||
qt := p.String(1)
|
||||
switch qt {
|
||||
case "NFC_QC":
|
||||
ftype, mode = FCanonical, MComposed
|
||||
case "NFD_QC":
|
||||
ftype, mode = FCanonical, MDecomposed
|
||||
case "NFKC_QC":
|
||||
ftype, mode = FCompatibility, MComposed
|
||||
case "NFKD_QC":
|
||||
ftype, mode = FCompatibility, MDecomposed
|
||||
default:
|
||||
continue
|
||||
}
|
||||
var qr QCResult
|
||||
switch p.String(2) {
|
||||
case "Y":
|
||||
qr = QCYes
|
||||
case "N":
|
||||
qr = QCNo
|
||||
case "M":
|
||||
qr = QCMaybe
|
||||
default:
|
||||
log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
|
||||
}
|
||||
if got := c.forms[ftype].quickCheck[mode]; got != qr {
|
||||
log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
|
||||
}
|
||||
c.forms[ftype].verified[mode] = true
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
// Any unspecified value must be QCYes. Verify this.
|
||||
for i, c := range chars {
|
||||
for j, fd := range c.forms {
|
||||
for k, qr := range fd.quickCheck {
|
||||
if !fd.verified[k] && qr != QCYes {
|
||||
m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
|
||||
log.Printf(m, i, j, k, qr, c.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var testHeader = `const (
|
||||
Yes = iota
|
||||
No
|
||||
Maybe
|
||||
)
|
||||
|
||||
type formData struct {
|
||||
qc uint8
|
||||
combinesForward bool
|
||||
decomposition string
|
||||
}
|
||||
|
||||
type runeData struct {
|
||||
r rune
|
||||
ccc uint8
|
||||
nLead uint8
|
||||
nTrail uint8
|
||||
f [2]formData // 0: canonical; 1: compatibility
|
||||
}
|
||||
|
||||
func f(qc uint8, cf bool, dec string) [2]formData {
|
||||
return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
|
||||
}
|
||||
|
||||
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
|
||||
return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
|
||||
}
|
||||
|
||||
var testData = []runeData{
|
||||
`
|
||||
|
||||
func printTestdata() {
|
||||
type lastInfo struct {
|
||||
ccc uint8
|
||||
nLead uint8
|
||||
nTrail uint8
|
||||
f string
|
||||
}
|
||||
|
||||
last := lastInfo{}
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, testHeader)
|
||||
for r, c := range chars {
|
||||
f := c.forms[FCanonical]
|
||||
qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
|
||||
f = c.forms[FCompatibility]
|
||||
qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
|
||||
s := ""
|
||||
if d == dk && qc == qck && cf == cfk {
|
||||
s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
|
||||
} else {
|
||||
s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
|
||||
}
|
||||
current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
|
||||
if last != current {
|
||||
fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
|
||||
last = current
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes())
|
||||
}
|
117
vendor/golang.org/x/text/unicode/norm/triegen.go
generated
vendored
117
vendor/golang.org/x/text/unicode/norm/triegen.go
generated
vendored
@@ -1,117 +0,0 @@
|
||||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Trie table generator.
|
||||
// Used by make*tables tools to generate a go file with trie data structures
|
||||
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
|
||||
// sequence are used to lookup offsets in the index table to be used for the
|
||||
// next byte. The last byte is used to index into a table with 16-bit values.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
const maxSparseEntries = 16
|
||||
|
||||
type normCompacter struct {
|
||||
sparseBlocks [][]uint64
|
||||
sparseOffset []uint16
|
||||
sparseCount int
|
||||
name string
|
||||
}
|
||||
|
||||
func mostFrequentStride(a []uint64) int {
|
||||
counts := make(map[int]int)
|
||||
var v int
|
||||
for _, x := range a {
|
||||
if stride := int(x) - v; v != 0 && stride >= 0 {
|
||||
counts[stride]++
|
||||
}
|
||||
v = int(x)
|
||||
}
|
||||
var maxs, maxc int
|
||||
for stride, cnt := range counts {
|
||||
if cnt > maxc || (cnt == maxc && stride < maxs) {
|
||||
maxs, maxc = stride, cnt
|
||||
}
|
||||
}
|
||||
return maxs
|
||||
}
|
||||
|
||||
func countSparseEntries(a []uint64) int {
|
||||
stride := mostFrequentStride(a)
|
||||
var v, count int
|
||||
for _, tv := range a {
|
||||
if int(tv)-v != stride {
|
||||
if tv != 0 {
|
||||
count++
|
||||
}
|
||||
}
|
||||
v = int(tv)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
|
||||
if n := countSparseEntries(v); n <= maxSparseEntries {
|
||||
return (n+1)*4 + 2, true
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
func (c *normCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(c.sparseOffset))
|
||||
c.sparseBlocks = append(c.sparseBlocks, v)
|
||||
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
|
||||
c.sparseCount += countSparseEntries(v) + 1
|
||||
return h
|
||||
}
|
||||
|
||||
func (c *normCompacter) Handler() string {
|
||||
return c.name + "Sparse.lookup"
|
||||
}
|
||||
|
||||
func (c *normCompacter) Print(w io.Writer) (retErr error) {
|
||||
p := func(f string, x ...interface{}) {
|
||||
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
|
||||
retErr = err
|
||||
}
|
||||
}
|
||||
|
||||
ls := len(c.sparseBlocks)
|
||||
p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
|
||||
p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
|
||||
|
||||
ns := c.sparseCount
|
||||
p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
|
||||
p("var %sSparseValues = [%d]valueRange {", c.name, ns)
|
||||
for i, b := range c.sparseBlocks {
|
||||
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
|
||||
var v int
|
||||
stride := mostFrequentStride(b)
|
||||
n := countSparseEntries(b)
|
||||
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
|
||||
for i, nv := range b {
|
||||
if int(nv)-v != stride {
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+i-1)
|
||||
}
|
||||
if nv != 0 {
|
||||
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
|
||||
}
|
||||
}
|
||||
v = int(nv)
|
||||
}
|
||||
if v != 0 {
|
||||
p(",hi:%#02x},", 0x80+len(b)-1)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
115
vendor/golang.org/x/text/width/gen.go
generated
vendored
115
vendor/golang.org/x/text/width/gen.go
generated
vendored
@@ -1,115 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// This program generates the trie for width operations. The generated table
|
||||
// includes width category information as well as the normalization mappings.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
)
|
||||
|
||||
// See gen_common.go for flags.
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
genTables()
|
||||
genTests()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "width")
|
||||
gen.Repackage("gen_common.go", "common_test.go", "width")
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
t := triegen.NewTrie("width")
|
||||
// fold and inverse mappings. See mapComment for a description of the format
|
||||
// of each entry. Add dummy value to make an index of 0 mean no mapping.
|
||||
inverse := [][4]byte{{}}
|
||||
mapping := map[[4]byte]int{[4]byte{}: 0}
|
||||
|
||||
getWidthData(func(r rune, tag elem, alt rune) {
|
||||
idx := 0
|
||||
if alt != 0 {
|
||||
var buf [4]byte
|
||||
buf[0] = byte(utf8.EncodeRune(buf[1:], alt))
|
||||
s := string(r)
|
||||
buf[buf[0]] ^= s[len(s)-1]
|
||||
var ok bool
|
||||
if idx, ok = mapping[buf]; !ok {
|
||||
idx = len(mapping)
|
||||
if idx > math.MaxUint8 {
|
||||
log.Fatalf("Index %d does not fit in a byte.", idx)
|
||||
}
|
||||
mapping[buf] = idx
|
||||
inverse = append(inverse, buf)
|
||||
}
|
||||
}
|
||||
t.Insert(r, uint64(tag|elem(idx)))
|
||||
})
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
sz += writeMappings(w, inverse)
|
||||
|
||||
fmt.Fprintf(w, "// Total table size %d bytes (%dKiB)\n", sz, sz/1024)
|
||||
|
||||
gen.WriteVersionedGoFile(*outputFile, "width", w.Bytes())
|
||||
}
|
||||
|
||||
const inverseDataComment = `
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.`
|
||||
|
||||
func writeMappings(w io.Writer, data [][4]byte) int {
|
||||
fmt.Fprintln(w, inverseDataComment)
|
||||
fmt.Fprintf(w, "var inverseData = [%d][4]byte{\n", len(data))
|
||||
for _, x := range data {
|
||||
fmt.Fprintf(w, "{ 0x%02x, 0x%02x, 0x%02x, 0x%02x },\n", x[0], x[1], x[2], x[3])
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
return len(data) * 4
|
||||
}
|
||||
|
||||
func genTests() {
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, "\nvar mapRunes = map[rune]struct{r rune; e elem}{\n")
|
||||
getWidthData(func(r rune, tag elem, alt rune) {
|
||||
if alt != 0 {
|
||||
fmt.Fprintf(w, "\t0x%X: {0x%X, 0x%X},\n", r, alt, tag)
|
||||
}
|
||||
})
|
||||
fmt.Fprintln(w, "}")
|
||||
gen.WriteGoFile("runes_test.go", "width", w.Bytes())
|
||||
}
|
96
vendor/golang.org/x/text/width/gen_common.go
generated
vendored
96
vendor/golang.org/x/text/width/gen_common.go
generated
vendored
@@ -1,96 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This code is shared between the main code generator and the test code.
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var (
|
||||
outputFile = flag.String("out", "tables.go", "output file")
|
||||
)
|
||||
|
||||
var typeMap = map[string]elem{
|
||||
"A": tagAmbiguous,
|
||||
"N": tagNeutral,
|
||||
"Na": tagNarrow,
|
||||
"W": tagWide,
|
||||
"F": tagFullwidth,
|
||||
"H": tagHalfwidth,
|
||||
}
|
||||
|
||||
// getWidthData calls f for every entry for which it is defined.
|
||||
//
|
||||
// f may be called multiple times for the same rune. The last call to f is the
|
||||
// correct value. f is not called for all runes. The default tag type is
|
||||
// Neutral.
|
||||
func getWidthData(f func(r rune, tag elem, alt rune)) {
|
||||
// Set the default values for Unified Ideographs. In line with Annex 11,
|
||||
// we encode full ranges instead of the defined runes in Unified_Ideograph.
|
||||
for _, b := range []struct{ lo, hi rune }{
|
||||
{0x4E00, 0x9FFF}, // the CJK Unified Ideographs block,
|
||||
{0x3400, 0x4DBF}, // the CJK Unified Ideographs Externsion A block,
|
||||
{0xF900, 0xFAFF}, // the CJK Compatibility Ideographs block,
|
||||
{0x20000, 0x2FFFF}, // the Supplementary Ideographic Plane,
|
||||
{0x30000, 0x3FFFF}, // the Tertiary Ideographic Plane,
|
||||
} {
|
||||
for r := b.lo; r <= b.hi; r++ {
|
||||
f(r, tagWide, 0)
|
||||
}
|
||||
}
|
||||
|
||||
inverse := map[rune]rune{}
|
||||
maps := map[string]bool{
|
||||
"<wide>": true,
|
||||
"<narrow>": true,
|
||||
}
|
||||
|
||||
// We cannot reuse package norm's decomposition, as we need an unexpanded
|
||||
// decomposition. We make use of the opportunity to verify that the
|
||||
// decomposition type is as expected.
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
|
||||
if !maps[s[0]] {
|
||||
return
|
||||
}
|
||||
x, err := strconv.ParseUint(s[1], 16, 32)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing rune %q", s[1])
|
||||
}
|
||||
if inverse[r] != 0 || inverse[rune(x)] != 0 {
|
||||
log.Fatalf("Circular dependency in mapping between %U and %U", r, x)
|
||||
}
|
||||
inverse[r] = rune(x)
|
||||
inverse[rune(x)] = r
|
||||
})
|
||||
|
||||
// <rune range>;<type>
|
||||
ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
|
||||
tag, ok := typeMap[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("Unknown width type %q", p.String(1))
|
||||
}
|
||||
r := p.Rune(0)
|
||||
alt, ok := inverse[r]
|
||||
if tag == tagFullwidth || tag == tagHalfwidth && r != wonSign {
|
||||
tag |= tagNeedsFold
|
||||
if !ok {
|
||||
log.Fatalf("Narrow or wide rune %U has no decomposition", r)
|
||||
}
|
||||
}
|
||||
f(r, tag, alt)
|
||||
})
|
||||
}
|
34
vendor/golang.org/x/text/width/gen_trieval.go
generated
vendored
34
vendor/golang.org/x/text/width/gen_trieval.go
generated
vendored
@@ -1,34 +0,0 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// elem is an entry of the width trie. The high byte is used to encode the type
|
||||
// of the rune. The low byte is used to store the index to a mapping entry in
|
||||
// the inverseData array.
|
||||
type elem uint16
|
||||
|
||||
const (
|
||||
tagNeutral elem = iota << typeShift
|
||||
tagAmbiguous
|
||||
tagWide
|
||||
tagNarrow
|
||||
tagFullwidth
|
||||
tagHalfwidth
|
||||
)
|
||||
|
||||
const (
|
||||
numTypeBits = 3
|
||||
typeShift = 16 - numTypeBits
|
||||
|
||||
// tagNeedsFold is true for all fullwidth and halfwidth runes except for
|
||||
// the Won sign U+20A9.
|
||||
tagNeedsFold = 0x1000
|
||||
|
||||
// The Korean Won sign is halfwidth, but SHOULD NOT be mapped to a wide
|
||||
// variant.
|
||||
wonSign rune = 0x20A9
|
||||
)
|
Reference in New Issue
Block a user