Migrate to dep (#3972)

* Update makefile to use dep * Migrate to dep * Fix some deps * Try to find a better version for golang.org/x/net * Try to find a better version for golang.org/x/oauth2
2025-12-07 13:28:25 +00:00 · 2018-05-21 15:34:20 +03:00
parent d7fd9bf7bb
commit 3f3383dc0a
281 changed files with 12024 additions and 32676 deletions
@@ -0,0 +1,3 @@
+# This source code refers to The Go Authors for copyright purposes.
+# The master list of authors is in the main Go distribution,
+# visible at http://tip.golang.org/AUTHORS.
@@ -0,0 +1,3 @@
+# This source code was written by the Go contributors.
+# The master list of contributors is in the main Go distribution,
+# visible at http://tip.golang.org/CONTRIBUTORS.
@@ -0,0 +1,556 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+	"unicode/utf8"
+
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/internal/gen"
+)
+
+const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
+	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+	` !"#$%&'()*+,-./0123456789:;<=>?` +
+	`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
+	"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
+
+var encodings = []struct {
+	name        string
+	mib         string
+	comment     string
+	varName     string
+	replacement byte
+	mapping     string
+}{
+	{
+		"IBM Code Page 037",
+		"IBM037",
+		"",
+		"CodePage037",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 437",
+		"PC8CodePage437",
+		"",
+		"CodePage437",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 850",
+		"PC850Multilingual",
+		"",
+		"CodePage850",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 852",
+		"PCp852",
+		"",
+		"CodePage852",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 855",
+		"IBM855",
+		"",
+		"CodePage855",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
+	},
+	{
+		"Windows Code Page 858", // PC latin1 with Euro
+		"IBM00858",
+		"",
+		"CodePage858",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
+	},
+	{
+		"IBM Code Page 860",
+		"IBM860",
+		"",
+		"CodePage860",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 862",
+		"PC862LatinHebrew",
+		"",
+		"CodePage862",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 863",
+		"IBM863",
+		"",
+		"CodePage863",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 865",
+		"IBM865",
+		"",
+		"CodePage865",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 866",
+		"IBM866",
+		"",
+		"CodePage866",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-ibm866.txt",
+	},
+	{
+		"IBM Code Page 1047",
+		"IBM1047",
+		"",
+		"CodePage1047",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
+	},
+	{
+		"IBM Code Page 1140",
+		"IBM01140",
+		"",
+		"CodePage1140",
+		0x3f,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
+	},
+	{
+		"ISO 8859-1",
+		"ISOLatin1",
+		"",
+		"ISO8859_1",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
+	},
+	{
+		"ISO 8859-2",
+		"ISOLatin2",
+		"",
+		"ISO8859_2",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
+	},
+	{
+		"ISO 8859-3",
+		"ISOLatin3",
+		"",
+		"ISO8859_3",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
+	},
+	{
+		"ISO 8859-4",
+		"ISOLatin4",
+		"",
+		"ISO8859_4",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
+	},
+	{
+		"ISO 8859-5",
+		"ISOLatinCyrillic",
+		"",
+		"ISO8859_5",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
+	},
+	{
+		"ISO 8859-6",
+		"ISOLatinArabic",
+		"",
+		"ISO8859_6,ISO8859_6E,ISO8859_6I",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
+	},
+	{
+		"ISO 8859-7",
+		"ISOLatinGreek",
+		"",
+		"ISO8859_7",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
+	},
+	{
+		"ISO 8859-8",
+		"ISOLatinHebrew",
+		"",
+		"ISO8859_8,ISO8859_8E,ISO8859_8I",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
+	},
+	{
+		"ISO 8859-9",
+		"ISOLatin5",
+		"",
+		"ISO8859_9",
+		encoding.ASCIISub,
+		"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
+	},
+	{
+		"ISO 8859-10",
+		"ISOLatin6",
+		"",
+		"ISO8859_10",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
+	},
+	{
+		"ISO 8859-13",
+		"ISO885913",
+		"",
+		"ISO8859_13",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
+	},
+	{
+		"ISO 8859-14",
+		"ISO885914",
+		"",
+		"ISO8859_14",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
+	},
+	{
+		"ISO 8859-15",
+		"ISO885915",
+		"",
+		"ISO8859_15",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
+	},
+	{
+		"ISO 8859-16",
+		"ISO885916",
+		"",
+		"ISO8859_16",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
+	},
+	{
+		"KOI8-R",
+		"KOI8R",
+		"",
+		"KOI8R",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-koi8-r.txt",
+	},
+	{
+		"KOI8-U",
+		"KOI8U",
+		"",
+		"KOI8U",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-koi8-u.txt",
+	},
+	{
+		"Macintosh",
+		"Macintosh",
+		"",
+		"Macintosh",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-macintosh.txt",
+	},
+	{
+		"Macintosh Cyrillic",
+		"MacintoshCyrillic",
+		"",
+		"MacintoshCyrillic",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
+	},
+	{
+		"Windows 874",
+		"Windows874",
+		"",
+		"Windows874",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-874.txt",
+	},
+	{
+		"Windows 1250",
+		"Windows1250",
+		"",
+		"Windows1250",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1250.txt",
+	},
+	{
+		"Windows 1251",
+		"Windows1251",
+		"",
+		"Windows1251",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1251.txt",
+	},
+	{
+		"Windows 1252",
+		"Windows1252",
+		"",
+		"Windows1252",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1252.txt",
+	},
+	{
+		"Windows 1253",
+		"Windows1253",
+		"",
+		"Windows1253",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1253.txt",
+	},
+	{
+		"Windows 1254",
+		"Windows1254",
+		"",
+		"Windows1254",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1254.txt",
+	},
+	{
+		"Windows 1255",
+		"Windows1255",
+		"",
+		"Windows1255",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1255.txt",
+	},
+	{
+		"Windows 1256",
+		"Windows1256",
+		"",
+		"Windows1256",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1256.txt",
+	},
+	{
+		"Windows 1257",
+		"Windows1257",
+		"",
+		"Windows1257",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1257.txt",
+	},
+	{
+		"Windows 1258",
+		"Windows1258",
+		"",
+		"Windows1258",
+		encoding.ASCIISub,
+		"http://encoding.spec.whatwg.org/index-windows-1258.txt",
+	},
+	{
+		"X-User-Defined",
+		"XUserDefined",
+		"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
+		"XUserDefined",
+		encoding.ASCIISub,
+		ascii +
+			"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
+			"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
+			"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
+			"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
+			"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
+			"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
+			"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
+			"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
+			"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
+			"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
+			"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
+			"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
+			"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
+			"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
+			"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
+			"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
+	},
+}
+
+func getWHATWG(url string) string {
+	res, err := http.Get(url)
+	if err != nil {
+		log.Fatalf("%q: Get: %v", url, err)
+	}
+	defer res.Body.Close()
+
+	mapping := make([]rune, 128)
+	for i := range mapping {
+		mapping[i] = '\ufffd'
+	}
+
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := 0, 0
+		if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0 || 128 <= x {
+			log.Fatalf("code %d is out of range", x)
+		}
+		if 0x80 <= y && y < 0xa0 {
+			// We diverge from the WHATWG spec by mapping control characters
+			// in the range [0x80, 0xa0) to U+FFFD.
+			continue
+		}
+		mapping[x] = rune(y)
+	}
+	return ascii + string(mapping)
+}
+
+func getUCM(url string) string {
+	res, err := http.Get(url)
+	if err != nil {
+		log.Fatalf("%q: Get: %v", url, err)
+	}
+	defer res.Body.Close()
+
+	mapping := make([]rune, 256)
+	for i := range mapping {
+		mapping[i] = '\ufffd'
+	}
+
+	charsFound := 0
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		var c byte
+		var r rune
+		if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
+			continue
+		}
+		mapping[c] = r
+		charsFound++
+	}
+
+	if charsFound < 200 {
+		log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
+	}
+
+	return string(mapping)
+}
+
+func main() {
+	mibs := map[string]bool{}
+	all := []string{}
+
+	w := gen.NewCodeWriter()
+	defer w.WriteGoFile("tables.go", "charmap")
+
+	printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
+
+	printf("import (\n")
+	printf("\t\"golang.org/x/text/encoding\"\n")
+	printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
+	printf(")\n\n")
+	for _, e := range encodings {
+		varNames := strings.Split(e.varName, ",")
+		all = append(all, varNames...)
+		varName := varNames[0]
+		switch {
+		case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
+			e.mapping = getWHATWG(e.mapping)
+		case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
+			e.mapping = getUCM(e.mapping)
+		}
+
+		asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
+		if asciiSuperset {
+			low = 0x80
+		}
+		lvn := 1
+		if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
+			lvn = 3
+		}
+		lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
+		printf("// %s is the %s encoding.\n", varName, e.name)
+		if e.comment != "" {
+			printf("//\n// %s\n", e.comment)
+		}
+		printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
+			varName, lowerVarName, lowerVarName, e.name)
+		if mibs[e.mib] {
+			log.Fatalf("MIB type %q declared multiple times.", e.mib)
+		}
+		printf("mib: identifier.%s,\n", e.mib)
+		printf("asciiSuperset: %t,\n", asciiSuperset)
+		printf("low: 0x%02x,\n", low)
+		printf("replacement: 0x%02x,\n", e.replacement)
+
+		printf("decode: [256]utf8Enc{\n")
+		i, backMapping := 0, map[rune]byte{}
+		for _, c := range e.mapping {
+			if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
+				backMapping[c] = byte(i)
+			}
+			var buf [8]byte
+			n := utf8.EncodeRune(buf[:], c)
+			if n > 3 {
+				panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
+			}
+			printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
+			if i%2 == 1 {
+				printf("\n")
+			}
+			i++
+		}
+		printf("},\n")
+
+		printf("encode: [256]uint32{\n")
+		encode := make([]uint32, 0, 256)
+		for c, i := range backMapping {
+			encode = append(encode, uint32(i)<<24|uint32(c))
+		}
+		sort.Sort(byRune(encode))
+		for len(encode) < cap(encode) {
+			encode = append(encode, encode[len(encode)-1])
+		}
+		for i, enc := range encode {
+			printf("0x%08x,", enc)
+			if i%8 == 7 {
+				printf("\n")
+			}
+		}
+		printf("},\n}\n")
+
+		// Add an estimate of the size of a single Charmap{} struct value, which
+		// includes two 256 elem arrays of 4 bytes and some extra fields, which
+		// align to 3 uint64s on 64-bit architectures.
+		w.Size += 2*4*256 + 3*8
+	}
+	// TODO: add proper line breaking.
+	printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
+}
+
+type byRune []uint32
+
+func (b byRune) Len() int           { return len(b) }
+func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
+func (b byRune) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
@@ -0,0 +1,170 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"log"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+)
+
+type group struct {
+	Encodings []struct {
+		Labels []string
+		Name   string
+	}
+}
+
+func main() {
+	gen.Init()
+
+	r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
+	var groups []group
+	if err := json.NewDecoder(r).Decode(&groups); err != nil {
+		log.Fatalf("Error reading encodings.json: %v", err)
+	}
+
+	w := &bytes.Buffer{}
+	fmt.Fprintln(w, "type htmlEncoding byte")
+	fmt.Fprintln(w, "const (")
+	for i, g := range groups {
+		for _, e := range g.Encodings {
+			key := strings.ToLower(e.Name)
+			name := consts[key]
+			if name == "" {
+				log.Fatalf("No const defined for %s.", key)
+			}
+			if i == 0 {
+				fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
+			} else {
+				fmt.Fprintf(w, "%s\n", name)
+			}
+		}
+	}
+	fmt.Fprintln(w, "numEncodings")
+	fmt.Fprint(w, ")\n\n")
+
+	fmt.Fprintln(w, "var canonical = [numEncodings]string{")
+	for _, g := range groups {
+		for _, e := range g.Encodings {
+			fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
+		}
+	}
+	fmt.Fprint(w, "}\n\n")
+
+	fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
+	for _, g := range groups {
+		for _, e := range g.Encodings {
+			for _, l := range e.Labels {
+				key := strings.ToLower(e.Name)
+				name := consts[key]
+				fmt.Fprintf(w, "%q: %s,\n", l, name)
+			}
+		}
+	}
+	fmt.Fprint(w, "}\n\n")
+
+	var tags []string
+	fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
+	for _, loc := range locales {
+		tags = append(tags, loc.tag)
+		fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
+	}
+	fmt.Fprint(w, "}\n\n")
+
+	fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
+
+	gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
+}
+
+// consts maps canonical encoding name to internal constant.
+var consts = map[string]string{
+	"utf-8":          "utf8",
+	"ibm866":         "ibm866",
+	"iso-8859-2":     "iso8859_2",
+	"iso-8859-3":     "iso8859_3",
+	"iso-8859-4":     "iso8859_4",
+	"iso-8859-5":     "iso8859_5",
+	"iso-8859-6":     "iso8859_6",
+	"iso-8859-7":     "iso8859_7",
+	"iso-8859-8":     "iso8859_8",
+	"iso-8859-8-i":   "iso8859_8I",
+	"iso-8859-10":    "iso8859_10",
+	"iso-8859-13":    "iso8859_13",
+	"iso-8859-14":    "iso8859_14",
+	"iso-8859-15":    "iso8859_15",
+	"iso-8859-16":    "iso8859_16",
+	"koi8-r":         "koi8r",
+	"koi8-u":         "koi8u",
+	"macintosh":      "macintosh",
+	"windows-874":    "windows874",
+	"windows-1250":   "windows1250",
+	"windows-1251":   "windows1251",
+	"windows-1252":   "windows1252",
+	"windows-1253":   "windows1253",
+	"windows-1254":   "windows1254",
+	"windows-1255":   "windows1255",
+	"windows-1256":   "windows1256",
+	"windows-1257":   "windows1257",
+	"windows-1258":   "windows1258",
+	"x-mac-cyrillic": "macintoshCyrillic",
+	"gbk":            "gbk",
+	"gb18030":        "gb18030",
+	// "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
+	"big5":           "big5",
+	"euc-jp":         "eucjp",
+	"iso-2022-jp":    "iso2022jp",
+	"shift_jis":      "shiftJIS",
+	"euc-kr":         "euckr",
+	"replacement":    "replacement",
+	"utf-16be":       "utf16be",
+	"utf-16le":       "utf16le",
+	"x-user-defined": "xUserDefined",
+}
+
+// locales is taken from
+// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
+var locales = []struct{ tag, name string }{
+	{"und", "windows-1252"}, // The default value.
+	{"ar", "windows-1256"},
+	{"ba", "windows-1251"},
+	{"be", "windows-1251"},
+	{"bg", "windows-1251"},
+	{"cs", "windows-1250"},
+	{"el", "iso-8859-7"},
+	{"et", "windows-1257"},
+	{"fa", "windows-1256"},
+	{"he", "windows-1255"},
+	{"hr", "windows-1250"},
+	{"hu", "iso-8859-2"},
+	{"ja", "shift_jis"},
+	{"kk", "windows-1251"},
+	{"ko", "euc-kr"},
+	{"ku", "windows-1254"},
+	{"ky", "windows-1251"},
+	{"lt", "windows-1257"},
+	{"lv", "windows-1257"},
+	{"mk", "windows-1251"},
+	{"pl", "iso-8859-2"},
+	{"ru", "windows-1251"},
+	{"sah", "windows-1251"},
+	{"sk", "windows-1250"},
+	{"sl", "iso-8859-2"},
+	{"sr", "windows-1251"},
+	{"tg", "windows-1251"},
+	{"th", "windows-874"},
+	{"tr", "windows-1254"},
+	{"tt", "windows-1251"},
+	{"uk", "windows-1251"},
+	{"vi", "windows-1258"},
+	{"zh-hans", "gb18030"},
+	{"zh-hant", "big5"},
+}
@@ -0,0 +1,137 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"encoding/xml"
+	"fmt"
+	"io"
+	"log"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+)
+
+type registry struct {
+	XMLName  xml.Name `xml:"registry"`
+	Updated  string   `xml:"updated"`
+	Registry []struct {
+		ID     string `xml:"id,attr"`
+		Record []struct {
+			Name string `xml:"name"`
+			Xref []struct {
+				Type string `xml:"type,attr"`
+				Data string `xml:"data,attr"`
+			} `xml:"xref"`
+			Desc struct {
+				Data string `xml:",innerxml"`
+				// Any []struct {
+				// 	Data string `xml:",chardata"`
+				// } `xml:",any"`
+				// Data string `xml:",chardata"`
+			} `xml:"description,"`
+			MIB   string   `xml:"value"`
+			Alias []string `xml:"alias"`
+			MIME  string   `xml:"preferred_alias"`
+		} `xml:"record"`
+	} `xml:"registry"`
+}
+
+func main() {
+	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
+	reg := &registry{}
+	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
+		log.Fatalf("Error decoding charset registry: %v", err)
+	}
+	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
+		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
+	}
+
+	w := &bytes.Buffer{}
+	fmt.Fprintf(w, "const (\n")
+	for _, rec := range reg.Registry[0].Record {
+		constName := ""
+		for _, a := range rec.Alias {
+			if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
+				// Some of the constant definitions have comments in them. Strip those.
+				constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
+			}
+		}
+		if constName == "" {
+			switch rec.MIB {
+			case "2085":
+				constName = "HZGB2312" // Not listed as alias for some reason.
+			default:
+				log.Fatalf("No cs alias defined for %s.", rec.MIB)
+			}
+		}
+		if rec.MIME != "" {
+			rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
+		}
+		fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
+		if len(rec.Desc.Data) > 0 {
+			fmt.Fprint(w, "// ")
+			d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
+			inElem := true
+			attr := ""
+			for {
+				t, err := d.Token()
+				if err != nil {
+					if err != io.EOF {
+						log.Fatal(err)
+					}
+					break
+				}
+				switch x := t.(type) {
+				case xml.CharData:
+					attr = "" // Don't need attribute info.
+					a := bytes.Split([]byte(x), []byte("\n"))
+					for i, b := range a {
+						if b = bytes.TrimSpace(b); len(b) != 0 {
+							if !inElem && i > 0 {
+								fmt.Fprint(w, "\n// ")
+							}
+							inElem = false
+							fmt.Fprintf(w, "%s ", string(b))
+						}
+					}
+				case xml.StartElement:
+					if x.Name.Local == "xref" {
+						inElem = true
+						use := false
+						for _, a := range x.Attr {
+							if a.Name.Local == "type" {
+								use = use || a.Value != "person"
+							}
+							if a.Name.Local == "data" && use {
+								attr = a.Value + " "
+							}
+						}
+					}
+				case xml.EndElement:
+					inElem = false
+					fmt.Fprint(w, attr)
+				}
+			}
+			fmt.Fprint(w, "\n")
+		}
+		for _, x := range rec.Xref {
+			switch x.Type {
+			case "rfc":
+				fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
+			case "uri":
+				fmt.Fprintf(w, "// Reference: %s\n", x.Data)
+			}
+		}
+		fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
+		fmt.Fprintln(w)
+	}
+	fmt.Fprintln(w, ")")
+
+	gen.WriteGoFile("mib.go", "identifier", w.Bytes())
+}
@@ -0,0 +1,161 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+//	go run maketables.go | gofmt > tables.go
+
+// TODO: Emoji extensions?
+// http://www.unicode.org/faq/emoji_dingbats.html
+// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+)
+
+type entry struct {
+	jisCode, table int
+}
+
+func main() {
+	fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+	fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
+	fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
+
+	reverse := [65536]entry{}
+	for i := range reverse {
+		reverse[i].table = -1
+	}
+
+	tables := []struct {
+		url  string
+		name string
+	}{
+		{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
+		{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
+	}
+	for i, table := range tables {
+		res, err := http.Get(table.url)
+		if err != nil {
+			log.Fatalf("%q: Get: %v", table.url, err)
+		}
+		defer res.Body.Close()
+
+		mapping := [65536]uint16{}
+
+		scanner := bufio.NewScanner(res.Body)
+		for scanner.Scan() {
+			s := strings.TrimSpace(scanner.Text())
+			if s == "" || s[0] == '#' {
+				continue
+			}
+			x, y := 0, uint16(0)
+			if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+				log.Fatalf("%q: could not parse %q", table.url, s)
+			}
+			if x < 0 || 120*94 <= x {
+				log.Fatalf("%q: JIS code %d is out of range", table.url, x)
+			}
+			mapping[x] = y
+			if reverse[y].table == -1 {
+				reverse[y] = entry{jisCode: x, table: i}
+			}
+		}
+		if err := scanner.Err(); err != nil {
+			log.Fatalf("%q: scanner error: %v", table.url, err)
+		}
+
+		fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
+			table.name, table.name, table.url)
+		fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
+		for i, m := range mapping {
+			if m != 0 {
+				fmt.Printf("\t%d: 0x%04X,\n", i, m)
+			}
+		}
+		fmt.Printf("}\n\n")
+	}
+
+	// Any run of at least separation continuous zero entries in the reverse map will
+	// be a separate encode table.
+	const separation = 1024
+
+	intervals := []interval(nil)
+	low, high := -1, -1
+	for i, v := range reverse {
+		if v.table == -1 {
+			continue
+		}
+		if low < 0 {
+			low = i
+		} else if i-high >= separation {
+			if high >= 0 {
+				intervals = append(intervals, interval{low, high})
+			}
+			low = i
+		}
+		high = i + 1
+	}
+	if high >= 0 {
+		intervals = append(intervals, interval{low, high})
+	}
+	sort.Sort(byDecreasingLength(intervals))
+
+	fmt.Printf("const (\n")
+	fmt.Printf("\tjis0208    = 1\n")
+	fmt.Printf("\tjis0212    = 2\n")
+	fmt.Printf("\tcodeMask   = 0x7f\n")
+	fmt.Printf("\tcodeShift  = 7\n")
+	fmt.Printf("\ttableShift = 14\n")
+	fmt.Printf(")\n\n")
+
+	fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+	fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
+	fmt.Printf("// sorted by decreasing length.\n")
+	for i, v := range intervals {
+		fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+	}
+	fmt.Printf("//\n")
+	fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
+	fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
+	fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
+	fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
+	fmt.Printf("\n")
+
+	for i, v := range intervals {
+		fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+		fmt.Printf("var encode%d = [...]uint16{\n", i)
+		for j := v.low; j < v.high; j++ {
+			x := reverse[j]
+			if x.table == -1 {
+				continue
+			}
+			fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
+				j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
+		}
+		fmt.Printf("}\n\n")
+	}
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+	low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int           { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
@@ -0,0 +1,143 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+//	go run maketables.go | gofmt > tables.go
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+)
+
+func main() {
+	fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+	fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
+	fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
+
+	res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
+	if err != nil {
+		log.Fatalf("Get: %v", err)
+	}
+	defer res.Body.Close()
+
+	mapping := [65536]uint16{}
+	reverse := [65536]uint16{}
+
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := uint16(0), uint16(0)
+		if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
+			log.Fatalf("EUC-KR code %d is out of range", x)
+		}
+		mapping[x] = y
+		if reverse[y] == 0 {
+			c0, c1 := uint16(0), uint16(0)
+			if x < 178*(0xc7-0x81) {
+				c0 = uint16(x/178) + 0x81
+				c1 = uint16(x % 178)
+				switch {
+				case c1 < 1*26:
+					c1 += 0x41
+				case c1 < 2*26:
+					c1 += 0x47
+				default:
+					c1 += 0x4d
+				}
+			} else {
+				x -= 178 * (0xc7 - 0x81)
+				c0 = uint16(x/94) + 0xc7
+				c1 = uint16(x%94) + 0xa1
+			}
+			reverse[y] = c0<<8 | c1
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatalf("scanner error: %v", err)
+	}
+
+	fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
+	fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
+	fmt.Printf("var decode = [...]uint16{\n")
+	for i, v := range mapping {
+		if v != 0 {
+			fmt.Printf("\t%d: 0x%04X,\n", i, v)
+		}
+	}
+	fmt.Printf("}\n\n")
+
+	// Any run of at least separation continuous zero entries in the reverse map will
+	// be a separate encode table.
+	const separation = 1024
+
+	intervals := []interval(nil)
+	low, high := -1, -1
+	for i, v := range reverse {
+		if v == 0 {
+			continue
+		}
+		if low < 0 {
+			low = i
+		} else if i-high >= separation {
+			if high >= 0 {
+				intervals = append(intervals, interval{low, high})
+			}
+			low = i
+		}
+		high = i + 1
+	}
+	if high >= 0 {
+		intervals = append(intervals, interval{low, high})
+	}
+	sort.Sort(byDecreasingLength(intervals))
+
+	fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+	fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
+	fmt.Printf("// sorted by decreasing length.\n")
+	for i, v := range intervals {
+		fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+	}
+	fmt.Printf("\n")
+
+	for i, v := range intervals {
+		fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+		fmt.Printf("var encode%d = [...]uint16{\n", i)
+		for j := v.low; j < v.high; j++ {
+			x := reverse[j]
+			if x == 0 {
+				continue
+			}
+			fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+		}
+		fmt.Printf("}\n\n")
+	}
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+	low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int           { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
@@ -0,0 +1,161 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+//	go run maketables.go | gofmt > tables.go
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+)
+
+func main() {
+	fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+	fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
+	fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
+
+	printGB18030()
+	printGBK()
+}
+
+func printGB18030() {
+	res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
+	if err != nil {
+		log.Fatalf("Get: %v", err)
+	}
+	defer res.Body.Close()
+
+	fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
+	fmt.Printf("var gb18030 = [...][2]uint16{\n")
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := uint32(0), uint32(0)
+		if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0x10000 && y < 0x10000 {
+			fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
+		}
+	}
+	fmt.Printf("}\n\n")
+}
+
+func printGBK() {
+	res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
+	if err != nil {
+		log.Fatalf("Get: %v", err)
+	}
+	defer res.Body.Close()
+
+	mapping := [65536]uint16{}
+	reverse := [65536]uint16{}
+
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := uint16(0), uint16(0)
+		if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0 || 126*190 <= x {
+			log.Fatalf("GBK code %d is out of range", x)
+		}
+		mapping[x] = y
+		if reverse[y] == 0 {
+			c0, c1 := x/190, x%190
+			if c1 >= 0x3f {
+				c1++
+			}
+			reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
+		}
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatalf("scanner error: %v", err)
+	}
+
+	fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
+	fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
+	fmt.Printf("var decode = [...]uint16{\n")
+	for i, v := range mapping {
+		if v != 0 {
+			fmt.Printf("\t%d: 0x%04X,\n", i, v)
+		}
+	}
+	fmt.Printf("}\n\n")
+
+	// Any run of at least separation continuous zero entries in the reverse map will
+	// be a separate encode table.
+	const separation = 1024
+
+	intervals := []interval(nil)
+	low, high := -1, -1
+	for i, v := range reverse {
+		if v == 0 {
+			continue
+		}
+		if low < 0 {
+			low = i
+		} else if i-high >= separation {
+			if high >= 0 {
+				intervals = append(intervals, interval{low, high})
+			}
+			low = i
+		}
+		high = i + 1
+	}
+	if high >= 0 {
+		intervals = append(intervals, interval{low, high})
+	}
+	sort.Sort(byDecreasingLength(intervals))
+
+	fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+	fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
+	fmt.Printf("// sorted by decreasing length.\n")
+	for i, v := range intervals {
+		fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
+	}
+	fmt.Printf("\n")
+
+	for i, v := range intervals {
+		fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+		fmt.Printf("var encode%d = [...]uint16{\n", i)
+		for j := v.low; j < v.high; j++ {
+			x := reverse[j]
+			if x == 0 {
+				continue
+			}
+			fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+		}
+		fmt.Printf("}\n\n")
+	}
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+	low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int           { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
@@ -0,0 +1,140 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates tables.go:
+//	go run maketables.go | gofmt > tables.go
+
+import (
+	"bufio"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+)
+
+func main() {
+	fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
+	fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
+	fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
+
+	res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
+	if err != nil {
+		log.Fatalf("Get: %v", err)
+	}
+	defer res.Body.Close()
+
+	mapping := [65536]uint32{}
+	reverse := [65536 * 4]uint16{}
+
+	scanner := bufio.NewScanner(res.Body)
+	for scanner.Scan() {
+		s := strings.TrimSpace(scanner.Text())
+		if s == "" || s[0] == '#' {
+			continue
+		}
+		x, y := uint16(0), uint32(0)
+		if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
+			log.Fatalf("could not parse %q", s)
+		}
+		if x < 0 || 126*157 <= x {
+			log.Fatalf("Big5 code %d is out of range", x)
+		}
+		mapping[x] = y
+
+		// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
+		// "The index pointer for code point in index is the first pointer
+		// corresponding to code point in index", which would normally mean
+		// that the code below should be guarded by "if reverse[y] == 0", but
+		// last instead of first seems to match the behavior of
+		// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
+		// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
+		// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
+		// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
+		c0, c1 := x/157, x%157
+		if c1 < 0x3f {
+			c1 += 0x40
+		} else {
+			c1 += 0x62
+		}
+		reverse[y] = (0x81+c0)<<8 | c1
+	}
+	if err := scanner.Err(); err != nil {
+		log.Fatalf("scanner error: %v", err)
+	}
+
+	fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
+	fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
+	fmt.Printf("var decode = [...]uint32{\n")
+	for i, v := range mapping {
+		if v != 0 {
+			fmt.Printf("\t%d: 0x%08X,\n", i, v)
+		}
+	}
+	fmt.Printf("}\n\n")
+
+	// Any run of at least separation continuous zero entries in the reverse map will
+	// be a separate encode table.
+	const separation = 1024
+
+	intervals := []interval(nil)
+	low, high := -1, -1
+	for i, v := range reverse {
+		if v == 0 {
+			continue
+		}
+		if low < 0 {
+			low = i
+		} else if i-high >= separation {
+			if high >= 0 {
+				intervals = append(intervals, interval{low, high})
+			}
+			low = i
+		}
+		high = i + 1
+	}
+	if high >= 0 {
+		intervals = append(intervals, interval{low, high})
+	}
+	sort.Sort(byDecreasingLength(intervals))
+
+	fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
+	fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
+	fmt.Printf("// sorted by decreasing length.\n")
+	for i, v := range intervals {
+		fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
+	}
+	fmt.Printf("\n")
+
+	for i, v := range intervals {
+		fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
+		fmt.Printf("var encode%d = [...]uint16{\n", i)
+		for j := v.low; j < v.high; j++ {
+			x := reverse[j]
+			if x == 0 {
+				continue
+			}
+			fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
+		}
+		fmt.Printf("}\n\n")
+	}
+}
+
+// interval is a half-open interval [low, high).
+type interval struct {
+	low, high int
+}
+
+func (i interval) len() int { return i.high - i.low }
+
+// byDecreasingLength sorts intervals by decreasing length.
+type byDecreasingLength []interval
+
+func (b byDecreasingLength) Len() int           { return len(b) }
+func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
+func (b byDecreasingLength) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
@@ -0,0 +1,351 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gen
+
+import (
+	"bytes"
+	"encoding/gob"
+	"fmt"
+	"hash"
+	"hash/fnv"
+	"io"
+	"log"
+	"os"
+	"reflect"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// This file contains utilities for generating code.
+
+// TODO: other write methods like:
+// - slices, maps, types, etc.
+
+// CodeWriter is a utility for writing structured code. It computes the content
+// hash and size of written content. It ensures there are newlines between
+// written code blocks.
+type CodeWriter struct {
+	buf  bytes.Buffer
+	Size int
+	Hash hash.Hash32 // content hash
+	gob  *gob.Encoder
+	// For comments we skip the usual one-line separator if they are followed by
+	// a code block.
+	skipSep bool
+}
+
+func (w *CodeWriter) Write(p []byte) (n int, err error) {
+	return w.buf.Write(p)
+}
+
+// NewCodeWriter returns a new CodeWriter.
+func NewCodeWriter() *CodeWriter {
+	h := fnv.New32()
+	return &CodeWriter{Hash: h, gob: gob.NewEncoder(h)}
+}
+
+// WriteGoFile appends the buffer with the total size of all created structures
+// and writes it as a Go file to the the given file with the given package name.
+func (w *CodeWriter) WriteGoFile(filename, pkg string) {
+	f, err := os.Create(filename)
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v", filename, err)
+	}
+	defer f.Close()
+	if _, err = w.WriteGo(f, pkg); err != nil {
+		log.Fatalf("Error writing file %s: %v", filename, err)
+	}
+}
+
+// WriteGo appends the buffer with the total size of all created structures and
+// writes it as a Go file to the the given writer with the given package name.
+func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) {
+	sz := w.Size
+	w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
+	defer w.buf.Reset()
+	return WriteGo(out, pkg, w.buf.Bytes())
+}
+
+func (w *CodeWriter) printf(f string, x ...interface{}) {
+	fmt.Fprintf(w, f, x...)
+}
+
+func (w *CodeWriter) insertSep() {
+	if w.skipSep {
+		w.skipSep = false
+		return
+	}
+	// Use at least two newlines to ensure a blank space between the previous
+	// block. WriteGoFile will remove extraneous newlines.
+	w.printf("\n\n")
+}
+
+// WriteComment writes a comment block. All line starts are prefixed with "//".
+// Initial empty lines are gobbled. The indentation for the first line is
+// stripped from consecutive lines.
+func (w *CodeWriter) WriteComment(comment string, args ...interface{}) {
+	s := fmt.Sprintf(comment, args...)
+	s = strings.Trim(s, "\n")
+
+	// Use at least two newlines to ensure a blank space between the previous
+	// block. WriteGoFile will remove extraneous newlines.
+	w.printf("\n\n// ")
+	w.skipSep = true
+
+	// strip first indent level.
+	sep := "\n"
+	for ; len(s) > 0 && (s[0] == '\t' || s[0] == ' '); s = s[1:] {
+		sep += s[:1]
+	}
+
+	strings.NewReplacer(sep, "\n// ", "\n", "\n// ").WriteString(w, s)
+
+	w.printf("\n")
+}
+
+func (w *CodeWriter) writeSizeInfo(size int) {
+	w.printf("// Size: %d bytes\n", size)
+}
+
+// WriteConst writes a constant of the given name and value.
+func (w *CodeWriter) WriteConst(name string, x interface{}) {
+	w.insertSep()
+	v := reflect.ValueOf(x)
+
+	switch v.Type().Kind() {
+	case reflect.String:
+		w.printf("const %s %s = ", name, typeName(x))
+		w.WriteString(v.String())
+		w.printf("\n")
+	default:
+		w.printf("const %s = %#v\n", name, x)
+	}
+}
+
+// WriteVar writes a variable of the given name and value.
+func (w *CodeWriter) WriteVar(name string, x interface{}) {
+	w.insertSep()
+	v := reflect.ValueOf(x)
+	oldSize := w.Size
+	sz := int(v.Type().Size())
+	w.Size += sz
+
+	switch v.Type().Kind() {
+	case reflect.String:
+		w.printf("var %s %s = ", name, typeName(x))
+		w.WriteString(v.String())
+	case reflect.Struct:
+		w.gob.Encode(x)
+		fallthrough
+	case reflect.Slice, reflect.Array:
+		w.printf("var %s = ", name)
+		w.writeValue(v)
+		w.writeSizeInfo(w.Size - oldSize)
+	default:
+		w.printf("var %s %s = ", name, typeName(x))
+		w.gob.Encode(x)
+		w.writeValue(v)
+		w.writeSizeInfo(w.Size - oldSize)
+	}
+	w.printf("\n")
+}
+
+func (w *CodeWriter) writeValue(v reflect.Value) {
+	x := v.Interface()
+	switch v.Kind() {
+	case reflect.String:
+		w.WriteString(v.String())
+	case reflect.Array:
+		// Don't double count: callers of WriteArray count on the size being
+		// added, so we need to discount it here.
+		w.Size -= int(v.Type().Size())
+		w.writeSlice(x, true)
+	case reflect.Slice:
+		w.writeSlice(x, false)
+	case reflect.Struct:
+		w.printf("%s{\n", typeName(v.Interface()))
+		t := v.Type()
+		for i := 0; i < v.NumField(); i++ {
+			w.printf("%s: ", t.Field(i).Name)
+			w.writeValue(v.Field(i))
+			w.printf(",\n")
+		}
+		w.printf("}")
+	default:
+		w.printf("%#v", x)
+	}
+}
+
+// WriteString writes a string literal.
+func (w *CodeWriter) WriteString(s string) {
+	s = strings.Replace(s, `\`, `\\`, -1)
+	io.WriteString(w.Hash, s) // content hash
+	w.Size += len(s)
+
+	const maxInline = 40
+	if len(s) <= maxInline {
+		w.printf("%q", s)
+		return
+	}
+
+	// We will render the string as a multi-line string.
+	const maxWidth = 80 - 4 - len(`"`) - len(`" +`)
+
+	// When starting on its own line, go fmt indents line 2+ an extra level.
+	n, max := maxWidth, maxWidth-4
+
+	// As per https://golang.org/issue/18078, the compiler has trouble
+	// compiling the concatenation of many strings, s0 + s1 + s2 + ... + sN,
+	// for large N. We insert redundant, explicit parentheses to work around
+	// that, lowering the N at any given step: (s0 + s1 + ... + s63) + (s64 +
+	// ... + s127) + etc + (etc + ... + sN).
+	explicitParens, extraComment := len(s) > 128*1024, ""
+	if explicitParens {
+		w.printf(`(`)
+		extraComment = "; the redundant, explicit parens are for https://golang.org/issue/18078"
+	}
+
+	// Print "" +\n, if a string does not start on its own line.
+	b := w.buf.Bytes()
+	if p := len(bytes.TrimRight(b, " \t")); p > 0 && b[p-1] != '\n' {
+		w.printf("\"\" + // Size: %d bytes%s\n", len(s), extraComment)
+		n, max = maxWidth, maxWidth
+	}
+
+	w.printf(`"`)
+
+	for sz, p, nLines := 0, 0, 0; p < len(s); {
+		var r rune
+		r, sz = utf8.DecodeRuneInString(s[p:])
+		out := s[p : p+sz]
+		chars := 1
+		if !unicode.IsPrint(r) || r == utf8.RuneError || r == '"' {
+			switch sz {
+			case 1:
+				out = fmt.Sprintf("\\x%02x", s[p])
+			case 2, 3:
+				out = fmt.Sprintf("\\u%04x", r)
+			case 4:
+				out = fmt.Sprintf("\\U%08x", r)
+			}
+			chars = len(out)
+		}
+		if n -= chars; n < 0 {
+			nLines++
+			if explicitParens && nLines&63 == 63 {
+				w.printf("\") + (\"")
+			}
+			w.printf("\" +\n\"")
+			n = max - len(out)
+		}
+		w.printf("%s", out)
+		p += sz
+	}
+	w.printf(`"`)
+	if explicitParens {
+		w.printf(`)`)
+	}
+}
+
+// WriteSlice writes a slice value.
+func (w *CodeWriter) WriteSlice(x interface{}) {
+	w.writeSlice(x, false)
+}
+
+// WriteArray writes an array value.
+func (w *CodeWriter) WriteArray(x interface{}) {
+	w.writeSlice(x, true)
+}
+
+func (w *CodeWriter) writeSlice(x interface{}, isArray bool) {
+	v := reflect.ValueOf(x)
+	w.gob.Encode(v.Len())
+	w.Size += v.Len() * int(v.Type().Elem().Size())
+	name := typeName(x)
+	if isArray {
+		name = fmt.Sprintf("[%d]%s", v.Len(), name[strings.Index(name, "]")+1:])
+	}
+	if isArray {
+		w.printf("%s{\n", name)
+	} else {
+		w.printf("%s{ // %d elements\n", name, v.Len())
+	}
+
+	switch kind := v.Type().Elem().Kind(); kind {
+	case reflect.String:
+		for _, s := range x.([]string) {
+			w.WriteString(s)
+			w.printf(",\n")
+		}
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+		reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
+		// nLine and nBlock are the number of elements per line and block.
+		nLine, nBlock, format := 8, 64, "%d,"
+		switch kind {
+		case reflect.Uint8:
+			format = "%#02x,"
+		case reflect.Uint16:
+			format = "%#04x,"
+		case reflect.Uint32:
+			nLine, nBlock, format = 4, 32, "%#08x,"
+		case reflect.Uint, reflect.Uint64:
+			nLine, nBlock, format = 4, 32, "%#016x,"
+		case reflect.Int8:
+			nLine = 16
+		}
+		n := nLine
+		for i := 0; i < v.Len(); i++ {
+			if i%nBlock == 0 && v.Len() > nBlock {
+				w.printf("// Entry %X - %X\n", i, i+nBlock-1)
+			}
+			x := v.Index(i).Interface()
+			w.gob.Encode(x)
+			w.printf(format, x)
+			if n--; n == 0 {
+				n = nLine
+				w.printf("\n")
+			}
+		}
+		w.printf("\n")
+	case reflect.Struct:
+		zero := reflect.Zero(v.Type().Elem()).Interface()
+		for i := 0; i < v.Len(); i++ {
+			x := v.Index(i).Interface()
+			w.gob.EncodeValue(v)
+			if !reflect.DeepEqual(zero, x) {
+				line := fmt.Sprintf("%#v,\n", x)
+				line = line[strings.IndexByte(line, '{'):]
+				w.printf("%d: ", i)
+				w.printf(line)
+			}
+		}
+	case reflect.Array:
+		for i := 0; i < v.Len(); i++ {
+			w.printf("%d: %#v,\n", i, v.Index(i).Interface())
+		}
+	default:
+		panic("gen: slice elem type not supported")
+	}
+	w.printf("}")
+}
+
+// WriteType writes a definition of the type of the given value and returns the
+// type name.
+func (w *CodeWriter) WriteType(x interface{}) string {
+	t := reflect.TypeOf(x)
+	w.printf("type %s struct {\n", t.Name())
+	for i := 0; i < t.NumField(); i++ {
+		w.printf("\t%s %s\n", t.Field(i).Name, t.Field(i).Type)
+	}
+	w.printf("}\n")
+	return t.Name()
+}
+
+// typeName returns the name of the go type of x.
+func typeName(x interface{}) string {
+	t := reflect.ValueOf(x).Type()
+	return strings.Replace(fmt.Sprint(t), "main.", "", 1)
+}
@@ -0,0 +1,281 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gen contains common code for the various code generation tools in the
+// text repository. Its usage ensures consistency between tools.
+//
+// This package defines command line flags that are common to most generation
+// tools. The flags allow for specifying specific Unicode and CLDR versions
+// in the public Unicode data repository (http://www.unicode.org/Public).
+//
+// A local Unicode data mirror can be set through the flag -local or the
+// environment variable UNICODE_DIR. The former takes precedence. The local
+// directory should follow the same structure as the public repository.
+//
+// IANA data can also optionally be mirrored by putting it in the iana directory
+// rooted at the top of the local mirror. Beware, though, that IANA data is not
+// versioned. So it is up to the developer to use the right version.
+package gen // import "golang.org/x/text/internal/gen"
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/build"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+	"path"
+	"path/filepath"
+	"sync"
+	"unicode"
+
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	url = flag.String("url",
+		"http://www.unicode.org/Public",
+		"URL of Unicode database directory")
+	iana = flag.String("iana",
+		"http://www.iana.org",
+		"URL of the IANA repository")
+	unicodeVersion = flag.String("unicode",
+		getEnv("UNICODE_VERSION", unicode.Version),
+		"unicode version to use")
+	cldrVersion = flag.String("cldr",
+		getEnv("CLDR_VERSION", cldr.Version),
+		"cldr version to use")
+)
+
+func getEnv(name, def string) string {
+	if v := os.Getenv(name); v != "" {
+		return v
+	}
+	return def
+}
+
+// Init performs common initialization for a gen command. It parses the flags
+// and sets up the standard logging parameters.
+func Init() {
+	log.SetPrefix("")
+	log.SetFlags(log.Lshortfile)
+	flag.Parse()
+}
+
+const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
+
+package %s
+
+`
+
+// UnicodeVersion reports the requested Unicode version.
+func UnicodeVersion() string {
+	return *unicodeVersion
+}
+
+// UnicodeVersion reports the requested CLDR version.
+func CLDRVersion() string {
+	return *cldrVersion
+}
+
+// IsLocal reports whether data files are available locally.
+func IsLocal() bool {
+	dir, err := localReadmeFile()
+	if err != nil {
+		return false
+	}
+	if _, err = os.Stat(dir); err != nil {
+		return false
+	}
+	return true
+}
+
+// OpenUCDFile opens the requested UCD file. The file is specified relative to
+// the public Unicode root directory. It will call log.Fatal if there are any
+// errors.
+func OpenUCDFile(file string) io.ReadCloser {
+	return openUnicode(path.Join(*unicodeVersion, "ucd", file))
+}
+
+// OpenCLDRCoreZip opens the CLDR core zip file. It will call log.Fatal if there
+// are any errors.
+func OpenCLDRCoreZip() io.ReadCloser {
+	return OpenUnicodeFile("cldr", *cldrVersion, "core.zip")
+}
+
+// OpenUnicodeFile opens the requested file of the requested category from the
+// root of the Unicode data archive. The file is specified relative to the
+// public Unicode root directory. If version is "", it will use the default
+// Unicode version. It will call log.Fatal if there are any errors.
+func OpenUnicodeFile(category, version, file string) io.ReadCloser {
+	if version == "" {
+		version = UnicodeVersion()
+	}
+	return openUnicode(path.Join(category, version, file))
+}
+
+// OpenIANAFile opens the requested IANA file. The file is specified relative
+// to the IANA root, which is typically either http://www.iana.org or the
+// iana directory in the local mirror. It will call log.Fatal if there are any
+// errors.
+func OpenIANAFile(path string) io.ReadCloser {
+	return Open(*iana, "iana", path)
+}
+
+var (
+	dirMutex sync.Mutex
+	localDir string
+)
+
+const permissions = 0755
+
+func localReadmeFile() (string, error) {
+	p, err := build.Import("golang.org/x/text", "", build.FindOnly)
+	if err != nil {
+		return "", fmt.Errorf("Could not locate package: %v", err)
+	}
+	return filepath.Join(p.Dir, "DATA", "README"), nil
+}
+
+func getLocalDir() string {
+	dirMutex.Lock()
+	defer dirMutex.Unlock()
+
+	readme, err := localReadmeFile()
+	if err != nil {
+		log.Fatal(err)
+	}
+	dir := filepath.Dir(readme)
+	if _, err := os.Stat(readme); err != nil {
+		if err := os.MkdirAll(dir, permissions); err != nil {
+			log.Fatalf("Could not create directory: %v", err)
+		}
+		ioutil.WriteFile(readme, []byte(readmeTxt), permissions)
+	}
+	return dir
+}
+
+const readmeTxt = `Generated by golang.org/x/text/internal/gen. DO NOT EDIT.
+
+This directory contains downloaded files used to generate the various tables
+in the golang.org/x/text subrepo.
+
+Note that the language subtag repo (iana/assignments/language-subtag-registry)
+and all other times in the iana subdirectory are not versioned and will need
+to be periodically manually updated. The easiest way to do this is to remove
+the entire iana directory. This is mostly of concern when updating the language
+package.
+`
+
+// Open opens subdir/path if a local directory is specified and the file exists,
+// where subdir is a directory relative to the local root, or fetches it from
+// urlRoot/path otherwise. It will call log.Fatal if there are any errors.
+func Open(urlRoot, subdir, path string) io.ReadCloser {
+	file := filepath.Join(getLocalDir(), subdir, filepath.FromSlash(path))
+	return open(file, urlRoot, path)
+}
+
+func openUnicode(path string) io.ReadCloser {
+	file := filepath.Join(getLocalDir(), filepath.FromSlash(path))
+	return open(file, *url, path)
+}
+
+// TODO: automatically periodically update non-versioned files.
+
+func open(file, urlRoot, path string) io.ReadCloser {
+	if f, err := os.Open(file); err == nil {
+		return f
+	}
+	r := get(urlRoot, path)
+	defer r.Close()
+	b, err := ioutil.ReadAll(r)
+	if err != nil {
+		log.Fatalf("Could not download file: %v", err)
+	}
+	os.MkdirAll(filepath.Dir(file), permissions)
+	if err := ioutil.WriteFile(file, b, permissions); err != nil {
+		log.Fatalf("Could not create file: %v", err)
+	}
+	return ioutil.NopCloser(bytes.NewReader(b))
+}
+
+func get(root, path string) io.ReadCloser {
+	url := root + "/" + path
+	fmt.Printf("Fetching %s...", url)
+	defer fmt.Println(" done.")
+	resp, err := http.Get(url)
+	if err != nil {
+		log.Fatalf("HTTP GET: %v", err)
+	}
+	if resp.StatusCode != 200 {
+		log.Fatalf("Bad GET status for %q: %q", url, resp.Status)
+	}
+	return resp.Body
+}
+
+// TODO: use Write*Version in all applicable packages.
+
+// WriteUnicodeVersion writes a constant for the Unicode version from which the
+// tables are generated.
+func WriteUnicodeVersion(w io.Writer) {
+	fmt.Fprintf(w, "// UnicodeVersion is the Unicode version from which the tables in this package are derived.\n")
+	fmt.Fprintf(w, "const UnicodeVersion = %q\n\n", UnicodeVersion())
+}
+
+// WriteCLDRVersion writes a constant for the CLDR version from which the
+// tables are generated.
+func WriteCLDRVersion(w io.Writer) {
+	fmt.Fprintf(w, "// CLDRVersion is the CLDR version from which the tables in this package are derived.\n")
+	fmt.Fprintf(w, "const CLDRVersion = %q\n\n", CLDRVersion())
+}
+
+// WriteGoFile prepends a standard file comment and package statement to the
+// given bytes, applies gofmt, and writes them to a file with the given name.
+// It will call log.Fatal if there are any errors.
+func WriteGoFile(filename, pkg string, b []byte) {
+	w, err := os.Create(filename)
+	if err != nil {
+		log.Fatalf("Could not create file %s: %v", filename, err)
+	}
+	defer w.Close()
+	if _, err = WriteGo(w, pkg, b); err != nil {
+		log.Fatalf("Error writing file %s: %v", filename, err)
+	}
+}
+
+// WriteGo prepends a standard file comment and package statement to the given
+// bytes, applies gofmt, and writes them to w.
+func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) {
+	src := []byte(fmt.Sprintf(header, pkg))
+	src = append(src, b...)
+	formatted, err := format.Source(src)
+	if err != nil {
+		// Print the generated code even in case of an error so that the
+		// returned error can be meaningfully interpreted.
+		n, _ = w.Write(src)
+		return n, err
+	}
+	return w.Write(formatted)
+}
+
+// Repackage rewrites a Go file from belonging to package main to belonging to
+// the given package.
+func Repackage(inFile, outFile, pkg string) {
+	src, err := ioutil.ReadFile(inFile)
+	if err != nil {
+		log.Fatalf("reading %s: %v", inFile, err)
+	}
+	const toDelete = "package main\n\n"
+	i := bytes.Index(src, []byte(toDelete))
+	if i < 0 {
+		log.Fatalf("Could not find %q in %s.", toDelete, inFile)
+	}
+	w := &bytes.Buffer{}
+	w.Write(src[i+len(toDelete):])
+	WriteGoFile(outFile, pkg, w.Bytes())
+}
@@ -0,0 +1,58 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package triegen
+
+// This file defines Compacter and its implementations.
+
+import "io"
+
+// A Compacter generates an alternative, more space-efficient way to store a
+// trie value block. A trie value block holds all possible values for the last
+// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
+// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
+type Compacter interface {
+	// Size returns whether the Compacter could encode the given block as well
+	// as its size in case it can. len(v) is always 64.
+	Size(v []uint64) (sz int, ok bool)
+
+	// Store stores the block using the Compacter's compression method.
+	// It returns a handle with which the block can be retrieved.
+	// len(v) is always 64.
+	Store(v []uint64) uint32
+
+	// Print writes the data structures associated to the given store to w.
+	Print(w io.Writer) error
+
+	// Handler returns the name of a function that gets called during trie
+	// lookup for blocks generated by the Compacter. The function should be of
+	// the form func (n uint32, b byte) uint64, where n is the index returned by
+	// the Compacter's Store method and b is the last byte of the UTF-8
+	// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
+	// block.
+	Handler() string
+}
+
+// simpleCompacter is the default Compacter used by builder. It implements a
+// normal trie block.
+type simpleCompacter builder
+
+func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
+	return blockSize * b.ValueSize, true
+}
+
+func (b *simpleCompacter) Store(v []uint64) uint32 {
+	h := uint32(len(b.ValueBlocks) - blockOffset)
+	b.ValueBlocks = append(b.ValueBlocks, v)
+	return h
+}
+
+func (b *simpleCompacter) Print(io.Writer) error {
+	// Structures are printed in print.go.
+	return nil
+}
+
+func (b *simpleCompacter) Handler() string {
+	panic("Handler should be special-cased for this Compacter")
+}
@@ -0,0 +1,251 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package triegen
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"strings"
+	"text/template"
+)
+
+// print writes all the data structures as well as the code necessary to use the
+// trie to w.
+func (b *builder) print(w io.Writer) error {
+	b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
+	b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
+	b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
+	b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
+	b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
+
+	// If we only have one root trie, all starter blocks are at position 0 and
+	// we can access the arrays directly.
+	if len(b.Trie) == 1 {
+		// At this point we cannot refer to the generated tables directly.
+		b.ASCIIBlock = b.Name + "Values"
+		b.StarterBlock = b.Name + "Index"
+	} else {
+		// Otherwise we need to have explicit starter indexes in the trie
+		// structure.
+		b.ASCIIBlock = "t.ascii"
+		b.StarterBlock = "t.utf8Start"
+	}
+
+	b.SourceType = "[]byte"
+	if err := lookupGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	b.SourceType = "string"
+	if err := lookupGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	if err := trieGen.Execute(w, b); err != nil {
+		return err
+	}
+
+	for _, c := range b.Compactions {
+		if err := c.c.Print(w); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func printValues(n int, values []uint64) string {
+	w := &bytes.Buffer{}
+	boff := n * blockSize
+	fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
+	var newline bool
+	for i, v := range values {
+		if i%6 == 0 {
+			newline = true
+		}
+		if v != 0 {
+			if newline {
+				fmt.Fprintf(w, "\n")
+				newline = false
+			}
+			fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
+		}
+	}
+	return w.String()
+}
+
+func printIndex(b *builder, nr int, n *node) string {
+	w := &bytes.Buffer{}
+	boff := nr * blockSize
+	fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
+	var newline bool
+	for i, c := range n.children {
+		if i%8 == 0 {
+			newline = true
+		}
+		if c != nil {
+			v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
+			if v != 0 {
+				if newline {
+					fmt.Fprintf(w, "\n")
+					newline = false
+				}
+				fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
+			}
+		}
+	}
+	return w.String()
+}
+
+var (
+	trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
+		"printValues": printValues,
+		"printIndex":  printIndex,
+		"title":       strings.Title,
+		"dec":         func(x int) int { return x - 1 },
+		"psize": func(n int) string {
+			return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
+		},
+	}).Parse(trieTemplate))
+	lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
+)
+
+// TODO: consider the return type of lookup. It could be uint64, even if the
+// internal value type is smaller. We will have to verify this with the
+// performance of unicode/norm, which is very sensitive to such changes.
+const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
+// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
+type {{.Name}}Trie struct { {{if $multi}}
+	ascii []{{.ValueType}} // index for ASCII bytes
+	utf8Start  []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
+{{end}}}
+
+func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
+	h := {{.Name}}TrieHandles[i]
+	return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
+}
+
+type {{.Name}}TrieHandle struct {
+	ascii, multi {{.IndexType}}
+}
+
+// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
+var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
+{{range .Trie}}	{ {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
+{{end}}}{{else}}
+	return &{{.Name}}Trie{}
+}
+{{end}}
+// lookupValue determines the type of block n and looks up the value for b.
+func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
+	switch { {{range $i, $c := .Compactions}}
+		{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
+			n -= {{$c.Offset}}{{end}}
+			return {{print $b.ValueType}}({{$c.Handler}}){{end}}
+	}
+}
+
+// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
+// The third block is the zero block.
+var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
+{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
+{{end}}}
+
+// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
+// Block 0 is the zero block.
+var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
+{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
+{{end}}}
+`
+
+// TODO: consider allowing zero-length strings after evaluating performance with
+// unicode/norm.
+const lookupTemplate = `
+// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
+// the width in bytes of this encoding. The size will be 0 if s does not
+// hold enough bytes to complete the encoding. len(s) must be greater than 0.
+func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
+	c0 := s[0]
+	switch {
+	case c0 < 0x80: // is ASCII
+		return {{.ASCIIBlock}}[c0], 1
+	case c0 < 0xC2:
+		return 0, 1  // Illegal UTF-8: not a starter, not ASCII.
+	case c0 < 0xE0: // 2-byte UTF-8
+		if len(s) < 2 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c1), 2
+	case c0 < 0xF0: // 3-byte UTF-8
+		if len(s) < 3 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = {{.Name}}Index[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return 0, 2 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c2), 3
+	case c0 < 0xF8: // 4-byte UTF-8
+		if len(s) < 4 {
+			return 0, 0
+		}
+		i := {{.StarterBlock}}[c0]
+		c1 := s[1]
+		if c1 < 0x80 || 0xC0 <= c1 {
+			return 0, 1 // Illegal UTF-8: not a continuation byte.
+		}
+		o := uint32(i)<<6 + uint32(c1)
+		i = {{.Name}}Index[o]
+		c2 := s[2]
+		if c2 < 0x80 || 0xC0 <= c2 {
+			return 0, 2 // Illegal UTF-8: not a continuation byte.
+		}
+		o = uint32(i)<<6 + uint32(c2)
+		i = {{.Name}}Index[o]
+		c3 := s[3]
+		if c3 < 0x80 || 0xC0 <= c3 {
+			return 0, 3 // Illegal UTF-8: not a continuation byte.
+		}
+		return t.lookupValue(uint32(i), c3), 4
+	}
+	// Illegal rune
+	return 0, 1
+}
+
+// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
+// s must start with a full and valid UTF-8 encoded rune.
+func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
+	c0 := s[0]
+	if c0 < 0x80 { // is ASCII
+		return {{.ASCIIBlock}}[c0]
+	}
+	i := {{.StarterBlock}}[c0]
+	if c0 < 0xE0 { // 2-byte UTF-8
+		return t.lookupValue(uint32(i), s[1])
+	}
+	i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
+	if c0 < 0xF0 { // 3-byte UTF-8
+		return t.lookupValue(uint32(i), s[2])
+	}
+	i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
+	if c0 < 0xF8 { // 4-byte UTF-8
+		return t.lookupValue(uint32(i), s[3])
+	}
+	return 0
+}
+`
@@ -0,0 +1,494 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package triegen implements a code generator for a trie for associating
+// unsigned integer values with UTF-8 encoded runes.
+//
+// Many of the go.text packages use tries for storing per-rune information.  A
+// trie is especially useful if many of the runes have the same value. If this
+// is the case, many blocks can be expected to be shared allowing for
+// information on many runes to be stored in little space.
+//
+// As most of the lookups are done directly on []byte slices, the tries use the
+// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
+// runes and contributes a little bit to better performance. It also naturally
+// provides a fast path for ASCII.
+//
+// Space is also an issue. There are many code points defined in Unicode and as
+// a result tables can get quite large. So every byte counts. The triegen
+// package automatically chooses the smallest integer values to represent the
+// tables. Compacters allow further compression of the trie by allowing for
+// alternative representations of individual trie blocks.
+//
+// triegen allows generating multiple tries as a single structure. This is
+// useful when, for example, one wants to generate tries for several languages
+// that have a lot of values in common. Some existing libraries for
+// internationalization store all per-language data as a dynamically loadable
+// chunk. The go.text packages are designed with the assumption that the user
+// typically wants to compile in support for all supported languages, in line
+// with the approach common to Go to create a single standalone binary. The
+// multi-root trie approach can give significant storage savings in this
+// scenario.
+//
+// triegen generates both tables and code. The code is optimized to use the
+// automatically chosen data types. The following code is generated for a Trie
+// or multiple Tries named "foo":
+//	- type fooTrie
+//		The trie type.
+//
+//	- func newFooTrie(x int) *fooTrie
+//		Trie constructor, where x is the index of the trie passed to Gen.
+//
+//	- func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
+//		The lookup method, where uintX is automatically chosen.
+//
+//	- func lookupString, lookupUnsafe and lookupStringUnsafe
+//		Variants of the above.
+//
+//	- var fooValues and fooIndex and any tables generated by Compacters.
+//		The core trie data.
+//
+//	- var fooTrieHandles
+//		Indexes of starter blocks in case of multiple trie roots.
+//
+// It is recommended that users test the generated trie by checking the returned
+// value for every rune. Such exhaustive tests are possible as the the number of
+// runes in Unicode is limited.
+package triegen // import "golang.org/x/text/internal/triegen"
+
+// TODO: Arguably, the internally optimized data types would not have to be
+// exposed in the generated API. We could also investigate not generating the
+// code, but using it through a package. We would have to investigate the impact
+// on performance of making such change, though. For packages like unicode/norm,
+// small changes like this could tank performance.
+
+import (
+	"encoding/binary"
+	"fmt"
+	"hash/crc64"
+	"io"
+	"log"
+	"unicode/utf8"
+)
+
+// builder builds a set of tries for associating values with runes. The set of
+// tries can share common index and value blocks.
+type builder struct {
+	Name string
+
+	// ValueType is the type of the trie values looked up.
+	ValueType string
+
+	// ValueSize is the byte size of the ValueType.
+	ValueSize int
+
+	// IndexType is the type of trie index values used for all UTF-8 bytes of
+	// a rune except the last one.
+	IndexType string
+
+	// IndexSize is the byte size of the IndexType.
+	IndexSize int
+
+	// SourceType is used when generating the lookup functions. If the user
+	// requests StringSupport, all lookup functions will be generated for
+	// string input as well.
+	SourceType string
+
+	Trie []*Trie
+
+	IndexBlocks []*node
+	ValueBlocks [][]uint64
+	Compactions []compaction
+	Checksum    uint64
+
+	ASCIIBlock   string
+	StarterBlock string
+
+	indexBlockIdx map[uint64]int
+	valueBlockIdx map[uint64]nodeIndex
+	asciiBlockIdx map[uint64]int
+
+	// Stats are used to fill out the template.
+	Stats struct {
+		NValueEntries int
+		NValueBytes   int
+		NIndexEntries int
+		NIndexBytes   int
+		NHandleBytes  int
+	}
+
+	err error
+}
+
+// A nodeIndex encodes the index of a node, which is defined by the compaction
+// which stores it and an index within the compaction. For internal nodes, the
+// compaction is always 0.
+type nodeIndex struct {
+	compaction int
+	index      int
+}
+
+// compaction keeps track of stats used for the compaction.
+type compaction struct {
+	c         Compacter
+	blocks    []*node
+	maxHandle uint32
+	totalSize int
+
+	// Used by template-based generator and thus exported.
+	Cutoff  uint32
+	Offset  uint32
+	Handler string
+}
+
+func (b *builder) setError(err error) {
+	if b.err == nil {
+		b.err = err
+	}
+}
+
+// An Option can be passed to Gen.
+type Option func(b *builder) error
+
+// Compact configures the trie generator to use the given Compacter.
+func Compact(c Compacter) Option {
+	return func(b *builder) error {
+		b.Compactions = append(b.Compactions, compaction{
+			c:       c,
+			Handler: c.Handler() + "(n, b)"})
+		return nil
+	}
+}
+
+// Gen writes Go code for a shared trie lookup structure to w for the given
+// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
+// return the *nameTrie for tries[x]. A value can be looked up by using one of
+// the various lookup methods defined on nameTrie. It returns the table size of
+// the generated trie.
+func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
+	// The index contains two dummy blocks, followed by the zero block. The zero
+	// block is at offset 0x80, so that the offset for the zero block for
+	// continuation bytes is 0.
+	b := &builder{
+		Name:        name,
+		Trie:        tries,
+		IndexBlocks: []*node{{}, {}, {}},
+		Compactions: []compaction{{
+			Handler: name + "Values[n<<6+uint32(b)]",
+		}},
+		// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
+		// block.
+		indexBlockIdx: map[uint64]int{0: 0},
+		valueBlockIdx: map[uint64]nodeIndex{0: {}},
+		asciiBlockIdx: map[uint64]int{},
+	}
+	b.Compactions[0].c = (*simpleCompacter)(b)
+
+	for _, f := range opts {
+		if err := f(b); err != nil {
+			return 0, err
+		}
+	}
+	b.build()
+	if b.err != nil {
+		return 0, b.err
+	}
+	if err = b.print(w); err != nil {
+		return 0, err
+	}
+	return b.Size(), nil
+}
+
+// A Trie represents a single root node of a trie. A builder may build several
+// overlapping tries at once.
+type Trie struct {
+	root *node
+
+	hiddenTrie
+}
+
+// hiddenTrie contains values we want to be visible to the template generator,
+// but hidden from the API documentation.
+type hiddenTrie struct {
+	Name         string
+	Checksum     uint64
+	ASCIIIndex   int
+	StarterIndex int
+}
+
+// NewTrie returns a new trie root.
+func NewTrie(name string) *Trie {
+	return &Trie{
+		&node{
+			children: make([]*node, blockSize),
+			values:   make([]uint64, utf8.RuneSelf),
+		},
+		hiddenTrie{Name: name},
+	}
+}
+
+// Gen is a convenience wrapper around the Gen func passing t as the only trie
+// and uses the name passed to NewTrie. It returns the size of the generated
+// tables.
+func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
+	return Gen(w, t.Name, []*Trie{t}, opts...)
+}
+
+// node is a node of the intermediate trie structure.
+type node struct {
+	// children holds this node's children. It is always of length 64.
+	// A child node may be nil.
+	children []*node
+
+	// values contains the values of this node. If it is non-nil, this node is
+	// either a root or leaf node:
+	// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
+	// For leaf nodes, len(values) ==  64 and it maps the bytes in [0x80, 0xBF].
+	values []uint64
+
+	index nodeIndex
+}
+
+// Insert associates value with the given rune. Insert will panic if a non-zero
+// value is passed for an invalid rune.
+func (t *Trie) Insert(r rune, value uint64) {
+	if value == 0 {
+		return
+	}
+	s := string(r)
+	if []rune(s)[0] != r && value != 0 {
+		// Note: The UCD tables will always assign what amounts to a zero value
+		// to a surrogate. Allowing a zero value for an illegal rune allows
+		// users to iterate over [0..MaxRune] without having to explicitly
+		// exclude surrogates, which would be tedious.
+		panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
+	}
+	if len(s) == 1 {
+		// It is a root node value (ASCII).
+		t.root.values[s[0]] = value
+		return
+	}
+
+	n := t.root
+	for ; len(s) > 1; s = s[1:] {
+		if n.children == nil {
+			n.children = make([]*node, blockSize)
+		}
+		p := s[0] % blockSize
+		c := n.children[p]
+		if c == nil {
+			c = &node{}
+			n.children[p] = c
+		}
+		if len(s) > 2 && c.values != nil {
+			log.Fatalf("triegen: insert(%U): found internal node with values", r)
+		}
+		n = c
+	}
+	if n.values == nil {
+		n.values = make([]uint64, blockSize)
+	}
+	if n.children != nil {
+		log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
+	}
+	n.values[s[0]-0x80] = value
+}
+
+// Size returns the number of bytes the generated trie will take to store. It
+// needs to be exported as it is used in the templates.
+func (b *builder) Size() int {
+	// Index blocks.
+	sz := len(b.IndexBlocks) * blockSize * b.IndexSize
+
+	// Skip the first compaction, which represents the normal value blocks, as
+	// its totalSize does not account for the ASCII blocks, which are managed
+	// separately.
+	sz += len(b.ValueBlocks) * blockSize * b.ValueSize
+	for _, c := range b.Compactions[1:] {
+		sz += c.totalSize
+	}
+
+	// TODO: this computation does not account for the fixed overhead of a using
+	// a compaction, either code or data. As for data, though, the typical
+	// overhead of data is in the order of bytes (2 bytes for cases). Further,
+	// the savings of using a compaction should anyway be substantial for it to
+	// be worth it.
+
+	// For multi-root tries, we also need to account for the handles.
+	if len(b.Trie) > 1 {
+		sz += 2 * b.IndexSize * len(b.Trie)
+	}
+	return sz
+}
+
+func (b *builder) build() {
+	// Compute the sizes of the values.
+	var vmax uint64
+	for _, t := range b.Trie {
+		vmax = maxValue(t.root, vmax)
+	}
+	b.ValueType, b.ValueSize = getIntType(vmax)
+
+	// Compute all block allocations.
+	// TODO: first compute the ASCII blocks for all tries and then the other
+	// nodes. ASCII blocks are more restricted in placement, as they require two
+	// blocks to be placed consecutively. Processing them first may improve
+	// sharing (at least one zero block can be expected to be saved.)
+	for _, t := range b.Trie {
+		b.Checksum += b.buildTrie(t)
+	}
+
+	// Compute the offsets for all the Compacters.
+	offset := uint32(0)
+	for i := range b.Compactions {
+		c := &b.Compactions[i]
+		c.Offset = offset
+		offset += c.maxHandle + 1
+		c.Cutoff = offset
+	}
+
+	// Compute the sizes of indexes.
+	// TODO: different byte positions could have different sizes. So far we have
+	// not found a case where this is beneficial.
+	imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
+	for _, ib := range b.IndexBlocks {
+		if x := uint64(ib.index.index); x > imax {
+			imax = x
+		}
+	}
+	b.IndexType, b.IndexSize = getIntType(imax)
+}
+
+func maxValue(n *node, max uint64) uint64 {
+	if n == nil {
+		return max
+	}
+	for _, c := range n.children {
+		max = maxValue(c, max)
+	}
+	for _, v := range n.values {
+		if max < v {
+			max = v
+		}
+	}
+	return max
+}
+
+func getIntType(v uint64) (string, int) {
+	switch {
+	case v < 1<<8:
+		return "uint8", 1
+	case v < 1<<16:
+		return "uint16", 2
+	case v < 1<<32:
+		return "uint32", 4
+	}
+	return "uint64", 8
+}
+
+const (
+	blockSize = 64
+
+	// Subtract two blocks to offset 0x80, the first continuation byte.
+	blockOffset = 2
+
+	// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
+	rootBlockOffset = 3
+)
+
+var crcTable = crc64.MakeTable(crc64.ISO)
+
+func (b *builder) buildTrie(t *Trie) uint64 {
+	n := t.root
+
+	// Get the ASCII offset. For the first trie, the ASCII block will be at
+	// position 0.
+	hasher := crc64.New(crcTable)
+	binary.Write(hasher, binary.BigEndian, n.values)
+	hash := hasher.Sum64()
+
+	v, ok := b.asciiBlockIdx[hash]
+	if !ok {
+		v = len(b.ValueBlocks)
+		b.asciiBlockIdx[hash] = v
+
+		b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
+		if v == 0 {
+			// Add the zero block at position 2 so that it will be assigned a
+			// zero reference in the lookup blocks.
+			// TODO: always do this? This would allow us to remove a check from
+			// the trie lookup, but at the expense of extra space. Analyze
+			// performance for unicode/norm.
+			b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
+		}
+	}
+	t.ASCIIIndex = v
+
+	// Compute remaining offsets.
+	t.Checksum = b.computeOffsets(n, true)
+	// We already subtracted the normal blockOffset from the index. Subtract the
+	// difference for starter bytes.
+	t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
+	return t.Checksum
+}
+
+func (b *builder) computeOffsets(n *node, root bool) uint64 {
+	// For the first trie, the root lookup block will be at position 3, which is
+	// the offset for UTF-8 non-ASCII starter bytes.
+	first := len(b.IndexBlocks) == rootBlockOffset
+	if first {
+		b.IndexBlocks = append(b.IndexBlocks, n)
+	}
+
+	// We special-case the cases where all values recursively are 0. This allows
+	// for the use of a zero block to which all such values can be directed.
+	hash := uint64(0)
+	if n.children != nil || n.values != nil {
+		hasher := crc64.New(crcTable)
+		for _, c := range n.children {
+			var v uint64
+			if c != nil {
+				v = b.computeOffsets(c, false)
+			}
+			binary.Write(hasher, binary.BigEndian, v)
+		}
+		binary.Write(hasher, binary.BigEndian, n.values)
+		hash = hasher.Sum64()
+	}
+
+	if first {
+		b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
+	}
+
+	// Compacters don't apply to internal nodes.
+	if n.children != nil {
+		v, ok := b.indexBlockIdx[hash]
+		if !ok {
+			v = len(b.IndexBlocks) - blockOffset
+			b.IndexBlocks = append(b.IndexBlocks, n)
+			b.indexBlockIdx[hash] = v
+		}
+		n.index = nodeIndex{0, v}
+	} else {
+		h, ok := b.valueBlockIdx[hash]
+		if !ok {
+			bestI, bestSize := 0, blockSize*b.ValueSize
+			for i, c := range b.Compactions[1:] {
+				if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
+					bestI, bestSize = i+1, sz
+				}
+			}
+			c := &b.Compactions[bestI]
+			c.totalSize += bestSize
+			v := c.c.Store(n.values)
+			if c.maxHandle < v {
+				c.maxHandle = v
+			}
+			h = nodeIndex{bestI, int(v)}
+			b.valueBlockIdx[hash] = h
+		}
+		n.index = h
+	}
+	return hash
+}
@@ -0,0 +1,376 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ucd provides a parser for Unicode Character Database files, the
+// format of which is defined in http://www.unicode.org/reports/tr44/. See
+// http://www.unicode.org/Public/UCD/latest/ucd/ for example files.
+//
+// It currently does not support substitutions of missing fields.
+package ucd // import "golang.org/x/text/internal/ucd"
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"io"
+	"log"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// UnicodeData.txt fields.
+const (
+	CodePoint = iota
+	Name
+	GeneralCategory
+	CanonicalCombiningClass
+	BidiClass
+	DecompMapping
+	DecimalValue
+	DigitValue
+	NumericValue
+	BidiMirrored
+	Unicode1Name
+	ISOComment
+	SimpleUppercaseMapping
+	SimpleLowercaseMapping
+	SimpleTitlecaseMapping
+)
+
+// Parse calls f for each entry in the given reader of a UCD file. It will close
+// the reader upon return. It will call log.Fatal if any error occurred.
+//
+// This implements the most common usage pattern of using Parser.
+func Parse(r io.ReadCloser, f func(p *Parser)) {
+	defer r.Close()
+
+	p := New(r)
+	for p.Next() {
+		f(p)
+	}
+	if err := p.Err(); err != nil {
+		r.Close() // os.Exit will cause defers not to be called.
+		log.Fatal(err)
+	}
+}
+
+// An Option is used to configure a Parser.
+type Option func(p *Parser)
+
+func keepRanges(p *Parser) {
+	p.keepRanges = true
+}
+
+var (
+	// KeepRanges prevents the expansion of ranges. The raw ranges can be
+	// obtained by calling Range(0) on the parser.
+	KeepRanges Option = keepRanges
+)
+
+// The Part option register a handler for lines starting with a '@'. The text
+// after a '@' is available as the first field. Comments are handled as usual.
+func Part(f func(p *Parser)) Option {
+	return func(p *Parser) {
+		p.partHandler = f
+	}
+}
+
+// The CommentHandler option passes comments that are on a line by itself to
+// a given handler.
+func CommentHandler(f func(s string)) Option {
+	return func(p *Parser) {
+		p.commentHandler = f
+	}
+}
+
+// A Parser parses Unicode Character Database (UCD) files.
+type Parser struct {
+	scanner *bufio.Scanner
+
+	keepRanges bool // Don't expand rune ranges in field 0.
+
+	err     error
+	comment []byte
+	field   [][]byte
+	// parsedRange is needed in case Range(0) is called more than once for one
+	// field. In some cases this requires scanning ahead.
+	parsedRange          bool
+	rangeStart, rangeEnd rune
+
+	partHandler    func(p *Parser)
+	commentHandler func(s string)
+}
+
+func (p *Parser) setError(err error) {
+	if p.err == nil {
+		p.err = err
+	}
+}
+
+func (p *Parser) getField(i int) []byte {
+	if i >= len(p.field) {
+		return nil
+	}
+	return p.field[i]
+}
+
+// Err returns a non-nil error if any error occurred during parsing.
+func (p *Parser) Err() error {
+	return p.err
+}
+
+// New returns a Parser for the given Reader.
+func New(r io.Reader, o ...Option) *Parser {
+	p := &Parser{
+		scanner: bufio.NewScanner(r),
+	}
+	for _, f := range o {
+		f(p)
+	}
+	return p
+}
+
+// Next parses the next line in the file. It returns true if a line was parsed
+// and false if it reached the end of the file.
+func (p *Parser) Next() bool {
+	if !p.keepRanges && p.rangeStart < p.rangeEnd {
+		p.rangeStart++
+		return true
+	}
+	p.comment = nil
+	p.field = p.field[:0]
+	p.parsedRange = false
+
+	for p.scanner.Scan() {
+		b := p.scanner.Bytes()
+		if len(b) == 0 {
+			continue
+		}
+		if b[0] == '#' {
+			if p.commentHandler != nil {
+				p.commentHandler(strings.TrimSpace(string(b[1:])))
+			}
+			continue
+		}
+
+		// Parse line
+		if i := bytes.IndexByte(b, '#'); i != -1 {
+			p.comment = bytes.TrimSpace(b[i+1:])
+			b = b[:i]
+		}
+		if b[0] == '@' {
+			if p.partHandler != nil {
+				p.field = append(p.field, bytes.TrimSpace(b[1:]))
+				p.partHandler(p)
+				p.field = p.field[:0]
+			}
+			p.comment = nil
+			continue
+		}
+		for {
+			i := bytes.IndexByte(b, ';')
+			if i == -1 {
+				p.field = append(p.field, bytes.TrimSpace(b))
+				break
+			}
+			p.field = append(p.field, bytes.TrimSpace(b[:i]))
+			b = b[i+1:]
+		}
+		if !p.keepRanges {
+			p.rangeStart, p.rangeEnd = p.getRange(0)
+		}
+		return true
+	}
+	p.setError(p.scanner.Err())
+	return false
+}
+
+func parseRune(b []byte) (rune, error) {
+	if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
+		b = b[2:]
+	}
+	x, err := strconv.ParseUint(string(b), 16, 32)
+	return rune(x), err
+}
+
+func (p *Parser) parseRune(b []byte) rune {
+	x, err := parseRune(b)
+	p.setError(err)
+	return x
+}
+
+// Rune parses and returns field i as a rune.
+func (p *Parser) Rune(i int) rune {
+	if i > 0 || p.keepRanges {
+		return p.parseRune(p.getField(i))
+	}
+	return p.rangeStart
+}
+
+// Runes interprets and returns field i as a sequence of runes.
+func (p *Parser) Runes(i int) (runes []rune) {
+	add := func(b []byte) {
+		if b = bytes.TrimSpace(b); len(b) > 0 {
+			runes = append(runes, p.parseRune(b))
+		}
+	}
+	for b := p.getField(i); ; {
+		i := bytes.IndexByte(b, ' ')
+		if i == -1 {
+			add(b)
+			break
+		}
+		add(b[:i])
+		b = b[i+1:]
+	}
+	return
+}
+
+var (
+	errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
+
+	// reRange matches one line of a legacy rune range.
+	reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
+)
+
+// Range parses and returns field i as a rune range. A range is inclusive at
+// both ends. If the field only has one rune, first and last will be identical.
+// It supports the legacy format for ranges used in UnicodeData.txt.
+func (p *Parser) Range(i int) (first, last rune) {
+	if !p.keepRanges {
+		return p.rangeStart, p.rangeStart
+	}
+	return p.getRange(i)
+}
+
+func (p *Parser) getRange(i int) (first, last rune) {
+	b := p.getField(i)
+	if k := bytes.Index(b, []byte("..")); k != -1 {
+		return p.parseRune(b[:k]), p.parseRune(b[k+2:])
+	}
+	// The first field may not be a rune, in which case we may ignore any error
+	// and set the range as 0..0.
+	x, err := parseRune(b)
+	if err != nil {
+		// Disable range parsing henceforth. This ensures that an error will be
+		// returned if the user subsequently will try to parse this field as
+		// a Rune.
+		p.keepRanges = true
+	}
+	// Special case for UnicodeData that was retained for backwards compatibility.
+	if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) {
+		if p.parsedRange {
+			return p.rangeStart, p.rangeEnd
+		}
+		mf := reRange.FindStringSubmatch(p.scanner.Text())
+		if mf == nil || !p.scanner.Scan() {
+			p.setError(errIncorrectLegacyRange)
+			return x, x
+		}
+		// Using Bytes would be more efficient here, but Text is a lot easier
+		// and this is not a frequent case.
+		ml := reRange.FindStringSubmatch(p.scanner.Text())
+		if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
+			p.setError(errIncorrectLegacyRange)
+			return x, x
+		}
+		p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])])
+		p.parsedRange = true
+		return p.rangeStart, p.rangeEnd
+	}
+	return x, x
+}
+
+// bools recognizes all valid UCD boolean values.
+var bools = map[string]bool{
+	"":      false,
+	"N":     false,
+	"No":    false,
+	"F":     false,
+	"False": false,
+	"Y":     true,
+	"Yes":   true,
+	"T":     true,
+	"True":  true,
+}
+
+// Bool parses and returns field i as a boolean value.
+func (p *Parser) Bool(i int) bool {
+	b := p.getField(i)
+	for s, v := range bools {
+		if bstrEq(b, s) {
+			return v
+		}
+	}
+	p.setError(strconv.ErrSyntax)
+	return false
+}
+
+// Int parses and returns field i as an integer value.
+func (p *Parser) Int(i int) int {
+	x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
+	p.setError(err)
+	return int(x)
+}
+
+// Uint parses and returns field i as an unsigned integer value.
+func (p *Parser) Uint(i int) uint {
+	x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
+	p.setError(err)
+	return uint(x)
+}
+
+// Float parses and returns field i as a decimal value.
+func (p *Parser) Float(i int) float64 {
+	x, err := strconv.ParseFloat(string(p.getField(i)), 64)
+	p.setError(err)
+	return x
+}
+
+// String parses and returns field i as a string value.
+func (p *Parser) String(i int) string {
+	return string(p.getField(i))
+}
+
+// Strings parses and returns field i as a space-separated list of strings.
+func (p *Parser) Strings(i int) []string {
+	ss := strings.Split(string(p.getField(i)), " ")
+	for i, s := range ss {
+		ss[i] = strings.TrimSpace(s)
+	}
+	return ss
+}
+
+// Comment returns the comments for the current line.
+func (p *Parser) Comment() string {
+	return string(p.comment)
+}
+
+var errUndefinedEnum = errors.New("ucd: undefined enum value")
+
+// Enum interprets and returns field i as a value that must be one of the values
+// in enum.
+func (p *Parser) Enum(i int, enum ...string) string {
+	b := p.getField(i)
+	for _, s := range enum {
+		if bstrEq(b, s) {
+			return s
+		}
+	}
+	p.setError(errUndefinedEnum)
+	return ""
+}
+
+func bstrEq(b []byte, s string) bool {
+	if len(b) != len(s) {
+		return false
+	}
+	for i, c := range b {
+		if c != s[i] {
+			return false
+		}
+	}
+	return true
+}
@@ -1,16 +0,0 @@
-# Copyright 2013 The Go Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style
-# license that can be found in the LICENSE file.
-
-CLEANFILES+=maketables
-
-maketables: maketables.go
-	go build $^
-
-tables:	maketables
-	./maketables > tables.go
-	gofmt -w -s tables.go
-
-# Build (but do not run) maketables during testing,
-# just to make sure it still compiles.
-testshort: maketables
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This file contains code common to the maketables.go and the package code.
+
+// langAliasType is the type of an alias in langAliasMap.
+type langAliasType int8
+
+const (
+	langDeprecated langAliasType = iota
+	langMacro
+	langLegacy
+
+	langAliasTypeUnknown langAliasType = -1
+)
@@ -0,0 +1,162 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This file generates derivative tables based on the language package itself.
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"reflect"
+	"sort"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/language"
+	"golang.org/x/text/unicode/cldr"
+)
+
+var (
+	test = flag.Bool("test", false,
+		"test existing tables; can be used to compare web data with package data.")
+
+	draft = flag.String("draft",
+		"contributed",
+		`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
+)
+
+func main() {
+	gen.Init()
+
+	// Read the CLDR zip file.
+	r := gen.OpenCLDRCoreZip()
+	defer r.Close()
+
+	d := &cldr.Decoder{}
+	data, err := d.DecodeZip(r)
+	if err != nil {
+		log.Fatalf("DecodeZip: %v", err)
+	}
+
+	w := gen.NewCodeWriter()
+	defer func() {
+		buf := &bytes.Buffer{}
+
+		if _, err = w.WriteGo(buf, "language"); err != nil {
+			log.Fatalf("Error formatting file index.go: %v", err)
+		}
+
+		// Since we're generating a table for our own package we need to rewrite
+		// doing the equivalent of go fmt -r 'language.b -> b'. Using
+		// bytes.Replace will do.
+		out := bytes.Replace(buf.Bytes(), []byte("language."), nil, -1)
+		if err := ioutil.WriteFile("index.go", out, 0600); err != nil {
+			log.Fatalf("Could not create file index.go: %v", err)
+		}
+	}()
+
+	m := map[language.Tag]bool{}
+	for _, lang := range data.Locales() {
+		// We include all locales unconditionally to be consistent with en_US.
+		// We want en_US, even though it has no data associated with it.
+
+		// TODO: put any of the languages for which no data exists at the end
+		// of the index. This allows all components based on ICU to use that
+		// as the cutoff point.
+		// if x := data.RawLDML(lang); false ||
+		// 	x.LocaleDisplayNames != nil ||
+		// 	x.Characters != nil ||
+		// 	x.Delimiters != nil ||
+		// 	x.Measurement != nil ||
+		// 	x.Dates != nil ||
+		// 	x.Numbers != nil ||
+		// 	x.Units != nil ||
+		// 	x.ListPatterns != nil ||
+		// 	x.Collations != nil ||
+		// 	x.Segmentations != nil ||
+		// 	x.Rbnf != nil ||
+		// 	x.Annotations != nil ||
+		// 	x.Metadata != nil {
+
+		// TODO: support POSIX natively, albeit non-standard.
+		tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
+		m[tag] = true
+		// }
+	}
+	// Include locales for plural rules, which uses a different structure.
+	for _, plurals := range data.Supplemental().Plurals {
+		for _, rules := range plurals.PluralRules {
+			for _, lang := range strings.Split(rules.Locales, " ") {
+				m[language.Make(lang)] = true
+			}
+		}
+	}
+
+	var core, special []language.Tag
+
+	for t := range m {
+		if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
+			log.Fatalf("Unexpected extension %v in %v", x, t)
+		}
+		if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
+			core = append(core, t)
+		} else {
+			special = append(special, t)
+		}
+	}
+
+	w.WriteComment(`
+	NumCompactTags is the number of common tags. The maximum tag is
+	NumCompactTags-1.`)
+	w.WriteConst("NumCompactTags", len(core)+len(special))
+
+	sort.Sort(byAlpha(special))
+	w.WriteVar("specialTags", special)
+
+	// TODO: order by frequency?
+	sort.Sort(byAlpha(core))
+
+	// Size computations are just an estimate.
+	w.Size += int(reflect.TypeOf(map[uint32]uint16{}).Size())
+	w.Size += len(core) * 6 // size of uint32 and uint16
+
+	fmt.Fprintln(w)
+	fmt.Fprintln(w, "var coreTags = map[uint32]uint16{")
+	fmt.Fprintln(w, "0x0: 0, // und")
+	i := len(special) + 1 // Und and special tags already written.
+	for _, t := range core {
+		if t == language.Und {
+			continue
+		}
+		fmt.Fprint(w.Hash, t, i)
+		b, s, r := t.Raw()
+		fmt.Fprintf(w, "0x%s%s%s: %d, // %s\n",
+			getIndex(b, 3), // 3 is enough as it is guaranteed to be a compact number
+			getIndex(s, 2),
+			getIndex(r, 3),
+			i, t)
+		i++
+	}
+	fmt.Fprintln(w, "}")
+}
+
+// getIndex prints the subtag type and extracts its index of size nibble.
+// If the index is less than n nibbles, the result is prefixed with 0s.
+func getIndex(x interface{}, n int) string {
+	s := fmt.Sprintf("%#v", x) // s is of form Type{typeID: 0x00}
+	s = s[strings.Index(s, "0x")+2 : len(s)-1]
+	return strings.Repeat("0", n-len(s)) + s
+}
+
+type byAlpha []language.Tag
+
+func (a byAlpha) Len() int           { return len(a) }
+func (a byAlpha) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byAlpha) Less(i, j int) bool { return a[i].String() < a[j].String() }
@@ -0,0 +1,100 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cldr
+
+import (
+	"encoding/xml"
+	"regexp"
+	"strconv"
+)
+
+// Elem is implemented by every XML element.
+type Elem interface {
+	setEnclosing(Elem)
+	setName(string)
+	enclosing() Elem
+
+	GetCommon() *Common
+}
+
+type hidden struct {
+	CharData string `xml:",chardata"`
+	Alias    *struct {
+		Common
+		Source string `xml:"source,attr"`
+		Path   string `xml:"path,attr"`
+	} `xml:"alias"`
+	Def *struct {
+		Common
+		Choice string `xml:"choice,attr,omitempty"`
+		Type   string `xml:"type,attr,omitempty"`
+	} `xml:"default"`
+}
+
+// Common holds several of the most common attributes and sub elements
+// of an XML element.
+type Common struct {
+	XMLName         xml.Name
+	name            string
+	enclElem        Elem
+	Type            string `xml:"type,attr,omitempty"`
+	Reference       string `xml:"reference,attr,omitempty"`
+	Alt             string `xml:"alt,attr,omitempty"`
+	ValidSubLocales string `xml:"validSubLocales,attr,omitempty"`
+	Draft           string `xml:"draft,attr,omitempty"`
+	hidden
+}
+
+// Default returns the default type to select from the enclosed list
+// or "" if no default value is specified.
+func (e *Common) Default() string {
+	if e.Def == nil {
+		return ""
+	}
+	if e.Def.Choice != "" {
+		return e.Def.Choice
+	} else if e.Def.Type != "" {
+		// Type is still used by the default element in collation.
+		return e.Def.Type
+	}
+	return ""
+}
+
+// GetCommon returns e. It is provided such that Common implements Elem.
+func (e *Common) GetCommon() *Common {
+	return e
+}
+
+// Data returns the character data accumulated for this element.
+func (e *Common) Data() string {
+	e.CharData = charRe.ReplaceAllStringFunc(e.CharData, replaceUnicode)
+	return e.CharData
+}
+
+func (e *Common) setName(s string) {
+	e.name = s
+}
+
+func (e *Common) enclosing() Elem {
+	return e.enclElem
+}
+
+func (e *Common) setEnclosing(en Elem) {
+	e.enclElem = en
+}
+
+// Escape characters that can be escaped without further escaping the string.
+var charRe = regexp.MustCompile(`&#x[0-9a-fA-F]*;|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\[abtnvfr]`)
+
+// replaceUnicode converts hexadecimal Unicode codepoint notations to a one-rune string.
+// It assumes the input string is correctly formatted.
+func replaceUnicode(s string) string {
+	if s[1] == '#' {
+		r, _ := strconv.ParseInt(s[3:len(s)-1], 16, 32)
+		return string(r)
+	}
+	r, _, _, _ := strconv.UnquoteChar(s, 0)
+	return string(r)
+}
@@ -0,0 +1,130 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:generate go run makexml.go -output xml.go
+
+// Package cldr provides a parser for LDML and related XML formats.
+// This package is intended to be used by the table generation tools
+// for the various internationalization-related packages.
+// As the XML types are generated from the CLDR DTD, and as the CLDR standard
+// is periodically amended, this package may change considerably over time.
+// This mostly means that data may appear and disappear between versions.
+// That is, old code should keep compiling for newer versions, but data
+// may have moved or changed.
+// CLDR version 22 is the first version supported by this package.
+// Older versions may not work.
+package cldr // import "golang.org/x/text/unicode/cldr"
+
+import (
+	"fmt"
+	"sort"
+)
+
+// CLDR provides access to parsed data of the Unicode Common Locale Data Repository.
+type CLDR struct {
+	parent   map[string][]string
+	locale   map[string]*LDML
+	resolved map[string]*LDML
+	bcp47    *LDMLBCP47
+	supp     *SupplementalData
+}
+
+func makeCLDR() *CLDR {
+	return &CLDR{
+		parent:   make(map[string][]string),
+		locale:   make(map[string]*LDML),
+		resolved: make(map[string]*LDML),
+		bcp47:    &LDMLBCP47{},
+		supp:     &SupplementalData{},
+	}
+}
+
+// BCP47 returns the parsed BCP47 LDML data. If no such data was parsed, nil is returned.
+func (cldr *CLDR) BCP47() *LDMLBCP47 {
+	return nil
+}
+
+// Draft indicates the draft level of an element.
+type Draft int
+
+const (
+	Approved Draft = iota
+	Contributed
+	Provisional
+	Unconfirmed
+)
+
+var drafts = []string{"unconfirmed", "provisional", "contributed", "approved", ""}
+
+// ParseDraft returns the Draft value corresponding to the given string. The
+// empty string corresponds to Approved.
+func ParseDraft(level string) (Draft, error) {
+	if level == "" {
+		return Approved, nil
+	}
+	for i, s := range drafts {
+		if level == s {
+			return Unconfirmed - Draft(i), nil
+		}
+	}
+	return Approved, fmt.Errorf("cldr: unknown draft level %q", level)
+}
+
+func (d Draft) String() string {
+	return drafts[len(drafts)-1-int(d)]
+}
+
+// SetDraftLevel sets which draft levels to include in the evaluated LDML.
+// Any draft element for which the draft level is higher than lev will be excluded.
+// If multiple draft levels are available for a single element, the one with the
+// lowest draft level will be selected, unless preferDraft is true, in which case
+// the highest draft will be chosen.
+// It is assumed that the underlying LDML is canonicalized.
+func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool) {
+	// TODO: implement
+	cldr.resolved = make(map[string]*LDML)
+}
+
+// RawLDML returns the LDML XML for id in unresolved form.
+// id must be one of the strings returned by Locales.
+func (cldr *CLDR) RawLDML(loc string) *LDML {
+	return cldr.locale[loc]
+}
+
+// LDML returns the fully resolved LDML XML for loc, which must be one of
+// the strings returned by Locales.
+func (cldr *CLDR) LDML(loc string) (*LDML, error) {
+	return cldr.resolve(loc)
+}
+
+// Supplemental returns the parsed supplemental data. If no such data was parsed,
+// nil is returned.
+func (cldr *CLDR) Supplemental() *SupplementalData {
+	return cldr.supp
+}
+
+// Locales returns the locales for which there exist files.
+// Valid sublocales for which there is no file are not included.
+// The root locale is always sorted first.
+func (cldr *CLDR) Locales() []string {
+	loc := []string{"root"}
+	hasRoot := false
+	for l, _ := range cldr.locale {
+		if l == "root" {
+			hasRoot = true
+			continue
+		}
+		loc = append(loc, l)
+	}
+	sort.Strings(loc[1:])
+	if !hasRoot {
+		return loc[1:]
+	}
+	return loc
+}
+
+// Get fills in the fields of x based on the XPath path.
+func Get(e Elem, path string) (res Elem, err error) {
+	return walkXPath(e, path)
+}
@@ -0,0 +1,359 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cldr
+
+import (
+	"bufio"
+	"encoding/xml"
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// RuleProcessor can be passed to Collator's Process method, which
+// parses the rules and calls the respective method for each rule found.
+type RuleProcessor interface {
+	Reset(anchor string, before int) error
+	Insert(level int, str, context, extend string) error
+	Index(id string)
+}
+
+const (
+	// cldrIndex is a Unicode-reserved sentinel value used to mark the start
+	// of a grouping within an index.
+	// We ignore any rule that starts with this rune.
+	// See http://unicode.org/reports/tr35/#Collation_Elements for details.
+	cldrIndex = "\uFDD0"
+
+	// specialAnchor is the format in which to represent logical reset positions,
+	// such as "first tertiary ignorable".
+	specialAnchor = "<%s/>"
+)
+
+// Process parses the rules for the tailorings of this collation
+// and calls the respective methods of p for each rule found.
+func (c Collation) Process(p RuleProcessor) (err error) {
+	if len(c.Cr) > 0 {
+		if len(c.Cr) > 1 {
+			return fmt.Errorf("multiple cr elements, want 0 or 1")
+		}
+		return processRules(p, c.Cr[0].Data())
+	}
+	if c.Rules.Any != nil {
+		return c.processXML(p)
+	}
+	return errors.New("no tailoring data")
+}
+
+// processRules parses rules in the Collation Rule Syntax defined in
+// http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Tailorings.
+func processRules(p RuleProcessor, s string) (err error) {
+	chk := func(s string, e error) string {
+		if err == nil {
+			err = e
+		}
+		return s
+	}
+	i := 0 // Save the line number for use after the loop.
+	scanner := bufio.NewScanner(strings.NewReader(s))
+	for ; scanner.Scan() && err == nil; i++ {
+		for s := skipSpace(scanner.Text()); s != "" && s[0] != '#'; s = skipSpace(s) {
+			level := 5
+			var ch byte
+			switch ch, s = s[0], s[1:]; ch {
+			case '&': // followed by <anchor> or '[' <key> ']'
+				if s = skipSpace(s); consume(&s, '[') {
+					s = chk(parseSpecialAnchor(p, s))
+				} else {
+					s = chk(parseAnchor(p, 0, s))
+				}
+			case '<': // sort relation '<'{1,4}, optionally followed by '*'.
+				for level = 1; consume(&s, '<'); level++ {
+				}
+				if level > 4 {
+					err = fmt.Errorf("level %d > 4", level)
+				}
+				fallthrough
+			case '=': // identity relation, optionally followed by *.
+				if consume(&s, '*') {
+					s = chk(parseSequence(p, level, s))
+				} else {
+					s = chk(parseOrder(p, level, s))
+				}
+			default:
+				chk("", fmt.Errorf("illegal operator %q", ch))
+				break
+			}
+		}
+	}
+	if chk("", scanner.Err()); err != nil {
+		return fmt.Errorf("%d: %v", i, err)
+	}
+	return nil
+}
+
+// parseSpecialAnchor parses the anchor syntax which is either of the form
+//    ['before' <level>] <anchor>
+// or
+//    [<label>]
+// The starting should already be consumed.
+func parseSpecialAnchor(p RuleProcessor, s string) (tail string, err error) {
+	i := strings.IndexByte(s, ']')
+	if i == -1 {
+		return "", errors.New("unmatched bracket")
+	}
+	a := strings.TrimSpace(s[:i])
+	s = s[i+1:]
+	if strings.HasPrefix(a, "before ") {
+		l, err := strconv.ParseUint(skipSpace(a[len("before "):]), 10, 3)
+		if err != nil {
+			return s, err
+		}
+		return parseAnchor(p, int(l), s)
+	}
+	return s, p.Reset(fmt.Sprintf(specialAnchor, a), 0)
+}
+
+func parseAnchor(p RuleProcessor, level int, s string) (tail string, err error) {
+	anchor, s, err := scanString(s)
+	if err != nil {
+		return s, err
+	}
+	return s, p.Reset(anchor, level)
+}
+
+func parseOrder(p RuleProcessor, level int, s string) (tail string, err error) {
+	var value, context, extend string
+	if value, s, err = scanString(s); err != nil {
+		return s, err
+	}
+	if strings.HasPrefix(value, cldrIndex) {
+		p.Index(value[len(cldrIndex):])
+		return
+	}
+	if consume(&s, '|') {
+		if context, s, err = scanString(s); err != nil {
+			return s, errors.New("missing string after context")
+		}
+	}
+	if consume(&s, '/') {
+		if extend, s, err = scanString(s); err != nil {
+			return s, errors.New("missing string after extension")
+		}
+	}
+	return s, p.Insert(level, value, context, extend)
+}
+
+// scanString scans a single input string.
+func scanString(s string) (str, tail string, err error) {
+	if s = skipSpace(s); s == "" {
+		return s, s, errors.New("missing string")
+	}
+	buf := [16]byte{} // small but enough to hold most cases.
+	value := buf[:0]
+	for s != "" {
+		if consume(&s, '\'') {
+			i := strings.IndexByte(s, '\'')
+			if i == -1 {
+				return "", "", errors.New(`unmatched single quote`)
+			}
+			if i == 0 {
+				value = append(value, '\'')
+			} else {
+				value = append(value, s[:i]...)
+			}
+			s = s[i+1:]
+			continue
+		}
+		r, sz := utf8.DecodeRuneInString(s)
+		if unicode.IsSpace(r) || strings.ContainsRune("&<=#", r) {
+			break
+		}
+		value = append(value, s[:sz]...)
+		s = s[sz:]
+	}
+	return string(value), skipSpace(s), nil
+}
+
+func parseSequence(p RuleProcessor, level int, s string) (tail string, err error) {
+	if s = skipSpace(s); s == "" {
+		return s, errors.New("empty sequence")
+	}
+	last := rune(0)
+	for s != "" {
+		r, sz := utf8.DecodeRuneInString(s)
+		s = s[sz:]
+
+		if r == '-' {
+			// We have a range. The first element was already written.
+			if last == 0 {
+				return s, errors.New("range without starter value")
+			}
+			r, sz = utf8.DecodeRuneInString(s)
+			s = s[sz:]
+			if r == utf8.RuneError || r < last {
+				return s, fmt.Errorf("invalid range %q-%q", last, r)
+			}
+			for i := last + 1; i <= r; i++ {
+				if err := p.Insert(level, string(i), "", ""); err != nil {
+					return s, err
+				}
+			}
+			last = 0
+			continue
+		}
+
+		if unicode.IsSpace(r) || unicode.IsPunct(r) {
+			break
+		}
+
+		// normal case
+		if err := p.Insert(level, string(r), "", ""); err != nil {
+			return s, err
+		}
+		last = r
+	}
+	return s, nil
+}
+
+func skipSpace(s string) string {
+	return strings.TrimLeftFunc(s, unicode.IsSpace)
+}
+
+// consumes returns whether the next byte is ch. If so, it gobbles it by
+// updating s.
+func consume(s *string, ch byte) (ok bool) {
+	if *s == "" || (*s)[0] != ch {
+		return false
+	}
+	*s = (*s)[1:]
+	return true
+}
+
+// The following code parses Collation rules of CLDR version 24 and before.
+
+var lmap = map[byte]int{
+	'p': 1,
+	's': 2,
+	't': 3,
+	'i': 5,
+}
+
+type rulesElem struct {
+	Rules struct {
+		Common
+		Any []*struct {
+			XMLName xml.Name
+			rule
+		} `xml:",any"`
+	} `xml:"rules"`
+}
+
+type rule struct {
+	Value  string `xml:",chardata"`
+	Before string `xml:"before,attr"`
+	Any    []*struct {
+		XMLName xml.Name
+		rule
+	} `xml:",any"`
+}
+
+var emptyValueError = errors.New("cldr: empty rule value")
+
+func (r *rule) value() (string, error) {
+	// Convert hexadecimal Unicode codepoint notation to a string.
+	s := charRe.ReplaceAllStringFunc(r.Value, replaceUnicode)
+	r.Value = s
+	if s == "" {
+		if len(r.Any) != 1 {
+			return "", emptyValueError
+		}
+		r.Value = fmt.Sprintf(specialAnchor, r.Any[0].XMLName.Local)
+		r.Any = nil
+	} else if len(r.Any) != 0 {
+		return "", fmt.Errorf("cldr: XML elements found in collation rule: %v", r.Any)
+	}
+	return r.Value, nil
+}
+
+func (r rule) process(p RuleProcessor, name, context, extend string) error {
+	v, err := r.value()
+	if err != nil {
+		return err
+	}
+	switch name {
+	case "p", "s", "t", "i":
+		if strings.HasPrefix(v, cldrIndex) {
+			p.Index(v[len(cldrIndex):])
+			return nil
+		}
+		if err := p.Insert(lmap[name[0]], v, context, extend); err != nil {
+			return err
+		}
+	case "pc", "sc", "tc", "ic":
+		level := lmap[name[0]]
+		for _, s := range v {
+			if err := p.Insert(level, string(s), context, extend); err != nil {
+				return err
+			}
+		}
+	default:
+		return fmt.Errorf("cldr: unsupported tag: %q", name)
+	}
+	return nil
+}
+
+// processXML parses the format of CLDR versions 24 and older.
+func (c Collation) processXML(p RuleProcessor) (err error) {
+	// Collation is generated and defined in xml.go.
+	var v string
+	for _, r := range c.Rules.Any {
+		switch r.XMLName.Local {
+		case "reset":
+			level := 0
+			switch r.Before {
+			case "primary", "1":
+				level = 1
+			case "secondary", "2":
+				level = 2
+			case "tertiary", "3":
+				level = 3
+			case "":
+			default:
+				return fmt.Errorf("cldr: unknown level %q", r.Before)
+			}
+			v, err = r.value()
+			if err == nil {
+				err = p.Reset(v, level)
+			}
+		case "x":
+			var context, extend string
+			for _, r1 := range r.Any {
+				v, err = r1.value()
+				switch r1.XMLName.Local {
+				case "context":
+					context = v
+				case "extend":
+					extend = v
+				}
+			}
+			for _, r1 := range r.Any {
+				if t := r1.XMLName.Local; t == "context" || t == "extend" {
+					continue
+				}
+				r1.rule.process(p, r1.XMLName.Local, context, extend)
+			}
+		default:
+			err = r.rule.process(p, r.XMLName.Local, "", "")
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
@@ -0,0 +1,171 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cldr
+
+import (
+	"archive/zip"
+	"bytes"
+	"encoding/xml"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path/filepath"
+	"regexp"
+)
+
+// A Decoder loads an archive of CLDR data.
+type Decoder struct {
+	dirFilter     []string
+	sectionFilter []string
+	loader        Loader
+	cldr          *CLDR
+	curLocale     string
+}
+
+// SetSectionFilter takes a list top-level LDML element names to which
+// evaluation of LDML should be limited.  It automatically calls SetDirFilter.
+func (d *Decoder) SetSectionFilter(filter ...string) {
+	d.sectionFilter = filter
+	// TODO: automatically set dir filter
+}
+
+// SetDirFilter limits the loading of LDML XML files of the specied directories.
+// Note that sections may be split across directories differently for different CLDR versions.
+// For more robust code, use SetSectionFilter.
+func (d *Decoder) SetDirFilter(dir ...string) {
+	d.dirFilter = dir
+}
+
+// A Loader provides access to the files of a CLDR archive.
+type Loader interface {
+	Len() int
+	Path(i int) string
+	Reader(i int) (io.ReadCloser, error)
+}
+
+var fileRe = regexp.MustCompile(".*/(.*)/(.*)\\.xml")
+
+// Decode loads and decodes the files represented by l.
+func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) {
+	d.cldr = makeCLDR()
+	for i := 0; i < l.Len(); i++ {
+		fname := l.Path(i)
+		if m := fileRe.FindStringSubmatch(fname); m != nil {
+			if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) {
+				continue
+			}
+			var r io.Reader
+			if r, err = l.Reader(i); err == nil {
+				err = d.decode(m[1], m[2], r)
+			}
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+	d.cldr.finalize(d.sectionFilter)
+	return d.cldr, nil
+}
+
+func (d *Decoder) decode(dir, id string, r io.Reader) error {
+	var v interface{}
+	var l *LDML
+	cldr := d.cldr
+	switch {
+	case dir == "supplemental":
+		v = cldr.supp
+	case dir == "transforms":
+		return nil
+	case dir == "bcp47":
+		v = cldr.bcp47
+	case dir == "validity":
+		return nil
+	default:
+		ok := false
+		if v, ok = cldr.locale[id]; !ok {
+			l = &LDML{}
+			v, cldr.locale[id] = l, l
+		}
+	}
+	x := xml.NewDecoder(r)
+	if err := x.Decode(v); err != nil {
+		log.Printf("%s/%s: %v", dir, id, err)
+		return err
+	}
+	if l != nil {
+		if l.Identity == nil {
+			return fmt.Errorf("%s/%s: missing identity element", dir, id)
+		}
+		// TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970
+		// is resolved.
+		// path := strings.Split(id, "_")
+		// if lang := l.Identity.Language.Type; lang != path[0] {
+		// 	return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0])
+		// }
+	}
+	return nil
+}
+
+type pathLoader []string
+
+func makePathLoader(path string) (pl pathLoader, err error) {
+	err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error {
+		pl = append(pl, path)
+		return err
+	})
+	return pl, err
+}
+
+func (pl pathLoader) Len() int {
+	return len(pl)
+}
+
+func (pl pathLoader) Path(i int) string {
+	return pl[i]
+}
+
+func (pl pathLoader) Reader(i int) (io.ReadCloser, error) {
+	return os.Open(pl[i])
+}
+
+// DecodePath loads CLDR data from the given path.
+func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) {
+	loader, err := makePathLoader(path)
+	if err != nil {
+		return nil, err
+	}
+	return d.Decode(loader)
+}
+
+type zipLoader struct {
+	r *zip.Reader
+}
+
+func (zl zipLoader) Len() int {
+	return len(zl.r.File)
+}
+
+func (zl zipLoader) Path(i int) string {
+	return zl.r.File[i].Name
+}
+
+func (zl zipLoader) Reader(i int) (io.ReadCloser, error) {
+	return zl.r.File[i].Open()
+}
+
+// DecodeZip loads CLDR data from the zip archive for which r is the source.
+func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) {
+	buffer, err := ioutil.ReadAll(r)
+	if err != nil {
+		return nil, err
+	}
+	archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
+	if err != nil {
+		return nil, err
+	}
+	return d.Decode(zipLoader{archive})
+}
@@ -0,0 +1,400 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// This tool generates types for the various XML formats of CLDR.
+package main
+
+import (
+	"archive/zip"
+	"bytes"
+	"encoding/xml"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"regexp"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+)
+
+var outputFile = flag.String("output", "xml.go", "output file name")
+
+func main() {
+	flag.Parse()
+
+	r := gen.OpenCLDRCoreZip()
+	buffer, err := ioutil.ReadAll(r)
+	if err != nil {
+		log.Fatal("Could not read zip file")
+	}
+	r.Close()
+	z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
+	if err != nil {
+		log.Fatalf("Could not read zip archive: %v", err)
+	}
+
+	var buf bytes.Buffer
+
+	version := gen.CLDRVersion()
+
+	for _, dtd := range files {
+		for _, f := range z.File {
+			if strings.HasSuffix(f.Name, dtd.file+".dtd") {
+				r, err := f.Open()
+				failOnError(err)
+
+				b := makeBuilder(&buf, dtd)
+				b.parseDTD(r)
+				b.resolve(b.index[dtd.top[0]])
+				b.write()
+				if b.version != "" && version != b.version {
+					println(f.Name)
+					log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
+				}
+				break
+			}
+		}
+	}
+	fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
+	fmt.Fprintf(&buf, "const Version = %q\n", version)
+
+	gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
+}
+
+func failOnError(err error) {
+	if err != nil {
+		log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
+		os.Exit(1)
+	}
+}
+
+// configuration data per DTD type
+type dtd struct {
+	file string   // base file name
+	root string   // Go name of the root XML element
+	top  []string // create a different type for this section
+
+	skipElem    []string // hard-coded or deprecated elements
+	skipAttr    []string // attributes to exclude
+	predefined  []string // hard-coded elements exist of the form <name>Elem
+	forceRepeat []string // elements to make slices despite DTD
+}
+
+var files = []dtd{
+	{
+		file: "ldmlBCP47",
+		root: "LDMLBCP47",
+		top:  []string{"ldmlBCP47"},
+		skipElem: []string{
+			"cldrVersion", // deprecated, not used
+		},
+	},
+	{
+		file: "ldmlSupplemental",
+		root: "SupplementalData",
+		top:  []string{"supplementalData"},
+		skipElem: []string{
+			"cldrVersion", // deprecated, not used
+		},
+		forceRepeat: []string{
+			"plurals", // data defined in plurals.xml and ordinals.xml
+		},
+	},
+	{
+		file: "ldml",
+		root: "LDML",
+		top: []string{
+			"ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
+		},
+		skipElem: []string{
+			"cp",       // not used anywhere
+			"special",  // not used anywhere
+			"fallback", // deprecated, not used
+			"alias",    // in Common
+			"default",  // in Common
+		},
+		skipAttr: []string{
+			"hiraganaQuarternary", // typo in DTD, correct version included as well
+		},
+		predefined: []string{"rules"},
+	},
+}
+
+var comments = map[string]string{
+	"ldmlBCP47": `
+// LDMLBCP47 holds information on allowable values for various variables in LDML.
+`,
+	"supplementalData": `
+// SupplementalData holds information relevant for internationalization
+// and proper use of CLDR, but that is not contained in the locale hierarchy.
+`,
+	"ldml": `
+// LDML is the top-level type for locale-specific data.
+`,
+	"collation": `
+// Collation contains rules that specify a certain sort-order,
+// as a tailoring of the root order. 
+// The parsed rules are obtained by passing a RuleProcessor to Collation's
+// Process method.
+`,
+	"calendar": `
+// Calendar specifies the fields used for formatting and parsing dates and times.
+// The month and quarter names are identified numerically, starting at 1.
+// The day (of the week) names are identified with short strings, since there is
+// no universally-accepted numeric designation.
+`,
+	"dates": `
+// Dates contains information regarding the format and parsing of dates and times.
+`,
+	"localeDisplayNames": `
+// LocaleDisplayNames specifies localized display names for for scripts, languages,
+// countries, currencies, and variants.
+`,
+	"numbers": `
+// Numbers supplies information for formatting and parsing numbers and currencies.
+`,
+}
+
+type element struct {
+	name      string // XML element name
+	category  string // elements contained by this element
+	signature string // category + attrKey*
+
+	attr []*attribute // attributes supported by this element.
+	sub  []struct {   // parsed and evaluated sub elements of this element.
+		e      *element
+		repeat bool // true if the element needs to be a slice
+	}
+
+	resolved bool // prevent multiple resolutions of this element.
+}
+
+type attribute struct {
+	name string
+	key  string
+	list []string
+
+	tag string // Go tag
+}
+
+var (
+	reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
+	reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
+	reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
+	reToken = regexp.MustCompile(`\w\-`)
+)
+
+// builder is used to read in the DTD files from CLDR and generate Go code
+// to be used with the encoding/xml package.
+type builder struct {
+	w       io.Writer
+	index   map[string]*element
+	elem    []*element
+	info    dtd
+	version string
+}
+
+func makeBuilder(w io.Writer, d dtd) builder {
+	return builder{
+		w:     w,
+		index: make(map[string]*element),
+		elem:  []*element{},
+		info:  d,
+	}
+}
+
+// parseDTD parses a DTD file.
+func (b *builder) parseDTD(r io.Reader) {
+	for d := xml.NewDecoder(r); ; {
+		t, err := d.Token()
+		if t == nil {
+			break
+		}
+		failOnError(err)
+		dir, ok := t.(xml.Directive)
+		if !ok {
+			continue
+		}
+		m := reHead.FindSubmatch(dir)
+		dir = dir[len(m[0]):]
+		ename := string(m[2])
+		el, elementFound := b.index[ename]
+		switch string(m[1]) {
+		case "ELEMENT":
+			if elementFound {
+				log.Fatal("parseDTD: duplicate entry for element %q", ename)
+			}
+			m := reElem.FindSubmatch(dir)
+			if m == nil {
+				log.Fatalf("parseDTD: invalid element %q", string(dir))
+			}
+			if len(m[0]) != len(dir) {
+				log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
+			}
+			s := string(m[1])
+			el = &element{
+				name:     ename,
+				category: s,
+			}
+			b.index[ename] = el
+		case "ATTLIST":
+			if !elementFound {
+				log.Fatalf("parseDTD: unknown element %q", ename)
+			}
+			s := string(dir)
+			m := reAttr.FindStringSubmatch(s)
+			if m == nil {
+				log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
+			}
+			if m[4] == "FIXED" {
+				b.version = m[5]
+			} else {
+				switch m[1] {
+				case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
+				case "type", "choice":
+				default:
+					el.attr = append(el.attr, &attribute{
+						name: m[1],
+						key:  s,
+						list: reToken.FindAllString(m[3], -1),
+					})
+					el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
+				}
+			}
+		}
+	}
+}
+
+var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)
+
+// resolve takes a parsed element and converts it into structured data
+// that can be used to generate the XML code.
+func (b *builder) resolve(e *element) {
+	if e.resolved {
+		return
+	}
+	b.elem = append(b.elem, e)
+	e.resolved = true
+	s := e.category
+	found := make(map[string]bool)
+	sequenceStart := []int{}
+	for len(s) > 0 {
+		m := reCat.FindStringSubmatch(s)
+		if m == nil {
+			log.Fatalf("%s: invalid category string %q", e.name, s)
+		}
+		repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
+		switch m[1] {
+		case "":
+		case "(":
+			sequenceStart = append(sequenceStart, len(e.sub))
+		case ")":
+			if len(sequenceStart) == 0 {
+				log.Fatalf("%s: unmatched closing parenthesis", e.name)
+			}
+			for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
+				e.sub[i].repeat = e.sub[i].repeat || repeat
+			}
+			sequenceStart = sequenceStart[:len(sequenceStart)-1]
+		default:
+			if in(b.info.skipElem, m[1]) {
+			} else if sub, ok := b.index[m[1]]; ok {
+				if !found[sub.name] {
+					e.sub = append(e.sub, struct {
+						e      *element
+						repeat bool
+					}{sub, repeat})
+					found[sub.name] = true
+					b.resolve(sub)
+				}
+			} else if m[1] == "#PCDATA" || m[1] == "ANY" {
+			} else if m[1] != "EMPTY" {
+				log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
+			}
+		}
+		s = s[len(m[0]):]
+	}
+}
+
+// return true if s is contained in set.
+func in(set []string, s string) bool {
+	for _, v := range set {
+		if v == s {
+			return true
+		}
+	}
+	return false
+}
+
+var repl = strings.NewReplacer("-", " ", "_", " ")
+
+// title puts the first character or each character following '_' in title case and
+// removes all occurrences of '_'.
+func title(s string) string {
+	return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
+}
+
+// writeElem generates Go code for a single element, recursively.
+func (b *builder) writeElem(tab int, e *element) {
+	p := func(f string, x ...interface{}) {
+		f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
+		fmt.Fprintf(b.w, f, x...)
+	}
+	if len(e.sub) == 0 && len(e.attr) == 0 {
+		p("Common")
+		return
+	}
+	p("struct {")
+	tab++
+	p("\nCommon")
+	for _, attr := range e.attr {
+		if !in(b.info.skipAttr, attr.name) {
+			p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
+		}
+	}
+	for _, sub := range e.sub {
+		if in(b.info.predefined, sub.e.name) {
+			p("\n%sElem", sub.e.name)
+			continue
+		}
+		if in(b.info.skipElem, sub.e.name) {
+			continue
+		}
+		p("\n%s ", title(sub.e.name))
+		if sub.repeat {
+			p("[]")
+		}
+		p("*")
+		if in(b.info.top, sub.e.name) {
+			p(title(sub.e.name))
+		} else {
+			b.writeElem(tab, sub.e)
+		}
+		p(" `xml:\"%s\"`", sub.e.name)
+	}
+	tab--
+	p("\n}")
+}
+
+// write generates the Go XML code.
+func (b *builder) write() {
+	for i, name := range b.info.top {
+		e := b.index[name]
+		if e != nil {
+			fmt.Fprintf(b.w, comments[name])
+			name := title(e.name)
+			if i == 0 {
+				name = b.info.root
+			}
+			fmt.Fprintf(b.w, "type %s ", name)
+			b.writeElem(0, e)
+			fmt.Fprint(b.w, "\n")
+		}
+	}
+}
@@ -0,0 +1,602 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cldr
+
+// This file implements the various inheritance constructs defined by LDML.
+// See http://www.unicode.org/reports/tr35/#Inheritance_and_Validity
+// for more details.
+
+import (
+	"fmt"
+	"log"
+	"reflect"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+// fieldIter iterates over fields in a struct. It includes
+// fields of embedded structs.
+type fieldIter struct {
+	v        reflect.Value
+	index, n []int
+}
+
+func iter(v reflect.Value) fieldIter {
+	if v.Kind() != reflect.Struct {
+		log.Panicf("value %v must be a struct", v)
+	}
+	i := fieldIter{
+		v:     v,
+		index: []int{0},
+		n:     []int{v.NumField()},
+	}
+	i.descent()
+	return i
+}
+
+func (i *fieldIter) descent() {
+	for f := i.field(); f.Anonymous && f.Type.NumField() > 0; f = i.field() {
+		i.index = append(i.index, 0)
+		i.n = append(i.n, f.Type.NumField())
+	}
+}
+
+func (i *fieldIter) done() bool {
+	return len(i.index) == 1 && i.index[0] >= i.n[0]
+}
+
+func skip(f reflect.StructField) bool {
+	return !f.Anonymous && (f.Name[0] < 'A' || f.Name[0] > 'Z')
+}
+
+func (i *fieldIter) next() {
+	for {
+		k := len(i.index) - 1
+		i.index[k]++
+		if i.index[k] < i.n[k] {
+			if !skip(i.field()) {
+				break
+			}
+		} else {
+			if k == 0 {
+				return
+			}
+			i.index = i.index[:k]
+			i.n = i.n[:k]
+		}
+	}
+	i.descent()
+}
+
+func (i *fieldIter) value() reflect.Value {
+	return i.v.FieldByIndex(i.index)
+}
+
+func (i *fieldIter) field() reflect.StructField {
+	return i.v.Type().FieldByIndex(i.index)
+}
+
+type visitor func(v reflect.Value) error
+
+var stopDescent = fmt.Errorf("do not recurse")
+
+func (f visitor) visit(x interface{}) error {
+	return f.visitRec(reflect.ValueOf(x))
+}
+
+// visit recursively calls f on all nodes in v.
+func (f visitor) visitRec(v reflect.Value) error {
+	if v.Kind() == reflect.Ptr {
+		if v.IsNil() {
+			return nil
+		}
+		return f.visitRec(v.Elem())
+	}
+	if err := f(v); err != nil {
+		if err == stopDescent {
+			return nil
+		}
+		return err
+	}
+	switch v.Kind() {
+	case reflect.Struct:
+		for i := iter(v); !i.done(); i.next() {
+			if err := f.visitRec(i.value()); err != nil {
+				return err
+			}
+		}
+	case reflect.Slice:
+		for i := 0; i < v.Len(); i++ {
+			if err := f.visitRec(v.Index(i)); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// getPath is used for error reporting purposes only.
+func getPath(e Elem) string {
+	if e == nil {
+		return "<nil>"
+	}
+	if e.enclosing() == nil {
+		return e.GetCommon().name
+	}
+	if e.GetCommon().Type == "" {
+		return fmt.Sprintf("%s.%s", getPath(e.enclosing()), e.GetCommon().name)
+	}
+	return fmt.Sprintf("%s.%s[type=%s]", getPath(e.enclosing()), e.GetCommon().name, e.GetCommon().Type)
+}
+
+// xmlName returns the xml name of the element or attribute
+func xmlName(f reflect.StructField) (name string, attr bool) {
+	tags := strings.Split(f.Tag.Get("xml"), ",")
+	for _, s := range tags {
+		attr = attr || s == "attr"
+	}
+	return tags[0], attr
+}
+
+func findField(v reflect.Value, key string) (reflect.Value, error) {
+	v = reflect.Indirect(v)
+	for i := iter(v); !i.done(); i.next() {
+		if n, _ := xmlName(i.field()); n == key {
+			return i.value(), nil
+		}
+	}
+	return reflect.Value{}, fmt.Errorf("cldr: no field %q in element %#v", key, v.Interface())
+}
+
+var xpathPart = regexp.MustCompile(`(\pL+)(?:\[@(\pL+)='([\w-]+)'\])?`)
+
+func walkXPath(e Elem, path string) (res Elem, err error) {
+	for _, c := range strings.Split(path, "/") {
+		if c == ".." {
+			if e = e.enclosing(); e == nil {
+				panic("path ..")
+				return nil, fmt.Errorf(`cldr: ".." moves past root in path %q`, path)
+			}
+			continue
+		} else if c == "" {
+			continue
+		}
+		m := xpathPart.FindStringSubmatch(c)
+		if len(m) == 0 || len(m[0]) != len(c) {
+			return nil, fmt.Errorf("cldr: syntax error in path component %q", c)
+		}
+		v, err := findField(reflect.ValueOf(e), m[1])
+		if err != nil {
+			return nil, err
+		}
+		switch v.Kind() {
+		case reflect.Slice:
+			i := 0
+			if m[2] != "" || v.Len() > 1 {
+				if m[2] == "" {
+					m[2] = "type"
+					if m[3] = e.GetCommon().Default(); m[3] == "" {
+						return nil, fmt.Errorf("cldr: type selector or default value needed for element %s", m[1])
+					}
+				}
+				for ; i < v.Len(); i++ {
+					vi := v.Index(i)
+					key, err := findField(vi.Elem(), m[2])
+					if err != nil {
+						return nil, err
+					}
+					key = reflect.Indirect(key)
+					if key.Kind() == reflect.String && key.String() == m[3] {
+						break
+					}
+				}
+			}
+			if i == v.Len() || v.Index(i).IsNil() {
+				return nil, fmt.Errorf("no %s found with %s==%s", m[1], m[2], m[3])
+			}
+			e = v.Index(i).Interface().(Elem)
+		case reflect.Ptr:
+			if v.IsNil() {
+				return nil, fmt.Errorf("cldr: element %q not found within element %q", m[1], e.GetCommon().name)
+			}
+			var ok bool
+			if e, ok = v.Interface().(Elem); !ok {
+				return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
+			} else if m[2] != "" || m[3] != "" {
+				return nil, fmt.Errorf("cldr: no type selector allowed for element %s", m[1])
+			}
+		default:
+			return nil, fmt.Errorf("cldr: %q is not an XML element", m[1])
+		}
+	}
+	return e, nil
+}
+
+const absPrefix = "//ldml/"
+
+func (cldr *CLDR) resolveAlias(e Elem, src, path string) (res Elem, err error) {
+	if src != "locale" {
+		if !strings.HasPrefix(path, absPrefix) {
+			return nil, fmt.Errorf("cldr: expected absolute path, found %q", path)
+		}
+		path = path[len(absPrefix):]
+		if e, err = cldr.resolve(src); err != nil {
+			return nil, err
+		}
+	}
+	return walkXPath(e, path)
+}
+
+func (cldr *CLDR) resolveAndMergeAlias(e Elem) error {
+	alias := e.GetCommon().Alias
+	if alias == nil {
+		return nil
+	}
+	a, err := cldr.resolveAlias(e, alias.Source, alias.Path)
+	if err != nil {
+		return fmt.Errorf("%v: error evaluating path %q: %v", getPath(e), alias.Path, err)
+	}
+	// Ensure alias node was already evaluated. TODO: avoid double evaluation.
+	err = cldr.resolveAndMergeAlias(a)
+	v := reflect.ValueOf(e).Elem()
+	for i := iter(reflect.ValueOf(a).Elem()); !i.done(); i.next() {
+		if vv := i.value(); vv.Kind() != reflect.Ptr || !vv.IsNil() {
+			if _, attr := xmlName(i.field()); !attr {
+				v.FieldByIndex(i.index).Set(vv)
+			}
+		}
+	}
+	return err
+}
+
+func (cldr *CLDR) aliasResolver() visitor {
+	return func(v reflect.Value) (err error) {
+		if e, ok := v.Addr().Interface().(Elem); ok {
+			err = cldr.resolveAndMergeAlias(e)
+			if err == nil && blocking[e.GetCommon().name] {
+				return stopDescent
+			}
+		}
+		return err
+	}
+}
+
+// elements within blocking elements do not inherit.
+// Taken from CLDR's supplementalMetaData.xml.
+var blocking = map[string]bool{
+	"identity":         true,
+	"supplementalData": true,
+	"cldrTest":         true,
+	"collation":        true,
+	"transform":        true,
+}
+
+// Distinguishing attributes affect inheritance; two elements with different
+// distinguishing attributes are treated as different for purposes of inheritance,
+// except when such attributes occur in the indicated elements.
+// Taken from CLDR's supplementalMetaData.xml.
+var distinguishing = map[string][]string{
+	"key":        nil,
+	"request_id": nil,
+	"id":         nil,
+	"registry":   nil,
+	"alt":        nil,
+	"iso4217":    nil,
+	"iso3166":    nil,
+	"mzone":      nil,
+	"from":       nil,
+	"to":         nil,
+	"type": []string{
+		"abbreviationFallback",
+		"default",
+		"mapping",
+		"measurementSystem",
+		"preferenceOrdering",
+	},
+	"numberSystem": nil,
+}
+
+func in(set []string, s string) bool {
+	for _, v := range set {
+		if v == s {
+			return true
+		}
+	}
+	return false
+}
+
+// attrKey computes a key based on the distinguishable attributes of
+// an element and it's values.
+func attrKey(v reflect.Value, exclude ...string) string {
+	parts := []string{}
+	ename := v.Interface().(Elem).GetCommon().name
+	v = v.Elem()
+	for i := iter(v); !i.done(); i.next() {
+		if name, attr := xmlName(i.field()); attr {
+			if except, ok := distinguishing[name]; ok && !in(exclude, name) && !in(except, ename) {
+				v := i.value()
+				if v.Kind() == reflect.Ptr {
+					v = v.Elem()
+				}
+				if v.IsValid() {
+					parts = append(parts, fmt.Sprintf("%s=%s", name, v.String()))
+				}
+			}
+		}
+	}
+	sort.Strings(parts)
+	return strings.Join(parts, ";")
+}
+
+// Key returns a key for e derived from all distinguishing attributes
+// except those specified by exclude.
+func Key(e Elem, exclude ...string) string {
+	return attrKey(reflect.ValueOf(e), exclude...)
+}
+
+// linkEnclosing sets the enclosing element as well as the name
+// for all sub-elements of child, recursively.
+func linkEnclosing(parent, child Elem) {
+	child.setEnclosing(parent)
+	v := reflect.ValueOf(child).Elem()
+	for i := iter(v); !i.done(); i.next() {
+		vf := i.value()
+		if vf.Kind() == reflect.Slice {
+			for j := 0; j < vf.Len(); j++ {
+				linkEnclosing(child, vf.Index(j).Interface().(Elem))
+			}
+		} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
+			linkEnclosing(child, vf.Interface().(Elem))
+		}
+	}
+}
+
+func setNames(e Elem, name string) {
+	e.setName(name)
+	v := reflect.ValueOf(e).Elem()
+	for i := iter(v); !i.done(); i.next() {
+		vf := i.value()
+		name, _ = xmlName(i.field())
+		if vf.Kind() == reflect.Slice {
+			for j := 0; j < vf.Len(); j++ {
+				setNames(vf.Index(j).Interface().(Elem), name)
+			}
+		} else if vf.Kind() == reflect.Ptr && !vf.IsNil() && vf.Elem().Kind() == reflect.Struct {
+			setNames(vf.Interface().(Elem), name)
+		}
+	}
+}
+
+// deepCopy copies elements of v recursively.  All elements of v that may
+// be modified by inheritance are explicitly copied.
+func deepCopy(v reflect.Value) reflect.Value {
+	switch v.Kind() {
+	case reflect.Ptr:
+		if v.IsNil() || v.Elem().Kind() != reflect.Struct {
+			return v
+		}
+		nv := reflect.New(v.Elem().Type())
+		nv.Elem().Set(v.Elem())
+		deepCopyRec(nv.Elem(), v.Elem())
+		return nv
+	case reflect.Slice:
+		nv := reflect.MakeSlice(v.Type(), v.Len(), v.Len())
+		for i := 0; i < v.Len(); i++ {
+			deepCopyRec(nv.Index(i), v.Index(i))
+		}
+		return nv
+	}
+	panic("deepCopy: must be called with pointer or slice")
+}
+
+// deepCopyRec is only called by deepCopy.
+func deepCopyRec(nv, v reflect.Value) {
+	if v.Kind() == reflect.Struct {
+		t := v.Type()
+		for i := 0; i < v.NumField(); i++ {
+			if name, attr := xmlName(t.Field(i)); name != "" && !attr {
+				deepCopyRec(nv.Field(i), v.Field(i))
+			}
+		}
+	} else {
+		nv.Set(deepCopy(v))
+	}
+}
+
+// newNode is used to insert a missing node during inheritance.
+func (cldr *CLDR) newNode(v, enc reflect.Value) reflect.Value {
+	n := reflect.New(v.Type())
+	for i := iter(v); !i.done(); i.next() {
+		if name, attr := xmlName(i.field()); name == "" || attr {
+			n.Elem().FieldByIndex(i.index).Set(i.value())
+		}
+	}
+	n.Interface().(Elem).GetCommon().setEnclosing(enc.Addr().Interface().(Elem))
+	return n
+}
+
+// v, parent must be pointers to struct
+func (cldr *CLDR) inheritFields(v, parent reflect.Value) (res reflect.Value, err error) {
+	t := v.Type()
+	nv := reflect.New(t)
+	nv.Elem().Set(v)
+	for i := iter(v); !i.done(); i.next() {
+		vf := i.value()
+		f := i.field()
+		name, attr := xmlName(f)
+		if name == "" || attr {
+			continue
+		}
+		pf := parent.FieldByIndex(i.index)
+		if blocking[name] {
+			if vf.IsNil() {
+				vf = pf
+			}
+			nv.Elem().FieldByIndex(i.index).Set(deepCopy(vf))
+			continue
+		}
+		switch f.Type.Kind() {
+		case reflect.Ptr:
+			if f.Type.Elem().Kind() == reflect.Struct {
+				if !vf.IsNil() {
+					if vf, err = cldr.inheritStructPtr(vf, pf); err != nil {
+						return reflect.Value{}, err
+					}
+					vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
+					nv.Elem().FieldByIndex(i.index).Set(vf)
+				} else if !pf.IsNil() {
+					n := cldr.newNode(pf.Elem(), v)
+					if vf, err = cldr.inheritStructPtr(n, pf); err != nil {
+						return reflect.Value{}, err
+					}
+					vf.Interface().(Elem).setEnclosing(nv.Interface().(Elem))
+					nv.Elem().FieldByIndex(i.index).Set(vf)
+				}
+			}
+		case reflect.Slice:
+			vf, err := cldr.inheritSlice(nv.Elem(), vf, pf)
+			if err != nil {
+				return reflect.Zero(t), err
+			}
+			nv.Elem().FieldByIndex(i.index).Set(vf)
+		}
+	}
+	return nv, nil
+}
+
+func root(e Elem) *LDML {
+	for ; e.enclosing() != nil; e = e.enclosing() {
+	}
+	return e.(*LDML)
+}
+
+// inheritStructPtr first merges possible aliases in with v and then inherits
+// any underspecified elements from parent.
+func (cldr *CLDR) inheritStructPtr(v, parent reflect.Value) (r reflect.Value, err error) {
+	if !v.IsNil() {
+		e := v.Interface().(Elem).GetCommon()
+		alias := e.Alias
+		if alias == nil && !parent.IsNil() {
+			alias = parent.Interface().(Elem).GetCommon().Alias
+		}
+		if alias != nil {
+			a, err := cldr.resolveAlias(v.Interface().(Elem), alias.Source, alias.Path)
+			if a != nil {
+				if v, err = cldr.inheritFields(v.Elem(), reflect.ValueOf(a).Elem()); err != nil {
+					return reflect.Value{}, err
+				}
+			}
+		}
+		if !parent.IsNil() {
+			return cldr.inheritFields(v.Elem(), parent.Elem())
+		}
+	} else if parent.IsNil() {
+		panic("should not reach here")
+	}
+	return v, nil
+}
+
+// Must be slice of struct pointers.
+func (cldr *CLDR) inheritSlice(enc, v, parent reflect.Value) (res reflect.Value, err error) {
+	t := v.Type()
+	index := make(map[string]reflect.Value)
+	if !v.IsNil() {
+		for i := 0; i < v.Len(); i++ {
+			vi := v.Index(i)
+			key := attrKey(vi)
+			index[key] = vi
+		}
+	}
+	if !parent.IsNil() {
+		for i := 0; i < parent.Len(); i++ {
+			vi := parent.Index(i)
+			key := attrKey(vi)
+			if w, ok := index[key]; ok {
+				index[key], err = cldr.inheritStructPtr(w, vi)
+			} else {
+				n := cldr.newNode(vi.Elem(), enc)
+				index[key], err = cldr.inheritStructPtr(n, vi)
+			}
+			index[key].Interface().(Elem).setEnclosing(enc.Addr().Interface().(Elem))
+			if err != nil {
+				return v, err
+			}
+		}
+	}
+	keys := make([]string, 0, len(index))
+	for k, _ := range index {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	sl := reflect.MakeSlice(t, len(index), len(index))
+	for i, k := range keys {
+		sl.Index(i).Set(index[k])
+	}
+	return sl, nil
+}
+
+func parentLocale(loc string) string {
+	parts := strings.Split(loc, "_")
+	if len(parts) == 1 {
+		return "root"
+	}
+	parts = parts[:len(parts)-1]
+	key := strings.Join(parts, "_")
+	return key
+}
+
+func (cldr *CLDR) resolve(loc string) (res *LDML, err error) {
+	if r := cldr.resolved[loc]; r != nil {
+		return r, nil
+	}
+	x := cldr.RawLDML(loc)
+	if x == nil {
+		return nil, fmt.Errorf("cldr: unknown locale %q", loc)
+	}
+	var v reflect.Value
+	if loc == "root" {
+		x = deepCopy(reflect.ValueOf(x)).Interface().(*LDML)
+		linkEnclosing(nil, x)
+		err = cldr.aliasResolver().visit(x)
+	} else {
+		key := parentLocale(loc)
+		var parent *LDML
+		for ; cldr.locale[key] == nil; key = parentLocale(key) {
+		}
+		if parent, err = cldr.resolve(key); err != nil {
+			return nil, err
+		}
+		v, err = cldr.inheritFields(reflect.ValueOf(x).Elem(), reflect.ValueOf(parent).Elem())
+		x = v.Interface().(*LDML)
+		linkEnclosing(nil, x)
+	}
+	if err != nil {
+		return nil, err
+	}
+	cldr.resolved[loc] = x
+	return x, err
+}
+
+// finalize finalizes the initialization of the raw LDML structs.  It also
+// removed unwanted fields, as specified by filter, so that they will not
+// be unnecessarily evaluated.
+func (cldr *CLDR) finalize(filter []string) {
+	for _, x := range cldr.locale {
+		if filter != nil {
+			v := reflect.ValueOf(x).Elem()
+			t := v.Type()
+			for i := 0; i < v.NumField(); i++ {
+				f := t.Field(i)
+				name, _ := xmlName(f)
+				if name != "" && name != "identity" && !in(filter, name) {
+					v.Field(i).Set(reflect.Zero(f.Type))
+				}
+			}
+		}
+		linkEnclosing(nil, x) // for resolving aliases and paths
+		setNames(x, "ldml")
+	}
+}
@@ -0,0 +1,144 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cldr
+
+import (
+	"fmt"
+	"reflect"
+	"sort"
+)
+
+// Slice provides utilities for modifying slices of elements.
+// It can be wrapped around any slice of which the element type implements
+// interface Elem.
+type Slice struct {
+	ptr reflect.Value
+	typ reflect.Type
+}
+
+// Value returns the reflect.Value of the underlying slice.
+func (s *Slice) Value() reflect.Value {
+	return s.ptr.Elem()
+}
+
+// MakeSlice wraps a pointer to a slice of Elems.
+// It replaces the array pointed to by the slice so that subsequent modifications
+// do not alter the data in a CLDR type.
+// It panics if an incorrect type is passed.
+func MakeSlice(slicePtr interface{}) Slice {
+	ptr := reflect.ValueOf(slicePtr)
+	if ptr.Kind() != reflect.Ptr {
+		panic(fmt.Sprintf("MakeSlice: argument must be pointer to slice, found %v", ptr.Type()))
+	}
+	sl := ptr.Elem()
+	if sl.Kind() != reflect.Slice {
+		panic(fmt.Sprintf("MakeSlice: argument must point to a slice, found %v", sl.Type()))
+	}
+	intf := reflect.TypeOf((*Elem)(nil)).Elem()
+	if !sl.Type().Elem().Implements(intf) {
+		panic(fmt.Sprintf("MakeSlice: element type of slice (%v) does not implement Elem", sl.Type().Elem()))
+	}
+	nsl := reflect.MakeSlice(sl.Type(), sl.Len(), sl.Len())
+	reflect.Copy(nsl, sl)
+	sl.Set(nsl)
+	return Slice{
+		ptr: ptr,
+		typ: sl.Type().Elem().Elem(),
+	}
+}
+
+func (s Slice) indexForAttr(a string) []int {
+	for i := iter(reflect.Zero(s.typ)); !i.done(); i.next() {
+		if n, _ := xmlName(i.field()); n == a {
+			return i.index
+		}
+	}
+	panic(fmt.Sprintf("MakeSlice: no attribute %q for type %v", a, s.typ))
+}
+
+// Filter filters s to only include elements for which fn returns true.
+func (s Slice) Filter(fn func(e Elem) bool) {
+	k := 0
+	sl := s.Value()
+	for i := 0; i < sl.Len(); i++ {
+		vi := sl.Index(i)
+		if fn(vi.Interface().(Elem)) {
+			sl.Index(k).Set(vi)
+			k++
+		}
+	}
+	sl.Set(sl.Slice(0, k))
+}
+
+// Group finds elements in s for which fn returns the same value and groups
+// them in a new Slice.
+func (s Slice) Group(fn func(e Elem) string) []Slice {
+	m := make(map[string][]reflect.Value)
+	sl := s.Value()
+	for i := 0; i < sl.Len(); i++ {
+		vi := sl.Index(i)
+		key := fn(vi.Interface().(Elem))
+		m[key] = append(m[key], vi)
+	}
+	keys := []string{}
+	for k, _ := range m {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	res := []Slice{}
+	for _, k := range keys {
+		nsl := reflect.New(sl.Type())
+		nsl.Elem().Set(reflect.Append(nsl.Elem(), m[k]...))
+		res = append(res, MakeSlice(nsl.Interface()))
+	}
+	return res
+}
+
+// SelectAnyOf filters s to contain only elements for which attr matches
+// any of the values.
+func (s Slice) SelectAnyOf(attr string, values ...string) {
+	index := s.indexForAttr(attr)
+	s.Filter(func(e Elem) bool {
+		vf := reflect.ValueOf(e).Elem().FieldByIndex(index)
+		return in(values, vf.String())
+	})
+}
+
+// SelectOnePerGroup filters s to include at most one element e per group of
+// elements matching Key(attr), where e has an attribute a that matches any
+// the values in v.
+// If more than one element in a group matches a value in v preference
+// is given to the element that matches the first value in v.
+func (s Slice) SelectOnePerGroup(a string, v []string) {
+	index := s.indexForAttr(a)
+	grouped := s.Group(func(e Elem) string { return Key(e, a) })
+	sl := s.Value()
+	sl.Set(sl.Slice(0, 0))
+	for _, g := range grouped {
+		e := reflect.Value{}
+		found := len(v)
+		gsl := g.Value()
+		for i := 0; i < gsl.Len(); i++ {
+			vi := gsl.Index(i).Elem().FieldByIndex(index)
+			j := 0
+			for ; j < len(v) && v[j] != vi.String(); j++ {
+			}
+			if j < found {
+				found = j
+				e = gsl.Index(i)
+			}
+		}
+		if found < len(v) {
+			sl.Set(reflect.Append(sl, e))
+		}
+	}
+}
+
+// SelectDraft drops all elements from the list with a draft level smaller than d
+// and selects the highest draft level of the remaining.
+// This method assumes that the input CLDR is canonicalized.
+func (s Slice) SelectDraft(d Draft) {
+	s.SelectOnePerGroup("draft", drafts[len(drafts)-2-int(d):])
+}
@@ -0,0 +1,976 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Normalization table generator.
+// Data read from the web.
+// See forminfo.go for a description of the trie values associated with each rune.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"sort"
+	"strconv"
+	"strings"
+
+	"golang.org/x/text/internal/gen"
+	"golang.org/x/text/internal/triegen"
+	"golang.org/x/text/internal/ucd"
+)
+
+func main() {
+	gen.Init()
+	loadUnicodeData()
+	compactCCC()
+	loadCompositionExclusions()
+	completeCharFields(FCanonical)
+	completeCharFields(FCompatibility)
+	computeNonStarterCounts()
+	verifyComputed()
+	printChars()
+	testDerived()
+	printTestdata()
+	makeTables()
+}
+
+var (
+	tablelist = flag.String("tables",
+		"all",
+		"comma-separated list of which tables to generate; "+
+			"can be 'decomp', 'recomp', 'info' and 'all'")
+	test = flag.Bool("test",
+		false,
+		"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
+	verbose = flag.Bool("verbose",
+		false,
+		"write data to stdout as it is parsed")
+)
+
+const MaxChar = 0x10FFFF // anything above this shouldn't exist
+
+// Quick Check properties of runes allow us to quickly
+// determine whether a rune may occur in a normal form.
+// For a given normal form, a rune may be guaranteed to occur
+// verbatim (QC=Yes), may or may not combine with another
+// rune (QC=Maybe), or may not occur (QC=No).
+type QCResult int
+
+const (
+	QCUnknown QCResult = iota
+	QCYes
+	QCNo
+	QCMaybe
+)
+
+func (r QCResult) String() string {
+	switch r {
+	case QCYes:
+		return "Yes"
+	case QCNo:
+		return "No"
+	case QCMaybe:
+		return "Maybe"
+	}
+	return "***UNKNOWN***"
+}
+
+const (
+	FCanonical     = iota // NFC or NFD
+	FCompatibility        // NFKC or NFKD
+	FNumberOfFormTypes
+)
+
+const (
+	MComposed   = iota // NFC or NFKC
+	MDecomposed        // NFD or NFKD
+	MNumberOfModes
+)
+
+// This contains only the properties we're interested in.
+type Char struct {
+	name          string
+	codePoint     rune  // if zero, this index is not a valid code point.
+	ccc           uint8 // canonical combining class
+	origCCC       uint8
+	excludeInComp bool // from CompositionExclusions.txt
+	compatDecomp  bool // it has a compatibility expansion
+
+	nTrailingNonStarters uint8
+	nLeadingNonStarters  uint8 // must be equal to trailing if non-zero
+
+	forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
+
+	state State
+}
+
+var chars = make([]Char, MaxChar+1)
+var cccMap = make(map[uint8]uint8)
+
+func (c Char) String() string {
+	buf := new(bytes.Buffer)
+
+	fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
+	fmt.Fprintf(buf, "  ccc: %v\n", c.ccc)
+	fmt.Fprintf(buf, "  excludeInComp: %v\n", c.excludeInComp)
+	fmt.Fprintf(buf, "  compatDecomp: %v\n", c.compatDecomp)
+	fmt.Fprintf(buf, "  state: %v\n", c.state)
+	fmt.Fprintf(buf, "  NFC:\n")
+	fmt.Fprint(buf, c.forms[FCanonical])
+	fmt.Fprintf(buf, "  NFKC:\n")
+	fmt.Fprint(buf, c.forms[FCompatibility])
+
+	return buf.String()
+}
+
+// In UnicodeData.txt, some ranges are marked like this:
+//	3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+//	4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+// parseCharacter keeps a state variable indicating the weirdness.
+type State int
+
+const (
+	SNormal State = iota // known to be zero for the type
+	SFirst
+	SLast
+	SMissing
+)
+
+var lastChar = rune('\u0000')
+
+func (c Char) isValid() bool {
+	return c.codePoint != 0 && c.state != SMissing
+}
+
+type FormInfo struct {
+	quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
+	verified   [MNumberOfModes]bool     // index: MComposed or MDecomposed
+
+	combinesForward  bool // May combine with rune on the right
+	combinesBackward bool // May combine with rune on the left
+	isOneWay         bool // Never appears in result
+	inDecomp         bool // Some decompositions result in this char.
+	decomp           Decomposition
+	expandedDecomp   Decomposition
+}
+
+func (f FormInfo) String() string {
+	buf := bytes.NewBuffer(make([]byte, 0))
+
+	fmt.Fprintf(buf, "    quickCheck[C]: %v\n", f.quickCheck[MComposed])
+	fmt.Fprintf(buf, "    quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
+	fmt.Fprintf(buf, "    cmbForward: %v\n", f.combinesForward)
+	fmt.Fprintf(buf, "    cmbBackward: %v\n", f.combinesBackward)
+	fmt.Fprintf(buf, "    isOneWay: %v\n", f.isOneWay)
+	fmt.Fprintf(buf, "    inDecomp: %v\n", f.inDecomp)
+	fmt.Fprintf(buf, "    decomposition: %X\n", f.decomp)
+	fmt.Fprintf(buf, "    expandedDecomp: %X\n", f.expandedDecomp)
+
+	return buf.String()
+}
+
+type Decomposition []rune
+
+func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
+	decomp := strings.Split(s, " ")
+	if len(decomp) > 0 && skipfirst {
+		decomp = decomp[1:]
+	}
+	for _, d := range decomp {
+		point, err := strconv.ParseUint(d, 16, 64)
+		if err != nil {
+			return a, err
+		}
+		a = append(a, rune(point))
+	}
+	return a, nil
+}
+
+func loadUnicodeData() {
+	f := gen.OpenUCDFile("UnicodeData.txt")
+	defer f.Close()
+	p := ucd.New(f)
+	for p.Next() {
+		r := p.Rune(ucd.CodePoint)
+		char := &chars[r]
+
+		char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
+		decmap := p.String(ucd.DecompMapping)
+
+		exp, err := parseDecomposition(decmap, false)
+		isCompat := false
+		if err != nil {
+			if len(decmap) > 0 {
+				exp, err = parseDecomposition(decmap, true)
+				if err != nil {
+					log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
+				}
+				isCompat = true
+			}
+		}
+
+		char.name = p.String(ucd.Name)
+		char.codePoint = r
+		char.forms[FCompatibility].decomp = exp
+		if !isCompat {
+			char.forms[FCanonical].decomp = exp
+		} else {
+			char.compatDecomp = true
+		}
+		if len(decmap) > 0 {
+			char.forms[FCompatibility].decomp = exp
+		}
+	}
+	if err := p.Err(); err != nil {
+		log.Fatal(err)
+	}
+}
+
+// compactCCC converts the sparse set of CCC values to a continguous one,
+// reducing the number of bits needed from 8 to 6.
+func compactCCC() {
+	m := make(map[uint8]uint8)
+	for i := range chars {
+		c := &chars[i]
+		m[c.ccc] = 0
+	}
+	cccs := []int{}
+	for v, _ := range m {
+		cccs = append(cccs, int(v))
+	}
+	sort.Ints(cccs)
+	for i, c := range cccs {
+		cccMap[uint8(i)] = uint8(c)
+		m[uint8(c)] = uint8(i)
+	}
+	for i := range chars {
+		c := &chars[i]
+		c.origCCC = c.ccc
+		c.ccc = m[c.ccc]
+	}
+	if len(m) >= 1<<6 {
+		log.Fatalf("too many difference CCC values: %d >= 64", len(m))
+	}
+}
+
+// CompositionExclusions.txt has form:
+// 0958    # ...
+// See http://unicode.org/reports/tr44/ for full explanation
+func loadCompositionExclusions() {
+	f := gen.OpenUCDFile("CompositionExclusions.txt")
+	defer f.Close()
+	p := ucd.New(f)
+	for p.Next() {
+		c := &chars[p.Rune(0)]
+		if c.excludeInComp {
+			log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
+		}
+		c.excludeInComp = true
+	}
+	if e := p.Err(); e != nil {
+		log.Fatal(e)
+	}
+}
+
+// hasCompatDecomp returns true if any of the recursive
+// decompositions contains a compatibility expansion.
+// In this case, the character may not occur in NFK*.
+func hasCompatDecomp(r rune) bool {
+	c := &chars[r]
+	if c.compatDecomp {
+		return true
+	}
+	for _, d := range c.forms[FCompatibility].decomp {
+		if hasCompatDecomp(d) {
+			return true
+		}
+	}
+	return false
+}
+
+// Hangul related constants.
+const (
+	HangulBase = 0xAC00
+	HangulEnd  = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
+
+	JamoLBase = 0x1100
+	JamoLEnd  = 0x1113
+	JamoVBase = 0x1161
+	JamoVEnd  = 0x1176
+	JamoTBase = 0x11A8
+	JamoTEnd  = 0x11C3
+
+	JamoLVTCount = 19 * 21 * 28
+	JamoTCount   = 28
+)
+
+func isHangul(r rune) bool {
+	return HangulBase <= r && r < HangulEnd
+}
+
+func isHangulWithoutJamoT(r rune) bool {
+	if !isHangul(r) {
+		return false
+	}
+	r -= HangulBase
+	return r < JamoLVTCount && r%JamoTCount == 0
+}
+
+func ccc(r rune) uint8 {
+	return chars[r].ccc
+}
+
+// Insert a rune in a buffer, ordered by Canonical Combining Class.
+func insertOrdered(b Decomposition, r rune) Decomposition {
+	n := len(b)
+	b = append(b, 0)
+	cc := ccc(r)
+	if cc > 0 {
+		// Use bubble sort.
+		for ; n > 0; n-- {
+			if ccc(b[n-1]) <= cc {
+				break
+			}
+			b[n] = b[n-1]
+		}
+	}
+	b[n] = r
+	return b
+}
+
+// Recursively decompose.
+func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
+	dcomp := chars[r].forms[form].decomp
+	if len(dcomp) == 0 {
+		return insertOrdered(d, r)
+	}
+	for _, c := range dcomp {
+		d = decomposeRecursive(form, c, d)
+	}
+	return d
+}
+
+func completeCharFields(form int) {
+	// Phase 0: pre-expand decomposition.
+	for i := range chars {
+		f := &chars[i].forms[form]
+		if len(f.decomp) == 0 {
+			continue
+		}
+		exp := make(Decomposition, 0)
+		for _, c := range f.decomp {
+			exp = decomposeRecursive(form, c, exp)
+		}
+		f.expandedDecomp = exp
+	}
+
+	// Phase 1: composition exclusion, mark decomposition.
+	for i := range chars {
+		c := &chars[i]
+		f := &c.forms[form]
+
+		// Marks script-specific exclusions and version restricted.
+		f.isOneWay = c.excludeInComp
+
+		// Singletons
+		f.isOneWay = f.isOneWay || len(f.decomp) == 1
+
+		// Non-starter decompositions
+		if len(f.decomp) > 1 {
+			chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
+			f.isOneWay = f.isOneWay || chk
+		}
+
+		// Runes that decompose into more than two runes.
+		f.isOneWay = f.isOneWay || len(f.decomp) > 2
+
+		if form == FCompatibility {
+			f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
+		}
+
+		for _, r := range f.decomp {
+			chars[r].forms[form].inDecomp = true
+		}
+	}
+
+	// Phase 2: forward and backward combining.
+	for i := range chars {
+		c := &chars[i]
+		f := &c.forms[form]
+
+		if !f.isOneWay && len(f.decomp) == 2 {
+			f0 := &chars[f.decomp[0]].forms[form]
+			f1 := &chars[f.decomp[1]].forms[form]
+			if !f0.isOneWay {
+				f0.combinesForward = true
+			}
+			if !f1.isOneWay {
+				f1.combinesBackward = true
+			}
+		}
+		if isHangulWithoutJamoT(rune(i)) {
+			f.combinesForward = true
+		}
+	}
+
+	// Phase 3: quick check values.
+	for i := range chars {
+		c := &chars[i]
+		f := &c.forms[form]
+
+		switch {
+		case len(f.decomp) > 0:
+			f.quickCheck[MDecomposed] = QCNo
+		case isHangul(rune(i)):
+			f.quickCheck[MDecomposed] = QCNo
+		default:
+			f.quickCheck[MDecomposed] = QCYes
+		}
+		switch {
+		case f.isOneWay:
+			f.quickCheck[MComposed] = QCNo
+		case (i & 0xffff00) == JamoLBase:
+			f.quickCheck[MComposed] = QCYes
+			if JamoLBase <= i && i < JamoLEnd {
+				f.combinesForward = true
+			}
+			if JamoVBase <= i && i < JamoVEnd {
+				f.quickCheck[MComposed] = QCMaybe
+				f.combinesBackward = true
+				f.combinesForward = true
+			}
+			if JamoTBase <= i && i < JamoTEnd {
+				f.quickCheck[MComposed] = QCMaybe
+				f.combinesBackward = true
+			}
+		case !f.combinesBackward:
+			f.quickCheck[MComposed] = QCYes
+		default:
+			f.quickCheck[MComposed] = QCMaybe
+		}
+	}
+}
+
+func computeNonStarterCounts() {
+	// Phase 4: leading and trailing non-starter count
+	for i := range chars {
+		c := &chars[i]
+
+		runes := []rune{rune(i)}
+		// We always use FCompatibility so that the CGJ insertion points do not
+		// change for repeated normalizations with different forms.
+		if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
+			runes = exp
+		}
+		// We consider runes that combine backwards to be non-starters for the
+		// purpose of Stream-Safe Text Processing.
+		for _, r := range runes {
+			if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
+				break
+			}
+			c.nLeadingNonStarters++
+		}
+		for i := len(runes) - 1; i >= 0; i-- {
+			if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
+				break
+			}
+			c.nTrailingNonStarters++
+		}
+		if c.nTrailingNonStarters > 3 {
+			log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
+		}
+
+		if isHangul(rune(i)) {
+			c.nTrailingNonStarters = 2
+			if isHangulWithoutJamoT(rune(i)) {
+				c.nTrailingNonStarters = 1
+			}
+		}
+
+		if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
+			log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
+		}
+		if t := c.nTrailingNonStarters; t > 3 {
+			log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
+		}
+	}
+}
+
+func printBytes(w io.Writer, b []byte, name string) {
+	fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
+	fmt.Fprintf(w, "var %s = [...]byte {", name)
+	for i, c := range b {
+		switch {
+		case i%64 == 0:
+			fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
+		case i%8 == 0:
+			fmt.Fprintf(w, "\n")
+		}
+		fmt.Fprintf(w, "0x%.2X, ", c)
+	}
+	fmt.Fprint(w, "\n}\n\n")
+}
+
+// See forminfo.go for format.
+func makeEntry(f *FormInfo, c *Char) uint16 {
+	e := uint16(0)
+	if r := c.codePoint; HangulBase <= r && r < HangulEnd {
+		e |= 0x40
+	}
+	if f.combinesForward {
+		e |= 0x20
+	}
+	if f.quickCheck[MDecomposed] == QCNo {
+		e |= 0x4
+	}
+	switch f.quickCheck[MComposed] {
+	case QCYes:
+	case QCNo:
+		e |= 0x10
+	case QCMaybe:
+		e |= 0x18
+	default:
+		log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
+	}
+	e |= uint16(c.nTrailingNonStarters)
+	return e
+}
+
+// decompSet keeps track of unique decompositions, grouped by whether
+// the decomposition is followed by a trailing and/or leading CCC.
+type decompSet [7]map[string]bool
+
+const (
+	normalDecomp = iota
+	firstMulti
+	firstCCC
+	endMulti
+	firstLeadingCCC
+	firstCCCZeroExcept
+	firstStarterWithNLead
+	lastDecomp
+)
+
+var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
+
+func makeDecompSet() decompSet {
+	m := decompSet{}
+	for i := range m {
+		m[i] = make(map[string]bool)
+	}
+	return m
+}
+func (m *decompSet) insert(key int, s string) {
+	m[key][s] = true
+}
+
+func printCharInfoTables(w io.Writer) int {
+	mkstr := func(r rune, f *FormInfo) (int, string) {
+		d := f.expandedDecomp
+		s := string([]rune(d))
+		if max := 1 << 6; len(s) >= max {
+			const msg = "%U: too many bytes in decomposition: %d >= %d"
+			log.Fatalf(msg, r, len(s), max)
+		}
+		head := uint8(len(s))
+		if f.quickCheck[MComposed] != QCYes {
+			head |= 0x40
+		}
+		if f.combinesForward {
+			head |= 0x80
+		}
+		s = string([]byte{head}) + s
+
+		lccc := ccc(d[0])
+		tccc := ccc(d[len(d)-1])
+		cc := ccc(r)
+		if cc != 0 && lccc == 0 && tccc == 0 {
+			log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
+		}
+		if tccc < lccc && lccc != 0 {
+			const msg = "%U: lccc (%d) must be <= tcc (%d)"
+			log.Fatalf(msg, r, lccc, tccc)
+		}
+		index := normalDecomp
+		nTrail := chars[r].nTrailingNonStarters
+		nLead := chars[r].nLeadingNonStarters
+		if tccc > 0 || lccc > 0 || nTrail > 0 {
+			tccc <<= 2
+			tccc |= nTrail
+			s += string([]byte{tccc})
+			index = endMulti
+			for _, r := range d[1:] {
+				if ccc(r) == 0 {
+					index = firstCCC
+				}
+			}
+			if lccc > 0 || nLead > 0 {
+				s += string([]byte{lccc})
+				if index == firstCCC {
+					log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
+				}
+				index = firstLeadingCCC
+			}
+			if cc != lccc {
+				if cc != 0 {
+					log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
+				}
+				index = firstCCCZeroExcept
+			}
+		} else if len(d) > 1 {
+			index = firstMulti
+		}
+		return index, s
+	}
+
+	decompSet := makeDecompSet()
+	const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
+	decompSet.insert(firstStarterWithNLead, nLeadStr)
+
+	// Store the uniqued decompositions in a byte buffer,
+	// preceded by their byte length.
+	for _, c := range chars {
+		for _, f := range c.forms {
+			if len(f.expandedDecomp) == 0 {
+				continue
+			}
+			if f.combinesBackward {
+				log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
+			}
+			index, s := mkstr(c.codePoint, &f)
+			decompSet.insert(index, s)
+		}
+	}
+
+	decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
+	size := 0
+	positionMap := make(map[string]uint16)
+	decompositions.WriteString("\000")
+	fmt.Fprintln(w, "const (")
+	for i, m := range decompSet {
+		sa := []string{}
+		for s := range m {
+			sa = append(sa, s)
+		}
+		sort.Strings(sa)
+		for _, s := range sa {
+			p := decompositions.Len()
+			decompositions.WriteString(s)
+			positionMap[s] = uint16(p)
+		}
+		if cname[i] != "" {
+			fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
+		}
+	}
+	fmt.Fprintln(w, "maxDecomp = 0x8000")
+	fmt.Fprintln(w, ")")
+	b := decompositions.Bytes()
+	printBytes(w, b, "decomps")
+	size += len(b)
+
+	varnames := []string{"nfc", "nfkc"}
+	for i := 0; i < FNumberOfFormTypes; i++ {
+		trie := triegen.NewTrie(varnames[i])
+
+		for r, c := range chars {
+			f := c.forms[i]
+			d := f.expandedDecomp
+			if len(d) != 0 {
+				_, key := mkstr(c.codePoint, &f)
+				trie.Insert(rune(r), uint64(positionMap[key]))
+				if c.ccc != ccc(d[0]) {
+					// We assume the lead ccc of a decomposition !=0 in this case.
+					if ccc(d[0]) == 0 {
+						log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
+					}
+				}
+			} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
+				// Handle cases where it can't be detected that the nLead should be equal
+				// to nTrail.
+				trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
+			} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
+				trie.Insert(c.codePoint, uint64(0x8000|v))
+			}
+		}
+		sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
+		if err != nil {
+			log.Fatal(err)
+		}
+		size += sz
+	}
+	return size
+}
+
+func contains(sa []string, s string) bool {
+	for _, a := range sa {
+		if a == s {
+			return true
+		}
+	}
+	return false
+}
+
+func makeTables() {
+	w := &bytes.Buffer{}
+
+	size := 0
+	if *tablelist == "" {
+		return
+	}
+	list := strings.Split(*tablelist, ",")
+	if *tablelist == "all" {
+		list = []string{"recomp", "info"}
+	}
+
+	// Compute maximum decomposition size.
+	max := 0
+	for _, c := range chars {
+		if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
+			max = n
+		}
+	}
+
+	fmt.Fprintln(w, "const (")
+	fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
+	fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
+	fmt.Fprintln(w)
+	fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
+	fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
+	fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
+	fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
+	fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
+	fmt.Fprintln(w, ")\n")
+
+	// Print the CCC remap table.
+	size += len(cccMap)
+	fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
+	for i := 0; i < len(cccMap); i++ {
+		if i%8 == 0 {
+			fmt.Fprintln(w)
+		}
+		fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
+	}
+	fmt.Fprintln(w, "\n}\n")
+
+	if contains(list, "info") {
+		size += printCharInfoTables(w)
+	}
+
+	if contains(list, "recomp") {
+		// Note that we use 32 bit keys, instead of 64 bit.
+		// This clips the bits of three entries, but we know
+		// this won't cause a collision. The compiler will catch
+		// any changes made to UnicodeData.txt that introduces
+		// a collision.
+		// Note that the recomposition map for NFC and NFKC
+		// are identical.
+
+		// Recomposition map
+		nrentries := 0
+		for _, c := range chars {
+			f := c.forms[FCanonical]
+			if !f.isOneWay && len(f.decomp) > 0 {
+				nrentries++
+			}
+		}
+		sz := nrentries * 8
+		size += sz
+		fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
+		fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
+		for i, c := range chars {
+			f := c.forms[FCanonical]
+			d := f.decomp
+			if !f.isOneWay && len(d) > 0 {
+				key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
+				fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
+			}
+		}
+		fmt.Fprintf(w, "}\n\n")
+	}
+
+	fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
+	gen.WriteGoFile("tables.go", "norm", w.Bytes())
+}
+
+func printChars() {
+	if *verbose {
+		for _, c := range chars {
+			if !c.isValid() || c.state == SMissing {
+				continue
+			}
+			fmt.Println(c)
+		}
+	}
+}
+
+// verifyComputed does various consistency tests.
+func verifyComputed() {
+	for i, c := range chars {
+		for _, f := range c.forms {
+			isNo := (f.quickCheck[MDecomposed] == QCNo)
+			if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
+				log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
+			}
+
+			isMaybe := f.quickCheck[MComposed] == QCMaybe
+			if f.combinesBackward != isMaybe {
+				log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
+			}
+			if len(f.decomp) > 0 && f.combinesForward && isMaybe {
+				log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
+			}
+
+			if len(f.expandedDecomp) != 0 {
+				continue
+			}
+			if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
+				// We accept these runes to be treated differently (it only affects
+				// segment breaking in iteration, most likely on improper use), but
+				// reconsider if more characters are added.
+				// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
+				// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
+				// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
+				// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
+				// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
+				// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
+				if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
+					log.Fatalf("%U: nLead was %v; want %v", i, a, b)
+				}
+			}
+		}
+		nfc := c.forms[FCanonical]
+		nfkc := c.forms[FCompatibility]
+		if nfc.combinesBackward != nfkc.combinesBackward {
+			log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
+		}
+	}
+}
+
+// Use values in DerivedNormalizationProps.txt to compare against the
+// values we computed.
+// DerivedNormalizationProps.txt has form:
+// 00C0..00C5    ; NFD_QC; N # ...
+// 0374          ; NFD_QC; N # ...
+// See http://unicode.org/reports/tr44/ for full explanation
+func testDerived() {
+	f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
+	defer f.Close()
+	p := ucd.New(f)
+	for p.Next() {
+		r := p.Rune(0)
+		c := &chars[r]
+
+		var ftype, mode int
+		qt := p.String(1)
+		switch qt {
+		case "NFC_QC":
+			ftype, mode = FCanonical, MComposed
+		case "NFD_QC":
+			ftype, mode = FCanonical, MDecomposed
+		case "NFKC_QC":
+			ftype, mode = FCompatibility, MComposed
+		case "NFKD_QC":
+			ftype, mode = FCompatibility, MDecomposed
+		default:
+			continue
+		}
+		var qr QCResult
+		switch p.String(2) {
+		case "Y":
+			qr = QCYes
+		case "N":
+			qr = QCNo
+		case "M":
+			qr = QCMaybe
+		default:
+			log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
+		}
+		if got := c.forms[ftype].quickCheck[mode]; got != qr {
+			log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
+		}
+		c.forms[ftype].verified[mode] = true
+	}
+	if err := p.Err(); err != nil {
+		log.Fatal(err)
+	}
+	// Any unspecified value must be QCYes. Verify this.
+	for i, c := range chars {
+		for j, fd := range c.forms {
+			for k, qr := range fd.quickCheck {
+				if !fd.verified[k] && qr != QCYes {
+					m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
+					log.Printf(m, i, j, k, qr, c.name)
+				}
+			}
+		}
+	}
+}
+
+var testHeader = `const (
+	Yes = iota
+	No
+	Maybe
+)
+
+type formData struct {
+	qc              uint8
+	combinesForward bool
+	decomposition   string
+}
+
+type runeData struct {
+	r      rune
+	ccc    uint8
+	nLead  uint8
+	nTrail uint8
+	f      [2]formData // 0: canonical; 1: compatibility
+}
+
+func f(qc uint8, cf bool, dec string) [2]formData {
+	return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
+}
+
+func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
+	return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
+}
+
+var testData = []runeData{
+`
+
+func printTestdata() {
+	type lastInfo struct {
+		ccc    uint8
+		nLead  uint8
+		nTrail uint8
+		f      string
+	}
+
+	last := lastInfo{}
+	w := &bytes.Buffer{}
+	fmt.Fprintf(w, testHeader)
+	for r, c := range chars {
+		f := c.forms[FCanonical]
+		qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
+		f = c.forms[FCompatibility]
+		qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
+		s := ""
+		if d == dk && qc == qck && cf == cfk {
+			s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
+		} else {
+			s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
+		}
+		current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
+		if last != current {
+			fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
+			last = current
+		}
+	}
+	fmt.Fprintln(w, "}")
+	gen.WriteGoFile("data_test.go", "norm", w.Bytes())
+}
@@ -0,0 +1,117 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Trie table generator.
+// Used by make*tables tools to generate a go file with trie data structures
+// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
+// sequence are used to lookup offsets in the index table to be used for the
+// next byte. The last byte is used to index into a table with 16-bit values.
+
+package main
+
+import (
+	"fmt"
+	"io"
+)
+
+const maxSparseEntries = 16
+
+type normCompacter struct {
+	sparseBlocks [][]uint64
+	sparseOffset []uint16
+	sparseCount  int
+	name         string
+}
+
+func mostFrequentStride(a []uint64) int {
+	counts := make(map[int]int)
+	var v int
+	for _, x := range a {
+		if stride := int(x) - v; v != 0 && stride >= 0 {
+			counts[stride]++
+		}
+		v = int(x)
+	}
+	var maxs, maxc int
+	for stride, cnt := range counts {
+		if cnt > maxc || (cnt == maxc && stride < maxs) {
+			maxs, maxc = stride, cnt
+		}
+	}
+	return maxs
+}
+
+func countSparseEntries(a []uint64) int {
+	stride := mostFrequentStride(a)
+	var v, count int
+	for _, tv := range a {
+		if int(tv)-v != stride {
+			if tv != 0 {
+				count++
+			}
+		}
+		v = int(tv)
+	}
+	return count
+}
+
+func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
+	if n := countSparseEntries(v); n <= maxSparseEntries {
+		return (n+1)*4 + 2, true
+	}
+	return 0, false
+}
+
+func (c *normCompacter) Store(v []uint64) uint32 {
+	h := uint32(len(c.sparseOffset))
+	c.sparseBlocks = append(c.sparseBlocks, v)
+	c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
+	c.sparseCount += countSparseEntries(v) + 1
+	return h
+}
+
+func (c *normCompacter) Handler() string {
+	return c.name + "Sparse.lookup"
+}
+
+func (c *normCompacter) Print(w io.Writer) (retErr error) {
+	p := func(f string, x ...interface{}) {
+		if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
+			retErr = err
+		}
+	}
+
+	ls := len(c.sparseBlocks)
+	p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
+	p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
+
+	ns := c.sparseCount
+	p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
+	p("var %sSparseValues = [%d]valueRange {", c.name, ns)
+	for i, b := range c.sparseBlocks {
+		p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
+		var v int
+		stride := mostFrequentStride(b)
+		n := countSparseEntries(b)
+		p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
+		for i, nv := range b {
+			if int(nv)-v != stride {
+				if v != 0 {
+					p(",hi:%#02x},", 0x80+i-1)
+				}
+				if nv != 0 {
+					p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
+				}
+			}
+			v = int(nv)
+		}
+		if v != 0 {
+			p(",hi:%#02x},", 0x80+len(b)-1)
+		}
+	}
+	p("\n}\n\n")
+	return
+}