gitea/modules/charset/ambiguous/generate.go

189 lines
4.8 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package main
import (
"bytes"
"flag"
"fmt"
"go/format"
"os"
"sort"
"text/template"
"unicode"
"code.gitea.io/gitea/modules/json"
"golang.org/x/text/unicode/rangetable"
)
// ambiguous.json provides a one to one mapping of ambiguous characters to other characters
// See https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
type RunePair struct {
Confusable rune
With rune
}
var verbose bool
func main() {
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter
Usage: %[1]s [-v] [-o output.go] ambiguous.json
`, os.Args[0])
flag.PrintDefaults()
}
output := ""
flag.BoolVar(&verbose, "v", false, "verbose output")
flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to")
flag.Parse()
input := flag.Arg(0)
if input == "" {
input = "ambiguous.json"
}
bs, err := os.ReadFile(input)
if err != nil {
fatalf("Unable to read: %s Err: %v", input, err)
}
var unwrapped string
if err := json.Unmarshal(bs, &unwrapped); err != nil {
fatalf("Unable to unwrap content in: %s Err: %v", input, err)
}
fromJSON := map[string][]uint32{}
if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil {
fatalf("Unable to unmarshal content in: %s Err: %v", input, err)
}
tables := make([]*AmbiguousTable, 0, len(fromJSON))
for locale, chars := range fromJSON {
table := &AmbiguousTable{Locale: locale}
table.Confusable = make([]rune, 0, len(chars)/2)
table.With = make([]rune, 0, len(chars)/2)
pairs := make([]RunePair, len(chars)/2)
for i := 0; i < len(chars); i += 2 {
pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1])
}
sort.Slice(pairs, func(i, j int) bool {
return pairs[i].Confusable < pairs[j].Confusable
})
for _, pair := range pairs {
table.Confusable = append(table.Confusable, pair.Confusable)
table.With = append(table.With, pair.With)
}
table.RangeTable = rangetable.New(table.Confusable...)
tables = append(tables, table)
}
sort.Slice(tables, func(i, j int) bool {
return tables[i].Locale < tables[j].Locale
})
data := map[string]any{
"Tables": tables,
}
if err := runTemplate(generatorTemplate, output, &data); err != nil {
fatalf("Unable to run template: %v", err)
}
}
func runTemplate(t *template.Template, filename string, data any) error {
buf := bytes.NewBuffer(nil)
if err := t.Execute(buf, data); err != nil {
return fmt.Errorf("unable to execute template: %w", err)
}
bs, err := format.Source(buf.Bytes())
if err != nil {
verbosef("Bad source:\n%s", buf.String())
return fmt.Errorf("unable to format source: %w", err)
}
old, err := os.ReadFile(filename)
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("failed to read old file %s because %w", filename, err)
} else if err == nil {
if bytes.Equal(bs, old) {
// files are the same don't rewrite it.
return nil
}
}
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to create file %s because %w", filename, err)
}
defer file.Close()
_, err = file.Write(bs)
if err != nil {
return fmt.Errorf("unable to write generated source: %w", err)
}
return nil
}
var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package charset
import "unicode"
// This file is generated from https://github.com/hediet/vscode-unicode-data/blob/main/out/ambiguous.json
// AmbiguousTable matches a confusable rune with its partner for the Locale
type AmbiguousTable struct {
Confusable []rune
With []rune
Locale string
RangeTable *unicode.RangeTable
}
// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale
var AmbiguousCharacters = map[string]*AmbiguousTable{
{{range .Tables}}{{printf "%q:" .Locale}} {
Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} },
With: []rune{ {{range .With}}{{.}},{{end}} },
Locale: {{printf "%q" .Locale}},
RangeTable: &unicode.RangeTable{
R16: []unicode.Range16{
{{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
R32: []unicode.Range32{
{{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}},
{{end}} },
LatinOffset: {{.RangeTable.LatinOffset}},
},
},
{{end}}
}
`))
func logf(format string, args ...any) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
}
func verbosef(format string, args ...any) {
if verbose {
logf(format, args...)
}
}
func fatalf(format string, args ...any) {
logf("fatal: "+format+"\n", args...)
os.Exit(1)
}