1
1
mirror of https://github.com/go-gitea/gitea synced 2025-01-15 20:24:27 +00:00

266 lines
8.0 KiB
Go
Raw Normal View History

// Package org is an Org mode syntax processor.
//
// It parses plain text into an AST and can export it as HTML or pretty printed Org mode syntax.
// Further export formats can be defined using the Writer interface.
//
// You probably want to start with something like this:
// input := strings.NewReader("Your Org mode input")
// html, err := org.New().Parse(input, "./").Write(org.NewHTMLWriter())
// if err != nil {
// log.Fatalf("Something went wrong: %s", err)
// }
// log.Print(html)
package org
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
)
type Configuration struct {
MaxEmphasisNewLines int // Maximum number of newlines inside an emphasis. See org-emphasis-regexp-components newline.
AutoLink bool // Try to convert text passages that look like hyperlinks into hyperlinks.
DefaultSettings map[string]string // Default values for settings that are overriden by setting the same key in BufferSettings.
Log *log.Logger // Log is used to print warnings during parsing.
ReadFile func(filename string) ([]byte, error) // ReadFile is used to read e.g. #+INCLUDE files.
}
// Document contains the parsing results and a pointer to the Configuration.
type Document struct {
*Configuration
Path string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE).
tokens []token
baseLvl int
Macros map[string]string
Links map[string]string
Nodes []Node
NamedNodes map[string]Node
Outline Outline // Outline is a Table Of Contents for the document and contains all sections (headline + content).
BufferSettings map[string]string // Settings contains all settings that were parsed from keywords.
Error error
}
// Node represents a parsed node of the document.
type Node interface {
String() string // String returns the pretty printed Org mode string for the node (see OrgWriter).
}
type lexFn = func(line string) (t token, ok bool)
type parseFn = func(*Document, int, stopFn) (int, Node)
type stopFn = func(*Document, int) bool
type token struct {
kind string
lvl int
content string
matches []string
}
var lexFns = []lexFn{
lexHeadline,
lexDrawer,
lexBlock,
lexResult,
lexList,
lexTable,
lexHorizontalRule,
lexKeywordOrComment,
lexFootnoteDefinition,
lexExample,
lexText,
}
var nilToken = token{"nil", -1, "", nil}
var orgWriter = NewOrgWriter()
// New returns a new Configuration with (hopefully) sane defaults.
func New() *Configuration {
return &Configuration{
AutoLink: true,
MaxEmphasisNewLines: 1,
DefaultSettings: map[string]string{
"TODO": "TODO | DONE",
"EXCLUDE_TAGS": "noexport",
"OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t title:t",
},
Log: log.New(os.Stderr, "go-org: ", 0),
ReadFile: ioutil.ReadFile,
}
}
// String returns the pretty printed Org mode string for the given nodes (see OrgWriter).
2019-11-05 10:39:03 +02:00
func String(nodes []Node) string { return orgWriter.WriteNodesAsString(nodes...) }
// Write is called after with an instance of the Writer interface to export a parsed Document into another format.
func (d *Document) Write(w Writer) (out string, err error) {
defer func() {
if recovered := recover(); recovered != nil {
err = fmt.Errorf("could not write output: %s", recovered)
}
}()
if d.Error != nil {
return "", d.Error
} else if d.Nodes == nil {
return "", fmt.Errorf("could not write output: parse was not called")
}
w.Before(d)
WriteNodes(w, d.Nodes...)
w.After(d)
return w.String(), err
}
// Parse parses the input into an AST (and some other helpful fields like Outline).
// To allow method chaining, errors are stored in document.Error rather than being returned.
func (c *Configuration) Parse(input io.Reader, path string) (d *Document) {
outlineSection := &Section{}
d = &Document{
Configuration: c,
Outline: Outline{outlineSection, outlineSection, 0},
BufferSettings: map[string]string{},
NamedNodes: map[string]Node{},
Links: map[string]string{},
Macros: map[string]string{},
Path: path,
}
defer func() {
if recovered := recover(); recovered != nil {
d.Error = fmt.Errorf("could not parse input: %v", recovered)
}
}()
if d.tokens != nil {
d.Error = fmt.Errorf("parse was called multiple times")
}
d.tokenize(input)
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return i >= len(d.tokens) })
d.Nodes = nodes
return d
}
// Silent disables all logging of warnings during parsing.
func (c *Configuration) Silent() *Configuration {
c.Log = log.New(ioutil.Discard, "", 0)
return c
}
func (d *Document) tokenize(input io.Reader) {
d.tokens = []token{}
scanner := bufio.NewScanner(input)
for scanner.Scan() {
d.tokens = append(d.tokens, tokenize(scanner.Text()))
}
if err := scanner.Err(); err != nil {
d.Error = fmt.Errorf("could not tokenize input: %s", err)
}
}
// Get returns the value for key in BufferSettings or DefaultSettings if key does not exist in the former
func (d *Document) Get(key string) string {
if v, ok := d.BufferSettings[key]; ok {
return v
}
if v, ok := d.DefaultSettings[key]; ok {
return v
}
return ""
}
// GetOption returns the value associated to the export option key
// Currently supported options:
// - < (export timestamps)
// - e (export org entities)
// - f (export footnotes)
// - title (export title)
// - toc (export table of content. an int limits the included org headline lvl)
// - todo (export headline todo status)
// - pri (export headline priority)
// - tags (export headline tags)
// see https://orgmode.org/manual/Export-settings.html for more information
func (d *Document) GetOption(key string) string {
get := func(settings map[string]string) string {
for _, field := range strings.Fields(settings["OPTIONS"]) {
if strings.HasPrefix(field, key+":") {
return field[len(key)+1:]
}
}
return ""
}
value := get(d.BufferSettings)
if value == "" {
value = get(d.DefaultSettings)
}
if value == "" {
value = "nil"
d.Log.Printf("Missing value for export option %s", key)
}
return value
}
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
switch d.tokens[i].kind {
case "unorderedList", "orderedList":
consumed, node = d.parseList(i, stop)
case "tableRow", "tableSeparator":
consumed, node = d.parseTable(i, stop)
case "beginBlock":
consumed, node = d.parseBlock(i, stop)
case "result":
consumed, node = d.parseResult(i, stop)
case "beginDrawer":
consumed, node = d.parseDrawer(i, stop)
case "text":
consumed, node = d.parseParagraph(i, stop)
case "example":
consumed, node = d.parseExample(i, stop)
case "horizontalRule":
consumed, node = d.parseHorizontalRule(i, stop)
case "comment":
consumed, node = d.parseComment(i, stop)
case "keyword":
consumed, node = d.parseKeyword(i, stop)
case "headline":
consumed, node = d.parseHeadline(i, stop)
case "footnoteDefinition":
consumed, node = d.parseFootnoteDefinition(i, stop)
}
if consumed != 0 {
return consumed, node
}
d.Log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
d.tokens[i] = token{"text", len(m[1]), m[2], m}
return d.parseOne(i, stop)
}
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
start, nodes := i, []Node{}
for i < len(d.tokens) && !stop(d, i) {
consumed, node := d.parseOne(i, stop)
i += consumed
nodes = append(nodes, node)
}
return i - start, nodes
}
func (d *Document) addHeadline(headline *Headline) int {
current := &Section{Headline: headline}
d.Outline.last.add(current)
d.Outline.count++
d.Outline.last = current
return d.Outline.count
}
func tokenize(line string) token {
for _, lexFn := range lexFns {
if token, ok := lexFn(line); ok {
return token
}
}
panic(fmt.Sprintf("could not lex line: %s", line))
}