1
1
mirror of https://github.com/go-gitea/gitea synced 2025-01-25 09:04:29 +00:00

527 lines
13 KiB
Go
Raw Normal View History

2016-11-03 23:16:01 +01:00
// Copyright 2015 Unknwon
//
// Licensed under the Apache License, Version 2.0 (the "License"): you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
package ini
import (
"bufio"
"bytes"
"fmt"
"io"
2019-05-13 08:38:53 -07:00
"regexp"
2016-11-03 23:16:01 +01:00
"strconv"
"strings"
"unicode"
)
const minReaderBufferSize = 4096
var pythonMultiline = regexp.MustCompile(`^([\t\f ]+)(.*)`)
2019-05-13 08:38:53 -07:00
type parserOptions struct {
IgnoreContinuation bool
IgnoreInlineComment bool
AllowPythonMultilineValues bool
SpaceBeforeInlineComment bool
UnescapeValueDoubleQuotes bool
UnescapeValueCommentSymbols bool
PreserveSurroundedQuote bool
DebugFunc DebugFunc
ReaderBufferSize int
}
2016-11-03 23:16:01 +01:00
type parser struct {
buf *bufio.Reader
options parserOptions
2016-11-03 23:16:01 +01:00
isEOF bool
count int
comment *bytes.Buffer
}
func (p *parser) debug(format string, args ...interface{}) {
if p.options.DebugFunc != nil {
p.options.DebugFunc(fmt.Sprintf(format, args...))
}
}
func newParser(r io.Reader, opts parserOptions) *parser {
size := opts.ReaderBufferSize
if size < minReaderBufferSize {
size = minReaderBufferSize
}
2016-11-03 23:16:01 +01:00
return &parser{
buf: bufio.NewReaderSize(r, size),
options: opts,
2016-11-03 23:16:01 +01:00
count: 1,
comment: &bytes.Buffer{},
}
}
// BOM handles header of UTF-8, UTF-16 LE and UTF-16 BE's BOM format.
2016-11-03 23:16:01 +01:00
// http://en.wikipedia.org/wiki/Byte_order_mark#Representations_of_byte_order_marks_by_encoding
func (p *parser) BOM() error {
mask, err := p.buf.Peek(2)
2016-11-03 23:16:01 +01:00
if err != nil && err != io.EOF {
return err
} else if len(mask) < 2 {
2016-11-03 23:16:01 +01:00
return nil
}
switch {
case mask[0] == 254 && mask[1] == 255:
fallthrough
case mask[0] == 255 && mask[1] == 254:
2016-11-03 23:16:01 +01:00
p.buf.Read(mask)
case mask[0] == 239 && mask[1] == 187:
mask, err := p.buf.Peek(3)
if err != nil && err != io.EOF {
return err
} else if len(mask) < 3 {
return nil
}
if mask[2] == 191 {
p.buf.Read(mask)
}
2016-11-03 23:16:01 +01:00
}
return nil
}
func (p *parser) readUntil(delim byte) ([]byte, error) {
data, err := p.buf.ReadBytes(delim)
if err != nil {
if err == io.EOF {
p.isEOF = true
} else {
return nil, err
}
}
return data, nil
}
func cleanComment(in []byte) ([]byte, bool) {
i := bytes.IndexAny(in, "#;")
if i == -1 {
return nil, false
}
return in[i:], true
}
2019-05-13 08:38:53 -07:00
func readKeyName(delimiters string, in []byte) (string, int, error) {
2016-11-03 23:16:01 +01:00
line := string(in)
// Check if key name surrounded by quotes.
var keyQuote string
if line[0] == '"' {
if len(line) > 6 && string(line[0:3]) == `"""` {
keyQuote = `"""`
} else {
keyQuote = `"`
}
} else if line[0] == '`' {
keyQuote = "`"
}
// Get out key name
endIdx := -1
if len(keyQuote) > 0 {
startIdx := len(keyQuote)
// FIXME: fail case -> """"""name"""=value
pos := strings.Index(line[startIdx:], keyQuote)
if pos == -1 {
return "", -1, fmt.Errorf("missing closing key quote: %s", line)
}
pos += startIdx
// Find key-value delimiter
2019-05-13 08:38:53 -07:00
i := strings.IndexAny(line[pos+startIdx:], delimiters)
2016-11-03 23:16:01 +01:00
if i < 0 {
return "", -1, ErrDelimiterNotFound{line}
}
endIdx = pos + i
return strings.TrimSpace(line[startIdx:pos]), endIdx + startIdx + 1, nil
}
2019-05-13 08:38:53 -07:00
endIdx = strings.IndexAny(line, delimiters)
2016-11-03 23:16:01 +01:00
if endIdx < 0 {
return "", -1, ErrDelimiterNotFound{line}
}
return strings.TrimSpace(line[0:endIdx]), endIdx + 1, nil
}
func (p *parser) readMultilines(line, val, valQuote string) (string, error) {
for {
data, err := p.readUntil('\n')
if err != nil {
return "", err
}
next := string(data)
pos := strings.LastIndex(next, valQuote)
if pos > -1 {
val += next[:pos]
comment, has := cleanComment([]byte(next[pos:]))
if has {
p.comment.Write(bytes.TrimSpace(comment))
}
break
}
val += next
if p.isEOF {
return "", fmt.Errorf("missing closing key quote from '%s' to '%s'", line, next)
}
}
return val, nil
}
func (p *parser) readContinuationLines(val string) (string, error) {
for {
data, err := p.readUntil('\n')
if err != nil {
return "", err
}
next := strings.TrimSpace(string(data))
if len(next) == 0 {
break
}
val += next
if val[len(val)-1] != '\\' {
break
}
val = val[:len(val)-1]
}
return val, nil
}
// hasSurroundedQuote check if and only if the first and last characters
// are quotes \" or \'.
// It returns false if any other parts also contain same kind of quotes.
func hasSurroundedQuote(in string, quote byte) bool {
return len(in) >= 2 && in[0] == quote && in[len(in)-1] == quote &&
2016-11-03 23:16:01 +01:00
strings.IndexByte(in[1:], quote) == len(in)-2
}
func (p *parser) readValue(in []byte, bufferSize int) (string, error) {
2016-11-03 23:16:01 +01:00
line := strings.TrimLeftFunc(string(in), unicode.IsSpace)
if len(line) == 0 {
if p.options.AllowPythonMultilineValues && len(in) > 0 && in[len(in)-1] == '\n' {
return p.readPythonMultilines(line, bufferSize)
}
2016-11-03 23:16:01 +01:00
return "", nil
}
var valQuote string
if len(line) > 3 && string(line[0:3]) == `"""` {
valQuote = `"""`
} else if line[0] == '`' {
valQuote = "`"
} else if p.options.UnescapeValueDoubleQuotes && line[0] == '"' {
valQuote = `"`
2016-11-03 23:16:01 +01:00
}
if len(valQuote) > 0 {
startIdx := len(valQuote)
pos := strings.LastIndex(line[startIdx:], valQuote)
// Check for multi-line value
if pos == -1 {
return p.readMultilines(line, line[startIdx:], valQuote)
}
if p.options.UnescapeValueDoubleQuotes && valQuote == `"` {
return strings.Replace(line[startIdx:pos+startIdx], `\"`, `"`, -1), nil
}
2016-11-03 23:16:01 +01:00
return line[startIdx : pos+startIdx], nil
}
2019-05-13 08:38:53 -07:00
lastChar := line[len(line)-1]
// Won't be able to reach here if value only contains whitespace
2016-11-03 23:16:01 +01:00
line = strings.TrimSpace(line)
2019-05-13 08:38:53 -07:00
trimmedLastChar := line[len(line)-1]
2016-11-03 23:16:01 +01:00
// Check continuation lines when desired
if !p.options.IgnoreContinuation && trimmedLastChar == '\\' {
2016-11-03 23:16:01 +01:00
return p.readContinuationLines(line[:len(line)-1])
}
// Check if ignore inline comment
if !p.options.IgnoreInlineComment {
2019-05-13 08:38:53 -07:00
var i int
if p.options.SpaceBeforeInlineComment {
2019-05-13 08:38:53 -07:00
i = strings.Index(line, " #")
if i == -1 {
i = strings.Index(line, " ;")
}
} else {
i = strings.IndexAny(line, "#;")
}
if i > -1 {
p.comment.WriteString(line[i:])
line = strings.TrimSpace(line[:i])
}
2019-05-13 08:38:53 -07:00
2016-11-03 23:16:01 +01:00
}
// Trim single and double quotes
2019-05-13 08:38:53 -07:00
if (hasSurroundedQuote(line, '\'') ||
hasSurroundedQuote(line, '"')) && !p.options.PreserveSurroundedQuote {
2016-11-03 23:16:01 +01:00
line = line[1 : len(line)-1]
} else if len(valQuote) == 0 && p.options.UnescapeValueCommentSymbols {
if strings.Contains(line, `\;`) {
line = strings.Replace(line, `\;`, ";", -1)
}
if strings.Contains(line, `\#`) {
line = strings.Replace(line, `\#`, "#", -1)
}
} else if p.options.AllowPythonMultilineValues && lastChar == '\n' {
return p.readPythonMultilines(line, bufferSize)
}
2019-05-13 08:38:53 -07:00
return line, nil
}
2019-05-13 08:38:53 -07:00
func (p *parser) readPythonMultilines(line string, bufferSize int) (string, error) {
parserBufferPeekResult, _ := p.buf.Peek(bufferSize)
peekBuffer := bytes.NewBuffer(parserBufferPeekResult)
2019-05-13 08:38:53 -07:00
indentSize := 0
for {
peekData, peekErr := peekBuffer.ReadBytes('\n')
if peekErr != nil {
if peekErr == io.EOF {
p.debug("readPythonMultilines: io.EOF, peekData: %q, line: %q", string(peekData), line)
return line, nil
2019-05-13 08:38:53 -07:00
}
p.debug("readPythonMultilines: failed to peek with error: %v", peekErr)
return "", peekErr
}
2019-05-13 08:38:53 -07:00
p.debug("readPythonMultilines: parsing %q", string(peekData))
peekMatches := pythonMultiline.FindStringSubmatch(string(peekData))
p.debug("readPythonMultilines: matched %d parts", len(peekMatches))
for n, v := range peekMatches {
p.debug(" %d: %q", n, v)
}
// Return if not a Python multiline value.
if len(peekMatches) != 3 {
p.debug("readPythonMultilines: end of value, got: %q", line)
return line, nil
}
2019-05-13 08:38:53 -07:00
// Determine indent size and line prefix.
currentIndentSize := len(peekMatches[1])
if indentSize < 1 {
indentSize = currentIndentSize
p.debug("readPythonMultilines: indent size is %d", indentSize)
}
// Make sure each line is indented at least as far as first line.
if currentIndentSize < indentSize {
p.debug("readPythonMultilines: end of value, current indent: %d, expected indent: %d, line: %q", currentIndentSize, indentSize, line)
return line, nil
}
2019-05-13 08:38:53 -07:00
// Advance the parser reader (buffer) in-sync with the peek buffer.
_, err := p.buf.Discard(len(peekData))
if err != nil {
p.debug("readPythonMultilines: failed to skip to the end, returning error")
return "", err
2019-05-13 08:38:53 -07:00
}
// Handle indented empty line.
line += "\n" + peekMatches[1][indentSize:] + peekMatches[2]
}
2016-11-03 23:16:01 +01:00
}
// parse parses data through an io.Reader.
func (f *File) parse(reader io.Reader) (err error) {
p := newParser(reader, parserOptions{
IgnoreContinuation: f.options.IgnoreContinuation,
IgnoreInlineComment: f.options.IgnoreInlineComment,
AllowPythonMultilineValues: f.options.AllowPythonMultilineValues,
SpaceBeforeInlineComment: f.options.SpaceBeforeInlineComment,
UnescapeValueDoubleQuotes: f.options.UnescapeValueDoubleQuotes,
UnescapeValueCommentSymbols: f.options.UnescapeValueCommentSymbols,
PreserveSurroundedQuote: f.options.PreserveSurroundedQuote,
DebugFunc: f.options.DebugFunc,
ReaderBufferSize: f.options.ReaderBufferSize,
})
2016-11-03 23:16:01 +01:00
if err = p.BOM(); err != nil {
return fmt.Errorf("BOM: %v", err)
}
// Ignore error because default section name is never empty string.
name := DefaultSection
if f.options.Insensitive {
name = strings.ToLower(DefaultSection)
}
section, _ := f.NewSection(name)
2016-11-03 23:16:01 +01:00
2019-05-13 08:38:53 -07:00
// This "last" is not strictly equivalent to "previous one" if current key is not the first nested key
var isLastValueEmpty bool
var lastRegularKey *Key
2016-11-03 23:16:01 +01:00
var line []byte
var inUnparseableSection bool
2019-05-13 08:38:53 -07:00
// NOTE: Iterate and increase `currentPeekSize` until
// the size of the parser buffer is found.
// TODO(unknwon): When Golang 1.10 is the lowest version supported, replace with `parserBufferSize := p.buf.Size()`.
parserBufferSize := 0
// NOTE: Peek 4kb at a time.
currentPeekSize := minReaderBufferSize
2019-05-13 08:38:53 -07:00
if f.options.AllowPythonMultilineValues {
for {
peekBytes, _ := p.buf.Peek(currentPeekSize)
peekBytesLength := len(peekBytes)
if parserBufferSize >= peekBytesLength {
break
}
currentPeekSize *= 2
parserBufferSize = peekBytesLength
}
}
2016-11-03 23:16:01 +01:00
for !p.isEOF {
line, err = p.readUntil('\n')
if err != nil {
return err
}
2019-05-13 08:38:53 -07:00
if f.options.AllowNestedValues &&
isLastValueEmpty && len(line) > 0 {
if line[0] == ' ' || line[0] == '\t' {
lastRegularKey.addNestedValue(string(bytes.TrimSpace(line)))
continue
}
}
2016-11-03 23:16:01 +01:00
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
if len(line) == 0 {
continue
}
// Comments
if line[0] == '#' || line[0] == ';' {
// Note: we do not care ending line break,
// it is needed for adding second line,
// so just clean it once at the end when set to value.
p.comment.Write(line)
continue
}
// Section
if line[0] == '[' {
// Read to the next ']' (TODO: support quoted strings)
2019-05-13 08:38:53 -07:00
closeIdx := bytes.LastIndexByte(line, ']')
2016-11-03 23:16:01 +01:00
if closeIdx == -1 {
return fmt.Errorf("unclosed section: %s", line)
}
name := string(line[1:closeIdx])
section, err = f.NewSection(name)
if err != nil {
return err
}
comment, has := cleanComment(line[closeIdx+1:])
if has {
p.comment.Write(comment)
}
section.Comment = strings.TrimSpace(p.comment.String())
// Reset auto-counter and comments
2016-11-03 23:16:01 +01:00
p.comment.Reset()
p.count = 1
inUnparseableSection = false
for i := range f.options.UnparseableSections {
if f.options.UnparseableSections[i] == name ||
(f.options.Insensitive && strings.ToLower(f.options.UnparseableSections[i]) == strings.ToLower(name)) {
inUnparseableSection = true
continue
}
}
continue
}
if inUnparseableSection {
section.isRawSection = true
section.rawBody += string(line)
2016-11-03 23:16:01 +01:00
continue
}
2019-05-13 08:38:53 -07:00
kname, offset, err := readKeyName(f.options.KeyValueDelimiters, line)
2016-11-03 23:16:01 +01:00
if err != nil {
// Treat as boolean key when desired, and whole line is key name.
2019-05-13 08:38:53 -07:00
if IsErrDelimiterNotFound(err) {
switch {
case f.options.AllowBooleanKeys:
kname, err := p.readValue(line, parserBufferSize)
2019-05-13 08:38:53 -07:00
if err != nil {
return err
}
key, err := section.NewBooleanKey(kname)
if err != nil {
return err
}
key.Comment = strings.TrimSpace(p.comment.String())
p.comment.Reset()
continue
case f.options.SkipUnrecognizableLines:
continue
2016-11-03 23:16:01 +01:00
}
}
return err
}
// Auto increment.
isAutoIncr := false
if kname == "-" {
isAutoIncr = true
kname = "#" + strconv.Itoa(p.count)
p.count++
}
value, err := p.readValue(line[offset:], parserBufferSize)
2016-11-03 23:16:01 +01:00
if err != nil {
return err
}
2019-05-13 08:38:53 -07:00
isLastValueEmpty = len(value) == 0
2016-11-03 23:16:01 +01:00
key, err := section.NewKey(kname, value)
2016-11-03 23:16:01 +01:00
if err != nil {
return err
}
key.isAutoIncrement = isAutoIncr
2016-11-03 23:16:01 +01:00
key.Comment = strings.TrimSpace(p.comment.String())
p.comment.Reset()
2019-05-13 08:38:53 -07:00
lastRegularKey = key
2016-11-03 23:16:01 +01:00
}
return nil
}