1
1
mirror of https://github.com/go-gitea/gitea synced 2025-01-10 09:44:43 +00:00

194 lines
4.2 KiB
Go
Raw Normal View History

// Package shlex provides a simple lexical analysis like Unix shell.
package shlex
import (
"bufio"
"errors"
"io"
"strings"
"unicode"
)
var (
ErrNoClosing = errors.New("No closing quotation")
ErrNoEscaped = errors.New("No escaped character")
)
// Tokenizer is the interface that classifies a token according to
// words, whitespaces, quotations, escapes and escaped quotations.
type Tokenizer interface {
IsWord(rune) bool
IsWhitespace(rune) bool
IsQuote(rune) bool
IsEscape(rune) bool
IsEscapedQuote(rune) bool
}
// DefaultTokenizer implements a simple tokenizer like Unix shell.
type DefaultTokenizer struct{}
func (t *DefaultTokenizer) IsWord(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}
func (t *DefaultTokenizer) IsQuote(r rune) bool {
switch r {
case '\'', '"':
return true
default:
return false
}
}
func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
return unicode.IsSpace(r)
}
func (t *DefaultTokenizer) IsEscape(r rune) bool {
return r == '\\'
}
func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
return r == '"'
}
// Lexer represents a lexical analyzer.
type Lexer struct {
reader *bufio.Reader
tokenizer Tokenizer
posix bool
whitespacesplit bool
}
// NewLexer creates a new Lexer reading from io.Reader. This Lexer
// has a DefaultTokenizer according to posix and whitespacesplit
// rules.
func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
return &Lexer{
reader: bufio.NewReader(r),
tokenizer: &DefaultTokenizer{},
posix: posix,
whitespacesplit: whitespacesplit,
}
}
// NewLexerString creates a new Lexer reading from a string. This
// Lexer has a DefaultTokenizer according to posix and whitespacesplit
// rules.
func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
return NewLexer(strings.NewReader(s), posix, whitespacesplit)
}
// Split splits a string according to posix or non-posix rules.
func Split(s string, posix bool) ([]string, error) {
return NewLexerString(s, posix, true).Split()
}
// SetTokenizer sets a Tokenizer.
func (l *Lexer) SetTokenizer(t Tokenizer) {
l.tokenizer = t
}
func (l *Lexer) Split() ([]string, error) {
result := make([]string, 0)
for {
token, err := l.readToken()
if token != "" {
result = append(result, token)
}
if err == io.EOF {
break
} else if err != nil {
return result, err
}
}
return result, nil
}
func (l *Lexer) readToken() (string, error) {
t := l.tokenizer
token := ""
quoted := false
state := ' '
escapedstate := ' '
scanning:
for {
next, _, err := l.reader.ReadRune()
if err != nil {
if t.IsQuote(state) {
return token, ErrNoClosing
} else if t.IsEscape(state) {
return token, ErrNoEscaped
}
return token, err
}
switch {
case t.IsWhitespace(state):
switch {
case t.IsWhitespace(next):
break scanning
case l.posix && t.IsEscape(next):
escapedstate = 'a'
state = next
case t.IsWord(next):
token += string(next)
state = 'a'
case t.IsQuote(next):
if !l.posix {
token += string(next)
}
state = next
default:
token = string(next)
if l.whitespacesplit {
state = 'a'
} else if token != "" || (l.posix && quoted) {
break scanning
}
}
case t.IsQuote(state):
quoted = true
switch {
case next == state:
if !l.posix {
token += string(next)
break scanning
} else {
state = 'a'
}
case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
escapedstate = state
state = next
default:
token += string(next)
}
case t.IsEscape(state):
if t.IsQuote(escapedstate) && next != state && next != escapedstate {
token += string(state)
}
token += string(next)
state = escapedstate
case t.IsWord(state):
switch {
case t.IsWhitespace(next):
if token != "" || (l.posix && quoted) {
break scanning
}
case l.posix && t.IsQuote(next):
state = next
case l.posix && t.IsEscape(next):
escapedstate = 'a'
state = next
case t.IsWord(next) || t.IsQuote(next):
token += string(next)
default:
if l.whitespacesplit {
token += string(next)
} else if token != "" {
l.reader.UnreadRune()
break scanning
}
}
}
}
return token, nil
}