mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 19:38:23 +00:00 
			
		
		
		
	resolves git conflicts from #3896 (credit to @belak, in case github doesn't keep original author during squash) Co-Authored-By: Matti Ranta <techknowlogick@gitea.io>
		
			
				
	
	
		
			194 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			194 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| // Package shlex provides a simple lexical analysis like Unix shell.
 | |
| package shlex
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"errors"
 | |
| 	"io"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| )
 | |
| 
 | |
| var (
 | |
| 	ErrNoClosing = errors.New("No closing quotation")
 | |
| 	ErrNoEscaped = errors.New("No escaped character")
 | |
| )
 | |
| 
 | |
| // Tokenizer is the interface that classifies a token according to
 | |
| // words, whitespaces, quotations, escapes and escaped quotations.
 | |
| type Tokenizer interface {
 | |
| 	IsWord(rune) bool
 | |
| 	IsWhitespace(rune) bool
 | |
| 	IsQuote(rune) bool
 | |
| 	IsEscape(rune) bool
 | |
| 	IsEscapedQuote(rune) bool
 | |
| }
 | |
| 
 | |
| // DefaultTokenizer implements a simple tokenizer like Unix shell.
 | |
| type DefaultTokenizer struct{}
 | |
| 
 | |
| func (t *DefaultTokenizer) IsWord(r rune) bool {
 | |
| 	return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
 | |
| }
 | |
| func (t *DefaultTokenizer) IsQuote(r rune) bool {
 | |
| 	switch r {
 | |
| 	case '\'', '"':
 | |
| 		return true
 | |
| 	default:
 | |
| 		return false
 | |
| 	}
 | |
| }
 | |
| func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
 | |
| 	return unicode.IsSpace(r)
 | |
| }
 | |
| func (t *DefaultTokenizer) IsEscape(r rune) bool {
 | |
| 	return r == '\\'
 | |
| }
 | |
| func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
 | |
| 	return r == '"'
 | |
| }
 | |
| 
 | |
| // Lexer represents a lexical analyzer.
 | |
| type Lexer struct {
 | |
| 	reader          *bufio.Reader
 | |
| 	tokenizer       Tokenizer
 | |
| 	posix           bool
 | |
| 	whitespacesplit bool
 | |
| }
 | |
| 
 | |
| // NewLexer creates a new Lexer reading from io.Reader.  This Lexer
 | |
| // has a DefaultTokenizer according to posix and whitespacesplit
 | |
| // rules.
 | |
| func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
 | |
| 	return &Lexer{
 | |
| 		reader:          bufio.NewReader(r),
 | |
| 		tokenizer:       &DefaultTokenizer{},
 | |
| 		posix:           posix,
 | |
| 		whitespacesplit: whitespacesplit,
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // NewLexerString creates a new Lexer reading from a string.  This
 | |
| // Lexer has a DefaultTokenizer according to posix and whitespacesplit
 | |
| // rules.
 | |
| func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
 | |
| 	return NewLexer(strings.NewReader(s), posix, whitespacesplit)
 | |
| }
 | |
| 
 | |
| // Split splits a string according to posix or non-posix rules.
 | |
| func Split(s string, posix bool) ([]string, error) {
 | |
| 	return NewLexerString(s, posix, true).Split()
 | |
| }
 | |
| 
 | |
| // SetTokenizer sets a Tokenizer.
 | |
| func (l *Lexer) SetTokenizer(t Tokenizer) {
 | |
| 	l.tokenizer = t
 | |
| }
 | |
| 
 | |
| func (l *Lexer) Split() ([]string, error) {
 | |
| 	result := make([]string, 0)
 | |
| 	for {
 | |
| 		token, err := l.readToken()
 | |
| 		if token != "" {
 | |
| 			result = append(result, token)
 | |
| 		}
 | |
| 
 | |
| 		if err == io.EOF {
 | |
| 			break
 | |
| 		} else if err != nil {
 | |
| 			return result, err
 | |
| 		}
 | |
| 	}
 | |
| 	return result, nil
 | |
| }
 | |
| 
 | |
| func (l *Lexer) readToken() (string, error) {
 | |
| 	t := l.tokenizer
 | |
| 	token := ""
 | |
| 	quoted := false
 | |
| 	state := ' '
 | |
| 	escapedstate := ' '
 | |
| scanning:
 | |
| 	for {
 | |
| 		next, _, err := l.reader.ReadRune()
 | |
| 		if err != nil {
 | |
| 			if t.IsQuote(state) {
 | |
| 				return token, ErrNoClosing
 | |
| 			} else if t.IsEscape(state) {
 | |
| 				return token, ErrNoEscaped
 | |
| 			}
 | |
| 			return token, err
 | |
| 		}
 | |
| 
 | |
| 		switch {
 | |
| 		case t.IsWhitespace(state):
 | |
| 			switch {
 | |
| 			case t.IsWhitespace(next):
 | |
| 				break scanning
 | |
| 			case l.posix && t.IsEscape(next):
 | |
| 				escapedstate = 'a'
 | |
| 				state = next
 | |
| 			case t.IsWord(next):
 | |
| 				token += string(next)
 | |
| 				state = 'a'
 | |
| 			case t.IsQuote(next):
 | |
| 				if !l.posix {
 | |
| 					token += string(next)
 | |
| 				}
 | |
| 				state = next
 | |
| 			default:
 | |
| 				token = string(next)
 | |
| 				if l.whitespacesplit {
 | |
| 					state = 'a'
 | |
| 				} else if token != "" || (l.posix && quoted) {
 | |
| 					break scanning
 | |
| 				}
 | |
| 			}
 | |
| 		case t.IsQuote(state):
 | |
| 			quoted = true
 | |
| 			switch {
 | |
| 			case next == state:
 | |
| 				if !l.posix {
 | |
| 					token += string(next)
 | |
| 					break scanning
 | |
| 				} else {
 | |
| 					state = 'a'
 | |
| 				}
 | |
| 			case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
 | |
| 				escapedstate = state
 | |
| 				state = next
 | |
| 			default:
 | |
| 				token += string(next)
 | |
| 			}
 | |
| 		case t.IsEscape(state):
 | |
| 			if t.IsQuote(escapedstate) && next != state && next != escapedstate {
 | |
| 				token += string(state)
 | |
| 			}
 | |
| 			token += string(next)
 | |
| 			state = escapedstate
 | |
| 		case t.IsWord(state):
 | |
| 			switch {
 | |
| 			case t.IsWhitespace(next):
 | |
| 				if token != "" || (l.posix && quoted) {
 | |
| 					break scanning
 | |
| 				}
 | |
| 			case l.posix && t.IsQuote(next):
 | |
| 				state = next
 | |
| 			case l.posix && t.IsEscape(next):
 | |
| 				escapedstate = 'a'
 | |
| 				state = next
 | |
| 			case t.IsWord(next) || t.IsQuote(next):
 | |
| 				token += string(next)
 | |
| 			default:
 | |
| 				if l.whitespacesplit {
 | |
| 					token += string(next)
 | |
| 				} else if token != "" {
 | |
| 					l.reader.UnreadRune()
 | |
| 					break scanning
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return token, nil
 | |
| }
 |