aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go')
-rw-r--r--Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go819
1 files changed, 0 insertions, 819 deletions
diff --git a/Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go b/Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go
deleted file mode 100644
index bc3e74f77..000000000
--- a/Godeps/_workspace/src/github.com/obscuren/otto/parser/lexer.go
+++ /dev/null
@@ -1,819 +0,0 @@
-package parser
-
-import (
- "bytes"
- "errors"
- "fmt"
- "regexp"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf8"
-
- "github.com/robertkrimen/otto/file"
- "github.com/robertkrimen/otto/token"
-)
-
-type _chr struct {
- value rune
- width int
-}
-
-var matchIdentifier = regexp.MustCompile(`^[$_\p{L}][$_\p{L}\d}]*$`)
-
-func isDecimalDigit(chr rune) bool {
- return '0' <= chr && chr <= '9'
-}
-
-func digitValue(chr rune) int {
- switch {
- case '0' <= chr && chr <= '9':
- return int(chr - '0')
- case 'a' <= chr && chr <= 'f':
- return int(chr - 'a' + 10)
- case 'A' <= chr && chr <= 'F':
- return int(chr - 'A' + 10)
- }
- return 16 // Larger than any legal digit value
-}
-
-func isDigit(chr rune, base int) bool {
- return digitValue(chr) < base
-}
-
-func isIdentifierStart(chr rune) bool {
- return chr == '$' || chr == '_' || chr == '\\' ||
- 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
- chr >= utf8.RuneSelf && unicode.IsLetter(chr)
-}
-
-func isIdentifierPart(chr rune) bool {
- return chr == '$' || chr == '_' || chr == '\\' ||
- 'a' <= chr && chr <= 'z' || 'A' <= chr && chr <= 'Z' ||
- '0' <= chr && chr <= '9' ||
- chr >= utf8.RuneSelf && (unicode.IsLetter(chr) || unicode.IsDigit(chr))
-}
-
-func (self *_parser) scanIdentifier() (string, error) {
- offset := self.chrOffset
- parse := false
- for isIdentifierPart(self.chr) {
- if self.chr == '\\' {
- distance := self.chrOffset - offset
- self.read()
- if self.chr != 'u' {
- return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
- }
- parse = true
- var value rune
- for j := 0; j < 4; j++ {
- self.read()
- decimal, ok := hex2decimal(byte(self.chr))
- if !ok {
- return "", fmt.Errorf("Invalid identifier escape character: %c (%s)", self.chr, string(self.chr))
- }
- value = value<<4 | decimal
- }
- if value == '\\' {
- return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
- } else if distance == 0 {
- if !isIdentifierStart(value) {
- return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
- }
- } else if distance > 0 {
- if !isIdentifierPart(value) {
- return "", fmt.Errorf("Invalid identifier escape value: %c (%s)", value, string(value))
- }
- }
- }
- self.read()
- }
- literal := string(self.str[offset:self.chrOffset])
- if parse {
- return parseStringLiteral(literal)
- }
- return literal, nil
-}
-
-// 7.2
-func isLineWhiteSpace(chr rune) bool {
- switch chr {
- case '\u0009', '\u000b', '\u000c', '\u0020', '\u00a0', '\ufeff':
- return true
- case '\u000a', '\u000d', '\u2028', '\u2029':
- return false
- case '\u0085':
- return false
- }
- return unicode.IsSpace(chr)
-}
-
-// 7.3
-func isLineTerminator(chr rune) bool {
- switch chr {
- case '\u000a', '\u000d', '\u2028', '\u2029':
- return true
- }
- return false
-}
-
-func (self *_parser) scan() (tkn token.Token, literal string, idx file.Idx) {
-
- self.implicitSemicolon = false
-
- for {
- self.skipWhiteSpace()
-
- idx = self.idxOf(self.chrOffset)
- insertSemicolon := false
-
- switch chr := self.chr; {
- case isIdentifierStart(chr):
- var err error
- literal, err = self.scanIdentifier()
- if err != nil {
- tkn = token.ILLEGAL
- break
- }
- if len(literal) > 1 {
- // Keywords are longer than 1 character, avoid lookup otherwise
- var strict bool
- tkn, strict = token.IsKeyword(literal)
-
- switch tkn {
-
- case 0: // Not a keyword
- if literal == "true" || literal == "false" {
- self.insertSemicolon = true
- tkn = token.BOOLEAN
- return
- } else if literal == "null" {
- self.insertSemicolon = true
- tkn = token.NULL
- return
- }
-
- case token.KEYWORD:
- tkn = token.KEYWORD
- if strict {
- // TODO If strict and in strict mode, then this is not a break
- break
- }
- return
-
- case
- token.THIS,
- token.BREAK,
- token.THROW, // A newline after a throw is not allowed, but we need to detect it
- token.RETURN,
- token.CONTINUE,
- token.DEBUGGER:
- self.insertSemicolon = true
- return
-
- default:
- return
-
- }
- }
- self.insertSemicolon = true
- tkn = token.IDENTIFIER
- return
- case '0' <= chr && chr <= '9':
- self.insertSemicolon = true
- tkn, literal = self.scanNumericLiteral(false)
- return
- default:
- self.read()
- switch chr {
- case -1:
- if self.insertSemicolon {
- self.insertSemicolon = false
- self.implicitSemicolon = true
- }
- tkn = token.EOF
- case '\r', '\n', '\u2028', '\u2029':
- self.insertSemicolon = false
- self.implicitSemicolon = true
- continue
- case ':':
- tkn = token.COLON
- case '.':
- if digitValue(self.chr) < 10 {
- insertSemicolon = true
- tkn, literal = self.scanNumericLiteral(true)
- } else {
- tkn = token.PERIOD
- }
- case ',':
- tkn = token.COMMA
- case ';':
- tkn = token.SEMICOLON
- case '(':
- tkn = token.LEFT_PARENTHESIS
- case ')':
- tkn = token.RIGHT_PARENTHESIS
- insertSemicolon = true
- case '[':
- tkn = token.LEFT_BRACKET
- case ']':
- tkn = token.RIGHT_BRACKET
- insertSemicolon = true
- case '{':
- tkn = token.LEFT_BRACE
- case '}':
- tkn = token.RIGHT_BRACE
- insertSemicolon = true
- case '+':
- tkn = self.switch3(token.PLUS, token.ADD_ASSIGN, '+', token.INCREMENT)
- if tkn == token.INCREMENT {
- insertSemicolon = true
- }
- case '-':
- tkn = self.switch3(token.MINUS, token.SUBTRACT_ASSIGN, '-', token.DECREMENT)
- if tkn == token.DECREMENT {
- insertSemicolon = true
- }
- case '*':
- tkn = self.switch2(token.MULTIPLY, token.MULTIPLY_ASSIGN)
- case '/':
- if self.chr == '/' {
- self.skipSingleLineComment()
- continue
- } else if self.chr == '*' {
- self.skipMultiLineComment()
- continue
- } else {
- // Could be division, could be RegExp literal
- tkn = self.switch2(token.SLASH, token.QUOTIENT_ASSIGN)
- insertSemicolon = true
- }
- case '%':
- tkn = self.switch2(token.REMAINDER, token.REMAINDER_ASSIGN)
- case '^':
- tkn = self.switch2(token.EXCLUSIVE_OR, token.EXCLUSIVE_OR_ASSIGN)
- case '<':
- tkn = self.switch4(token.LESS, token.LESS_OR_EQUAL, '<', token.SHIFT_LEFT, token.SHIFT_LEFT_ASSIGN)
- case '>':
- tkn = self.switch6(token.GREATER, token.GREATER_OR_EQUAL, '>', token.SHIFT_RIGHT, token.SHIFT_RIGHT_ASSIGN, '>', token.UNSIGNED_SHIFT_RIGHT, token.UNSIGNED_SHIFT_RIGHT_ASSIGN)
- case '=':
- tkn = self.switch2(token.ASSIGN, token.EQUAL)
- if tkn == token.EQUAL && self.chr == '=' {
- self.read()
- tkn = token.STRICT_EQUAL
- }
- case '!':
- tkn = self.switch2(token.NOT, token.NOT_EQUAL)
- if tkn == token.NOT_EQUAL && self.chr == '=' {
- self.read()
- tkn = token.STRICT_NOT_EQUAL
- }
- case '&':
- if self.chr == '^' {
- self.read()
- tkn = self.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
- } else {
- tkn = self.switch3(token.AND, token.AND_ASSIGN, '&', token.LOGICAL_AND)
- }
- case '|':
- tkn = self.switch3(token.OR, token.OR_ASSIGN, '|', token.LOGICAL_OR)
- case '~':
- tkn = token.BITWISE_NOT
- case '?':
- tkn = token.QUESTION_MARK
- case '"', '\'':
- insertSemicolon = true
- tkn = token.STRING
- var err error
- literal, err = self.scanString(self.chrOffset - 1)
- if err != nil {
- tkn = token.ILLEGAL
- }
- default:
- self.errorUnexpected(idx, chr)
- tkn = token.ILLEGAL
- }
- }
- self.insertSemicolon = insertSemicolon
- return
- }
-}
-
-func (self *_parser) switch2(tkn0, tkn1 token.Token) token.Token {
- if self.chr == '=' {
- self.read()
- return tkn1
- }
- return tkn0
-}
-
-func (self *_parser) switch3(tkn0, tkn1 token.Token, chr2 rune, tkn2 token.Token) token.Token {
- if self.chr == '=' {
- self.read()
- return tkn1
- }
- if self.chr == chr2 {
- self.read()
- return tkn2
- }
- return tkn0
-}
-
-func (self *_parser) switch4(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token) token.Token {
- if self.chr == '=' {
- self.read()
- return tkn1
- }
- if self.chr == chr2 {
- self.read()
- if self.chr == '=' {
- self.read()
- return tkn3
- }
- return tkn2
- }
- return tkn0
-}
-
-func (self *_parser) switch6(tkn0, tkn1 token.Token, chr2 rune, tkn2, tkn3 token.Token, chr3 rune, tkn4, tkn5 token.Token) token.Token {
- if self.chr == '=' {
- self.read()
- return tkn1
- }
- if self.chr == chr2 {
- self.read()
- if self.chr == '=' {
- self.read()
- return tkn3
- }
- if self.chr == chr3 {
- self.read()
- if self.chr == '=' {
- self.read()
- return tkn5
- }
- return tkn4
- }
- return tkn2
- }
- return tkn0
-}
-
-func (self *_parser) chrAt(index int) _chr {
- value, width := utf8.DecodeRuneInString(self.str[index:])
- return _chr{
- value: value,
- width: width,
- }
-}
-
-func (self *_parser) _peek() rune {
- if self.offset+1 < self.length {
- return rune(self.str[self.offset+1])
- }
- return -1
-}
-
-func (self *_parser) read() {
- if self.offset < self.length {
- self.chrOffset = self.offset
- chr, width := rune(self.str[self.offset]), 1
- if chr >= utf8.RuneSelf { // !ASCII
- chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
- if chr == utf8.RuneError && width == 1 {
- self.error(self.chrOffset, "Invalid UTF-8 character")
- }
- }
- self.offset += width
- self.chr = chr
- } else {
- self.chrOffset = self.length
- self.chr = -1 // EOF
- }
-}
-
-// This is here since the functions are so similar
-func (self *_RegExp_parser) read() {
- if self.offset < self.length {
- self.chrOffset = self.offset
- chr, width := rune(self.str[self.offset]), 1
- if chr >= utf8.RuneSelf { // !ASCII
- chr, width = utf8.DecodeRuneInString(self.str[self.offset:])
- if chr == utf8.RuneError && width == 1 {
- self.error(self.chrOffset, "Invalid UTF-8 character")
- }
- }
- self.offset += width
- self.chr = chr
- } else {
- self.chrOffset = self.length
- self.chr = -1 // EOF
- }
-}
-
-func (self *_parser) skipSingleLineComment() {
- for self.chr != -1 {
- self.read()
- if isLineTerminator(self.chr) {
- return
- }
- }
-}
-
-func (self *_parser) skipMultiLineComment() {
- self.read()
- for self.chr >= 0 {
- chr := self.chr
- self.read()
- if chr == '*' && self.chr == '/' {
- self.read()
- return
- }
- }
-
- self.errorUnexpected(0, self.chr)
-}
-
-func (self *_parser) skipWhiteSpace() {
- for {
- switch self.chr {
- case ' ', '\t', '\f', '\v', '\u00a0', '\ufeff':
- self.read()
- continue
- case '\r':
- if self._peek() == '\n' {
- self.read()
- }
- fallthrough
- case '\u2028', '\u2029', '\n':
- if self.insertSemicolon {
- return
- }
- self.read()
- continue
- }
- if self.chr >= utf8.RuneSelf {
- if unicode.IsSpace(self.chr) {
- self.read()
- continue
- }
- }
- break
- }
-}
-
-func (self *_parser) skipLineWhiteSpace() {
- for isLineWhiteSpace(self.chr) {
- self.read()
- }
-}
-
-func (self *_parser) scanMantissa(base int) {
- for digitValue(self.chr) < base {
- self.read()
- }
-}
-
-func (self *_parser) scanEscape(quote rune) {
-
- var length, base uint32
- switch self.chr {
- //case '0', '1', '2', '3', '4', '5', '6', '7':
- // Octal:
- // length, base, limit = 3, 8, 255
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '0':
- self.read()
- return
- case '\r', '\n', '\u2028', '\u2029':
- self.scanNewline()
- return
- case 'x':
- self.read()
- length, base = 2, 16
- case 'u':
- self.read()
- length, base = 4, 16
- default:
- self.read() // Always make progress
- return
- }
-
- var value uint32
- for ; length > 0 && self.chr != quote && self.chr >= 0; length-- {
- digit := uint32(digitValue(self.chr))
- if digit >= base {
- break
- }
- value = value*base + digit
- self.read()
- }
-}
-
-func (self *_parser) scanString(offset int) (string, error) {
- // " ' /
- quote := rune(self.str[offset])
-
- for self.chr != quote {
- chr := self.chr
- if chr == '\n' || chr == '\r' || chr == '\u2028' || chr == '\u2029' || chr < 0 {
- goto newline
- }
- self.read()
- if chr == '\\' {
- if quote == '/' {
- if self.chr == '\n' || self.chr == '\r' || self.chr == '\u2028' || self.chr == '\u2029' || self.chr < 0 {
- goto newline
- }
- self.read()
- } else {
- self.scanEscape(quote)
- }
- } else if chr == '[' && quote == '/' {
- // Allow a slash (/) in a bracket character class ([...])
- // TODO Fix this, this is hacky...
- quote = -1
- } else if chr == ']' && quote == -1 {
- quote = '/'
- }
- }
-
- // " ' /
- self.read()
-
- return string(self.str[offset:self.chrOffset]), nil
-
-newline:
- self.scanNewline()
- err := "String not terminated"
- if quote == '/' {
- err = "Invalid regular expression: missing /"
- self.error(self.idxOf(offset), err)
- }
- return "", errors.New(err)
-}
-
-func (self *_parser) scanNewline() {
- if self.chr == '\r' {
- self.read()
- if self.chr != '\n' {
- return
- }
- }
- self.read()
-}
-
-func hex2decimal(chr byte) (value rune, ok bool) {
- {
- chr := rune(chr)
- switch {
- case '0' <= chr && chr <= '9':
- return chr - '0', true
- case 'a' <= chr && chr <= 'f':
- return chr - 'a' + 10, true
- case 'A' <= chr && chr <= 'F':
- return chr - 'A' + 10, true
- }
- return
- }
-}
-
-func parseNumberLiteral(literal string) (value interface{}, err error) {
- // TODO Is Uint okay? What about -MAX_UINT
- value, err = strconv.ParseInt(literal, 0, 64)
- if err == nil {
- return
- }
-
- parseIntErr := err // Save this first error, just in case
-
- value, err = strconv.ParseFloat(literal, 64)
- if err == nil {
- return
- } else if err.(*strconv.NumError).Err == strconv.ErrRange {
- // Infinity, etc.
- return value, nil
- }
-
- err = parseIntErr
-
- if err.(*strconv.NumError).Err == strconv.ErrRange {
- if len(literal) > 2 && literal[0] == '0' && (literal[1] == 'X' || literal[1] == 'x') {
- // Could just be a very large number (e.g. 0x8000000000000000)
- var value float64
- literal = literal[2:]
- for _, chr := range literal {
- digit := digitValue(chr)
- if digit >= 16 {
- goto error
- }
- value = value*16 + float64(digit)
- }
- return value, nil
- }
- }
-
-error:
- return nil, errors.New("Illegal numeric literal")
-}
-
-func parseStringLiteral(literal string) (string, error) {
- // Best case scenario...
- if literal == "" {
- return "", nil
- }
-
- // Slightly less-best case scenario...
- if !strings.ContainsRune(literal, '\\') {
- return literal, nil
- }
-
- str := literal
- buffer := bytes.NewBuffer(make([]byte, 0, 3*len(literal)/2))
-
- for len(str) > 0 {
- switch chr := str[0]; {
- // We do not explicitly handle the case of the quote
- // value, which can be: " ' /
- // This assumes we're already passed a partially well-formed literal
- case chr >= utf8.RuneSelf:
- chr, size := utf8.DecodeRuneInString(str)
- buffer.WriteRune(chr)
- str = str[size:]
- continue
- case chr != '\\':
- buffer.WriteByte(chr)
- str = str[1:]
- continue
- }
-
- if len(str) <= 1 {
- panic("len(str) <= 1")
- }
- chr := str[1]
- var value rune
- if chr >= utf8.RuneSelf {
- str = str[1:]
- var size int
- value, size = utf8.DecodeRuneInString(str)
- str = str[size:] // \ + <character>
- } else {
- str = str[2:] // \<character>
- switch chr {
- case 'b':
- value = '\b'
- case 'f':
- value = '\f'
- case 'n':
- value = '\n'
- case 'r':
- value = '\r'
- case 't':
- value = '\t'
- case 'v':
- value = '\v'
- case 'x', 'u':
- size := 0
- switch chr {
- case 'x':
- size = 2
- case 'u':
- size = 4
- }
- if len(str) < size {
- return "", fmt.Errorf("invalid escape: \\%s: len(%q) != %d", string(chr), str, size)
- }
- for j := 0; j < size; j++ {
- decimal, ok := hex2decimal(str[j])
- if !ok {
- return "", fmt.Errorf("invalid escape: \\%s: %q", string(chr), str[:size])
- }
- value = value<<4 | decimal
- }
- str = str[size:]
- if chr == 'x' {
- break
- }
- if value > utf8.MaxRune {
- panic("value > utf8.MaxRune")
- }
- case '0':
- if len(str) == 0 || '0' > str[0] || str[0] > '7' {
- value = 0
- break
- }
- fallthrough
- case '1', '2', '3', '4', '5', '6', '7':
- // TODO strict
- value = rune(chr) - '0'
- j := 0
- for ; j < 2; j++ {
- if len(str) < j+1 {
- break
- }
- chr := str[j]
- if '0' > chr || chr > '7' {
- break
- }
- decimal := rune(str[j]) - '0'
- value = (value << 3) | decimal
- }
- str = str[j:]
- case '\\':
- value = '\\'
- case '\'', '"':
- value = rune(chr)
- case '\r':
- if len(str) > 0 {
- if str[0] == '\n' {
- str = str[1:]
- }
- }
- fallthrough
- case '\n':
- continue
- default:
- value = rune(chr)
- }
- }
- buffer.WriteRune(value)
- }
-
- return buffer.String(), nil
-}
-
-func (self *_parser) scanNumericLiteral(decimalPoint bool) (token.Token, string) {
-
- offset := self.chrOffset
- tkn := token.NUMBER
-
- if decimalPoint {
- offset--
- self.scanMantissa(10)
- goto exponent
- }
-
- if self.chr == '0' {
- offset := self.chrOffset
- self.read()
- if self.chr == 'x' || self.chr == 'X' {
- // Hexadecimal
- self.read()
- if isDigit(self.chr, 16) {
- self.read()
- } else {
- return token.ILLEGAL, self.str[offset:self.chrOffset]
- }
- self.scanMantissa(16)
-
- if self.chrOffset-offset <= 2 {
- // Only "0x" or "0X"
- self.error(0, "Illegal hexadecimal number")
- }
-
- goto hexadecimal
- } else if self.chr == '.' {
- // Float
- goto float
- } else {
- // Octal, Float
- if self.chr == 'e' || self.chr == 'E' {
- goto exponent
- }
- self.scanMantissa(8)
- if self.chr == '8' || self.chr == '9' {
- return token.ILLEGAL, self.str[offset:self.chrOffset]
- }
- goto octal
- }
- }
-
- self.scanMantissa(10)
-
-float:
- if self.chr == '.' {
- self.read()
- self.scanMantissa(10)
- }
-
-exponent:
- if self.chr == 'e' || self.chr == 'E' {
- self.read()
- if self.chr == '-' || self.chr == '+' {
- self.read()
- }
- if isDecimalDigit(self.chr) {
- self.read()
- self.scanMantissa(10)
- } else {
- return token.ILLEGAL, self.str[offset:self.chrOffset]
- }
- }
-
-hexadecimal:
-octal:
- if isIdentifierStart(self.chr) || isDecimalDigit(self.chr) {
- return token.ILLEGAL, self.str[offset:self.chrOffset]
- }
-
- return tkn, self.str[offset:self.chrOffset]
-}