diff options
author | chriseth <chris@ethereum.org> | 2018-12-03 22:48:03 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-12-03 22:48:03 +0800 |
commit | c8a2cb62832afb2dc09ccee6fd42c1516dfdb981 (patch) | |
tree | 7977e9dcbbc215088c05b847f849871ef5d4ae66 /liblangutil/Scanner.cpp | |
parent | 1d4f565a64988a3400847d2655ca24f73f234bc6 (diff) | |
parent | 590be1d84cea9850ce69b68be3dc5294b39041e5 (diff) | |
download | dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.gz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.bz2 dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.lz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.xz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.zst dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.zip |
Merge pull request #5571 from ethereum/develop
Version 0.5.1
Diffstat (limited to 'liblangutil/Scanner.cpp')
-rw-r--r-- | liblangutil/Scanner.cpp | 904 |
1 files changed, 904 insertions, 0 deletions
diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp new file mode 100644 index 00000000..ac298bd5 --- /dev/null +++ b/liblangutil/Scanner.cpp @@ -0,0 +1,904 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see <http://www.gnu.org/licenses/>. + + This file is derived from the file "scanner.cc", which was part of the + V8 project. The original copyright header follows: + + Copyright 2006-2012, the V8 project authors. All rights reserved. + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2014 + * Solidity scanner. + */ + +#include <liblangutil/Exceptions.h> +#include <liblangutil/Scanner.h> +#include <algorithm> +#include <ostream> +#include <tuple> + +using namespace std; + +namespace langutil +{ + +namespace +{ +bool isDecimalDigit(char c) +{ + return '0' <= c && c <= '9'; +} +bool isHexDigit(char c) +{ + return isDecimalDigit(c) + || ('a' <= c && c <= 'f') + || ('A' <= c && c <= 'F'); +} +bool isLineTerminator(char c) +{ + return c == '\n'; +} +bool isWhiteSpace(char c) +{ + return c == ' ' || c == '\n' || c == '\t' || c == '\r'; +} +bool isIdentifierStart(char c) +{ + return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); +} +bool isIdentifierPart(char c) +{ + return isIdentifierStart(c) || isDecimalDigit(c); +} +int hexValue(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + else return -1; +} +} // end anonymous namespace + +std::string to_string(ScannerError _errorCode) +{ + switch (_errorCode) + { + case ScannerError::NoError: return "No error."; + case ScannerError::IllegalToken: return "Invalid token."; + case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes."; + case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid."; + case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator."; + case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence."; + case ScannerError::IllegalStringEndQuote: return "Expected string end-quote."; + case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'."; + case ScannerError::IllegalExponent: return "Invalid exponent."; + case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number."; + case ScannerError::OctalNotAllowed: return "Octal numbers not allowed."; + default: + solAssert(false, "Unhandled case in to_string(ScannerError)"); + return ""; + } +} + +std::ostream& operator<<(std::ostream& os, ScannerError _errorCode) +{ + os << to_string(_errorCode); + return os; +} + +/// Scoped helper for literal recording. Automatically drops the literal +/// if aborting the scanning before it's complete. +enum LiteralType { + LITERAL_TYPE_STRING, + LITERAL_TYPE_NUMBER, // not really different from string type in behaviour + LITERAL_TYPE_COMMENT +}; + +class LiteralScope +{ +public: + explicit LiteralScope(Scanner* _self, enum LiteralType _type): m_type(_type) + , m_scanner(_self) + , m_complete(false) + { + if (_type == LITERAL_TYPE_COMMENT) + m_scanner->m_nextSkippedComment.literal.clear(); + else + m_scanner->m_nextToken.literal.clear(); + } + ~LiteralScope() + { + if (!m_complete) + { + if (m_type == LITERAL_TYPE_COMMENT) + m_scanner->m_nextSkippedComment.literal.clear(); + else + m_scanner->m_nextToken.literal.clear(); + } + } + void complete() { m_complete = true; } + +private: + enum LiteralType m_type; + Scanner* m_scanner; + bool m_complete; +}; // end of LiteralScope class + + +void Scanner::reset(CharStream _source) +{ + m_source = make_shared<CharStream>(std::move(_source)); + reset(); +} + +void Scanner::reset(std::shared_ptr<CharStream> _source) +{ + solAssert(_source.get() != nullptr, "You MUST provide a CharStream when resetting."); + m_source = _source; + reset(); +} + +void Scanner::reset() +{ + m_source->reset(); + m_char = m_source->get(); + skipWhitespace(); + scanToken(); + next(); +} + +bool Scanner::scanHexByte(char& o_scannedByte) +{ + char x = 0; + for (int i = 0; i < 2; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_scannedByte = x; + return true; +} + +bool Scanner::scanUnicode(unsigned & o_codepoint) +{ + unsigned x = 0; + for (int i = 0; i < 4; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_codepoint = x; + return true; +} + +// This supports codepoints between 0000 and FFFF. +void Scanner::addUnicodeAsUTF8(unsigned codepoint) +{ + if (codepoint <= 0x7f) + addLiteralChar(codepoint); + else if (codepoint <= 0x7ff) + { + addLiteralChar(0xc0 | (codepoint >> 6)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } + else + { + addLiteralChar(0xe0 | (codepoint >> 12)); + addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } +} + +// Ensure that tokens can be stored in a byte. +BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100); + +Token Scanner::next() +{ + m_currentToken = m_nextToken; + m_skippedComment = m_nextSkippedComment; + scanToken(); + + return m_currentToken.token; +} + +Token Scanner::selectToken(char _next, Token _then, Token _else) +{ + advance(); + if (m_char == _next) + return selectToken(_then); + else + return _else; +} + +bool Scanner::skipWhitespace() +{ + int const startPosition = sourcePos(); + while (isWhiteSpace(m_char)) + advance(); + // Return whether or not we skipped any characters. + return sourcePos() != startPosition; +} + +void Scanner::skipWhitespaceExceptUnicodeLinebreak() +{ + while (isWhiteSpace(m_char) && !isUnicodeLinebreak()) + advance(); +} + +Token Scanner::skipSingleLineComment() +{ + // Line terminator is not part of the comment. If it is a + // non-ascii line terminator, it will result in a parser error. + while (!isUnicodeLinebreak()) + if (!advance()) break; + + return Token::Whitespace; +} + +Token Scanner::scanSingleLineDocComment() +{ + LiteralScope literal(this, LITERAL_TYPE_COMMENT); + advance(); //consume the last '/' at /// + + skipWhitespaceExceptUnicodeLinebreak(); + + while (!isSourcePastEndOfInput()) + { + if (isLineTerminator(m_char)) + { + // check if next line is also a documentation comment + skipWhitespace(); + if (!m_source->isPastEndOfInput(3) && + m_source->get(0) == '/' && + m_source->get(1) == '/' && + m_source->get(2) == '/') + { + addCommentLiteralChar('\n'); + m_char = m_source->advanceAndGet(3); + } + else + break; // next line is not a documentation comment, we are done + + } + else if (isUnicodeLinebreak()) + // Any line terminator that is not '\n' is considered to end the + // comment. + break; + addCommentLiteralChar(m_char); + advance(); + } + literal.complete(); + return Token::CommentLiteral; +} + +Token Scanner::skipMultiLineComment() +{ + advance(); + while (!isSourcePastEndOfInput()) + { + char ch = m_char; + advance(); + + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && m_char == '/') + { + m_char = ' '; + return Token::Whitespace; + } + } + // Unterminated multi-line comment. + return setError(ScannerError::IllegalCommentTerminator); +} + +Token Scanner::scanMultiLineDocComment() +{ + LiteralScope literal(this, LITERAL_TYPE_COMMENT); + bool endFound = false; + bool charsAdded = false; + + while (isWhiteSpace(m_char) && !isLineTerminator(m_char)) + advance(); + + while (!isSourcePastEndOfInput()) + { + //handle newlines in multline comments + if (isLineTerminator(m_char)) + { + skipWhitespace(); + if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*') + { // it is unknown if this leads to the end of the comment + addCommentLiteralChar('*'); + advance(); + } + else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) != '/') + { // skip first '*' in subsequent lines + if (charsAdded) + addCommentLiteralChar('\n'); + m_char = m_source->advanceAndGet(2); + } + else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/') + { // if after newline the comment ends, don't insert the newline + m_char = m_source->advanceAndGet(2); + endFound = true; + break; + } + else if (charsAdded) + addCommentLiteralChar('\n'); + } + + if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/') + { + m_char = m_source->advanceAndGet(2); + endFound = true; + break; + } + addCommentLiteralChar(m_char); + charsAdded = true; + advance(); + } + literal.complete(); + if (!endFound) + return setError(ScannerError::IllegalCommentTerminator); + else + return Token::CommentLiteral; +} + +Token Scanner::scanSlash() +{ + int firstSlashPosition = sourcePos(); + advance(); + if (m_char == '/') + { + if (!advance()) /* double slash comment directly before EOS */ + return Token::Whitespace; + else if (m_char == '/') + { + // doxygen style /// comment + Token comment; + m_nextSkippedComment.location.start = firstSlashPosition; + comment = scanSingleLineDocComment(); + m_nextSkippedComment.location.end = sourcePos(); + m_nextSkippedComment.token = comment; + return Token::Whitespace; + } + else + return skipSingleLineComment(); + } + else if (m_char == '*') + { + // doxygen style /** natspec comment + if (!advance()) /* slash star comment before EOS */ + return setError(ScannerError::IllegalCommentTerminator); + else if (m_char == '*') + { + advance(); //consume the last '*' at /** + + // "/**/" + if (m_char == '/') + { + advance(); //skip the closing slash + return Token::Whitespace; + } + // we actually have a multiline documentation comment + Token comment; + m_nextSkippedComment.location.start = firstSlashPosition; + comment = scanMultiLineDocComment(); + m_nextSkippedComment.location.end = sourcePos(); + m_nextSkippedComment.token = comment; + if (comment == Token::Illegal) + return Token::Illegal; // error already set + else + return Token::Whitespace; + } + else + return skipMultiLineComment(); + } + else if (m_char == '=') + return selectToken(Token::AssignDiv); + else + return Token::Div; +} + +void Scanner::scanToken() +{ + m_nextToken.error = ScannerError::NoError; + m_nextToken.literal.clear(); + m_nextToken.extendedTokenInfo = make_tuple(0, 0); + m_nextSkippedComment.literal.clear(); + m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0); + + Token token; + // M and N are for the purposes of grabbing different type sizes + unsigned m; + unsigned n; + do + { + // Remember the position of the next token + m_nextToken.location.start = sourcePos(); + switch (m_char) + { + case '"': + case '\'': + token = scanString(); + break; + case '<': + // < <= << <<= + advance(); + if (m_char == '=') + token = selectToken(Token::LessThanOrEqual); + else if (m_char == '<') + token = selectToken('=', Token::AssignShl, Token::SHL); + else + token = Token::LessThan; + break; + case '>': + // > >= >> >>= >>> >>>= + advance(); + if (m_char == '=') + token = selectToken(Token::GreaterThanOrEqual); + else if (m_char == '>') + { + // >> >>= >>> >>>= + advance(); + if (m_char == '=') + token = selectToken(Token::AssignSar); + else if (m_char == '>') + token = selectToken('=', Token::AssignShr, Token::SHR); + else + token = Token::SAR; + } + else + token = Token::GreaterThan; + break; + case '=': + // = == => + advance(); + if (m_char == '=') + token = selectToken(Token::Equal); + else if (m_char == '>') + token = selectToken(Token::Arrow); + else + token = Token::Assign; + break; + case '!': + // ! != + advance(); + if (m_char == '=') + token = selectToken(Token::NotEqual); + else + token = Token::Not; + break; + case '+': + // + ++ += + advance(); + if (m_char == '+') + token = selectToken(Token::Inc); + else if (m_char == '=') + token = selectToken(Token::AssignAdd); + else + token = Token::Add; + break; + case '-': + // - -- -= + advance(); + if (m_char == '-') + token = selectToken(Token::Dec); + else if (m_char == '=') + token = selectToken(Token::AssignSub); + else + token = Token::Sub; + break; + case '*': + // * ** *= + advance(); + if (m_char == '*') + token = selectToken(Token::Exp); + else if (m_char == '=') + token = selectToken(Token::AssignMul); + else + token = Token::Mul; + break; + case '%': + // % %= + token = selectToken('=', Token::AssignMod, Token::Mod); + break; + case '/': + // / // /* /= + token = scanSlash(); + break; + case '&': + // & && &= + advance(); + if (m_char == '&') + token = selectToken(Token::And); + else if (m_char == '=') + token = selectToken(Token::AssignBitAnd); + else + token = Token::BitAnd; + break; + case '|': + // | || |= + advance(); + if (m_char == '|') + token = selectToken(Token::Or); + else if (m_char == '=') + token = selectToken(Token::AssignBitOr); + else + token = Token::BitOr; + break; + case '^': + // ^ ^= + token = selectToken('=', Token::AssignBitXor, Token::BitXor); + break; + case '.': + // . Number + advance(); + if (isDecimalDigit(m_char)) + token = scanNumber('.'); + else + token = Token::Period; + break; + case ':': + token = selectToken(Token::Colon); + break; + case ';': + token = selectToken(Token::Semicolon); + break; + case ',': + token = selectToken(Token::Comma); + break; + case '(': + token = selectToken(Token::LParen); + break; + case ')': + token = selectToken(Token::RParen); + break; + case '[': + token = selectToken(Token::LBrack); + break; + case ']': + token = selectToken(Token::RBrack); + break; + case '{': + token = selectToken(Token::LBrace); + break; + case '}': + token = selectToken(Token::RBrace); + break; + case '?': + token = selectToken(Token::Conditional); + break; + case '~': + token = selectToken(Token::BitNot); + break; + default: + if (isIdentifierStart(m_char)) + { + tie(token, m, n) = scanIdentifierOrKeyword(); + + // Special case for hexadecimal literals + if (token == Token::Hex) + { + // reset + m = 0; + n = 0; + + // Special quoted hex string must follow + if (m_char == '"' || m_char == '\'') + token = scanHexString(); + else + token = setError(ScannerError::IllegalToken); + } + } + else if (isDecimalDigit(m_char)) + token = scanNumber(); + else if (skipWhitespace()) + token = Token::Whitespace; + else if (isSourcePastEndOfInput()) + token = Token::EOS; + else + token = selectErrorToken(ScannerError::IllegalToken); + break; + } + // Continue scanning for tokens as long as we're just skipping + // whitespace. + } + while (token == Token::Whitespace); + m_nextToken.location.end = sourcePos(); + m_nextToken.token = token; + m_nextToken.extendedTokenInfo = make_tuple(m, n); +} + +bool Scanner::scanEscape() +{ + char c = m_char; + advance(); + // Skip escaped newlines. + if (isLineTerminator(c)) + return true; + switch (c) + { + case '\'': // fall through + case '"': // fall through + case '\\': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'u': + { + unsigned codepoint; + if (!scanUnicode(codepoint)) + return false; + addUnicodeAsUTF8(codepoint); + return true; + } + case 'x': + if (!scanHexByte(c)) + return false; + break; + default: + return false; + } + + addLiteralChar(c); + return true; +} + +bool Scanner::isUnicodeLinebreak() +{ + if (0x0a <= m_char && m_char <= 0x0d) + // line feed, vertical tab, form feed, carriage return + return true; + else if (!m_source->isPastEndOfInput(1) && uint8_t(m_source->get(0)) == 0xc2 && uint8_t(m_source->get(1)) == 0x85) + // NEL - U+0085, C2 85 in utf8 + return true; + else if (!m_source->isPastEndOfInput(2) && uint8_t(m_source->get(0)) == 0xe2 && uint8_t(m_source->get(1)) == 0x80 && ( + uint8_t(m_source->get(2)) == 0xa8 || uint8_t(m_source->get(2)) == 0xa9 + )) + // LS - U+2028, E2 80 A8 in utf8 + // PS - U+2029, E2 80 A9 in utf8 + return true; + else + return false; +} + +Token Scanner::scanString() +{ + char const quote = m_char; + advance(); // consume quote + LiteralScope literal(this, LITERAL_TYPE_STRING); + while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak()) + { + char c = m_char; + advance(); + if (c == '\\') + { + if (isSourcePastEndOfInput() || !scanEscape()) + return setError(ScannerError::IllegalEscapeSequence); + } + else + addLiteralChar(c); + } + if (m_char != quote) + return setError(ScannerError::IllegalStringEndQuote); + literal.complete(); + advance(); // consume quote + return Token::StringLiteral; +} + +Token Scanner::scanHexString() +{ + char const quote = m_char; + advance(); // consume quote + LiteralScope literal(this, LITERAL_TYPE_STRING); + while (m_char != quote && !isSourcePastEndOfInput()) + { + char c = m_char; + if (!scanHexByte(c)) + // can only return false if hex-byte is incomplete (only one hex digit instead of two) + return setError(ScannerError::IllegalHexString); + addLiteralChar(c); + } + + if (m_char != quote) + return setError(ScannerError::IllegalStringEndQuote); + + literal.complete(); + advance(); // consume quote + return Token::StringLiteral; +} + +// Parse for regex [:digit:]+(_[:digit:]+)* +void Scanner::scanDecimalDigits() +{ + // MUST begin with a decimal digit. + if (!isDecimalDigit(m_char)) + return; + + // May continue with decimal digit or underscore for grouping. + do addLiteralCharAndAdvance(); + while (!m_source->isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_')); + + // Defer further validation of underscore to SyntaxChecker. +} + +Token Scanner::scanNumber(char _charSeen) +{ + enum { DECIMAL, HEX, BINARY } kind = DECIMAL; + LiteralScope literal(this, LITERAL_TYPE_NUMBER); + if (_charSeen == '.') + { + // we have already seen a decimal point of the float + addLiteralChar('.'); + if (m_char == '_') + return setError(ScannerError::IllegalToken); + scanDecimalDigits(); // we know we have at least one digit + } + else + { + solAssert(_charSeen == 0, ""); + // if the first character is '0' we must check for octals and hex + if (m_char == '0') + { + addLiteralCharAndAdvance(); + // either 0, 0exxx, 0Exxx, 0.xxx or a hex number + if (m_char == 'x') + { + // hex number + kind = HEX; + addLiteralCharAndAdvance(); + if (!isHexDigit(m_char)) + return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x' + + while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation + addLiteralCharAndAdvance(); + } + else if (isDecimalDigit(m_char)) + // We do not allow octal numbers + return setError(ScannerError::OctalNotAllowed); + } + // Parse decimal digits and allow trailing fractional part. + if (kind == DECIMAL) + { + scanDecimalDigits(); // optional + if (m_char == '.') + { + if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_') + { + // Assume the input may be a floating point number with leading '_' in fraction part. + // Recover by consuming it all but returning `Illegal` right away. + addLiteralCharAndAdvance(); // '.' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + } + if (m_source->isPastEndOfInput() || !isDecimalDigit(m_source->get(1))) + { + // A '.' has to be followed by a number. + literal.complete(); + return Token::Number; + } + addLiteralCharAndAdvance(); + scanDecimalDigits(); + } + } + } + // scan exponent, if any + if (m_char == 'e' || m_char == 'E') + { + solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); + if (kind != DECIMAL) + return setError(ScannerError::IllegalExponent); + else if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_') + { + // Recover from wrongly placed underscore as delimiter in literal with scientific + // notation by consuming until the end. + addLiteralCharAndAdvance(); // 'e' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + literal.complete(); + return Token::Number; + } + // scan exponent + addLiteralCharAndAdvance(); // 'e' | 'E' + if (m_char == '+' || m_char == '-') + addLiteralCharAndAdvance(); + if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E' + return setError(ScannerError::IllegalExponent); + scanDecimalDigits(); + } + // The source character immediately following a numeric literal must + // not be an identifier start or a decimal digit; see ECMA-262 + // section 7.8.3, page 17 (note that we read only one decimal digit + // if the value is 0). + if (isDecimalDigit(m_char) || isIdentifierStart(m_char)) + return setError(ScannerError::IllegalNumberEnd); + literal.complete(); + return Token::Number; +} + +tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword() +{ + solAssert(isIdentifierStart(m_char), ""); + LiteralScope literal(this, LITERAL_TYPE_STRING); + addLiteralCharAndAdvance(); + // Scan the rest of the identifier characters. + while (isIdentifierPart(m_char)) //get full literal + addLiteralCharAndAdvance(); + literal.complete(); + return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal); +} + + +} |