diff options
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 89 |
1 files changed, 62 insertions, 27 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index dbe1f389..e9dad2ad 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -214,9 +214,9 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint) } // Ensure that tokens can be stored in a byte. -BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); +BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100); -Token::Value Scanner::next() +Token Scanner::next() { m_currentToken = m_nextToken; m_skippedComment = m_nextSkippedComment; @@ -225,7 +225,7 @@ Token::Value Scanner::next() return m_currentToken.token; } -Token::Value Scanner::selectToken(char _next, Token::Value _then, Token::Value _else) +Token Scanner::selectToken(char _next, Token _then, Token _else) { advance(); if (m_char == _next) @@ -249,7 +249,7 @@ void Scanner::skipWhitespaceExceptUnicodeLinebreak() advance(); } -Token::Value Scanner::skipSingleLineComment() +Token Scanner::skipSingleLineComment() { // Line terminator is not part of the comment. If it is a // non-ascii line terminator, it will result in a parser error. @@ -259,7 +259,7 @@ Token::Value Scanner::skipSingleLineComment() return Token::Whitespace; } -Token::Value Scanner::scanSingleLineDocComment() +Token Scanner::scanSingleLineDocComment() { LiteralScope literal(this, LITERAL_TYPE_COMMENT); advance(); //consume the last '/' at /// @@ -295,7 +295,7 @@ Token::Value Scanner::scanSingleLineDocComment() return Token::CommentLiteral; } -Token::Value Scanner::skipMultiLineComment() +Token Scanner::skipMultiLineComment() { advance(); while (!isSourcePastEndOfInput()) @@ -316,7 +316,7 @@ Token::Value Scanner::skipMultiLineComment() return Token::Illegal; } -Token::Value Scanner::scanMultiLineDocComment() +Token Scanner::scanMultiLineDocComment() { LiteralScope literal(this, LITERAL_TYPE_COMMENT); bool endFound = false; @@ -369,7 +369,7 @@ Token::Value Scanner::scanMultiLineDocComment() return Token::CommentLiteral; } -Token::Value Scanner::scanSlash() +Token Scanner::scanSlash() { int firstSlashPosition = sourcePos(); advance(); @@ -380,7 +380,7 @@ Token::Value Scanner::scanSlash() else if (m_char == '/') { // doxygen style /// comment - Token::Value comment; + Token comment; m_nextSkippedComment.location.start = firstSlashPosition; comment = scanSingleLineDocComment(); m_nextSkippedComment.location.end = sourcePos(); @@ -406,7 +406,7 @@ Token::Value Scanner::scanSlash() return Token::Whitespace; } // we actually have a multiline documentation comment - Token::Value comment; + Token comment; m_nextSkippedComment.location.start = firstSlashPosition; comment = scanMultiLineDocComment(); m_nextSkippedComment.location.end = sourcePos(); @@ -432,7 +432,7 @@ void Scanner::scanToken() m_nextSkippedComment.literal.clear(); m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0); - Token::Value token; + Token token; // M and N are for the purposes of grabbing different type sizes unsigned m; unsigned n; @@ -601,7 +601,7 @@ void Scanner::scanToken() { tie(token, m, n) = scanIdentifierOrKeyword(); - // Special case for hexademical literals + // Special case for hexadecimal literals if (token == Token::Hex) { // reset @@ -612,7 +612,7 @@ void Scanner::scanToken() if (m_char == '"' || m_char == '\'') token = scanHexString(); else - token = Token::Illegal; + token = Token::IllegalHex; } } else if (isDecimalDigit(m_char)) @@ -703,7 +703,7 @@ bool Scanner::isUnicodeLinebreak() return false; } -Token::Value Scanner::scanString() +Token Scanner::scanString() { char const quote = m_char; advance(); // consume quote @@ -727,7 +727,7 @@ Token::Value Scanner::scanString() return Token::StringLiteral; } -Token::Value Scanner::scanHexString() +Token Scanner::scanHexString() { char const quote = m_char; advance(); // consume quote @@ -736,23 +736,31 @@ Token::Value Scanner::scanHexString() { char c = m_char; if (!scanHexByte(c)) - return Token::Illegal; + return Token::IllegalHex; addLiteralChar(c); } if (m_char != quote) - return Token::Illegal; + return Token::IllegalHex; literal.complete(); advance(); // consume quote return Token::StringLiteral; } +// Parse for regex [:digit:]+(_[:digit:]+)* void Scanner::scanDecimalDigits() { - while (isDecimalDigit(m_char)) - addLiteralCharAndAdvance(); + // MUST begin with a decimal digit. + if (!isDecimalDigit(m_char)) + return; + + // May continue with decimal digit or underscore for grouping. + do addLiteralCharAndAdvance(); + while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_')); + + // Defer further validation of underscore to SyntaxChecker. } -Token::Value Scanner::scanNumber(char _charSeen) +Token Scanner::scanNumber(char _charSeen) { enum { DECIMAL, HEX, BINARY } kind = DECIMAL; LiteralScope literal(this, LITERAL_TYPE_NUMBER); @@ -760,6 +768,8 @@ Token::Value Scanner::scanNumber(char _charSeen) { // we have already seen a decimal point of the float addLiteralChar('.'); + if (m_char == '_') + return Token::Illegal; scanDecimalDigits(); // we know we have at least one digit } else @@ -770,14 +780,15 @@ Token::Value Scanner::scanNumber(char _charSeen) { addLiteralCharAndAdvance(); // either 0, 0exxx, 0Exxx, 0.xxx or a hex number - if (m_char == 'x' || m_char == 'X') + if (m_char == 'x') { // hex number kind = HEX; addLiteralCharAndAdvance(); if (!isHexDigit(m_char)) - return Token::Illegal; // we must have at least one hex digit after 'x'/'X' - while (isHexDigit(m_char)) + return Token::Illegal; // we must have at least one hex digit after 'x' + + while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation addLiteralCharAndAdvance(); } else if (isDecimalDigit(m_char)) @@ -790,8 +801,22 @@ Token::Value Scanner::scanNumber(char _charSeen) scanDecimalDigits(); // optional if (m_char == '.') { + if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') + { + // Assume the input may be a floating point number with leading '_' in fraction part. + // Recover by consuming it all but returning `Illegal` right away. + addLiteralCharAndAdvance(); // '.' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + } + if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1))) + { + // A '.' has to be followed by a number. + literal.complete(); + return Token::Number; + } addLiteralCharAndAdvance(); - scanDecimalDigits(); // optional + scanDecimalDigits(); } } } @@ -801,8 +826,18 @@ Token::Value Scanner::scanNumber(char _charSeen) solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number"); if (kind != DECIMAL) return Token::Illegal; + else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_') + { + // Recover from wrongly placed underscore as delimiter in literal with scientific + // notation by consuming until the end. + addLiteralCharAndAdvance(); // 'e' + addLiteralCharAndAdvance(); // '_' + scanDecimalDigits(); + literal.complete(); + return Token::Number; + } // scan exponent - addLiteralCharAndAdvance(); + addLiteralCharAndAdvance(); // 'e' | 'E' if (m_char == '+' || m_char == '-') addLiteralCharAndAdvance(); if (!isDecimalDigit(m_char)) @@ -819,7 +854,7 @@ Token::Value Scanner::scanNumber(char _charSeen) return Token::Number; } -tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword() +tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword() { solAssert(isIdentifierStart(m_char), ""); LiteralScope literal(this, LITERAL_TYPE_STRING); @@ -828,7 +863,7 @@ tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword() while (isIdentifierPart(m_char)) //get full literal addLiteralCharAndAdvance(); literal.complete(); - return Token::fromIdentifierOrKeyword(m_nextToken.literal); + return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal); } char CharStream::advanceAndGet(size_t _chars) |