aboutsummaryrefslogtreecommitdiffstats
path: root/libsolidity/parsing/Scanner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r--libsolidity/parsing/Scanner.cpp89
1 files changed, 62 insertions, 27 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
index dbe1f389..e9dad2ad 100644
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@@ -214,9 +214,9 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint)
}
// Ensure that tokens can be stored in a byte.
-BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
+BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);
-Token::Value Scanner::next()
+Token Scanner::next()
{
m_currentToken = m_nextToken;
m_skippedComment = m_nextSkippedComment;
@@ -225,7 +225,7 @@ Token::Value Scanner::next()
return m_currentToken.token;
}
-Token::Value Scanner::selectToken(char _next, Token::Value _then, Token::Value _else)
+Token Scanner::selectToken(char _next, Token _then, Token _else)
{
advance();
if (m_char == _next)
@@ -249,7 +249,7 @@ void Scanner::skipWhitespaceExceptUnicodeLinebreak()
advance();
}
-Token::Value Scanner::skipSingleLineComment()
+Token Scanner::skipSingleLineComment()
{
// Line terminator is not part of the comment. If it is a
// non-ascii line terminator, it will result in a parser error.
@@ -259,7 +259,7 @@ Token::Value Scanner::skipSingleLineComment()
return Token::Whitespace;
}
-Token::Value Scanner::scanSingleLineDocComment()
+Token Scanner::scanSingleLineDocComment()
{
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
advance(); //consume the last '/' at ///
@@ -295,7 +295,7 @@ Token::Value Scanner::scanSingleLineDocComment()
return Token::CommentLiteral;
}
-Token::Value Scanner::skipMultiLineComment()
+Token Scanner::skipMultiLineComment()
{
advance();
while (!isSourcePastEndOfInput())
@@ -316,7 +316,7 @@ Token::Value Scanner::skipMultiLineComment()
return Token::Illegal;
}
-Token::Value Scanner::scanMultiLineDocComment()
+Token Scanner::scanMultiLineDocComment()
{
LiteralScope literal(this, LITERAL_TYPE_COMMENT);
bool endFound = false;
@@ -369,7 +369,7 @@ Token::Value Scanner::scanMultiLineDocComment()
return Token::CommentLiteral;
}
-Token::Value Scanner::scanSlash()
+Token Scanner::scanSlash()
{
int firstSlashPosition = sourcePos();
advance();
@@ -380,7 +380,7 @@ Token::Value Scanner::scanSlash()
else if (m_char == '/')
{
// doxygen style /// comment
- Token::Value comment;
+ Token comment;
m_nextSkippedComment.location.start = firstSlashPosition;
comment = scanSingleLineDocComment();
m_nextSkippedComment.location.end = sourcePos();
@@ -406,7 +406,7 @@ Token::Value Scanner::scanSlash()
return Token::Whitespace;
}
// we actually have a multiline documentation comment
- Token::Value comment;
+ Token comment;
m_nextSkippedComment.location.start = firstSlashPosition;
comment = scanMultiLineDocComment();
m_nextSkippedComment.location.end = sourcePos();
@@ -432,7 +432,7 @@ void Scanner::scanToken()
m_nextSkippedComment.literal.clear();
m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
- Token::Value token;
+ Token token;
// M and N are for the purposes of grabbing different type sizes
unsigned m;
unsigned n;
@@ -601,7 +601,7 @@ void Scanner::scanToken()
{
tie(token, m, n) = scanIdentifierOrKeyword();
- // Special case for hexademical literals
+ // Special case for hexadecimal literals
if (token == Token::Hex)
{
// reset
@@ -612,7 +612,7 @@ void Scanner::scanToken()
if (m_char == '"' || m_char == '\'')
token = scanHexString();
else
- token = Token::Illegal;
+ token = Token::IllegalHex;
}
}
else if (isDecimalDigit(m_char))
@@ -703,7 +703,7 @@ bool Scanner::isUnicodeLinebreak()
return false;
}
-Token::Value Scanner::scanString()
+Token Scanner::scanString()
{
char const quote = m_char;
advance(); // consume quote
@@ -727,7 +727,7 @@ Token::Value Scanner::scanString()
return Token::StringLiteral;
}
-Token::Value Scanner::scanHexString()
+Token Scanner::scanHexString()
{
char const quote = m_char;
advance(); // consume quote
@@ -736,23 +736,31 @@ Token::Value Scanner::scanHexString()
{
char c = m_char;
if (!scanHexByte(c))
- return Token::Illegal;
+ return Token::IllegalHex;
addLiteralChar(c);
}
if (m_char != quote)
- return Token::Illegal;
+ return Token::IllegalHex;
literal.complete();
advance(); // consume quote
return Token::StringLiteral;
}
+// Parse for regex [:digit:]+(_[:digit:]+)*
void Scanner::scanDecimalDigits()
{
- while (isDecimalDigit(m_char))
- addLiteralCharAndAdvance();
+ // MUST begin with a decimal digit.
+ if (!isDecimalDigit(m_char))
+ return;
+
+ // May continue with decimal digit or underscore for grouping.
+ do addLiteralCharAndAdvance();
+ while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+
+ // Defer further validation of underscore to SyntaxChecker.
}
-Token::Value Scanner::scanNumber(char _charSeen)
+Token Scanner::scanNumber(char _charSeen)
{
enum { DECIMAL, HEX, BINARY } kind = DECIMAL;
LiteralScope literal(this, LITERAL_TYPE_NUMBER);
@@ -760,6 +768,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
{
// we have already seen a decimal point of the float
addLiteralChar('.');
+ if (m_char == '_')
+ return Token::Illegal;
scanDecimalDigits(); // we know we have at least one digit
}
else
@@ -770,14 +780,15 @@ Token::Value Scanner::scanNumber(char _charSeen)
{
addLiteralCharAndAdvance();
// either 0, 0exxx, 0Exxx, 0.xxx or a hex number
- if (m_char == 'x' || m_char == 'X')
+ if (m_char == 'x')
{
// hex number
kind = HEX;
addLiteralCharAndAdvance();
if (!isHexDigit(m_char))
- return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
- while (isHexDigit(m_char))
+ return Token::Illegal; // we must have at least one hex digit after 'x'
+
+ while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
addLiteralCharAndAdvance();
}
else if (isDecimalDigit(m_char))
@@ -790,8 +801,22 @@ Token::Value Scanner::scanNumber(char _charSeen)
scanDecimalDigits(); // optional
if (m_char == '.')
{
+ if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+ {
+ // Assume the input may be a floating point number with leading '_' in fraction part.
+ // Recover by consuming it all but returning `Illegal` right away.
+ addLiteralCharAndAdvance(); // '.'
+ addLiteralCharAndAdvance(); // '_'
+ scanDecimalDigits();
+ }
+ if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
+ {
+ // A '.' has to be followed by a number.
+ literal.complete();
+ return Token::Number;
+ }
addLiteralCharAndAdvance();
- scanDecimalDigits(); // optional
+ scanDecimalDigits();
}
}
}
@@ -801,8 +826,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
if (kind != DECIMAL)
return Token::Illegal;
+ else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+ {
+ // Recover from wrongly placed underscore as delimiter in literal with scientific
+ // notation by consuming until the end.
+ addLiteralCharAndAdvance(); // 'e'
+ addLiteralCharAndAdvance(); // '_'
+ scanDecimalDigits();
+ literal.complete();
+ return Token::Number;
+ }
// scan exponent
- addLiteralCharAndAdvance();
+ addLiteralCharAndAdvance(); // 'e' | 'E'
if (m_char == '+' || m_char == '-')
addLiteralCharAndAdvance();
if (!isDecimalDigit(m_char))
@@ -819,7 +854,7 @@ Token::Value Scanner::scanNumber(char _charSeen)
return Token::Number;
}
-tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
+tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
{
solAssert(isIdentifierStart(m_char), "");
LiteralScope literal(this, LITERAL_TYPE_STRING);
@@ -828,7 +863,7 @@ tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
while (isIdentifierPart(m_char)) //get full literal
addLiteralCharAndAdvance();
literal.complete();
- return Token::fromIdentifierOrKeyword(m_nextToken.literal);
+ return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
}
char CharStream::advanceAndGet(size_t _chars)