1 files changed, 62 insertions, 27 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
index dbe1f389..e9dad2ad 100644
--- a/libsolidity/parsing/Scanner.cpp
+++ b/libsolidity/parsing/Scanner.cpp
@@ -214,9 +214,9 @@ void Scanner::addUnicodeAsUTF8(unsigned codepoint)
 }
 
 // Ensure that tokens can be stored in a byte.
-BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
+BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);
 
-Token::Value Scanner::next()
+Token Scanner::next()
 {
 	m_currentToken = m_nextToken;
 	m_skippedComment = m_nextSkippedComment;
@@ -225,7 +225,7 @@ Token::Value Scanner::next()
 	return m_currentToken.token;
 }
 
-Token::Value Scanner::selectToken(char _next, Token::Value _then, Token::Value _else)
+Token Scanner::selectToken(char _next, Token _then, Token _else)
 {
 	advance();
 	if (m_char == _next)
@@ -249,7 +249,7 @@ void Scanner::skipWhitespaceExceptUnicodeLinebreak()
 		advance();
 }
 
-Token::Value Scanner::skipSingleLineComment()
+Token Scanner::skipSingleLineComment()
 {
 	// Line terminator is not part of the comment. If it is a
 	// non-ascii line terminator, it will result in a parser error.
@@ -259,7 +259,7 @@ Token::Value Scanner::skipSingleLineComment()
 	return Token::Whitespace;
 }
 
-Token::Value Scanner::scanSingleLineDocComment()
+Token Scanner::scanSingleLineDocComment()
 {
 	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
 	advance(); //consume the last '/' at ///
@@ -295,7 +295,7 @@ Token::Value Scanner::scanSingleLineDocComment()
 	return Token::CommentLiteral;
 }
 
-Token::Value Scanner::skipMultiLineComment()
+Token Scanner::skipMultiLineComment()
 {
 	advance();
 	while (!isSourcePastEndOfInput())
@@ -316,7 +316,7 @@ Token::Value Scanner::skipMultiLineComment()
 	return Token::Illegal;
 }
 
-Token::Value Scanner::scanMultiLineDocComment()
+Token Scanner::scanMultiLineDocComment()
 {
 	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
 	bool endFound = false;
@@ -369,7 +369,7 @@ Token::Value Scanner::scanMultiLineDocComment()
 		return Token::CommentLiteral;
 }
 
-Token::Value Scanner::scanSlash()
+Token Scanner::scanSlash()
 {
 	int firstSlashPosition = sourcePos();
 	advance();
@@ -380,7 +380,7 @@ Token::Value Scanner::scanSlash()
 		else if (m_char == '/')
 		{
 			// doxygen style /// comment
-			Token::Value comment;
+			Token comment;
 			m_nextSkippedComment.location.start = firstSlashPosition;
 			comment = scanSingleLineDocComment();
 			m_nextSkippedComment.location.end = sourcePos();
@@ -406,7 +406,7 @@ Token::Value Scanner::scanSlash()
 				return Token::Whitespace;
 			}
 			// we actually have a multiline documentation comment
-			Token::Value comment;
+			Token comment;
 			m_nextSkippedComment.location.start = firstSlashPosition;
 			comment = scanMultiLineDocComment();
 			m_nextSkippedComment.location.end = sourcePos();
@@ -432,7 +432,7 @@ void Scanner::scanToken()
 	m_nextSkippedComment.literal.clear();
 	m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
 
-	Token::Value token;
+	Token token;
 	// M and N are for the purposes of grabbing different type sizes
 	unsigned m;
 	unsigned n;
@@ -601,7 +601,7 @@ void Scanner::scanToken()
 			{
 				tie(token, m, n) = scanIdentifierOrKeyword();
 
-				// Special case for hexademical literals
+				// Special case for hexadecimal literals
 				if (token == Token::Hex)
 				{
 					// reset
@@ -612,7 +612,7 @@ void Scanner::scanToken()
 					if (m_char == '"' || m_char == '\'')
 						token = scanHexString();
 					else
-						token = Token::Illegal;
+						token = Token::IllegalHex;
 				}
 			}
 			else if (isDecimalDigit(m_char))
@@ -703,7 +703,7 @@ bool Scanner::isUnicodeLinebreak()
 		return false;
 }
 
-Token::Value Scanner::scanString()
+Token Scanner::scanString()
 {
 	char const quote = m_char;
 	advance();  // consume quote
@@ -727,7 +727,7 @@ Token::Value Scanner::scanString()
 	return Token::StringLiteral;
 }
 
-Token::Value Scanner::scanHexString()
+Token Scanner::scanHexString()
 {
 	char const quote = m_char;
 	advance();  // consume quote
@@ -736,23 +736,31 @@ Token::Value Scanner::scanHexString()
 	{
 		char c = m_char;
 		if (!scanHexByte(c))
-			return Token::Illegal;
+			return Token::IllegalHex;
 		addLiteralChar(c);
 	}
 	if (m_char != quote)
-		return Token::Illegal;
+		return Token::IllegalHex;
 	literal.complete();
 	advance();  // consume quote
 	return Token::StringLiteral;
 }
 
+// Parse for regex [:digit:]+(_[:digit:]+)*
 void Scanner::scanDecimalDigits()
 {
-	while (isDecimalDigit(m_char))
-		addLiteralCharAndAdvance();
+	// MUST begin with a decimal digit.
+	if (!isDecimalDigit(m_char))
+		return;
+
+	// May continue with decimal digit or underscore for grouping.
+	do addLiteralCharAndAdvance();
+	while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+
+	// Defer further validation of underscore to SyntaxChecker.
 }
 
-Token::Value Scanner::scanNumber(char _charSeen)
+Token Scanner::scanNumber(char _charSeen)
 {
 	enum { DECIMAL, HEX, BINARY } kind = DECIMAL;
 	LiteralScope literal(this, LITERAL_TYPE_NUMBER);
@@ -760,6 +768,8 @@ Token::Value Scanner::scanNumber(char _charSeen)
 	{
 		// we have already seen a decimal point of the float
 		addLiteralChar('.');
+		if (m_char == '_')
+			return Token::Illegal;
 		scanDecimalDigits();  // we know we have at least one digit
 	}
 	else
@@ -770,14 +780,15 @@ Token::Value Scanner::scanNumber(char _charSeen)
 		{
 			addLiteralCharAndAdvance();
 			// either 0, 0exxx, 0Exxx, 0.xxx or a hex number
-			if (m_char == 'x' || m_char == 'X')
+			if (m_char == 'x')
 			{
 				// hex number
 				kind = HEX;
 				addLiteralCharAndAdvance();
 				if (!isHexDigit(m_char))
-					return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
-				while (isHexDigit(m_char))
+					return Token::Illegal; // we must have at least one hex digit after 'x'
+
+				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
 			else if (isDecimalDigit(m_char))
@@ -790,8 +801,22 @@ Token::Value Scanner::scanNumber(char _charSeen)
 			scanDecimalDigits();  // optional
 			if (m_char == '.')
 			{
+				if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+				{
+					// Assume the input may be a floating point number with leading '_' in fraction part.
+					// Recover by consuming it all but returning `Illegal` right away.
+					addLiteralCharAndAdvance(); // '.'
+					addLiteralCharAndAdvance(); // '_'
+					scanDecimalDigits();
+				}
+				if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
+				{
+					// A '.' has to be followed by a number.
+					literal.complete();
+					return Token::Number;
+				}
 				addLiteralCharAndAdvance();
-				scanDecimalDigits();  // optional
+				scanDecimalDigits();
 			}
 		}
 	}
@@ -801,8 +826,18 @@ Token::Value Scanner::scanNumber(char _charSeen)
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
 			return Token::Illegal;
+		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+		{
+			// Recover from wrongly placed underscore as delimiter in literal with scientific
+			// notation by consuming until the end.
+			addLiteralCharAndAdvance(); // 'e'
+			addLiteralCharAndAdvance(); // '_'
+			scanDecimalDigits();
+			literal.complete();
+			return Token::Number;
+		}
 		// scan exponent
-		addLiteralCharAndAdvance();
+		addLiteralCharAndAdvance(); // 'e' | 'E'
 		if (m_char == '+' || m_char == '-')
 			addLiteralCharAndAdvance();
 		if (!isDecimalDigit(m_char))
@@ -819,7 +854,7 @@ Token::Value Scanner::scanNumber(char _charSeen)
 	return Token::Number;
 }
 
-tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
+tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
 {
 	solAssert(isIdentifierStart(m_char), "");
 	LiteralScope literal(this, LITERAL_TYPE_STRING);
@@ -828,7 +863,7 @@ tuple<Token::Value, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
 	while (isIdentifierPart(m_char)) //get full literal
 		addLiteralCharAndAdvance();
 	literal.complete();
-	return Token::fromIdentifierOrKeyword(m_nextToken.literal);
+	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
 }
 
 char CharStream::advanceAndGet(size_t _chars)