Merge pull request #5571 from ethereum/develop

Version 0.5.1
author: chriseth <chris@ethereum.org> 2018-12-03 22:48:03 +0800
committer: GitHub <noreply@github.com> 2018-12-03 22:48:03 +0800
commit: c8a2cb62832afb2dc09ccee6fd42c1516dfdb981 (patch)
tree: 7977e9dcbbc215088c05b847f849871ef5d4ae66 /liblangutil/Scanner.cpp
parent: 1d4f565a64988a3400847d2655ca24f73f234bc6 (diff)
parent: 590be1d84cea9850ce69b68be3dc5294b39041e5 (diff)
download: dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.gz
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.bz2
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.lz
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.xz
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.zst
dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.zip
1 files changed, 904 insertions, 0 deletions
diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
new file mode 100644
index 00000000..ac298bd5
--- /dev/null
+++ b/liblangutil/Scanner.cpp
@@ -0,0 +1,904 @@
+/*
+	This file is part of solidity.
+
+	solidity is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	solidity is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
+
+	This file is derived from the file "scanner.cc", which was part of the
+	V8 project. The original copyright header follows:
+
+	Copyright 2006-2012, the V8 project authors. All rights reserved.
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are
+	met:
+
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above
+	  copyright notice, this list of conditions and the following
+	  disclaimer in the documentation and/or other materials provided
+	  with the distribution.
+	* Neither the name of Google Inc. nor the names of its
+	  contributors may be used to endorse or promote products derived
+	  from this software without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Solidity scanner.
+ */
+
+#include <liblangutil/Exceptions.h>
+#include <liblangutil/Scanner.h>
+#include <algorithm>
+#include <ostream>
+#include <tuple>
+
+using namespace std;
+
+namespace langutil
+{
+
+namespace
+{
+bool isDecimalDigit(char c)
+{
+	return '0' <= c && c <= '9';
+}
+bool isHexDigit(char c)
+{
+	return isDecimalDigit(c)
+		   || ('a' <= c && c <= 'f')
+		   || ('A' <= c && c <= 'F');
+}
+bool isLineTerminator(char c)
+{
+	return c == '\n';
+}
+bool isWhiteSpace(char c)
+{
+	return c == ' ' || c == '\n' || c == '\t' || c == '\r';
+}
+bool isIdentifierStart(char c)
+{
+	return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+bool isIdentifierPart(char c)
+{
+	return isIdentifierStart(c) || isDecimalDigit(c);
+}
+int hexValue(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	else if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	else if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+	else return -1;
+}
+} // end anonymous namespace
+
+std::string to_string(ScannerError _errorCode)
+{
+	switch (_errorCode)
+	{
+		case ScannerError::NoError: return "No error.";
+		case ScannerError::IllegalToken: return "Invalid token.";
+		case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes.";
+		case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
+		case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
+		case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
+		case ScannerError::IllegalStringEndQuote: return "Expected string end-quote.";
+		case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'.";
+		case ScannerError::IllegalExponent: return "Invalid exponent.";
+		case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
+		case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
+		default:
+			solAssert(false, "Unhandled case in to_string(ScannerError)");
+			return "";
+	}
+}
+
+std::ostream& operator<<(std::ostream& os, ScannerError _errorCode)
+{
+	os << to_string(_errorCode);
+	return os;
+}
+
+/// Scoped helper for literal recording. Automatically drops the literal
+/// if aborting the scanning before it's complete.
+enum LiteralType {
+	LITERAL_TYPE_STRING,
+	LITERAL_TYPE_NUMBER, // not really different from string type in behaviour
+	LITERAL_TYPE_COMMENT
+};
+
+class LiteralScope
+{
+public:
+	explicit LiteralScope(Scanner* _self, enum LiteralType _type): m_type(_type)
+	, m_scanner(_self)
+	, m_complete(false)
+	{
+		if (_type == LITERAL_TYPE_COMMENT)
+			m_scanner->m_nextSkippedComment.literal.clear();
+		else
+			m_scanner->m_nextToken.literal.clear();
+	}
+	~LiteralScope()
+	{
+		if (!m_complete)
+		{
+			if (m_type == LITERAL_TYPE_COMMENT)
+				m_scanner->m_nextSkippedComment.literal.clear();
+			else
+				m_scanner->m_nextToken.literal.clear();
+		}
+	}
+	void complete() { m_complete = true; }
+
+private:
+	enum LiteralType m_type;
+	Scanner* m_scanner;
+	bool m_complete;
+}; // end of LiteralScope class
+
+
+void Scanner::reset(CharStream _source)
+{
+	m_source = make_shared<CharStream>(std::move(_source));
+	reset();
+}
+
+void Scanner::reset(std::shared_ptr<CharStream> _source)
+{
+	solAssert(_source.get() != nullptr, "You MUST provide a CharStream when resetting.");
+	m_source = _source;
+	reset();
+}
+
+void Scanner::reset()
+{
+	m_source->reset();
+	m_char = m_source->get();
+	skipWhitespace();
+	scanToken();
+	next();
+}
+
+bool Scanner::scanHexByte(char& o_scannedByte)
+{
+	char x = 0;
+	for (int i = 0; i < 2; i++)
+	{
+		int d = hexValue(m_char);
+		if (d < 0)
+		{
+			rollback(i);
+			return false;
+		}
+		x = x * 16 + d;
+		advance();
+	}
+	o_scannedByte = x;
+	return true;
+}
+
+bool Scanner::scanUnicode(unsigned & o_codepoint)
+{
+	unsigned x = 0;
+	for (int i = 0; i < 4; i++)
+	{
+		int d = hexValue(m_char);
+		if (d < 0)
+		{
+			rollback(i);
+			return false;
+		}
+		x = x * 16 + d;
+		advance();
+	}
+	o_codepoint = x;
+	return true;
+}
+
+// This supports codepoints between 0000 and FFFF.
+void Scanner::addUnicodeAsUTF8(unsigned codepoint)
+{
+	if (codepoint <= 0x7f)
+		addLiteralChar(codepoint);
+	else if (codepoint <= 0x7ff)
+	{
+		addLiteralChar(0xc0 | (codepoint >> 6));
+		addLiteralChar(0x80 | (codepoint & 0x3f));
+	}
+	else
+	{
+		addLiteralChar(0xe0 | (codepoint >> 12));
+		addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f));
+		addLiteralChar(0x80 | (codepoint & 0x3f));
+	}
+}
+
+// Ensure that tokens can be stored in a byte.
+BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);
+
+Token Scanner::next()
+{
+	m_currentToken = m_nextToken;
+	m_skippedComment = m_nextSkippedComment;
+	scanToken();
+
+	return m_currentToken.token;
+}
+
+Token Scanner::selectToken(char _next, Token _then, Token _else)
+{
+	advance();
+	if (m_char == _next)
+		return selectToken(_then);
+	else
+		return _else;
+}
+
+bool Scanner::skipWhitespace()
+{
+	int const startPosition = sourcePos();
+	while (isWhiteSpace(m_char))
+		advance();
+	// Return whether or not we skipped any characters.
+	return sourcePos() != startPosition;
+}
+
+void Scanner::skipWhitespaceExceptUnicodeLinebreak()
+{
+	while (isWhiteSpace(m_char) && !isUnicodeLinebreak())
+		advance();
+}
+
+Token Scanner::skipSingleLineComment()
+{
+	// Line terminator is not part of the comment. If it is a
+	// non-ascii line terminator, it will result in a parser error.
+	while (!isUnicodeLinebreak())
+		if (!advance()) break;
+
+	return Token::Whitespace;
+}
+
+Token Scanner::scanSingleLineDocComment()
+{
+	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+	advance(); //consume the last '/' at ///
+
+	skipWhitespaceExceptUnicodeLinebreak();
+
+	while (!isSourcePastEndOfInput())
+	{
+		if (isLineTerminator(m_char))
+		{
+			// check if next line is also a documentation comment
+			skipWhitespace();
+			if (!m_source->isPastEndOfInput(3) &&
+				m_source->get(0) == '/' &&
+				m_source->get(1) == '/' &&
+				m_source->get(2) == '/')
+			{
+				addCommentLiteralChar('\n');
+				m_char = m_source->advanceAndGet(3);
+			}
+			else
+				break; // next line is not a documentation comment, we are done
+
+		}
+		else if (isUnicodeLinebreak())
+			// Any line terminator that is not '\n' is considered to end the
+			// comment.
+			break;
+		addCommentLiteralChar(m_char);
+		advance();
+	}
+	literal.complete();
+	return Token::CommentLiteral;
+}
+
+Token Scanner::skipMultiLineComment()
+{
+	advance();
+	while (!isSourcePastEndOfInput())
+	{
+		char ch = m_char;
+		advance();
+
+		// If we have reached the end of the multi-line comment, we
+		// consume the '/' and insert a whitespace. This way all
+		// multi-line comments are treated as whitespace.
+		if (ch == '*' && m_char == '/')
+		{
+			m_char = ' ';
+			return Token::Whitespace;
+		}
+	}
+	// Unterminated multi-line comment.
+	return setError(ScannerError::IllegalCommentTerminator);
+}
+
+Token Scanner::scanMultiLineDocComment()
+{
+	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+	bool endFound = false;
+	bool charsAdded = false;
+
+	while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
+		advance();
+
+	while (!isSourcePastEndOfInput())
+	{
+		//handle newlines in multline comments
+		if (isLineTerminator(m_char))
+		{
+			skipWhitespace();
+			if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
+			{ // it is unknown if this leads to the end of the comment
+				addCommentLiteralChar('*');
+				advance();
+			}
+			else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) != '/')
+			{ // skip first '*' in subsequent lines
+				if (charsAdded)
+					addCommentLiteralChar('\n');
+				m_char = m_source->advanceAndGet(2);
+			}
+			else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
+			{ // if after newline the comment ends, don't insert the newline
+				m_char = m_source->advanceAndGet(2);
+				endFound = true;
+				break;
+			}
+			else if (charsAdded)
+				addCommentLiteralChar('\n');
+		}
+
+		if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
+		{
+			m_char = m_source->advanceAndGet(2);
+			endFound = true;
+			break;
+		}
+		addCommentLiteralChar(m_char);
+		charsAdded = true;
+		advance();
+	}
+	literal.complete();
+	if (!endFound)
+		return setError(ScannerError::IllegalCommentTerminator);
+	else
+		return Token::CommentLiteral;
+}
+
+Token Scanner::scanSlash()
+{
+	int firstSlashPosition = sourcePos();
+	advance();
+	if (m_char == '/')
+	{
+		if (!advance()) /* double slash comment directly before EOS */
+			return Token::Whitespace;
+		else if (m_char == '/')
+		{
+			// doxygen style /// comment
+			Token comment;
+			m_nextSkippedComment.location.start = firstSlashPosition;
+			comment = scanSingleLineDocComment();
+			m_nextSkippedComment.location.end = sourcePos();
+			m_nextSkippedComment.token = comment;
+			return Token::Whitespace;
+		}
+		else
+			return skipSingleLineComment();
+	}
+	else if (m_char == '*')
+	{
+		// doxygen style /** natspec comment
+		if (!advance()) /* slash star comment before EOS */
+			return setError(ScannerError::IllegalCommentTerminator);
+		else if (m_char == '*')
+		{
+			advance(); //consume the last '*' at /**
+
+			// "/**/"
+			if (m_char == '/')
+			{
+				advance(); //skip the closing slash
+				return Token::Whitespace;
+			}
+			// we actually have a multiline documentation comment
+			Token comment;
+			m_nextSkippedComment.location.start = firstSlashPosition;
+			comment = scanMultiLineDocComment();
+			m_nextSkippedComment.location.end = sourcePos();
+			m_nextSkippedComment.token = comment;
+			if (comment == Token::Illegal)
+				return Token::Illegal; // error already set
+			else
+				return Token::Whitespace;
+		}
+		else
+			return skipMultiLineComment();
+	}
+	else if (m_char == '=')
+		return selectToken(Token::AssignDiv);
+	else
+		return Token::Div;
+}
+
+void Scanner::scanToken()
+{
+	m_nextToken.error = ScannerError::NoError;
+	m_nextToken.literal.clear();
+	m_nextToken.extendedTokenInfo = make_tuple(0, 0);
+	m_nextSkippedComment.literal.clear();
+	m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
+
+	Token token;
+	// M and N are for the purposes of grabbing different type sizes
+	unsigned m;
+	unsigned n;
+	do
+	{
+		// Remember the position of the next token
+		m_nextToken.location.start = sourcePos();
+		switch (m_char)
+		{
+		case '"':
+		case '\'':
+			token = scanString();
+			break;
+		case '<':
+			// < <= << <<=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::LessThanOrEqual);
+			else if (m_char == '<')
+				token = selectToken('=', Token::AssignShl, Token::SHL);
+			else
+				token = Token::LessThan;
+			break;
+		case '>':
+			// > >= >> >>= >>> >>>=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::GreaterThanOrEqual);
+			else if (m_char == '>')
+			{
+				// >> >>= >>> >>>=
+				advance();
+				if (m_char == '=')
+					token = selectToken(Token::AssignSar);
+				else if (m_char == '>')
+					token = selectToken('=', Token::AssignShr, Token::SHR);
+				else
+					token = Token::SAR;
+			}
+			else
+				token = Token::GreaterThan;
+			break;
+		case '=':
+			// = == =>
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::Equal);
+			else if (m_char == '>')
+				token = selectToken(Token::Arrow);
+			else
+				token = Token::Assign;
+			break;
+		case '!':
+			// ! !=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::NotEqual);
+			else
+				token = Token::Not;
+			break;
+		case '+':
+			// + ++ +=
+			advance();
+			if (m_char == '+')
+				token = selectToken(Token::Inc);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignAdd);
+			else
+				token = Token::Add;
+			break;
+		case '-':
+			// - -- -=
+			advance();
+			if (m_char == '-')
+				token = selectToken(Token::Dec);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignSub);
+			else
+				token = Token::Sub;
+			break;
+		case '*':
+			// * ** *=
+			advance();
+			if (m_char == '*')
+				token = selectToken(Token::Exp);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignMul);
+			else
+				token = Token::Mul;
+			break;
+		case '%':
+			// % %=
+			token = selectToken('=', Token::AssignMod, Token::Mod);
+			break;
+		case '/':
+			// /  // /* /=
+			token = scanSlash();
+			break;
+		case '&':
+			// & && &=
+			advance();
+			if (m_char == '&')
+				token = selectToken(Token::And);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignBitAnd);
+			else
+				token = Token::BitAnd;
+			break;
+		case '|':
+			// | || |=
+			advance();
+			if (m_char == '|')
+				token = selectToken(Token::Or);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignBitOr);
+			else
+				token = Token::BitOr;
+			break;
+		case '^':
+			// ^ ^=
+			token = selectToken('=', Token::AssignBitXor, Token::BitXor);
+			break;
+		case '.':
+			// . Number
+			advance();
+			if (isDecimalDigit(m_char))
+				token = scanNumber('.');
+			else
+				token = Token::Period;
+			break;
+		case ':':
+			token = selectToken(Token::Colon);
+			break;
+		case ';':
+			token = selectToken(Token::Semicolon);
+			break;
+		case ',':
+			token = selectToken(Token::Comma);
+			break;
+		case '(':
+			token = selectToken(Token::LParen);
+			break;
+		case ')':
+			token = selectToken(Token::RParen);
+			break;
+		case '[':
+			token = selectToken(Token::LBrack);
+			break;
+		case ']':
+			token = selectToken(Token::RBrack);
+			break;
+		case '{':
+			token = selectToken(Token::LBrace);
+			break;
+		case '}':
+			token = selectToken(Token::RBrace);
+			break;
+		case '?':
+			token = selectToken(Token::Conditional);
+			break;
+		case '~':
+			token = selectToken(Token::BitNot);
+			break;
+		default:
+			if (isIdentifierStart(m_char))
+			{
+				tie(token, m, n) = scanIdentifierOrKeyword();
+
+				// Special case for hexadecimal literals
+				if (token == Token::Hex)
+				{
+					// reset
+					m = 0;
+					n = 0;
+
+					// Special quoted hex string must follow
+					if (m_char == '"' || m_char == '\'')
+						token = scanHexString();
+					else
+						token = setError(ScannerError::IllegalToken);
+				}
+			}
+			else if (isDecimalDigit(m_char))
+				token = scanNumber();
+			else if (skipWhitespace())
+				token = Token::Whitespace;
+			else if (isSourcePastEndOfInput())
+				token = Token::EOS;
+			else
+				token = selectErrorToken(ScannerError::IllegalToken);
+			break;
+		}
+		// Continue scanning for tokens as long as we're just skipping
+		// whitespace.
+	}
+	while (token == Token::Whitespace);
+	m_nextToken.location.end = sourcePos();
+	m_nextToken.token = token;
+	m_nextToken.extendedTokenInfo = make_tuple(m, n);
+}
+
+bool Scanner::scanEscape()
+{
+	char c = m_char;
+	advance();
+	// Skip escaped newlines.
+	if (isLineTerminator(c))
+		return true;
+	switch (c)
+	{
+	case '\'':  // fall through
+	case '"':  // fall through
+	case '\\':
+		break;
+	case 'b':
+		c = '\b';
+		break;
+	case 'f':
+		c = '\f';
+		break;
+	case 'n':
+		c = '\n';
+		break;
+	case 'r':
+		c = '\r';
+		break;
+	case 't':
+		c = '\t';
+		break;
+	case 'v':
+		c = '\v';
+		break;
+	case 'u':
+	{
+		unsigned codepoint;
+		if (!scanUnicode(codepoint))
+			return false;
+		addUnicodeAsUTF8(codepoint);
+		return true;
+	}
+	case 'x':
+		if (!scanHexByte(c))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	addLiteralChar(c);
+	return true;
+}
+
+bool Scanner::isUnicodeLinebreak()
+{
+	if (0x0a <= m_char && m_char <= 0x0d)
+		// line feed, vertical tab, form feed, carriage return
+		return true;
+	else if (!m_source->isPastEndOfInput(1) && uint8_t(m_source->get(0)) == 0xc2 && uint8_t(m_source->get(1)) == 0x85)
+		// NEL - U+0085, C2 85 in utf8
+		return true;
+	else if (!m_source->isPastEndOfInput(2) && uint8_t(m_source->get(0)) == 0xe2 && uint8_t(m_source->get(1)) == 0x80 && (
+		uint8_t(m_source->get(2)) == 0xa8 || uint8_t(m_source->get(2)) == 0xa9
+	))
+		// LS - U+2028, E2 80 A8  in utf8
+		// PS - U+2029, E2 80 A9  in utf8
+		return true;
+	else
+		return false;
+}
+
+Token Scanner::scanString()
+{
+	char const quote = m_char;
+	advance();  // consume quote
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
+	{
+		char c = m_char;
+		advance();
+		if (c == '\\')
+		{
+			if (isSourcePastEndOfInput() || !scanEscape())
+				return setError(ScannerError::IllegalEscapeSequence);
+		}
+		else
+			addLiteralChar(c);
+	}
+	if (m_char != quote)
+		return setError(ScannerError::IllegalStringEndQuote);
+	literal.complete();
+	advance();  // consume quote
+	return Token::StringLiteral;
+}
+
+Token Scanner::scanHexString()
+{
+	char const quote = m_char;
+	advance();  // consume quote
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	while (m_char != quote && !isSourcePastEndOfInput())
+	{
+		char c = m_char;
+		if (!scanHexByte(c))
+			// can only return false if hex-byte is incomplete (only one hex digit instead of two)
+			return setError(ScannerError::IllegalHexString);
+		addLiteralChar(c);
+	}
+
+	if (m_char != quote)
+		return setError(ScannerError::IllegalStringEndQuote);
+
+	literal.complete();
+	advance();  // consume quote
+	return Token::StringLiteral;
+}
+
+// Parse for regex [:digit:]+(_[:digit:]+)*
+void Scanner::scanDecimalDigits()
+{
+	// MUST begin with a decimal digit.
+	if (!isDecimalDigit(m_char))
+		return;
+
+	// May continue with decimal digit or underscore for grouping.
+	do addLiteralCharAndAdvance();
+	while (!m_source->isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+
+	// Defer further validation of underscore to SyntaxChecker.
+}
+
+Token Scanner::scanNumber(char _charSeen)
+{
+	enum { DECIMAL, HEX, BINARY } kind = DECIMAL;
+	LiteralScope literal(this, LITERAL_TYPE_NUMBER);
+	if (_charSeen == '.')
+	{
+		// we have already seen a decimal point of the float
+		addLiteralChar('.');
+		if (m_char == '_')
+			return setError(ScannerError::IllegalToken);
+		scanDecimalDigits();  // we know we have at least one digit
+	}
+	else
+	{
+		solAssert(_charSeen == 0, "");
+		// if the first character is '0' we must check for octals and hex
+		if (m_char == '0')
+		{
+			addLiteralCharAndAdvance();
+			// either 0, 0exxx, 0Exxx, 0.xxx or a hex number
+			if (m_char == 'x')
+			{
+				// hex number
+				kind = HEX;
+				addLiteralCharAndAdvance();
+				if (!isHexDigit(m_char))
+					return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x'
+
+				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
+					addLiteralCharAndAdvance();
+			}
+			else if (isDecimalDigit(m_char))
+				// We do not allow octal numbers
+				return setError(ScannerError::OctalNotAllowed);
+		}
+		// Parse decimal digits and allow trailing fractional part.
+		if (kind == DECIMAL)
+		{
+			scanDecimalDigits();  // optional
+			if (m_char == '.')
+			{
+				if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
+				{
+					// Assume the input may be a floating point number with leading '_' in fraction part.
+					// Recover by consuming it all but returning `Illegal` right away.
+					addLiteralCharAndAdvance(); // '.'
+					addLiteralCharAndAdvance(); // '_'
+					scanDecimalDigits();
+				}
+				if (m_source->isPastEndOfInput() || !isDecimalDigit(m_source->get(1)))
+				{
+					// A '.' has to be followed by a number.
+					literal.complete();
+					return Token::Number;
+				}
+				addLiteralCharAndAdvance();
+				scanDecimalDigits();
+			}
+		}
+	}
+	// scan exponent, if any
+	if (m_char == 'e' || m_char == 'E')
+	{
+		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
+		if (kind != DECIMAL)
+			return setError(ScannerError::IllegalExponent);
+		else if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
+		{
+			// Recover from wrongly placed underscore as delimiter in literal with scientific
+			// notation by consuming until the end.
+			addLiteralCharAndAdvance(); // 'e'
+			addLiteralCharAndAdvance(); // '_'
+			scanDecimalDigits();
+			literal.complete();
+			return Token::Number;
+		}
+		// scan exponent
+		addLiteralCharAndAdvance(); // 'e' | 'E'
+		if (m_char == '+' || m_char == '-')
+			addLiteralCharAndAdvance();
+		if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E'
+			return setError(ScannerError::IllegalExponent);
+		scanDecimalDigits();
+	}
+	// The source character immediately following a numeric literal must
+	// not be an identifier start or a decimal digit; see ECMA-262
+	// section 7.8.3, page 17 (note that we read only one decimal digit
+	// if the value is 0).
+	if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
+		return setError(ScannerError::IllegalNumberEnd);
+	literal.complete();
+	return Token::Number;
+}
+
+tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
+{
+	solAssert(isIdentifierStart(m_char), "");
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	addLiteralCharAndAdvance();
+	// Scan the rest of the identifier characters.
+	while (isIdentifierPart(m_char)) //get full literal
+		addLiteralCharAndAdvance();
+	literal.complete();
+	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
+}
+
+
+}
author	chriseth <chris@ethereum.org>	2018-12-03 22:48:03 +0800
committer	GitHub <noreply@github.com>	2018-12-03 22:48:03 +0800
commit	c8a2cb62832afb2dc09ccee6fd42c1516dfdb981 (patch)
tree	7977e9dcbbc215088c05b847f849871ef5d4ae66 /liblangutil/Scanner.cpp
parent	1d4f565a64988a3400847d2655ca24f73f234bc6 (diff)
parent	590be1d84cea9850ce69b68be3dc5294b39041e5 (diff)
download	dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.gz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.bz2 dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.lz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.xz dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.tar.zst dexon-solidity-c8a2cb62832afb2dc09ccee6fd42c1516dfdb981.zip