From 87821c53c3a73d3e35a0e50a7c159d9aa5d6b253 Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Wed, 14 Nov 2018 14:59:30 +0100
Subject: Isolating files shared between Yul- and Solidity language frontend.

---
 liblangutil/Scanner.cpp | 920 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 920 insertions(+)
 create mode 100644 liblangutil/Scanner.cpp

(limited to 'liblangutil/Scanner.cpp')
diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
new file mode 100644
index 00000000..246f5ea6
--- /dev/null
+++ b/liblangutil/Scanner.cpp
@@ -0,0 +1,920 @@
+/*
+	This file is part of solidity.
+
+	solidity is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	solidity is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
+
+	This file is derived from the file "scanner.cc", which was part of the
+	V8 project. The original copyright header follows:
+
+	Copyright 2006-2012, the V8 project authors. All rights reserved.
+	Redistribution and use in source and binary forms, with or without
+	modification, are permitted provided that the following conditions are
+	met:
+
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above
+	  copyright notice, this list of conditions and the following
+	  disclaimer in the documentation and/or other materials provided
+	  with the distribution.
+	* Neither the name of Google Inc. nor the names of its
+	  contributors may be used to endorse or promote products derived
+	  from this software without specific prior written permission.
+
+	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Solidity scanner.
+ */
+
+#include <algorithm>
+#include <tuple>
+#include <liblangutil/Exceptions.h>
+#include <liblangutil/Scanner.h>
+
+using namespace std;
+
+namespace dev
+{
+namespace solidity
+{
+
+namespace
+{
+bool isDecimalDigit(char c)
+{
+	return '0' <= c && c <= '9';
+}
+bool isHexDigit(char c)
+{
+	return isDecimalDigit(c)
+		   || ('a' <= c && c <= 'f')
+		   || ('A' <= c && c <= 'F');
+}
+bool isLineTerminator(char c)
+{
+	return c == '\n';
+}
+bool isWhiteSpace(char c)
+{
+	return c == ' ' || c == '\n' || c == '\t' || c == '\r';
+}
+bool isIdentifierStart(char c)
+{
+	return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+bool isIdentifierPart(char c)
+{
+	return isIdentifierStart(c) || isDecimalDigit(c);
+}
+int hexValue(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	else if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	else if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+	else return -1;
+}
+} // end anonymous namespace
+
+
+
+/// Scoped helper for literal recording. Automatically drops the literal
+/// if aborting the scanning before it's complete.
+enum LiteralType {
+	LITERAL_TYPE_STRING,
+	LITERAL_TYPE_NUMBER, // not really different from string type in behaviour
+	LITERAL_TYPE_COMMENT
+};
+
+class LiteralScope
+{
+public:
+	explicit LiteralScope(Scanner* _self, enum LiteralType _type): m_type(_type)
+	, m_scanner(_self)
+	, m_complete(false)
+	{
+		if (_type == LITERAL_TYPE_COMMENT)
+			m_scanner->m_nextSkippedComment.literal.clear();
+		else
+			m_scanner->m_nextToken.literal.clear();
+	}
+	~LiteralScope()
+	{
+		if (!m_complete)
+		{
+			if (m_type == LITERAL_TYPE_COMMENT)
+				m_scanner->m_nextSkippedComment.literal.clear();
+			else
+				m_scanner->m_nextToken.literal.clear();
+		}
+	}
+	void complete() { m_complete = true; }
+
+private:
+	enum LiteralType m_type;
+	Scanner* m_scanner;
+	bool m_complete;
+}; // end of LiteralScope class
+
+
+void Scanner::reset(CharStream const& _source, string const& _sourceName)
+{
+	m_source = _source;
+	m_sourceName = make_shared<string const>(_sourceName);
+	reset();
+}
+
+void Scanner::reset()
+{
+	m_source.reset();
+	m_char = m_source.get();
+	skipWhitespace();
+	scanToken();
+	next();
+}
+
+bool Scanner::scanHexByte(char& o_scannedByte)
+{
+	char x = 0;
+	for (int i = 0; i < 2; i++)
+	{
+		int d = hexValue(m_char);
+		if (d < 0)
+		{
+			rollback(i);
+			return false;
+		}
+		x = x * 16 + d;
+		advance();
+	}
+	o_scannedByte = x;
+	return true;
+}
+
+bool Scanner::scanUnicode(unsigned & o_codepoint)
+{
+	unsigned x = 0;
+	for (int i = 0; i < 4; i++)
+	{
+		int d = hexValue(m_char);
+		if (d < 0)
+		{
+			rollback(i);
+			return false;
+		}
+		x = x * 16 + d;
+		advance();
+	}
+	o_codepoint = x;
+	return true;
+}
+
+// This supports codepoints between 0000 and FFFF.
+void Scanner::addUnicodeAsUTF8(unsigned codepoint)
+{
+	if (codepoint <= 0x7f)
+		addLiteralChar(codepoint);
+	else if (codepoint <= 0x7ff)
+	{
+		addLiteralChar(0xc0 | (codepoint >> 6));
+		addLiteralChar(0x80 | (codepoint & 0x3f));
+	}
+	else
+	{
+		addLiteralChar(0xe0 | (codepoint >> 12));
+		addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f));
+		addLiteralChar(0x80 | (codepoint & 0x3f));
+	}
+}
+
+// Ensure that tokens can be stored in a byte.
+BOOST_STATIC_ASSERT(TokenTraits::count() <= 0x100);
+
+Token Scanner::next()
+{
+	m_currentToken = m_nextToken;
+	m_skippedComment = m_nextSkippedComment;
+	scanToken();
+
+	return m_currentToken.token;
+}
+
+Token Scanner::selectToken(char _next, Token _then, Token _else)
+{
+	advance();
+	if (m_char == _next)
+		return selectToken(_then);
+	else
+		return _else;
+}
+
+bool Scanner::skipWhitespace()
+{
+	int const startPosition = sourcePos();
+	while (isWhiteSpace(m_char))
+		advance();
+	// Return whether or not we skipped any characters.
+	return sourcePos() != startPosition;
+}
+
+void Scanner::skipWhitespaceExceptUnicodeLinebreak()
+{
+	while (isWhiteSpace(m_char) && !isUnicodeLinebreak())
+		advance();
+}
+
+Token Scanner::skipSingleLineComment()
+{
+	// Line terminator is not part of the comment. If it is a
+	// non-ascii line terminator, it will result in a parser error.
+	while (!isUnicodeLinebreak())
+		if (!advance()) break;
+
+	return Token::Whitespace;
+}
+
+Token Scanner::scanSingleLineDocComment()
+{
+	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+	advance(); //consume the last '/' at ///
+
+	skipWhitespaceExceptUnicodeLinebreak();
+
+	while (!isSourcePastEndOfInput())
+	{
+		if (isLineTerminator(m_char))
+		{
+			// check if next line is also a documentation comment
+			skipWhitespace();
+			if (!m_source.isPastEndOfInput(3) &&
+				m_source.get(0) == '/' &&
+				m_source.get(1) == '/' &&
+				m_source.get(2) == '/')
+			{
+				addCommentLiteralChar('\n');
+				m_char = m_source.advanceAndGet(3);
+			}
+			else
+				break; // next line is not a documentation comment, we are done
+
+		}
+		else if (isUnicodeLinebreak())
+			// Any line terminator that is not '\n' is considered to end the
+			// comment.
+			break;
+		addCommentLiteralChar(m_char);
+		advance();
+	}
+	literal.complete();
+	return Token::CommentLiteral;
+}
+
+Token Scanner::skipMultiLineComment()
+{
+	advance();
+	while (!isSourcePastEndOfInput())
+	{
+		char ch = m_char;
+		advance();
+
+		// If we have reached the end of the multi-line comment, we
+		// consume the '/' and insert a whitespace. This way all
+		// multi-line comments are treated as whitespace.
+		if (ch == '*' && m_char == '/')
+		{
+			m_char = ' ';
+			return Token::Whitespace;
+		}
+	}
+	// Unterminated multi-line comment.
+	return Token::Illegal;
+}
+
+Token Scanner::scanMultiLineDocComment()
+{
+	LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+	bool endFound = false;
+	bool charsAdded = false;
+
+	while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
+		advance();
+
+	while (!isSourcePastEndOfInput())
+	{
+		//handle newlines in multline comments
+		if (isLineTerminator(m_char))
+		{
+			skipWhitespace();
+			if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '*')
+			{ // it is unknown if this leads to the end of the comment
+				addCommentLiteralChar('*');
+				advance();
+			}
+			else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/')
+			{ // skip first '*' in subsequent lines
+				if (charsAdded)
+					addCommentLiteralChar('\n');
+				m_char = m_source.advanceAndGet(2);
+			}
+			else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+			{ // if after newline the comment ends, don't insert the newline
+				m_char = m_source.advanceAndGet(2);
+				endFound = true;
+				break;
+			}
+			else if (charsAdded)
+				addCommentLiteralChar('\n');
+		}
+
+		if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+		{
+			m_char = m_source.advanceAndGet(2);
+			endFound = true;
+			break;
+		}
+		addCommentLiteralChar(m_char);
+		charsAdded = true;
+		advance();
+	}
+	literal.complete();
+	if (!endFound)
+		return Token::Illegal;
+	else
+		return Token::CommentLiteral;
+}
+
+Token Scanner::scanSlash()
+{
+	int firstSlashPosition = sourcePos();
+	advance();
+	if (m_char == '/')
+	{
+		if (!advance()) /* double slash comment directly before EOS */
+			return Token::Whitespace;
+		else if (m_char == '/')
+		{
+			// doxygen style /// comment
+			Token comment;
+			m_nextSkippedComment.location.start = firstSlashPosition;
+			comment = scanSingleLineDocComment();
+			m_nextSkippedComment.location.end = sourcePos();
+			m_nextSkippedComment.token = comment;
+			return Token::Whitespace;
+		}
+		else
+			return skipSingleLineComment();
+	}
+	else if (m_char == '*')
+	{
+		// doxygen style /** natspec comment
+		if (!advance()) /* slash star comment before EOS */
+			return Token::Illegal;
+		else if (m_char == '*')
+		{
+			advance(); //consume the last '*' at /**
+
+			// "/**/"
+			if (m_char == '/')
+			{
+				advance(); //skip the closing slash
+				return Token::Whitespace;
+			}
+			// we actually have a multiline documentation comment
+			Token comment;
+			m_nextSkippedComment.location.start = firstSlashPosition;
+			comment = scanMultiLineDocComment();
+			m_nextSkippedComment.location.end = sourcePos();
+			m_nextSkippedComment.token = comment;
+			if (comment == Token::Illegal)
+				return Token::Illegal;
+			else
+				return Token::Whitespace;
+		}
+		else
+			return skipMultiLineComment();
+	}
+	else if (m_char == '=')
+		return selectToken(Token::AssignDiv);
+	else
+		return Token::Div;
+}
+
+void Scanner::scanToken()
+{
+	m_nextToken.literal.clear();
+	m_nextToken.extendedTokenInfo = make_tuple(0, 0);
+	m_nextSkippedComment.literal.clear();
+	m_nextSkippedComment.extendedTokenInfo = make_tuple(0, 0);
+
+	Token token;
+	// M and N are for the purposes of grabbing different type sizes
+	unsigned m;
+	unsigned n;
+	do
+	{
+		// Remember the position of the next token
+		m_nextToken.location.start = sourcePos();
+		switch (m_char)
+		{
+		case '"':
+		case '\'':
+			token = scanString();
+			break;
+		case '<':
+			// < <= << <<=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::LessThanOrEqual);
+			else if (m_char == '<')
+				token = selectToken('=', Token::AssignShl, Token::SHL);
+			else
+				token = Token::LessThan;
+			break;
+		case '>':
+			// > >= >> >>= >>> >>>=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::GreaterThanOrEqual);
+			else if (m_char == '>')
+			{
+				// >> >>= >>> >>>=
+				advance();
+				if (m_char == '=')
+					token = selectToken(Token::AssignSar);
+				else if (m_char == '>')
+					token = selectToken('=', Token::AssignShr, Token::SHR);
+				else
+					token = Token::SAR;
+			}
+			else
+				token = Token::GreaterThan;
+			break;
+		case '=':
+			// = == =>
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::Equal);
+			else if (m_char == '>')
+				token = selectToken(Token::Arrow);
+			else
+				token = Token::Assign;
+			break;
+		case '!':
+			// ! !=
+			advance();
+			if (m_char == '=')
+				token = selectToken(Token::NotEqual);
+			else
+				token = Token::Not;
+			break;
+		case '+':
+			// + ++ +=
+			advance();
+			if (m_char == '+')
+				token = selectToken(Token::Inc);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignAdd);
+			else
+				token = Token::Add;
+			break;
+		case '-':
+			// - -- -=
+			advance();
+			if (m_char == '-')
+				token = selectToken(Token::Dec);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignSub);
+			else
+				token = Token::Sub;
+			break;
+		case '*':
+			// * ** *=
+			advance();
+			if (m_char == '*')
+				token = selectToken(Token::Exp);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignMul);
+			else
+				token = Token::Mul;
+			break;
+		case '%':
+			// % %=
+			token = selectToken('=', Token::AssignMod, Token::Mod);
+			break;
+		case '/':
+			// /  // /* /=
+			token = scanSlash();
+			break;
+		case '&':
+			// & && &=
+			advance();
+			if (m_char == '&')
+				token = selectToken(Token::And);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignBitAnd);
+			else
+				token = Token::BitAnd;
+			break;
+		case '|':
+			// | || |=
+			advance();
+			if (m_char == '|')
+				token = selectToken(Token::Or);
+			else if (m_char == '=')
+				token = selectToken(Token::AssignBitOr);
+			else
+				token = Token::BitOr;
+			break;
+		case '^':
+			// ^ ^=
+			token = selectToken('=', Token::AssignBitXor, Token::BitXor);
+			break;
+		case '.':
+			// . Number
+			advance();
+			if (isDecimalDigit(m_char))
+				token = scanNumber('.');
+			else
+				token = Token::Period;
+			break;
+		case ':':
+			token = selectToken(Token::Colon);
+			break;
+		case ';':
+			token = selectToken(Token::Semicolon);
+			break;
+		case ',':
+			token = selectToken(Token::Comma);
+			break;
+		case '(':
+			token = selectToken(Token::LParen);
+			break;
+		case ')':
+			token = selectToken(Token::RParen);
+			break;
+		case '[':
+			token = selectToken(Token::LBrack);
+			break;
+		case ']':
+			token = selectToken(Token::RBrack);
+			break;
+		case '{':
+			token = selectToken(Token::LBrace);
+			break;
+		case '}':
+			token = selectToken(Token::RBrace);
+			break;
+		case '?':
+			token = selectToken(Token::Conditional);
+			break;
+		case '~':
+			token = selectToken(Token::BitNot);
+			break;
+		default:
+			if (isIdentifierStart(m_char))
+			{
+				tie(token, m, n) = scanIdentifierOrKeyword();
+
+				// Special case for hexadecimal literals
+				if (token == Token::Hex)
+				{
+					// reset
+					m = 0;
+					n = 0;
+
+					// Special quoted hex string must follow
+					if (m_char == '"' || m_char == '\'')
+						token = scanHexString();
+					else
+						token = Token::IllegalHex;
+				}
+			}
+			else if (isDecimalDigit(m_char))
+				token = scanNumber();
+			else if (skipWhitespace())
+				token = Token::Whitespace;
+			else if (isSourcePastEndOfInput())
+				token = Token::EOS;
+			else
+				token = selectToken(Token::Illegal);
+			break;
+		}
+		// Continue scanning for tokens as long as we're just skipping
+		// whitespace.
+	}
+	while (token == Token::Whitespace);
+	m_nextToken.location.end = sourcePos();
+	m_nextToken.token = token;
+	m_nextToken.extendedTokenInfo = make_tuple(m, n);
+}
+
+bool Scanner::scanEscape()
+{
+	char c = m_char;
+	advance();
+	// Skip escaped newlines.
+	if (isLineTerminator(c))
+		return true;
+	switch (c)
+	{
+	case '\'':  // fall through
+	case '"':  // fall through
+	case '\\':
+		break;
+	case 'b':
+		c = '\b';
+		break;
+	case 'f':
+		c = '\f';
+		break;
+	case 'n':
+		c = '\n';
+		break;
+	case 'r':
+		c = '\r';
+		break;
+	case 't':
+		c = '\t';
+		break;
+	case 'v':
+		c = '\v';
+		break;
+	case 'u':
+	{
+		unsigned codepoint;
+		if (!scanUnicode(codepoint))
+			return false;
+		addUnicodeAsUTF8(codepoint);
+		return true;
+	}
+	case 'x':
+		if (!scanHexByte(c))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	addLiteralChar(c);
+	return true;
+}
+
+bool Scanner::isUnicodeLinebreak()
+{
+	if (0x0a <= m_char && m_char <= 0x0d)
+		// line feed, vertical tab, form feed, carriage return
+		return true;
+	else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
+		// NEL - U+0085, C2 85 in utf8
+		return true;
+	else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
+		uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
+	))
+		// LS - U+2028, E2 80 A8  in utf8
+		// PS - U+2029, E2 80 A9  in utf8
+		return true;
+	else
+		return false;
+}
+
+Token Scanner::scanString()
+{
+	char const quote = m_char;
+	advance();  // consume quote
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
+	{
+		char c = m_char;
+		advance();
+		if (c == '\\')
+		{
+			if (isSourcePastEndOfInput() || !scanEscape())
+				return Token::Illegal;
+		}
+		else
+			addLiteralChar(c);
+	}
+	if (m_char != quote)
+		return Token::Illegal;
+	literal.complete();
+	advance();  // consume quote
+	return Token::StringLiteral;
+}
+
+Token Scanner::scanHexString()
+{
+	char const quote = m_char;
+	advance();  // consume quote
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	while (m_char != quote && !isSourcePastEndOfInput())
+	{
+		char c = m_char;
+		if (!scanHexByte(c))
+			return Token::IllegalHex;
+		addLiteralChar(c);
+	}
+	if (m_char != quote)
+		return Token::IllegalHex;
+	literal.complete();
+	advance();  // consume quote
+	return Token::StringLiteral;
+}
+
+// Parse for regex [:digit:]+(_[:digit:]+)*
+void Scanner::scanDecimalDigits()
+{
+	// MUST begin with a decimal digit.
+	if (!isDecimalDigit(m_char))
+		return;
+
+	// May continue with decimal digit or underscore for grouping.
+	do addLiteralCharAndAdvance();
+	while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+
+	// Defer further validation of underscore to SyntaxChecker.
+}
+
+Token Scanner::scanNumber(char _charSeen)
+{
+	enum { DECIMAL, HEX, BINARY } kind = DECIMAL;
+	LiteralScope literal(this, LITERAL_TYPE_NUMBER);
+	if (_charSeen == '.')
+	{
+		// we have already seen a decimal point of the float
+		addLiteralChar('.');
+		if (m_char == '_')
+			return Token::Illegal;
+		scanDecimalDigits();  // we know we have at least one digit
+	}
+	else
+	{
+		solAssert(_charSeen == 0, "");
+		// if the first character is '0' we must check for octals and hex
+		if (m_char == '0')
+		{
+			addLiteralCharAndAdvance();
+			// either 0, 0exxx, 0Exxx, 0.xxx or a hex number
+			if (m_char == 'x')
+			{
+				// hex number
+				kind = HEX;
+				addLiteralCharAndAdvance();
+				if (!isHexDigit(m_char))
+					return Token::Illegal; // we must have at least one hex digit after 'x'
+
+				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
+					addLiteralCharAndAdvance();
+			}
+			else if (isDecimalDigit(m_char))
+				// We do not allow octal numbers
+				return Token::Illegal;
+		}
+		// Parse decimal digits and allow trailing fractional part.
+		if (kind == DECIMAL)
+		{
+			scanDecimalDigits();  // optional
+			if (m_char == '.')
+			{
+				if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+				{
+					// Assume the input may be a floating point number with leading '_' in fraction part.
+					// Recover by consuming it all but returning `Illegal` right away.
+					addLiteralCharAndAdvance(); // '.'
+					addLiteralCharAndAdvance(); // '_'
+					scanDecimalDigits();
+				}
+				if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
+				{
+					// A '.' has to be followed by a number.
+					literal.complete();
+					return Token::Number;
+				}
+				addLiteralCharAndAdvance();
+				scanDecimalDigits();
+			}
+		}
+	}
+	// scan exponent, if any
+	if (m_char == 'e' || m_char == 'E')
+	{
+		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
+		if (kind != DECIMAL)
+			return Token::Illegal;
+		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+		{
+			// Recover from wrongly placed underscore as delimiter in literal with scientific
+			// notation by consuming until the end.
+			addLiteralCharAndAdvance(); // 'e'
+			addLiteralCharAndAdvance(); // '_'
+			scanDecimalDigits();
+			literal.complete();
+			return Token::Number;
+		}
+		// scan exponent
+		addLiteralCharAndAdvance(); // 'e' | 'E'
+		if (m_char == '+' || m_char == '-')
+			addLiteralCharAndAdvance();
+		if (!isDecimalDigit(m_char))
+			return Token::Illegal; // we must have at least one decimal digit after 'e'/'E'
+		scanDecimalDigits();
+	}
+	// The source character immediately following a numeric literal must
+	// not be an identifier start or a decimal digit; see ECMA-262
+	// section 7.8.3, page 17 (note that we read only one decimal digit
+	// if the value is 0).
+	if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
+		return Token::Illegal;
+	literal.complete();
+	return Token::Number;
+}
+
+tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
+{
+	solAssert(isIdentifierStart(m_char), "");
+	LiteralScope literal(this, LITERAL_TYPE_STRING);
+	addLiteralCharAndAdvance();
+	// Scan the rest of the identifier characters.
+	while (isIdentifierPart(m_char)) //get full literal
+		addLiteralCharAndAdvance();
+	literal.complete();
+	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
+}
+
+char CharStream::advanceAndGet(size_t _chars)
+{
+	if (isPastEndOfInput())
+		return 0;
+	m_position += _chars;
+	if (isPastEndOfInput())
+		return 0;
+	return m_source[m_position];
+}
+
+char CharStream::rollback(size_t _amount)
+{
+	solAssert(m_position >= _amount, "");
+	m_position -= _amount;
+	return get();
+}
+
+string CharStream::lineAtPosition(int _position) const
+{
+	// if _position points to \n, it returns the line before the \n
+	using size_type = string::size_type;
+	size_type searchStart = min<size_type>(m_source.size(), _position);
+	if (searchStart > 0)
+		searchStart--;
+	size_type lineStart = m_source.rfind('\n', searchStart);
+	if (lineStart == string::npos)
+		lineStart = 0;
+	else
+		lineStart++;
+	return m_source.substr(lineStart, min(m_source.find('\n', lineStart),
+										  m_source.size()) - lineStart);
+}
+
+tuple<int, int> CharStream::translatePositionToLineColumn(int _position) const
+{
+	using size_type = string::size_type;
+	size_type searchPosition = min<size_type>(m_source.size(), _position);
+	int lineNumber = count(m_source.begin(), m_source.begin() + searchPosition, '\n');
+	size_type lineStart;
+	if (searchPosition == 0)
+		lineStart = 0;
+	else
+	{
+		lineStart = m_source.rfind('\n', searchPosition - 1);
+		lineStart = lineStart == string::npos ? 0 : lineStart + 1;
+	}
+	return tuple<int, int>(lineNumber, searchPosition - lineStart);
+}
+
+
+}
+}
-- 
cgit v1.2.3


From 2518b5314c0c16dcdbbc0b093f5b716e9e85dbc2 Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Wed, 14 Nov 2018 15:18:55 +0100
Subject: Splitting out CharStream from Scanner.

---
 liblangutil/Scanner.cpp | 50 -------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 246f5ea6..beb39a4f 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -866,55 +866,5 @@ tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
 	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
 }
 
-char CharStream::advanceAndGet(size_t _chars)
-{
-	if (isPastEndOfInput())
-		return 0;
-	m_position += _chars;
-	if (isPastEndOfInput())
-		return 0;
-	return m_source[m_position];
-}
-
-char CharStream::rollback(size_t _amount)
-{
-	solAssert(m_position >= _amount, "");
-	m_position -= _amount;
-	return get();
-}
-
-string CharStream::lineAtPosition(int _position) const
-{
-	// if _position points to \n, it returns the line before the \n
-	using size_type = string::size_type;
-	size_type searchStart = min<size_type>(m_source.size(), _position);
-	if (searchStart > 0)
-		searchStart--;
-	size_type lineStart = m_source.rfind('\n', searchStart);
-	if (lineStart == string::npos)
-		lineStart = 0;
-	else
-		lineStart++;
-	return m_source.substr(lineStart, min(m_source.find('\n', lineStart),
-										  m_source.size()) - lineStart);
-}
-
-tuple<int, int> CharStream::translatePositionToLineColumn(int _position) const
-{
-	using size_type = string::size_type;
-	size_type searchPosition = min<size_type>(m_source.size(), _position);
-	int lineNumber = count(m_source.begin(), m_source.begin() + searchPosition, '\n');
-	size_type lineStart;
-	if (searchPosition == 0)
-		lineStart = 0;
-	else
-	{
-		lineStart = m_source.rfind('\n', searchPosition - 1);
-		lineStart = lineStart == string::npos ? 0 : lineStart + 1;
-	}
-	return tuple<int, int>(lineNumber, searchPosition - lineStart);
-}
-
-
 }
 }
-- 
cgit v1.2.3


From d67322a1861d60a88151f7c25d6c3478a9a39acf Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Wed, 14 Nov 2018 17:11:55 +0100
Subject: Introduce namespace `langutil` in liblangutil directory.

Also:
- Use {}-style list initialisation for SourceLocation construction
- Introduce new system includes
- Changes the API of the Scanner to take source as value (with move) as opposed to as a reference
---
 liblangutil/Scanner.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index beb39a4f..3d7527d4 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -50,16 +50,14 @@
  * Solidity scanner.
  */
 
-#include <algorithm>
-#include <tuple>
 #include <liblangutil/Exceptions.h>
 #include <liblangutil/Scanner.h>
+#include <algorithm>
+#include <tuple>
 
 using namespace std;
 
-namespace dev
-{
-namespace solidity
+namespace langutil
 {
 
 namespace
@@ -143,10 +141,10 @@ private:
 }; // end of LiteralScope class
 
 
-void Scanner::reset(CharStream const& _source, string const& _sourceName)
+void Scanner::reset(CharStream _source, string _sourceName)
 {
-	m_source = _source;
-	m_sourceName = make_shared<string const>(_sourceName);
+	m_source = std::move(_source);
+	m_sourceName = make_shared<string const>(std::move(_sourceName));
 	reset();
 }
 
@@ -866,5 +864,5 @@ tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
 	return TokenTraits::fromIdentifierOrKeyword(m_nextToken.literal);
 }
 
-}
+
 }
-- 
cgit v1.2.3


From e454737a3cf389ee400a9ef1d9f252c579a2ceea Mon Sep 17 00:00:00 2001
From: Lazaridis <info@lazaridis.com>
Date: Thu, 22 Nov 2018 18:37:19 +0200
Subject: adapt to latest code changes

---
 liblangutil/Scanner.cpp | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 3d7527d4..091e9b89 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -311,7 +311,7 @@ Token Scanner::skipMultiLineComment()
 		}
 	}
 	// Unterminated multi-line comment.
-	return Token::Illegal;
+	return Token::IllegalCommentTerminator;
 }
 
 Token Scanner::scanMultiLineDocComment()
@@ -362,7 +362,7 @@ Token Scanner::scanMultiLineDocComment()
 	}
 	literal.complete();
 	if (!endFound)
-		return Token::Illegal;
+		return Token::IllegalCommentTerminator;
 	else
 		return Token::CommentLiteral;
 }
@@ -392,7 +392,7 @@ Token Scanner::scanSlash()
 	{
 		// doxygen style /** natspec comment
 		if (!advance()) /* slash star comment before EOS */
-			return Token::Illegal;
+			return Token::IllegalCommentTerminator;
 		else if (m_char == '*')
 		{
 			advance(); //consume the last '*' at /**
@@ -409,8 +409,9 @@ Token Scanner::scanSlash()
 			comment = scanMultiLineDocComment();
 			m_nextSkippedComment.location.end = sourcePos();
 			m_nextSkippedComment.token = comment;
-			if (comment == Token::Illegal)
-				return Token::Illegal;
+			// @todo possibly: if (comment.isIllegal) return comment; to pass all errors
+			if (comment == Token::IllegalCommentTerminator)
+				return Token::IllegalCommentTerminator;
 			else
 				return Token::Whitespace;
 		}
@@ -620,6 +621,7 @@ void Scanner::scanToken()
 			else if (isSourcePastEndOfInput())
 				token = Token::EOS;
 			else
+				// @todo verfiy if this is actually an "IllegalUnknown" case
 				token = selectToken(Token::Illegal);
 			break;
 		}
@@ -713,13 +715,13 @@ Token Scanner::scanString()
 		if (c == '\\')
 		{
 			if (isSourcePastEndOfInput() || !scanEscape())
-				return Token::Illegal;
+				return Token::IllegalStringEscape;
 		}
 		else
 			addLiteralChar(c);
 	}
 	if (m_char != quote)
-		return Token::Illegal;
+		return Token::IllegalStringEndQuote;
 	literal.complete();
 	advance();  // consume quote
 	return Token::StringLiteral;
@@ -767,7 +769,8 @@ Token Scanner::scanNumber(char _charSeen)
 		// we have already seen a decimal point of the float
 		addLiteralChar('.');
 		if (m_char == '_')
-			return Token::Illegal;
+			// @todo add test-case (change of return value did not break test)
+			return Token::IllegalNumberSeparator;
 		scanDecimalDigits();  // we know we have at least one digit
 	}
 	else
@@ -784,14 +787,14 @@ Token Scanner::scanNumber(char _charSeen)
 				kind = HEX;
 				addLiteralCharAndAdvance();
 				if (!isHexDigit(m_char))
-					return Token::Illegal; // we must have at least one hex digit after 'x'
+					return Token::IllegalHexDigit; // we must have at least one hex digit after 'x'
 
 				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
 			else if (isDecimalDigit(m_char))
 				// We do not allow octal numbers
-				return Token::Illegal;
+				return Token::IllegalOctalNotAllowed;
 		}
 		// Parse decimal digits and allow trailing fractional part.
 		if (kind == DECIMAL)
@@ -823,7 +826,8 @@ Token Scanner::scanNumber(char _charSeen)
 	{
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
-			return Token::Illegal;
+			// @todo add test (change introduced no failing)
+			return Token::IllegalExponent;
 		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
 		{
 			// Recover from wrongly placed underscore as delimiter in literal with scientific
@@ -839,7 +843,7 @@ Token Scanner::scanNumber(char _charSeen)
 		if (m_char == '+' || m_char == '-')
 			addLiteralCharAndAdvance();
 		if (!isDecimalDigit(m_char))
-			return Token::Illegal; // we must have at least one decimal digit after 'e'/'E'
+			return Token::IllegalExponent; // we must have at least one decimal digit after 'e'/'E'
 		scanDecimalDigits();
 	}
 	// The source character immediately following a numeric literal must
@@ -847,7 +851,7 @@ Token Scanner::scanNumber(char _charSeen)
 	// section 7.8.3, page 17 (note that we read only one decimal digit
 	// if the value is 0).
 	if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
-		return Token::Illegal;
+		return Token::IllegalNumberEnd;
 	literal.complete();
 	return Token::Number;
 }
-- 
cgit v1.2.3


From e4106bd06eebce9e17d51858a37bf82566b7f640 Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Fri, 23 Nov 2018 16:47:34 +0100
Subject: Change scanner error diagnostics to be non-intrusive to the token
 API.

This also implicitly eliminates the magic-token Token::IllegalHex, and
streamlines error diagnostics over a custom enum class.
---
 liblangutil/Scanner.cpp | 70 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 22 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 091e9b89..215171b3 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -53,6 +53,7 @@
 #include <liblangutil/Exceptions.h>
 #include <liblangutil/Scanner.h>
 #include <algorithm>
+#include <ostream>
 #include <tuple>
 
 using namespace std;
@@ -100,7 +101,32 @@ int hexValue(char c)
 }
 } // end anonymous namespace
 
+std::string to_string(ScannerError _errorCode)
+{
+	switch (_errorCode)
+	{
+		case ScannerError::NoError: return "No error.";
+		case ScannerError::IllegalToken: return "Invalid token.";
+		case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes.";
+		case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
+		case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
+		case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
+		case ScannerError::IllegalStringEndQuote: return "Expected string end-quote.";
+		case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'.";
+		case ScannerError::IllegalExponent: return "Invalid exponent.";
+		case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
+		case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
+		default:
+			solAssert(false, "Unhandled case in to_string(ScannerError)");
+			return "";
+	}
+}
 
+std::ostream& operator<<(std::ostream& os, ScannerError _errorCode)
+{
+	os << to_string(_errorCode);
+	return os;
+}
 
 /// Scoped helper for literal recording. Automatically drops the literal
 /// if aborting the scanning before it's complete.
@@ -311,7 +337,7 @@ Token Scanner::skipMultiLineComment()
 		}
 	}
 	// Unterminated multi-line comment.
-	return Token::IllegalCommentTerminator;
+	return setError(ScannerError::IllegalCommentTerminator);
 }
 
 Token Scanner::scanMultiLineDocComment()
@@ -362,7 +388,7 @@ Token Scanner::scanMultiLineDocComment()
 	}
 	literal.complete();
 	if (!endFound)
-		return Token::IllegalCommentTerminator;
+		return setError(ScannerError::IllegalCommentTerminator);
 	else
 		return Token::CommentLiteral;
 }
@@ -392,7 +418,7 @@ Token Scanner::scanSlash()
 	{
 		// doxygen style /** natspec comment
 		if (!advance()) /* slash star comment before EOS */
-			return Token::IllegalCommentTerminator;
+			return setError(ScannerError::IllegalCommentTerminator);
 		else if (m_char == '*')
 		{
 			advance(); //consume the last '*' at /**
@@ -409,9 +435,8 @@ Token Scanner::scanSlash()
 			comment = scanMultiLineDocComment();
 			m_nextSkippedComment.location.end = sourcePos();
 			m_nextSkippedComment.token = comment;
-			// @todo possibly: if (comment.isIllegal) return comment; to pass all errors
-			if (comment == Token::IllegalCommentTerminator)
-				return Token::IllegalCommentTerminator;
+			if (comment == Token::Illegal)
+				return Token::Illegal; // error already set
 			else
 				return Token::Whitespace;
 		}
@@ -426,6 +451,7 @@ Token Scanner::scanSlash()
 
 void Scanner::scanToken()
 {
+	m_nextToken.error = ScannerError::NoError;
 	m_nextToken.literal.clear();
 	m_nextToken.extendedTokenInfo = make_tuple(0, 0);
 	m_nextSkippedComment.literal.clear();
@@ -611,7 +637,7 @@ void Scanner::scanToken()
 					if (m_char == '"' || m_char == '\'')
 						token = scanHexString();
 					else
-						token = Token::IllegalHex;
+						token = setError(ScannerError::IllegalToken);
 				}
 			}
 			else if (isDecimalDigit(m_char))
@@ -621,8 +647,7 @@ void Scanner::scanToken()
 			else if (isSourcePastEndOfInput())
 				token = Token::EOS;
 			else
-				// @todo verfiy if this is actually an "IllegalUnknown" case
-				token = selectToken(Token::Illegal);
+				token = selectErrorToken(ScannerError::IllegalToken);
 			break;
 		}
 		// Continue scanning for tokens as long as we're just skipping
@@ -715,13 +740,13 @@ Token Scanner::scanString()
 		if (c == '\\')
 		{
 			if (isSourcePastEndOfInput() || !scanEscape())
-				return Token::IllegalStringEscape;
+				return setError(ScannerError::IllegalEscapeSequence);
 		}
 		else
 			addLiteralChar(c);
 	}
 	if (m_char != quote)
-		return Token::IllegalStringEndQuote;
+		return setError(ScannerError::IllegalStringEndQuote);
 	literal.complete();
 	advance();  // consume quote
 	return Token::StringLiteral;
@@ -736,11 +761,14 @@ Token Scanner::scanHexString()
 	{
 		char c = m_char;
 		if (!scanHexByte(c))
-			return Token::IllegalHex;
+			// can only return false if hex-byte is incomplete (only one hex digit instead of two)
+			return setError(ScannerError::IllegalHexString);
 		addLiteralChar(c);
 	}
+
 	if (m_char != quote)
-		return Token::IllegalHex;
+		return setError(ScannerError::IllegalStringEndQuote);
+
 	literal.complete();
 	advance();  // consume quote
 	return Token::StringLiteral;
@@ -769,8 +797,7 @@ Token Scanner::scanNumber(char _charSeen)
 		// we have already seen a decimal point of the float
 		addLiteralChar('.');
 		if (m_char == '_')
-			// @todo add test-case (change of return value did not break test)
-			return Token::IllegalNumberSeparator;
+			return setError(ScannerError::IllegalToken);
 		scanDecimalDigits();  // we know we have at least one digit
 	}
 	else
@@ -787,14 +814,14 @@ Token Scanner::scanNumber(char _charSeen)
 				kind = HEX;
 				addLiteralCharAndAdvance();
 				if (!isHexDigit(m_char))
-					return Token::IllegalHexDigit; // we must have at least one hex digit after 'x'
+					return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x'
 
 				while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
 					addLiteralCharAndAdvance();
 			}
 			else if (isDecimalDigit(m_char))
 				// We do not allow octal numbers
-				return Token::IllegalOctalNotAllowed;
+				return setError(ScannerError::OctalNotAllowed);
 		}
 		// Parse decimal digits and allow trailing fractional part.
 		if (kind == DECIMAL)
@@ -826,8 +853,7 @@ Token Scanner::scanNumber(char _charSeen)
 	{
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
-			// @todo add test (change introduced no failing)
-			return Token::IllegalExponent;
+			return setError(ScannerError::IllegalExponent);
 		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
 		{
 			// Recover from wrongly placed underscore as delimiter in literal with scientific
@@ -842,8 +868,8 @@ Token Scanner::scanNumber(char _charSeen)
 		addLiteralCharAndAdvance(); // 'e' | 'E'
 		if (m_char == '+' || m_char == '-')
 			addLiteralCharAndAdvance();
-		if (!isDecimalDigit(m_char))
-			return Token::IllegalExponent; // we must have at least one decimal digit after 'e'/'E'
+		if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E'
+			return setError(ScannerError::IllegalExponent);
 		scanDecimalDigits();
 	}
 	// The source character immediately following a numeric literal must
@@ -851,7 +877,7 @@ Token Scanner::scanNumber(char _charSeen)
 	// section 7.8.3, page 17 (note that we read only one decimal digit
 	// if the value is 0).
 	if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
-		return Token::IllegalNumberEnd;
+		return setError(ScannerError::IllegalNumberEnd);
 	literal.complete();
 	return Token::Number;
 }
-- 
cgit v1.2.3


From c48a5264be4221873fe02cac57f6a41a32010fea Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Wed, 28 Nov 2018 16:19:22 +0100
Subject: liblangutil: SourceLocation: adds (shared) pointer to underlying
 CharStream source, eliminating sourceName

Also, adapted affecting code to those changes.
---
 liblangutil/Scanner.cpp | 51 ++++++++++++++++++++++++++++---------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 215171b3..5c0f356e 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -169,15 +169,22 @@ private:
 
 void Scanner::reset(CharStream _source, string _sourceName)
 {
-	m_source = std::move(_source);
+	m_source = make_shared<CharStream>(std::move(_source));
 	m_sourceName = make_shared<string const>(std::move(_sourceName));
 	reset();
 }
 
+void Scanner::reset(std::shared_ptr<CharStream> _source)
+{
+	solAssert(_source.get() != nullptr, "You MUST provide a CharStream when resetting.");
+	m_source = _source;
+	reset();
+}
+
 void Scanner::reset()
 {
-	m_source.reset();
-	m_char = m_source.get();
+	m_source->reset();
+	m_char = m_source->get();
 	skipWhitespace();
 	scanToken();
 	next();
@@ -296,13 +303,13 @@ Token Scanner::scanSingleLineDocComment()
 		{
 			// check if next line is also a documentation comment
 			skipWhitespace();
-			if (!m_source.isPastEndOfInput(3) &&
-				m_source.get(0) == '/' &&
-				m_source.get(1) == '/' &&
-				m_source.get(2) == '/')
+			if (!m_source->isPastEndOfInput(3) &&
+				m_source->get(0) == '/' &&
+				m_source->get(1) == '/' &&
+				m_source->get(2) == '/')
 			{
 				addCommentLiteralChar('\n');
-				m_char = m_source.advanceAndGet(3);
+				m_char = m_source->advanceAndGet(3);
 			}
 			else
 				break; // next line is not a documentation comment, we are done
@@ -355,20 +362,20 @@ Token Scanner::scanMultiLineDocComment()
 		if (isLineTerminator(m_char))
 		{
 			skipWhitespace();
-			if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '*')
+			if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '*')
 			{ // it is unknown if this leads to the end of the comment
 				addCommentLiteralChar('*');
 				advance();
 			}
-			else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/')
+			else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) != '/')
 			{ // skip first '*' in subsequent lines
 				if (charsAdded)
 					addCommentLiteralChar('\n');
-				m_char = m_source.advanceAndGet(2);
+				m_char = m_source->advanceAndGet(2);
 			}
-			else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+			else if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
 			{ // if after newline the comment ends, don't insert the newline
-				m_char = m_source.advanceAndGet(2);
+				m_char = m_source->advanceAndGet(2);
 				endFound = true;
 				break;
 			}
@@ -376,9 +383,9 @@ Token Scanner::scanMultiLineDocComment()
 				addCommentLiteralChar('\n');
 		}
 
-		if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+		if (!m_source->isPastEndOfInput(1) && m_source->get(0) == '*' && m_source->get(1) == '/')
 		{
-			m_char = m_source.advanceAndGet(2);
+			m_char = m_source->advanceAndGet(2);
 			endFound = true;
 			break;
 		}
@@ -715,11 +722,11 @@ bool Scanner::isUnicodeLinebreak()
 	if (0x0a <= m_char && m_char <= 0x0d)
 		// line feed, vertical tab, form feed, carriage return
 		return true;
-	else if (!m_source.isPastEndOfInput(1) && uint8_t(m_source.get(0)) == 0xc2 && uint8_t(m_source.get(1)) == 0x85)
+	else if (!m_source->isPastEndOfInput(1) && uint8_t(m_source->get(0)) == 0xc2 && uint8_t(m_source->get(1)) == 0x85)
 		// NEL - U+0085, C2 85 in utf8
 		return true;
-	else if (!m_source.isPastEndOfInput(2) && uint8_t(m_source.get(0)) == 0xe2 && uint8_t(m_source.get(1)) == 0x80 && (
-		uint8_t(m_source.get(2)) == 0xa8 || uint8_t(m_source.get(2)) == 0xa9
+	else if (!m_source->isPastEndOfInput(2) && uint8_t(m_source->get(0)) == 0xe2 && uint8_t(m_source->get(1)) == 0x80 && (
+		uint8_t(m_source->get(2)) == 0xa8 || uint8_t(m_source->get(2)) == 0xa9
 	))
 		// LS - U+2028, E2 80 A8  in utf8
 		// PS - U+2029, E2 80 A9  in utf8
@@ -783,7 +790,7 @@ void Scanner::scanDecimalDigits()
 
 	// May continue with decimal digit or underscore for grouping.
 	do addLiteralCharAndAdvance();
-	while (!m_source.isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
+	while (!m_source->isPastEndOfInput() && (isDecimalDigit(m_char) || m_char == '_'));
 
 	// Defer further validation of underscore to SyntaxChecker.
 }
@@ -829,7 +836,7 @@ Token Scanner::scanNumber(char _charSeen)
 			scanDecimalDigits();  // optional
 			if (m_char == '.')
 			{
-				if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+				if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
 				{
 					// Assume the input may be a floating point number with leading '_' in fraction part.
 					// Recover by consuming it all but returning `Illegal` right away.
@@ -837,7 +844,7 @@ Token Scanner::scanNumber(char _charSeen)
 					addLiteralCharAndAdvance(); // '_'
 					scanDecimalDigits();
 				}
-				if (m_source.isPastEndOfInput() || !isDecimalDigit(m_source.get(1)))
+				if (m_source->isPastEndOfInput() || !isDecimalDigit(m_source->get(1)))
 				{
 					// A '.' has to be followed by a number.
 					literal.complete();
@@ -854,7 +861,7 @@ Token Scanner::scanNumber(char _charSeen)
 		solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
 		if (kind != DECIMAL)
 			return setError(ScannerError::IllegalExponent);
-		else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
+		else if (!m_source->isPastEndOfInput(1) && m_source->get(1) == '_')
 		{
 			// Recover from wrongly placed underscore as delimiter in literal with scientific
 			// notation by consuming until the end.
-- 
cgit v1.2.3


From 435f7b3b72157e884344adbc7b62033bd08bb51c Mon Sep 17 00:00:00 2001
From: Christian Parpart <christian@ethereum.org>
Date: Thu, 29 Nov 2018 01:58:15 +0100
Subject: liblangutil: Scanner: remove superfluous sourceName field (it's in
 CharStream already)

Also, ParserBase::sourceName() was dead code. Eliminating it should
increase test coverage (how sneaky) :-)
---
 liblangutil/Scanner.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'liblangutil/Scanner.cpp')

diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 5c0f356e..ac298bd5 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -167,10 +167,9 @@ private:
 }; // end of LiteralScope class
 
 
-void Scanner::reset(CharStream _source, string _sourceName)
+void Scanner::reset(CharStream _source)
 {
 	m_source = make_shared<CharStream>(std::move(_source));
-	m_sourceName = make_shared<string const>(std::move(_sourceName));
 	reset();
 }
 
-- 
cgit v1.2.3