aboutsummaryrefslogtreecommitdiffstats
path: root/libsolidity/parsing/Scanner.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r--libsolidity/parsing/Scanner.cpp771
1 files changed, 771 insertions, 0 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp
new file mode 100644
index 00000000..fe0807d5
--- /dev/null
+++ b/libsolidity/parsing/Scanner.cpp
@@ -0,0 +1,771 @@
+/*
+ This file is part of cpp-ethereum.
+
+ cpp-ethereum is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ cpp-ethereum is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
+
+ This file is derived from the file "scanner.cc", which was part of the
+ V8 project. The original copyright header follows:
+
+ Copyright 2006-2012, the V8 project authors. All rights reserved.
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Solidity scanner.
+ */
+
+#include <algorithm>
+#include <tuple>
+#include <libsolidity/interface/Utils.h>
+#include <libsolidity/parsing/Scanner.h>
+
+using namespace std;
+
+namespace dev
+{
+namespace solidity
+{
+
+namespace
+{
+bool isDecimalDigit(char c)
+{
+ return '0' <= c && c <= '9';
+}
+bool isHexDigit(char c)
+{
+ return isDecimalDigit(c)
+ || ('a' <= c && c <= 'f')
+ || ('A' <= c && c <= 'F');
+}
+bool isLineTerminator(char c)
+{
+ return c == '\n';
+}
+bool isWhiteSpace(char c)
+{
+ return c == ' ' || c == '\n' || c == '\t' || c == '\r';
+}
+bool isIdentifierStart(char c)
+{
+ return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+}
+bool isIdentifierPart(char c)
+{
+ return isIdentifierStart(c) || isDecimalDigit(c);
+}
+
+int hexValue(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ else if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F')
+ return c - 'A' + 10;
+ else return -1;
+}
+} // end anonymous namespace
+
+
+
+/// Scoped helper for literal recording. Automatically drops the literal
+/// if aborting the scanning before it's complete.
+enum LiteralType {
+ LITERAL_TYPE_STRING,
+ LITERAL_TYPE_NUMBER, // not really different from string type in behaviour
+ LITERAL_TYPE_COMMENT
+};
+
+class LiteralScope
+{
+public:
+ explicit LiteralScope(Scanner* _self, enum LiteralType _type): m_type(_type)
+ , m_scanner(_self)
+ , m_complete(false)
+ {
+ if (_type == LITERAL_TYPE_COMMENT)
+ m_scanner->m_nextSkippedComment.literal.clear();
+ else
+ m_scanner->m_nextToken.literal.clear();
+ }
+ ~LiteralScope()
+ {
+ if (!m_complete)
+ {
+ if (m_type == LITERAL_TYPE_COMMENT)
+ m_scanner->m_nextSkippedComment.literal.clear();
+ else
+ m_scanner->m_nextToken.literal.clear();
+ }
+ }
+ void complete() { m_complete = true; }
+
+private:
+ enum LiteralType m_type;
+ Scanner* m_scanner;
+ bool m_complete;
+}; // end of LiteralScope class
+
+
+void Scanner::reset(CharStream const& _source, string const& _sourceName)
+{
+ m_source = _source;
+ m_sourceName = make_shared<string const>(_sourceName);
+ reset();
+}
+
+void Scanner::reset()
+{
+ m_source.reset();
+ m_char = m_source.get();
+ skipWhitespace();
+ scanToken();
+ next();
+}
+
+bool Scanner::scanHexByte(char& o_scannedByte)
+{
+ char x = 0;
+ for (int i = 0; i < 2; i++)
+ {
+ int d = hexValue(m_char);
+ if (d < 0)
+ {
+ rollback(i);
+ return false;
+ }
+ x = x * 16 + d;
+ advance();
+ }
+ o_scannedByte = x;
+ return true;
+}
+
+
+// Ensure that tokens can be stored in a byte.
+BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
+
+Token::Value Scanner::next()
+{
+ m_currentToken = m_nextToken;
+ m_skippedComment = m_nextSkippedComment;
+ scanToken();
+
+ return m_currentToken.token;
+}
+
+Token::Value Scanner::selectToken(char _next, Token::Value _then, Token::Value _else)
+{
+ advance();
+ if (m_char == _next)
+ return selectToken(_then);
+ else
+ return _else;
+}
+
+bool Scanner::skipWhitespace()
+{
+ int const startPosition = sourcePos();
+ while (isWhiteSpace(m_char))
+ advance();
+ // Return whether or not we skipped any characters.
+ return sourcePos() != startPosition;
+}
+
+bool Scanner::skipWhitespaceExceptLF()
+{
+ int const startPosition = sourcePos();
+ while (isWhiteSpace(m_char) && !isLineTerminator(m_char))
+ advance();
+ // Return whether or not we skipped any characters.
+ return sourcePos() != startPosition;
+}
+
+Token::Value Scanner::skipSingleLineComment()
+{
+ // The line terminator at the end of the line is not considered
+ // to be part of the single-line comment; it is recognized
+ // separately by the lexical grammar and becomes part of the
+ // stream of input elements for the syntactic grammar
+ while (!isLineTerminator(m_char))
+ if (!advance()) break;
+
+ return Token::Whitespace;
+}
+
+Token::Value Scanner::scanSingleLineDocComment()
+{
+ LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+ advance(); //consume the last '/' at ///
+ skipWhitespaceExceptLF();
+ while (!isSourcePastEndOfInput())
+ {
+ if (isLineTerminator(m_char))
+ {
+ // check if next line is also a documentation comment
+ skipWhitespace();
+ if (!m_source.isPastEndOfInput(3) &&
+ m_source.get(0) == '/' &&
+ m_source.get(1) == '/' &&
+ m_source.get(2) == '/')
+ {
+ addCommentLiteralChar('\n');
+ m_char = m_source.advanceAndGet(3);
+ }
+ else
+ break; // next line is not a documentation comment, we are done
+
+ }
+ addCommentLiteralChar(m_char);
+ advance();
+ }
+ literal.complete();
+ return Token::CommentLiteral;
+}
+
+Token::Value Scanner::skipMultiLineComment()
+{
+ advance();
+ while (!isSourcePastEndOfInput())
+ {
+ char ch = m_char;
+ advance();
+
+ // If we have reached the end of the multi-line comment, we
+ // consume the '/' and insert a whitespace. This way all
+ // multi-line comments are treated as whitespace.
+ if (ch == '*' && m_char == '/')
+ {
+ m_char = ' ';
+ return Token::Whitespace;
+ }
+ }
+ // Unterminated multi-line comment.
+ return Token::Illegal;
+}
+
+Token::Value Scanner::scanMultiLineDocComment()
+{
+ LiteralScope literal(this, LITERAL_TYPE_COMMENT);
+ bool endFound = false;
+ bool charsAdded = false;
+
+ while (!isSourcePastEndOfInput())
+ {
+ //handle newlines in multline comments
+ if (isLineTerminator(m_char))
+ {
+ skipWhitespace();
+ if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) != '/')
+ { // skip first '*' in subsequent lines
+ if (charsAdded)
+ addCommentLiteralChar('\n');
+ m_char = m_source.advanceAndGet(2);
+ }
+ else if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+ { // if after newline the comment ends, don't insert the newline
+ m_char = m_source.advanceAndGet(2);
+ endFound = true;
+ break;
+ }
+ else if (charsAdded)
+ addCommentLiteralChar('\n');
+ }
+
+ if (!m_source.isPastEndOfInput(1) && m_source.get(0) == '*' && m_source.get(1) == '/')
+ {
+ m_char = m_source.advanceAndGet(2);
+ endFound = true;
+ break;
+ }
+ addCommentLiteralChar(m_char);
+ charsAdded = true;
+ advance();
+ }
+ literal.complete();
+ if (!endFound)
+ return Token::Illegal;
+ else
+ return Token::CommentLiteral;
+}
+
+Token::Value Scanner::scanSlash()
+{
+ int firstSlashPosition = sourcePos();
+ advance();
+ if (m_char == '/')
+ {
+ if (!advance()) /* double slash comment directly before EOS */
+ return Token::Whitespace;
+ else if (m_char == '/')
+ {
+ // doxygen style /// comment
+ Token::Value comment;
+ m_nextSkippedComment.location.start = firstSlashPosition;
+ comment = scanSingleLineDocComment();
+ m_nextSkippedComment.location.end = sourcePos();
+ m_nextSkippedComment.token = comment;
+ return Token::Whitespace;
+ }
+ else
+ return skipSingleLineComment();
+ }
+ else if (m_char == '*')
+ {
+ // doxygen style /** natspec comment
+ if (!advance()) /* slash star comment before EOS */
+ return Token::Whitespace;
+ else if (m_char == '*')
+ {
+ advance(); //consume the last '*' at /**
+ skipWhitespaceExceptLF();
+
+ // special case of a closed normal multiline comment
+ if (!m_source.isPastEndOfInput() && m_source.get(0) == '/')
+ advance(); //skip the closing slash
+ else // we actually have a multiline documentation comment
+ {
+ Token::Value comment;
+ m_nextSkippedComment.location.start = firstSlashPosition;
+ comment = scanMultiLineDocComment();
+ m_nextSkippedComment.location.end = sourcePos();
+ m_nextSkippedComment.token = comment;
+ }
+ return Token::Whitespace;
+ }
+ else
+ return skipMultiLineComment();
+ }
+ else if (m_char == '=')
+ return selectToken(Token::AssignDiv);
+ else
+ return Token::Div;
+}
+
+void Scanner::scanToken()
+{
+ m_nextToken.literal.clear();
+ m_nextSkippedComment.literal.clear();
+ Token::Value token;
+ do
+ {
+ // Remember the position of the next token
+ m_nextToken.location.start = sourcePos();
+ switch (m_char)
+ {
+ case '\n': // fall-through
+ case ' ':
+ case '\t':
+ token = selectToken(Token::Whitespace);
+ break;
+ case '"':
+ case '\'':
+ token = scanString();
+ break;
+ case '<':
+ // < <= << <<=
+ advance();
+ if (m_char == '=')
+ token = selectToken(Token::LessThanOrEqual);
+ else if (m_char == '<')
+ token = selectToken('=', Token::AssignShl, Token::SHL);
+ else
+ token = Token::LessThan;
+ break;
+ case '>':
+ // > >= >> >>= >>> >>>=
+ advance();
+ if (m_char == '=')
+ token = selectToken(Token::GreaterThanOrEqual);
+ else if (m_char == '>')
+ {
+ // >> >>= >>> >>>=
+ advance();
+ if (m_char == '=')
+ token = selectToken(Token::AssignSar);
+ else if (m_char == '>')
+ token = selectToken('=', Token::AssignShr, Token::SHR);
+ else
+ token = Token::SAR;
+ }
+ else
+ token = Token::GreaterThan;
+ break;
+ case '=':
+ // = == =>
+ advance();
+ if (m_char == '=')
+ token = selectToken(Token::Equal);
+ else if (m_char == '>')
+ token = selectToken(Token::Arrow);
+ else
+ token = Token::Assign;
+ break;
+ case '!':
+ // ! !=
+ advance();
+ if (m_char == '=')
+ token = selectToken(Token::NotEqual);
+ else
+ token = Token::Not;
+ break;
+ case '+':
+ // + ++ +=
+ advance();
+ if (m_char == '+')
+ token = selectToken(Token::Inc);
+ else if (m_char == '=')
+ token = selectToken(Token::AssignAdd);
+ else
+ token = Token::Add;
+ break;
+ case '-':
+ // - -- -=
+ advance();
+ if (m_char == '-')
+ token = selectToken(Token::Dec);
+ else if (m_char == '=')
+ token = selectToken(Token::AssignSub);
+ else
+ token = Token::Sub;
+ break;
+ case '*':
+ // * ** *=
+ advance();
+ if (m_char == '*')
+ token = selectToken(Token::Exp);
+ else if (m_char == '=')
+ token = selectToken(Token::AssignMul);
+ else
+ token = Token::Mul;
+ break;
+ case '%':
+ // % %=
+ token = selectToken('=', Token::AssignMod, Token::Mod);
+ break;
+ case '/':
+ // / // /* /=
+ token = scanSlash();
+ break;
+ case '&':
+ // & && &=
+ advance();
+ if (m_char == '&')
+ token = selectToken(Token::And);
+ else if (m_char == '=')
+ token = selectToken(Token::AssignBitAnd);
+ else
+ token = Token::BitAnd;
+ break;
+ case '|':
+ // | || |=
+ advance();
+ if (m_char == '|')
+ token = selectToken(Token::Or);
+ else if (m_char == '=')
+ token = selectToken(Token::AssignBitOr);
+ else
+ token = Token::BitOr;
+ break;
+ case '^':
+ // ^ ^=
+ token = selectToken('=', Token::AssignBitXor, Token::BitXor);
+ break;
+ case '.':
+ // . Number
+ advance();
+ if (isDecimalDigit(m_char))
+ token = scanNumber('.');
+ else
+ token = Token::Period;
+ break;
+ case ':':
+ token = selectToken(Token::Colon);
+ break;
+ case ';':
+ token = selectToken(Token::Semicolon);
+ break;
+ case ',':
+ token = selectToken(Token::Comma);
+ break;
+ case '(':
+ token = selectToken(Token::LParen);
+ break;
+ case ')':
+ token = selectToken(Token::RParen);
+ break;
+ case '[':
+ token = selectToken(Token::LBrack);
+ break;
+ case ']':
+ token = selectToken(Token::RBrack);
+ break;
+ case '{':
+ token = selectToken(Token::LBrace);
+ break;
+ case '}':
+ token = selectToken(Token::RBrace);
+ break;
+ case '?':
+ token = selectToken(Token::Conditional);
+ break;
+ case '~':
+ token = selectToken(Token::BitNot);
+ break;
+ default:
+ if (isIdentifierStart(m_char))
+ token = scanIdentifierOrKeyword();
+ else if (isDecimalDigit(m_char))
+ token = scanNumber();
+ else if (skipWhitespace())
+ token = Token::Whitespace;
+ else if (isSourcePastEndOfInput())
+ token = Token::EOS;
+ else
+ token = selectToken(Token::Illegal);
+ break;
+ }
+ // Continue scanning for tokens as long as we're just skipping
+ // whitespace.
+ }
+ while (token == Token::Whitespace);
+ m_nextToken.location.end = sourcePos();
+ m_nextToken.token = token;
+}
+
+bool Scanner::scanEscape()
+{
+ char c = m_char;
+ advance();
+ // Skip escaped newlines.
+ if (isLineTerminator(c))
+ return true;
+ switch (c)
+ {
+ case '\'': // fall through
+ case '"': // fall through
+ case '\\':
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case 'x':
+ if (!scanHexByte(c))
+ return false;
+ break;
+ }
+
+ addLiteralChar(c);
+ return true;
+}
+
+Token::Value Scanner::scanString()
+{
+ char const quote = m_char;
+ advance(); // consume quote
+ LiteralScope literal(this, LITERAL_TYPE_STRING);
+ while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char))
+ {
+ char c = m_char;
+ advance();
+ if (c == '\\')
+ {
+ if (isSourcePastEndOfInput() || !scanEscape())
+ return Token::Illegal;
+ }
+ else
+ addLiteralChar(c);
+ }
+ if (m_char != quote)
+ return Token::Illegal;
+ literal.complete();
+ advance(); // consume quote
+ return Token::StringLiteral;
+}
+
+void Scanner::scanDecimalDigits()
+{
+ while (isDecimalDigit(m_char))
+ addLiteralCharAndAdvance();
+}
+
+Token::Value Scanner::scanNumber(char _charSeen)
+{
+ enum { DECIMAL, HEX, BINARY } kind = DECIMAL;
+ LiteralScope literal(this, LITERAL_TYPE_NUMBER);
+ if (_charSeen == '.')
+ {
+ // we have already seen a decimal point of the float
+ addLiteralChar('.');
+ scanDecimalDigits(); // we know we have at least one digit
+ }
+ else
+ {
+ solAssert(_charSeen == 0, "");
+ // if the first character is '0' we must check for octals and hex
+ if (m_char == '0')
+ {
+ addLiteralCharAndAdvance();
+ // either 0, 0exxx, 0Exxx, 0.xxx or a hex number
+ if (m_char == 'x' || m_char == 'X')
+ {
+ // hex number
+ kind = HEX;
+ addLiteralCharAndAdvance();
+ if (!isHexDigit(m_char))
+ return Token::Illegal; // we must have at least one hex digit after 'x'/'X'
+ while (isHexDigit(m_char))
+ addLiteralCharAndAdvance();
+ }
+ }
+ // Parse decimal digits and allow trailing fractional part.
+ if (kind == DECIMAL)
+ {
+ scanDecimalDigits(); // optional
+ if (m_char == '.')
+ {
+ addLiteralCharAndAdvance();
+ scanDecimalDigits(); // optional
+ }
+ }
+ }
+ // scan exponent, if any
+ if (m_char == 'e' || m_char == 'E')
+ {
+ solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
+ if (kind != DECIMAL)
+ return Token::Illegal;
+ // scan exponent
+ addLiteralCharAndAdvance();
+ if (m_char == '+' || m_char == '-')
+ addLiteralCharAndAdvance();
+ if (!isDecimalDigit(m_char))
+ return Token::Illegal; // we must have at least one decimal digit after 'e'/'E'
+ scanDecimalDigits();
+ }
+ // The source character immediately following a numeric literal must
+ // not be an identifier start or a decimal digit; see ECMA-262
+ // section 7.8.3, page 17 (note that we read only one decimal digit
+ // if the value is 0).
+ if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
+ return Token::Illegal;
+ literal.complete();
+ return Token::Number;
+}
+
+Token::Value Scanner::scanIdentifierOrKeyword()
+{
+ solAssert(isIdentifierStart(m_char), "");
+ LiteralScope literal(this, LITERAL_TYPE_STRING);
+ addLiteralCharAndAdvance();
+ // Scan the rest of the identifier characters.
+ while (isIdentifierPart(m_char))
+ addLiteralCharAndAdvance();
+ literal.complete();
+ return Token::fromIdentifierOrKeyword(m_nextToken.literal);
+}
+
+char CharStream::advanceAndGet(size_t _chars)
+{
+ if (isPastEndOfInput())
+ return 0;
+ m_position += _chars;
+ if (isPastEndOfInput())
+ return 0;
+ return m_source[m_position];
+}
+
+char CharStream::rollback(size_t _amount)
+{
+ solAssert(m_position >= _amount, "");
+ m_position -= _amount;
+ return get();
+}
+
+string CharStream::lineAtPosition(int _position) const
+{
+ // if _position points to \n, it returns the line before the \n
+ using size_type = string::size_type;
+ size_type searchStart = min<size_type>(m_source.size(), _position);
+ if (searchStart > 0)
+ searchStart--;
+ size_type lineStart = m_source.rfind('\n', searchStart);
+ if (lineStart == string::npos)
+ lineStart = 0;
+ else
+ lineStart++;
+ return m_source.substr(lineStart, min(m_source.find('\n', lineStart),
+ m_source.size()) - lineStart);
+}
+
+tuple<int, int> CharStream::translatePositionToLineColumn(int _position) const
+{
+ using size_type = string::size_type;
+ size_type searchPosition = min<size_type>(m_source.size(), _position);
+ int lineNumber = count(m_source.begin(), m_source.begin() + searchPosition, '\n');
+ size_type lineStart;
+ if (searchPosition == 0)
+ lineStart = 0;
+ else
+ {
+ lineStart = m_source.rfind('\n', searchPosition - 1);
+ lineStart = lineStart == string::npos ? 0 : lineStart + 1;
+ }
+ return tuple<int, int>(lineNumber, searchPosition - lineStart);
+}
+
+
+}
+}