diff options
author | Christian <c@ethdev.com> | 2014-10-06 23:13:52 +0800 |
---|---|---|
committer | Christian <c@ethdev.com> | 2014-10-08 17:11:50 +0800 |
commit | ef59373871528ac72c447e5f014aa18a1f3776e5 (patch) | |
tree | c4979d32856a3a32621d70600b0ce21ad783b9e8 | |
download | dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar.gz dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar.bz2 dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar.lz dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar.xz dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.tar.zst dexon-solidity-ef59373871528ac72c447e5f014aa18a1f3776e5.zip |
Solidity scanner and some unit tests.
The scanner is a modified version of the v8 javascript scanner.
-rw-r--r-- | CMakeLists.txt | 49 | ||||
-rw-r--r-- | Scanner.cpp | 653 | ||||
-rw-r--r-- | Scanner.h | 252 | ||||
-rw-r--r-- | Token.cpp | 77 | ||||
-rw-r--r-- | Token.h | 339 |
5 files changed, 1370 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..59aa7836 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_policy(SET CMP0015 NEW) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE solidity) + +if(ETH_STATIC) + add_library(${EXECUTABLE} STATIC ${SRC_LIST}) +else() + add_library(${EXECUTABLE} SHARED ${SRC_LIST}) +endif() + +file(GLOB HEADERS "*.h") + +include_directories(..) + +target_link_libraries(${EXECUTABLE} evmface) +target_link_libraries(${EXECUTABLE} devcore) + + +if("${TARGET_PLATFORM}" STREQUAL "w64") + target_link_libraries(${EXECUTABLE} boost_system-mt-s) + target_link_libraries(${EXECUTABLE} boost_thread_win32-mt-s) + target_link_libraries(${EXECUTABLE} iphlpapi) + target_link_libraries(${EXECUTABLE} ws2_32) + target_link_libraries(${EXECUTABLE} mswsock) + target_link_libraries(${EXECUTABLE} shlwapi) +elseif (APPLE) + # Latest mavericks boost libraries only come with -mt + target_link_libraries(${EXECUTABLE} boost_system-mt) + target_link_libraries(${EXECUTABLE} boost_thread-mt) + find_package(Threads REQUIRED) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +elseif (UNIX) + target_link_libraries(${EXECUTABLE} ${Boost_SYSTEM_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${Boost_THREAD_LIBRARY}) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +else () + target_link_libraries(${EXECUTABLE} boost_system) + target_link_libraries(${EXECUTABLE} boost_thread) + find_package(Threads REQUIRED) + target_link_libraries(${EXECUTABLE} ${CMAKE_THREAD_LIBS_INIT}) +endif () + +install( TARGETS ${EXECUTABLE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/Scanner.cpp b/Scanner.cpp new file mode 100644 index 00000000..101b4a1a --- /dev/null +++ b/Scanner.cpp @@ -0,0 +1,653 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#include <libsolidity/Scanner.h> + +namespace dev { +namespace solidity { + +namespace { + bool IsDecimalDigit(char c) { + return '0' <= c && c <= '9'; + } + bool IsHexDigit(char c) { + return IsDecimalDigit(c) + || ('a' <= c && c <= 'f') + || ('A' <= c && c <= 'F'); + } + bool IsLineTerminator(char c) { return c == '\n'; } + bool IsWhiteSpace(char c) { + return c == ' ' || c == '\n' || c == '\t'; + } + bool IsIdentifierStart(char c) { + return c == '_' || c == '$' || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); + } + bool IsIdentifierPart(char c) { + return IsIdentifierStart(c) || IsDecimalDigit(c); + } + + int HexValue(char c) { + if (c >= '0' && c <= '9') return c - '0'; + else if (c >= 'a' && c <= 'f') return c - 'a' + 10; + else if (c >= 'A' && c <= 'F') return c - 'A' + 10; + else return -1; + } +} + +Scanner::Scanner(const CharStream& _source) +{ + reset(_source); +} + +void Scanner::reset(const CharStream& _source) +{ + m_source = _source; + + // Initialize current_ to not refer to a literal. + m_current_token.token = Token::ILLEGAL; + m_current_token.literal.clear(); + + m_hasLineTerminatorBeforeNext = true; + m_hasMultilineCommentBeforeNext = false; + + m_char = m_source.get(); + skipWhitespace(); + scanToken(); +} + + +bool Scanner::scanHexNumber(char& scanned_number, int expected_length) +{ + BOOST_ASSERT(expected_length <= 4); // prevent overflow + + char x = 0; + for (int i = 0; i < expected_length; i++) { + int d = HexValue(m_char); + if (d < 0) { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + + scanned_number = x; + return true; +} + + +// Ensure that tokens can be stored in a byte. +BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); + +Token::Value Scanner::next() +{ + m_current_token = m_next_token; + m_hasLineTerminatorBeforeNext = false; + m_hasMultilineCommentBeforeNext = false; + scanToken(); + return m_current_token.token; +} + +bool Scanner::skipWhitespace() +{ + const int start_position = getSourcePos(); + + while (true) { + if (IsLineTerminator(m_char)) { + m_hasLineTerminatorBeforeNext = true; + } else if (!IsWhiteSpace(m_char)) { + break; + } + advance(); + } + + // Return whether or not we skipped any characters. + return getSourcePos() != start_position; +} + + +Token::Value Scanner::skipSingleLineComment() +{ + // The line terminator at the end of the line is not considered + // to be part of the single-line comment; it is recognized + // separately by the lexical grammar and becomes part of the + // stream of input elements for the syntactic grammar + while (advance() && !IsLineTerminator(m_char)) { }; + + return Token::WHITESPACE; +} + +Token::Value Scanner::skipMultiLineComment() +{ + BOOST_ASSERT(m_char == '*'); + advance(); + + while (!isSourcePastEndOfInput()) { + char ch = m_char; + advance(); + if (IsLineTerminator(ch)) { + // Following ECMA-262, section 7.4, a comment containing + // a newline will make the comment count as a line-terminator. + m_hasMultilineCommentBeforeNext = true; + } + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && m_char == '/') { + m_char = ' '; + return Token::WHITESPACE; + } + } + + // Unterminated multi-line comment. + return Token::ILLEGAL; +} + +void Scanner::scanToken() +{ + m_next_token.literal.clear(); + Token::Value token; + do { + // Remember the position of the next token + m_next_token.location.beg_pos = getSourcePos(); + + switch (m_char) { + case '\n': + m_hasLineTerminatorBeforeNext = true; // fall-through + case ' ': + case '\t': + token = selectToken(Token::WHITESPACE); + break; + + case '"': case '\'': + token = scanString(); + break; + + case '<': + // < <= << <<= + advance(); + if (m_char == '=') { + token = selectToken(Token::LTE); + } else if (m_char == '<') { + token = selectToken('=', Token::ASSIGN_SHL, Token::SHL); + } else { + token = Token::LT; + } + break; + + case '>': + // > >= >> >>= >>> >>>= + advance(); + if (m_char == '=') { + token = selectToken(Token::GTE); + } else if (m_char == '>') { + // >> >>= >>> >>>= + advance(); + if (m_char == '=') { + token = selectToken(Token::ASSIGN_SAR); + } else if (m_char == '>') { + token = selectToken('=', Token::ASSIGN_SHR, Token::SHR); + } else { + token = Token::SAR; + } + } else { + token = Token::GT; + } + break; + + case '=': + // = == => + advance(); + if (m_char == '=') { + token = selectToken(Token::EQ); + } else if (m_char == '>') { + token = selectToken(Token::ARROW); + } else { + token = Token::ASSIGN; + } + break; + + case '!': + // ! != !== + advance(); + if (m_char == '=') { + token = selectToken(Token::NE); + } else { + token = Token::NOT; + } + break; + + case '+': + // + ++ += + advance(); + if (m_char == '+') { + token = selectToken(Token::INC); + } else if (m_char == '=') { + token = selectToken(Token::ASSIGN_ADD); + } else { + token = Token::ADD; + } + break; + + case '-': + // - -- -= + advance(); + if (m_char == '-') { + advance(); + token = Token::DEC; + } else if (m_char == '=') { + token = selectToken(Token::ASSIGN_SUB); + } else { + token = Token::SUB; + } + break; + + case '*': + // * *= + token = selectToken('=', Token::ASSIGN_MUL, Token::MUL); + break; + + case '%': + // % %= + token = selectToken('=', Token::ASSIGN_MOD, Token::MOD); + break; + + case '/': + // / // /* /= + advance(); + if (m_char == '/') { + token = skipSingleLineComment(); + } else if (m_char == '*') { + token = skipMultiLineComment(); + } else if (m_char == '=') { + token = selectToken(Token::ASSIGN_DIV); + } else { + token = Token::DIV; + } + break; + + case '&': + // & && &= + advance(); + if (m_char == '&') { + token = selectToken(Token::AND); + } else if (m_char == '=') { + token = selectToken(Token::ASSIGN_BIT_AND); + } else { + token = Token::BIT_AND; + } + break; + + case '|': + // | || |= + advance(); + if (m_char == '|') { + token = selectToken(Token::OR); + } else if (m_char == '=') { + token = selectToken(Token::ASSIGN_BIT_OR); + } else { + token = Token::BIT_OR; + } + break; + + case '^': + // ^ ^= + token = selectToken('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); + break; + + case '.': + // . Number + advance(); + if (IsDecimalDigit(m_char)) { + token = scanNumber(true); + } else { + token = Token::PERIOD; + } + break; + + case ':': + token = selectToken(Token::COLON); + break; + + case ';': + token = selectToken(Token::SEMICOLON); + break; + + case ',': + token = selectToken(Token::COMMA); + break; + + case '(': + token = selectToken(Token::LPAREN); + break; + + case ')': + token = selectToken(Token::RPAREN); + break; + + case '[': + token = selectToken(Token::LBRACK); + break; + + case ']': + token = selectToken(Token::RBRACK); + break; + + case '{': + token = selectToken(Token::LBRACE); + break; + + case '}': + token = selectToken(Token::RBRACE); + break; + + case '?': + token = selectToken(Token::CONDITIONAL); + break; + + case '~': + token = selectToken(Token::BIT_NOT); + break; + + default: + if (IsIdentifierStart(m_char)) { + token = scanIdentifierOrKeyword(); + } else if (IsDecimalDigit(m_char)) { + token = scanNumber(false); + } else if (skipWhitespace()) { + token = Token::WHITESPACE; + } else if (isSourcePastEndOfInput()) { + token = Token::EOS; + } else { + token = selectToken(Token::ILLEGAL); + } + break; + } + + // Continue scanning for tokens as long as we're just skipping + // whitespace. + } while (token == Token::WHITESPACE); + + m_next_token.location.end_pos = getSourcePos(); + m_next_token.token = token; +} + +bool Scanner::scanEscape() +{ + char c = m_char; + advance(); + + // Skip escaped newlines. + if (IsLineTerminator(c)) + return true; + + switch (c) { + case '\'': // fall through + case '"' : // fall through + case '\\': break; + case 'b' : c = '\b'; break; + case 'f' : c = '\f'; break; + case 'n' : c = '\n'; break; + case 'r' : c = '\r'; break; + case 't' : c = '\t'; break; + case 'u' : { + if (!scanHexNumber(c, 4)) return false; + break; + } + case 'v' : c = '\v'; break; + case 'x' : { + if (!scanHexNumber(c, 2)) return false; + break; + } + } + + // According to ECMA-262, section 7.8.4, characters not covered by the + // above cases should be illegal, but they are commonly handled as + // non-escaped characters by JS VMs. + addLiteralChar(c); + return true; +} + +Token::Value Scanner::scanString() +{ + const char quote = m_char; + advance(); // consume quote + + LiteralScope literal(this); + while (m_char != quote && !isSourcePastEndOfInput() && !IsLineTerminator(m_char)) { + char c = m_char; + advance(); + if (c == '\\') { + if (isSourcePastEndOfInput() || !scanEscape()) return Token::ILLEGAL; + } else { + addLiteralChar(c); + } + } + if (m_char != quote) return Token::ILLEGAL; + literal.Complete(); + + advance(); // consume quote + return Token::STRING; +} + + +void Scanner::scanDecimalDigits() +{ + while (IsDecimalDigit(m_char)) + addLiteralCharAndAdvance(); +} + + +Token::Value Scanner::scanNumber(bool _periodSeen) +{ + BOOST_ASSERT(IsDecimalDigit(m_char)); // the first digit of the number or the fraction + + enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; + + LiteralScope literal(this); + if (_periodSeen) { + // we have already seen a decimal point of the float + addLiteralChar('.'); + scanDecimalDigits(); // we know we have at least one digit + } else { + // if the first character is '0' we must check for octals and hex + if (m_char == '0') { + addLiteralCharAndAdvance(); + + // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or + // an octal number. + if (m_char == 'x' || m_char == 'X') { + // hex number + kind = HEX; + addLiteralCharAndAdvance(); + if (!IsHexDigit(m_char)) { + // we must have at least one hex digit after 'x'/'X' + return Token::ILLEGAL; + } + while (IsHexDigit(m_char)) { + addLiteralCharAndAdvance(); + } + } + } + + // Parse decimal digits and allow trailing fractional part. + if (kind == DECIMAL) { + scanDecimalDigits(); // optional + if (m_char == '.') { + addLiteralCharAndAdvance(); + scanDecimalDigits(); // optional + } + } + } + + // scan exponent, if any + if (m_char == 'e' || m_char == 'E') { + BOOST_ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number + if (kind != DECIMAL) return Token::ILLEGAL; + // scan exponent + addLiteralCharAndAdvance(); + if (m_char == '+' || m_char == '-') + addLiteralCharAndAdvance(); + if (!IsDecimalDigit(m_char)) { + // we must have at least one decimal digit after 'e'/'E' + return Token::ILLEGAL; + } + scanDecimalDigits(); + } + + // The source character immediately following a numeric literal must + // not be an identifier start or a decimal digit; see ECMA-262 + // section 7.8.3, page 17 (note that we read only one decimal digit + // if the value is 0). + if (IsDecimalDigit(m_char) || IsIdentifierStart(m_char)) + return Token::ILLEGAL; + + literal.Complete(); + + return Token::NUMBER; +} + + +// ---------------------------------------------------------------------------- +// Keyword Matcher + +#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ + KEYWORD_GROUP('b') \ + KEYWORD("break", Token::BREAK) \ + KEYWORD_GROUP('c') \ + KEYWORD("case", Token::CASE) \ + KEYWORD("catch", Token::CATCH) \ + KEYWORD("const", Token::CONST) \ + KEYWORD("continue", Token::CONTINUE) \ + KEYWORD_GROUP('d') \ + KEYWORD("debugger", Token::DEBUGGER) \ + KEYWORD("default", Token::DEFAULT) \ + KEYWORD("delete", Token::DELETE) \ + KEYWORD("do", Token::DO) \ + KEYWORD_GROUP('e') \ + KEYWORD("else", Token::ELSE) \ + KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \ + KEYWORD_GROUP('f') \ + KEYWORD("false", Token::FALSE_LITERAL) \ + KEYWORD("finally", Token::FINALLY) \ + KEYWORD("for", Token::FOR) \ + KEYWORD("function", Token::FUNCTION) \ + KEYWORD_GROUP('i') \ + KEYWORD("if", Token::IF) \ + KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("in", Token::IN) \ + KEYWORD("instanceof", Token::INSTANCEOF) \ + KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('l') \ + KEYWORD_GROUP('n') \ + KEYWORD("new", Token::NEW) \ + KEYWORD("null", Token::NULL_LITERAL) \ + KEYWORD_GROUP('p') \ + KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('r') \ + KEYWORD("return", Token::RETURN) \ + KEYWORD_GROUP('s') \ + KEYWORD("switch", Token::SWITCH) \ + KEYWORD_GROUP('t') \ + KEYWORD("this", Token::THIS) \ + KEYWORD("throw", Token::THROW) \ + KEYWORD("true", Token::TRUE_LITERAL) \ + KEYWORD("try", Token::TRY) \ + KEYWORD("typeof", Token::TYPEOF) \ + KEYWORD_GROUP('v') \ + KEYWORD("var", Token::VAR) \ + KEYWORD("void", Token::VOID) \ + KEYWORD_GROUP('w') \ + KEYWORD("while", Token::WHILE) \ + KEYWORD("with", Token::WITH) + + +static Token::Value KeywordOrIdentifierToken(const std::string& input) +{ + BOOST_ASSERT(!input.empty()); + const int kMinLength = 2; + const int kMaxLength = 10; + if (input.size() < kMinLength || input.size() > kMaxLength) { + return Token::IDENTIFIER; + } + switch (input[0]) { + default: +#define KEYWORD_GROUP_CASE(ch) \ + break; \ + case ch: +#define KEYWORD(keyword, token) \ + { \ + /* 'keyword' is a char array, so sizeof(keyword) is */ \ + /* strlen(keyword) plus 1 for the NUL char. */ \ + const int keyword_length = sizeof(keyword) - 1; \ + BOOST_STATIC_ASSERT(keyword_length >= kMinLength); \ + BOOST_STATIC_ASSERT(keyword_length <= kMaxLength); \ + if (input == keyword) { \ + return token; \ + } \ + } + KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) + } + return Token::IDENTIFIER; +} + +Token::Value Scanner::scanIdentifierOrKeyword() +{ + BOOST_ASSERT(IsIdentifierStart(m_char)); + LiteralScope literal(this); + + addLiteralCharAndAdvance(); + + // Scan the rest of the identifier characters. + while (IsIdentifierPart(m_char)) + addLiteralCharAndAdvance(); + + literal.Complete(); + + return KeywordOrIdentifierToken(m_next_token.literal); +} + + +} } diff --git a/Scanner.h b/Scanner.h new file mode 100644 index 00000000..3cf52fbc --- /dev/null +++ b/Scanner.h @@ -0,0 +1,252 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#pragma once + +#include <boost/assert.hpp> + +#include <libdevcore/Common.h> +#include <libdevcore/Log.h> +#include <libdevcore/CommonData.h> +#include <libsolidity/Token.h> + +namespace dev { +namespace solidity { + + +class AstRawString; +class AstValueFactory; +class ParserRecorder; + +class CharStream { +public: + CharStream() + : m_pos(0) + {} + + explicit CharStream(const std::string& _source) + : m_source(_source), m_pos(0) + {} + int getPos() const { return m_pos; } + bool isPastEndOfInput() const { return m_pos >= m_source.size(); } + char get() const { return m_source[m_pos]; } + char advanceAndGet() { + if (isPastEndOfInput()) return 0; + ++m_pos; + if (isPastEndOfInput()) return 0; + return get(); + } + char rollback(size_t _amount) { + BOOST_ASSERT(m_pos >= _amount); + m_pos -= _amount; + return get(); + } + +private: + std::string m_source; + size_t m_pos; +}; + +// ---------------------------------------------------------------------------- +// JavaScript Scanner. + +class Scanner { +public: + // Scoped helper for literal recording. Automatically drops the literal + // if aborting the scanning before it's complete. + class LiteralScope { + public: + explicit LiteralScope(Scanner* self) + : scanner_(self), complete_(false) { + scanner_->startNewLiteral(); + } + ~LiteralScope() { + if (!complete_) scanner_->dropLiteral(); + } + void Complete() { + complete_ = true; + } + + private: + Scanner* scanner_; + bool complete_; + }; + + // Representation of an interval of source positions. + struct Location { + Location(int b, int e) : beg_pos(b), end_pos(e) { } + Location() : beg_pos(0), end_pos(0) { } + + bool IsValid() const { + return beg_pos >= 0 && end_pos >= beg_pos; + } + + static Location invalid() { return Location(-1, -1); } + + int beg_pos; + int end_pos; + }; + + explicit Scanner(const CharStream& _source); + + // Resets the scanner as if newly constructed with _input as input. + void reset(const CharStream& _source); + + // Returns the next token and advances input. + Token::Value next(); + // Returns the current token again. + Token::Value getCurrentToken() { return m_current_token.token; } + // Returns the location information for the current token + // (the token last returned by Next()). + Location getCurrentLocation() const { return m_current_token.location; } + const std::string& getCurrentLiteral() const { return m_current_token.literal; } + + // Similar functions for the upcoming token. + + // One token look-ahead (past the token returned by Next()). + Token::Value peek() const { return m_next_token.token; } + + Location peekLocation() const { return m_next_token.location; } + const std::string& peekLiteral() const { return m_next_token.literal; } + + // Returns true if there was a line terminator before the peek'ed token, + // possibly inside a multi-line comment. + bool hasAnyLineTerminatorBeforeNext() const { + return m_hasLineTerminatorBeforeNext || + m_hasMultilineCommentBeforeNext; + } + +private: + // Used for the current and look-ahead token. + struct TokenDesc { + Token::Value token; + Location location; + std::string literal; + }; + + static const int kCharacterLookaheadBufferSize = 1; + + // Literal buffer support + inline void startNewLiteral() { + m_next_token.literal.clear(); + } + + inline void addLiteralChar(char c) { + m_next_token.literal.push_back(c); + } + + inline void dropLiteral() { + m_next_token.literal.clear(); + } + + inline void addLiteralCharAndAdvance() { + addLiteralChar(m_char); + advance(); + } + + // Low-level scanning support. + bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); } + void rollback(int amount) { + m_char = m_source.rollback(amount); + } + + inline Token::Value selectToken(Token::Value tok) { + advance(); + return tok; + } + + inline Token::Value selectToken(char next, Token::Value then, Token::Value else_) { + advance(); + if (m_char == next) { + advance(); + return then; + } else { + return else_; + } + } + + bool scanHexNumber(char& scanned_number, int expected_length); + + // Scans a single JavaScript token. + void scanToken(); + + bool skipWhitespace(); + Token::Value skipSingleLineComment(); + Token::Value skipMultiLineComment(); + + void scanDecimalDigits(); + Token::Value scanNumber(bool _periodSeen); + Token::Value scanIdentifierOrKeyword(); + + Token::Value scanString(); + + // Scans an escape-sequence which is part of a string and adds the + // decoded character to the current literal. Returns true if a pattern + // is scanned. + bool scanEscape(); + + // Return the current source position. + int getSourcePos() { + return m_source.getPos(); + } + bool isSourcePastEndOfInput() { + return m_source.isPastEndOfInput(); + } + + TokenDesc m_current_token; // desc for current token (as returned by Next()) + TokenDesc m_next_token; // desc for next token (one token look-ahead) + + CharStream m_source; + + // one character look-ahead, equals 0 at end of input + char m_char; + + // Whether there is a line terminator whitespace character after + // the current token, and before the next. Does not count newlines + // inside multiline comments. + bool m_hasLineTerminatorBeforeNext; + // Whether there is a multi-line comment that contains a + // line-terminator after the current token, and before the next. + bool m_hasMultilineCommentBeforeNext; +}; + +} } diff --git a/Token.cpp b/Token.cpp new file mode 100644 index 00000000..6ae6456a --- /dev/null +++ b/Token.cpp @@ -0,0 +1,77 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#include <libsolidity/Token.h> + +namespace dev { +namespace solidity { + +#define T(name, string, precedence) #name, +const char* const Token::m_name[NUM_TOKENS] = { + TOKEN_LIST(T, T) +}; +#undef T + + +#define T(name, string, precedence) string, +const char* const Token::m_string[NUM_TOKENS] = { + TOKEN_LIST(T, T) +}; +#undef T + + +#define T(name, string, precedence) precedence, +const int8_t Token::m_precedence[NUM_TOKENS] = { + TOKEN_LIST(T, T) +}; +#undef T + + +#define KT(a, b, c) 'T', +#define KK(a, b, c) 'K', +const char Token::m_tokenType[] = { + TOKEN_LIST(KT, KK) +}; +#undef KT +#undef KK + +} } diff --git a/Token.h b/Token.h new file mode 100644 index 00000000..4f5ec194 --- /dev/null +++ b/Token.h @@ -0,0 +1,339 @@ +// Copyright 2006-2012, the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Modifications as part of cpp-ethereum under the following license: +// +// cpp-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// cpp-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. + +#pragma once + +#include <boost/assert.hpp> + +#include <libdevcore/Common.h> +#include <libdevcore/Log.h> + +namespace dev { +namespace solidity { + +// TOKEN_LIST takes a list of 3 macros M, all of which satisfy the +// same signature M(name, string, precedence), where name is the +// symbolic token name, string is the corresponding syntactic symbol +// (or NULL, for literals), and precedence is the precedence (or 0). +// The parameters are invoked for token categories as follows: +// +// T: Non-keyword tokens +// K: Keyword tokens + +// IGNORE_TOKEN is a convenience macro that can be supplied as +// an argument (at any position) for a TOKEN_LIST call. It does +// nothing with tokens belonging to the respective category. + +#define IGNORE_TOKEN(name, string, precedence) + +#define TOKEN_LIST(T, K) \ + /* End of source indicator. */ \ + T(EOS, "EOS", 0) \ + \ + /* Punctuators (ECMA-262, section 7.7, page 15). */ \ + T(LPAREN, "(", 0) \ + T(RPAREN, ")", 0) \ + T(LBRACK, "[", 0) \ + T(RBRACK, "]", 0) \ + T(LBRACE, "{", 0) \ + T(RBRACE, "}", 0) \ + T(COLON, ":", 0) \ + T(SEMICOLON, ";", 0) \ + T(PERIOD, ".", 0) \ + T(CONDITIONAL, "?", 3) \ + T(INC, "++", 0) \ + T(DEC, "--", 0) \ + T(ARROW, "=>", 0) \ + \ + /* Assignment operators. */ \ + /* IsAssignmentOp() and Assignment::is_compound() relies on */ \ + /* this block of enum values being contiguous and sorted in the */ \ + /* same order! */ \ + T(INIT_VAR, "=init_var", 2) /* AST-use only. */ \ + T(INIT_LET, "=init_let", 2) /* AST-use only. */ \ + T(INIT_CONST, "=init_const", 2) /* AST-use only. */ \ + T(INIT_CONST_LEGACY, "=init_const_legacy", 2) /* AST-use only. */ \ + T(ASSIGN, "=", 2) \ + T(ASSIGN_BIT_OR, "|=", 2) \ + T(ASSIGN_BIT_XOR, "^=", 2) \ + T(ASSIGN_BIT_AND, "&=", 2) \ + T(ASSIGN_SHL, "<<=", 2) \ + T(ASSIGN_SAR, ">>=", 2) \ + T(ASSIGN_SHR, ">>>=", 2) \ + T(ASSIGN_ADD, "+=", 2) \ + T(ASSIGN_SUB, "-=", 2) \ + T(ASSIGN_MUL, "*=", 2) \ + T(ASSIGN_DIV, "/=", 2) \ + T(ASSIGN_MOD, "%=", 2) \ + \ + /* Binary operators sorted by precedence. */ \ + /* IsBinaryOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(COMMA, ",", 1) \ + T(OR, "||", 4) \ + T(AND, "&&", 5) \ + T(BIT_OR, "|", 6) \ + T(BIT_XOR, "^", 7) \ + T(BIT_AND, "&", 8) \ + T(SHL, "<<", 11) \ + T(SAR, ">>", 11) \ + T(SHR, ">>>", 11) \ + T(ROR, "rotate right", 11) /* only used by Crankshaft */ \ + T(ADD, "+", 12) \ + T(SUB, "-", 12) \ + T(MUL, "*", 13) \ + T(DIV, "/", 13) \ + T(MOD, "%", 13) \ + \ + /* Compare operators sorted by precedence. */ \ + /* IsCompareOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(EQ, "==", 9) \ + T(NE, "!=", 9) \ + T(EQ_STRICT, "===", 9) \ + T(NE_STRICT, "!==", 9) \ + T(LT, "<", 10) \ + T(GT, ">", 10) \ + T(LTE, "<=", 10) \ + T(GTE, ">=", 10) \ + K(INSTANCEOF, "instanceof", 10) \ + K(IN, "in", 10) \ + \ + /* Unary operators. */ \ + /* IsUnaryOp() relies on this block of enum values */ \ + /* being contiguous and sorted in the same order! */ \ + T(NOT, "!", 0) \ + T(BIT_NOT, "~", 0) \ + K(DELETE, "delete", 0) \ + K(TYPEOF, "typeof", 0) \ + K(VOID, "void", 0) \ + \ + /* Keywords (ECMA-262, section 7.5.2, page 13). */ \ + K(BREAK, "break", 0) \ + K(CASE, "case", 0) \ + K(CATCH, "catch", 0) \ + K(CONTINUE, "continue", 0) \ + K(DEBUGGER, "debugger", 0) \ + K(DEFAULT, "default", 0) \ + /* DELETE */ \ + K(DO, "do", 0) \ + K(ELSE, "else", 0) \ + K(FINALLY, "finally", 0) \ + K(FOR, "for", 0) \ + K(FUNCTION, "function", 0) \ + K(IF, "if", 0) \ + /* IN */ \ + /* INSTANCEOF */ \ + K(NEW, "new", 0) \ + K(RETURN, "return", 0) \ + K(SWITCH, "switch", 0) \ + K(THIS, "this", 0) \ + K(THROW, "throw", 0) \ + K(TRY, "try", 0) \ + /* TYPEOF */ \ + K(VAR, "var", 0) \ + /* VOID */ \ + K(WHILE, "while", 0) \ + K(WITH, "with", 0) \ + \ + /* Literals (ECMA-262, section 7.8, page 16). */ \ + K(NULL_LITERAL, "null", 0) \ + K(TRUE_LITERAL, "true", 0) \ + K(FALSE_LITERAL, "false", 0) \ + T(NUMBER, NULL, 0) \ + T(STRING, NULL, 0) \ + \ + /* Identifiers (not keywords or future reserved words). */ \ + T(IDENTIFIER, NULL, 0) \ + \ + /* Future reserved words (ECMA-262, section 7.6.1.2). */ \ + T(FUTURE_RESERVED_WORD, NULL, 0) \ + T(FUTURE_STRICT_RESERVED_WORD, NULL, 0) \ + K(CLASS, "class", 0) \ + K(CONST, "const", 0) \ + K(EXPORT, "export", 0) \ + K(EXTENDS, "extends", 0) \ + K(IMPORT, "import", 0) \ + K(LET, "let", 0) \ + K(STATIC, "static", 0) \ +/* K(YIELD, "yield", 0) */ \ + K(SUPER, "super", 0) \ + \ + /* Illegal token - not able to scan. */ \ + T(ILLEGAL, "ILLEGAL", 0) \ + \ + /* Scanner-internal use only. */ \ + T(WHITESPACE, NULL, 0) + + +class Token { + public: + // All token values. +#define T(name, string, precedence) name, + enum Value { + TOKEN_LIST(T, T) + NUM_TOKENS + }; +#undef T + + // Returns a string corresponding to the C++ token name + // (e.g. "LT" for the token LT). + static const char* Name(Value tok) { + BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned + return m_name[tok]; + } + + // Predicates + static bool IsKeyword(Value tok) { + return m_tokenType[tok] == 'K'; + } + + static bool IsIdentifier(Value tok) { + return tok == IDENTIFIER; + } + + static bool IsAssignmentOp(Value tok) { + return INIT_VAR <= tok && tok <= ASSIGN_MOD; + } + + static bool IsBinaryOp(Value op) { + return COMMA <= op && op <= MOD; + } + + static bool IsTruncatingBinaryOp(Value op) { + return BIT_OR <= op && op <= ROR; + } + + static bool IsCompareOp(Value op) { + return EQ <= op && op <= IN; + } + + static bool IsOrderedRelationalCompareOp(Value op) { + return op == LT || op == LTE || op == GT || op == GTE; + } + + static bool IsEqualityOp(Value op) { + return op == EQ || op == EQ_STRICT; + } + + static bool IsInequalityOp(Value op) { + return op == NE || op == NE_STRICT; + } + + static bool IsArithmeticCompareOp(Value op) { + return IsOrderedRelationalCompareOp(op) || + IsEqualityOp(op) || IsInequalityOp(op); + } + + static Value NegateCompareOp(Value op) { + BOOST_ASSERT(IsArithmeticCompareOp(op)); + switch (op) { + case EQ: return NE; + case NE: return EQ; + case EQ_STRICT: return NE_STRICT; + case NE_STRICT: return EQ_STRICT; + case LT: return GTE; + case GT: return LTE; + case LTE: return GT; + case GTE: return LT; + default: + BOOST_ASSERT(false); // should not get here + return op; + } + } + + static Value ReverseCompareOp(Value op) { + BOOST_ASSERT(IsArithmeticCompareOp(op)); + switch (op) { + case EQ: return EQ; + case NE: return NE; + case EQ_STRICT: return EQ_STRICT; + case NE_STRICT: return NE_STRICT; + case LT: return GT; + case GT: return LT; + case LTE: return GTE; + case GTE: return LTE; + default: + BOOST_ASSERT(false); // should not get here + return op; + } + } + + static bool IsBitOp(Value op) { + return (BIT_OR <= op && op <= SHR) || op == BIT_NOT; + } + + static bool IsUnaryOp(Value op) { + return (NOT <= op && op <= VOID) || op == ADD || op == SUB; + } + + static bool IsCountOp(Value op) { + return op == INC || op == DEC; + } + + static bool IsShiftOp(Value op) { + return (SHL <= op) && (op <= SHR); + } + + // Returns a string corresponding to the JS token string + // (.e., "<" for the token LT) or NULL if the token doesn't + // have a (unique) string (e.g. an IDENTIFIER). + static const char* String(Value tok) { + BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned. + return m_string[tok]; + } + + // Returns the precedence > 0 for binary and compare + // operators; returns 0 otherwise. + static int Precedence(Value tok) { + BOOST_ASSERT(tok < NUM_TOKENS); // tok is unsigned. + return m_precedence[tok]; + } + + private: + static const char* const m_name[NUM_TOKENS]; + static const char* const m_string[NUM_TOKENS]; + static const int8_t m_precedence[NUM_TOKENS]; + static const char m_tokenType[NUM_TOKENS]; +}; + +} } |