aboutsummaryrefslogtreecommitdiffstats
path: root/liblangutil/Scanner.h
diff options
context:
space:
mode:
authorChristian Parpart <christian@ethereum.org>2018-11-14 21:59:30 +0800
committerAlex Beregszaszi <alex@rtfs.hu>2018-11-22 02:58:12 +0800
commit87821c53c3a73d3e35a0e50a7c159d9aa5d6b253 (patch)
treed0335db1bfe77f91168e14599e943105b7c9a2c3 /liblangutil/Scanner.h
parentd47707abaddfd6820d8ff1f9ec5ec0d2a61ee622 (diff)
downloaddexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar.gz
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar.bz2
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar.lz
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar.xz
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.tar.zst
dexon-solidity-87821c53c3a73d3e35a0e50a7c159d9aa5d6b253.zip
Isolating files shared between Yul- and Solidity language frontend.
Diffstat (limited to 'liblangutil/Scanner.h')
-rw-r--r--liblangutil/Scanner.h249
1 files changed, 249 insertions, 0 deletions
diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h
new file mode 100644
index 00000000..d89f0c84
--- /dev/null
+++ b/liblangutil/Scanner.h
@@ -0,0 +1,249 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see <http://www.gnu.org/licenses/>.
+
+ This file is derived from the file "scanner.h", which was part of the
+ V8 project. The original copyright header follows:
+
+ Copyright 2006-2012, the V8 project authors. All rights reserved.
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Solidity scanner.
+ */
+
+#pragma once
+
+#include <libdevcore/Common.h>
+#include <libdevcore/CommonData.h>
+#include <liblangutil/SourceLocation.h>
+#include <liblangutil/Token.h>
+
+namespace dev
+{
+namespace solidity
+{
+
+
+class AstRawString;
+class AstValueFactory;
+class ParserRecorder;
+
+class CharStream
+{
+public:
+ CharStream(): m_position(0) {}
+ explicit CharStream(std::string const& _source): m_source(_source), m_position(0) {}
+ int position() const { return m_position; }
+ bool isPastEndOfInput(size_t _charsForward = 0) const { return (m_position + _charsForward) >= m_source.size(); }
+ char get(size_t _charsForward = 0) const { return m_source[m_position + _charsForward]; }
+ char advanceAndGet(size_t _chars = 1);
+ char rollback(size_t _amount);
+
+ void reset() { m_position = 0; }
+
+ std::string const& source() const { return m_source; }
+
+ ///@{
+ ///@name Error printing helper functions
+ /// Functions that help pretty-printing parse errors
+ /// Do only use in error cases, they are quite expensive.
+ std::string lineAtPosition(int _position) const;
+ std::tuple<int, int> translatePositionToLineColumn(int _position) const;
+ ///@}
+
+private:
+ std::string m_source;
+ size_t m_position;
+};
+
+
+
+class Scanner
+{
+ friend class LiteralScope;
+public:
+
+ explicit Scanner(CharStream const& _source = CharStream(), std::string const& _sourceName = "") { reset(_source, _sourceName); }
+
+ std::string source() const { return m_source.source(); }
+
+ /// Resets the scanner as if newly constructed with _source and _sourceName as input.
+ void reset(CharStream const& _source, std::string const& _sourceName);
+ /// Resets scanner to the start of input.
+ void reset();
+
+ /// @returns the next token and advances input
+ Token next();
+
+ ///@{
+ ///@name Information about the current token
+
+ /// @returns the current token
+ Token currentToken() const
+ {
+ return m_currentToken.token;
+ }
+ ElementaryTypeNameToken currentElementaryTypeNameToken() const
+ {
+ unsigned firstSize;
+ unsigned secondSize;
+ std::tie(firstSize, secondSize) = m_currentToken.extendedTokenInfo;
+ return ElementaryTypeNameToken(m_currentToken.token, firstSize, secondSize);
+ }
+
+ SourceLocation currentLocation() const { return m_currentToken.location; }
+ std::string const& currentLiteral() const { return m_currentToken.literal; }
+ std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; }
+ ///@}
+
+ ///@{
+ ///@name Information about the current comment token
+
+ SourceLocation currentCommentLocation() const { return m_skippedComment.location; }
+ std::string const& currentCommentLiteral() const { return m_skippedComment.literal; }
+ /// Called by the parser during FunctionDefinition parsing to clear the current comment
+ void clearCurrentCommentLiteral() { m_skippedComment.literal.clear(); }
+
+ ///@}
+
+ ///@{
+ ///@name Information about the next token
+
+ /// @returns the next token without advancing input.
+ Token peekNextToken() const { return m_nextToken.token; }
+ SourceLocation peekLocation() const { return m_nextToken.location; }
+ std::string const& peekLiteral() const { return m_nextToken.literal; }
+ ///@}
+
+ std::shared_ptr<std::string const> const& sourceName() const { return m_sourceName; }
+
+ ///@{
+ ///@name Error printing helper functions
+ /// Functions that help pretty-printing parse errors
+ /// Do only use in error cases, they are quite expensive.
+ std::string lineAtPosition(int _position) const { return m_source.lineAtPosition(_position); }
+ std::tuple<int, int> translatePositionToLineColumn(int _position) const { return m_source.translatePositionToLineColumn(_position); }
+ std::string sourceAt(SourceLocation const& _location) const
+ {
+ solAssert(!_location.isEmpty(), "");
+ solAssert(m_sourceName && _location.sourceName, "");
+ solAssert(*m_sourceName == *_location.sourceName, "");
+ return m_source.source().substr(_location.start, _location.end - _location.start);
+ }
+ ///@}
+
+private:
+ /// Used for the current and look-ahead token and comments
+ struct TokenDesc
+ {
+ Token token;
+ SourceLocation location;
+ std::string literal;
+ std::tuple<unsigned, unsigned> extendedTokenInfo;
+ };
+
+ ///@{
+ ///@name Literal buffer support
+ inline void addLiteralChar(char c) { m_nextToken.literal.push_back(c); }
+ inline void addCommentLiteralChar(char c) { m_nextSkippedComment.literal.push_back(c); }
+ inline void addLiteralCharAndAdvance() { addLiteralChar(m_char); advance(); }
+ void addUnicodeAsUTF8(unsigned codepoint);
+ ///@}
+
+ bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
+ void rollback(int _amount) { m_char = m_source.rollback(_amount); }
+
+ inline Token selectToken(Token _tok) { advance(); return _tok; }
+ /// If the next character is _next, advance and return _then, otherwise return _else.
+ inline Token selectToken(char _next, Token _then, Token _else);
+
+ bool scanHexByte(char& o_scannedByte);
+ bool scanUnicode(unsigned& o_codepoint);
+
+ /// Scans a single Solidity token.
+ void scanToken();
+
+ /// Skips all whitespace and @returns true if something was skipped.
+ bool skipWhitespace();
+ /// Skips all whitespace that are neither '\r' nor '\n'.
+ void skipWhitespaceExceptUnicodeLinebreak();
+ Token skipSingleLineComment();
+ Token skipMultiLineComment();
+
+ void scanDecimalDigits();
+ Token scanNumber(char _charSeen = 0);
+ std::tuple<Token, unsigned, unsigned> scanIdentifierOrKeyword();
+
+ Token scanString();
+ Token scanHexString();
+ Token scanSingleLineDocComment();
+ Token scanMultiLineDocComment();
+ /// Scans a slash '/' and depending on the characters returns the appropriate token
+ Token scanSlash();
+
+ /// Scans an escape-sequence which is part of a string and adds the
+ /// decoded character to the current literal. Returns true if a pattern
+ /// is scanned.
+ bool scanEscape();
+
+ /// @returns true iff we are currently positioned at a unicode line break.
+ bool isUnicodeLinebreak();
+
+ /// Return the current source position.
+ int sourcePos() const { return m_source.position(); }
+ bool isSourcePastEndOfInput() const { return m_source.isPastEndOfInput(); }
+
+ TokenDesc m_skippedComment; // desc for current skipped comment
+ TokenDesc m_nextSkippedComment; // desc for next skipped comment
+
+ TokenDesc m_currentToken; // desc for current token (as returned by Next())
+ TokenDesc m_nextToken; // desc for next token (one token look-ahead)
+
+ CharStream m_source;
+ std::shared_ptr<std::string const> m_sourceName;
+
+ /// one character look-ahead, equals 0 at end of input
+ char m_char;
+};
+
+}
+}