aboutsummaryrefslogtreecommitdiffstats
path: root/liblangutil
diff options
context:
space:
mode:
Diffstat (limited to 'liblangutil')
-rw-r--r--liblangutil/CMakeLists.txt13
-rw-r--r--liblangutil/Scanner.cpp64
-rw-r--r--liblangutil/Scanner.h33
-rw-r--r--liblangutil/SourceReferenceFormatter.cpp129
-rw-r--r--liblangutil/SourceReferenceFormatter.h76
-rw-r--r--liblangutil/Token.h2
6 files changed, 295 insertions, 22 deletions
diff --git a/liblangutil/CMakeLists.txt b/liblangutil/CMakeLists.txt
index 722ca840..dfcccfce 100644
--- a/liblangutil/CMakeLists.txt
+++ b/liblangutil/CMakeLists.txt
@@ -1,6 +1,13 @@
# Solidity Commons Library (Solidity related sharing bits between libsolidity and libyul)
-file(GLOB sources "*.cpp")
-file(GLOB headers "*.h")
+set(sources
+ CharStream.cpp
+ ErrorReporter.cpp
+ Exceptions.cpp
+ ParserBase.cpp
+ Scanner.cpp
+ SourceReferenceFormatter.cpp
+ Token.cpp
+)
-add_library(langutil ${sources} ${headers})
+add_library(langutil ${sources})
target_link_libraries(langutil PUBLIC devcore)
diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp
index 3d7527d4..215171b3 100644
--- a/liblangutil/Scanner.cpp
+++ b/liblangutil/Scanner.cpp
@@ -53,6 +53,7 @@
#include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h>
#include <algorithm>
+#include <ostream>
#include <tuple>
using namespace std;
@@ -100,7 +101,32 @@ int hexValue(char c)
}
} // end anonymous namespace
+std::string to_string(ScannerError _errorCode)
+{
+ switch (_errorCode)
+ {
+ case ScannerError::NoError: return "No error.";
+ case ScannerError::IllegalToken: return "Invalid token.";
+ case ScannerError::IllegalHexString: return "Expected even number of hex-nibbles within double-quotes.";
+ case ScannerError::IllegalHexDigit: return "Hexadecimal digit missing or invalid.";
+ case ScannerError::IllegalCommentTerminator: return "Expected multi-line comment-terminator.";
+ case ScannerError::IllegalEscapeSequence: return "Invalid escape sequence.";
+ case ScannerError::IllegalStringEndQuote: return "Expected string end-quote.";
+ case ScannerError::IllegalNumberSeparator: return "Invalid use of number separator '_'.";
+ case ScannerError::IllegalExponent: return "Invalid exponent.";
+ case ScannerError::IllegalNumberEnd: return "Identifier-start is not allowed at end of a number.";
+ case ScannerError::OctalNotAllowed: return "Octal numbers not allowed.";
+ default:
+ solAssert(false, "Unhandled case in to_string(ScannerError)");
+ return "";
+ }
+}
+std::ostream& operator<<(std::ostream& os, ScannerError _errorCode)
+{
+ os << to_string(_errorCode);
+ return os;
+}
/// Scoped helper for literal recording. Automatically drops the literal
/// if aborting the scanning before it's complete.
@@ -311,7 +337,7 @@ Token Scanner::skipMultiLineComment()
}
}
// Unterminated multi-line comment.
- return Token::Illegal;
+ return setError(ScannerError::IllegalCommentTerminator);
}
Token Scanner::scanMultiLineDocComment()
@@ -362,7 +388,7 @@ Token Scanner::scanMultiLineDocComment()
}
literal.complete();
if (!endFound)
- return Token::Illegal;
+ return setError(ScannerError::IllegalCommentTerminator);
else
return Token::CommentLiteral;
}
@@ -392,7 +418,7 @@ Token Scanner::scanSlash()
{
// doxygen style /** natspec comment
if (!advance()) /* slash star comment before EOS */
- return Token::Illegal;
+ return setError(ScannerError::IllegalCommentTerminator);
else if (m_char == '*')
{
advance(); //consume the last '*' at /**
@@ -410,7 +436,7 @@ Token Scanner::scanSlash()
m_nextSkippedComment.location.end = sourcePos();
m_nextSkippedComment.token = comment;
if (comment == Token::Illegal)
- return Token::Illegal;
+ return Token::Illegal; // error already set
else
return Token::Whitespace;
}
@@ -425,6 +451,7 @@ Token Scanner::scanSlash()
void Scanner::scanToken()
{
+ m_nextToken.error = ScannerError::NoError;
m_nextToken.literal.clear();
m_nextToken.extendedTokenInfo = make_tuple(0, 0);
m_nextSkippedComment.literal.clear();
@@ -610,7 +637,7 @@ void Scanner::scanToken()
if (m_char == '"' || m_char == '\'')
token = scanHexString();
else
- token = Token::IllegalHex;
+ token = setError(ScannerError::IllegalToken);
}
}
else if (isDecimalDigit(m_char))
@@ -620,7 +647,7 @@ void Scanner::scanToken()
else if (isSourcePastEndOfInput())
token = Token::EOS;
else
- token = selectToken(Token::Illegal);
+ token = selectErrorToken(ScannerError::IllegalToken);
break;
}
// Continue scanning for tokens as long as we're just skipping
@@ -713,13 +740,13 @@ Token Scanner::scanString()
if (c == '\\')
{
if (isSourcePastEndOfInput() || !scanEscape())
- return Token::Illegal;
+ return setError(ScannerError::IllegalEscapeSequence);
}
else
addLiteralChar(c);
}
if (m_char != quote)
- return Token::Illegal;
+ return setError(ScannerError::IllegalStringEndQuote);
literal.complete();
advance(); // consume quote
return Token::StringLiteral;
@@ -734,11 +761,14 @@ Token Scanner::scanHexString()
{
char c = m_char;
if (!scanHexByte(c))
- return Token::IllegalHex;
+ // can only return false if hex-byte is incomplete (only one hex digit instead of two)
+ return setError(ScannerError::IllegalHexString);
addLiteralChar(c);
}
+
if (m_char != quote)
- return Token::IllegalHex;
+ return setError(ScannerError::IllegalStringEndQuote);
+
literal.complete();
advance(); // consume quote
return Token::StringLiteral;
@@ -767,7 +797,7 @@ Token Scanner::scanNumber(char _charSeen)
// we have already seen a decimal point of the float
addLiteralChar('.');
if (m_char == '_')
- return Token::Illegal;
+ return setError(ScannerError::IllegalToken);
scanDecimalDigits(); // we know we have at least one digit
}
else
@@ -784,14 +814,14 @@ Token Scanner::scanNumber(char _charSeen)
kind = HEX;
addLiteralCharAndAdvance();
if (!isHexDigit(m_char))
- return Token::Illegal; // we must have at least one hex digit after 'x'
+ return setError(ScannerError::IllegalHexDigit); // we must have at least one hex digit after 'x'
while (isHexDigit(m_char) || m_char == '_') // We keep the underscores for later validation
addLiteralCharAndAdvance();
}
else if (isDecimalDigit(m_char))
// We do not allow octal numbers
- return Token::Illegal;
+ return setError(ScannerError::OctalNotAllowed);
}
// Parse decimal digits and allow trailing fractional part.
if (kind == DECIMAL)
@@ -823,7 +853,7 @@ Token Scanner::scanNumber(char _charSeen)
{
solAssert(kind != HEX, "'e'/'E' must be scanned as part of the hex number");
if (kind != DECIMAL)
- return Token::Illegal;
+ return setError(ScannerError::IllegalExponent);
else if (!m_source.isPastEndOfInput(1) && m_source.get(1) == '_')
{
// Recover from wrongly placed underscore as delimiter in literal with scientific
@@ -838,8 +868,8 @@ Token Scanner::scanNumber(char _charSeen)
addLiteralCharAndAdvance(); // 'e' | 'E'
if (m_char == '+' || m_char == '-')
addLiteralCharAndAdvance();
- if (!isDecimalDigit(m_char))
- return Token::Illegal; // we must have at least one decimal digit after 'e'/'E'
+ if (!isDecimalDigit(m_char)) // we must have at least one decimal digit after 'e'/'E'
+ return setError(ScannerError::IllegalExponent);
scanDecimalDigits();
}
// The source character immediately following a numeric literal must
@@ -847,7 +877,7 @@ Token Scanner::scanNumber(char _charSeen)
// section 7.8.3, page 17 (note that we read only one decimal digit
// if the value is 0).
if (isDecimalDigit(m_char) || isIdentifierStart(m_char))
- return Token::Illegal;
+ return setError(ScannerError::IllegalNumberEnd);
literal.complete();
return Token::Number;
}
diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h
index da5e3dfb..d01e71e2 100644
--- a/liblangutil/Scanner.h
+++ b/liblangutil/Scanner.h
@@ -57,6 +57,7 @@
#include <liblangutil/SourceLocation.h>
#include <libdevcore/Common.h>
#include <libdevcore/CommonData.h>
+#include <iosfwd>
namespace langutil
{
@@ -65,6 +66,26 @@ class AstRawString;
class AstValueFactory;
class ParserRecorder;
+enum class ScannerError
+{
+ NoError,
+
+ IllegalToken,
+ IllegalHexString,
+ IllegalHexDigit,
+ IllegalCommentTerminator,
+ IllegalEscapeSequence,
+ IllegalStringEndQuote,
+ IllegalNumberSeparator,
+ IllegalExponent,
+ IllegalNumberEnd,
+
+ OctalNotAllowed,
+};
+
+std::string to_string(ScannerError _errorCode);
+std::ostream& operator<<(std::ostream& os, ScannerError _errorCode);
+
class Scanner
{
friend class LiteralScope;
@@ -100,6 +121,10 @@ public:
SourceLocation currentLocation() const { return m_currentToken.location; }
std::string const& currentLiteral() const { return m_currentToken.literal; }
std::tuple<unsigned, unsigned> const& currentTokenInfo() const { return m_currentToken.extendedTokenInfo; }
+
+ /// Retrieves the last error that occurred during lexical analysis.
+ /// @note If no error occurred, the value is undefined.
+ ScannerError currentError() const noexcept { return m_currentToken.error; }
///@}
///@{
@@ -139,12 +164,19 @@ public:
///@}
private:
+ inline Token setError(ScannerError _error) noexcept
+ {
+ m_nextToken.error = _error;
+ return Token::Illegal;
+ }
+
/// Used for the current and look-ahead token and comments
struct TokenDesc
{
Token token;
SourceLocation location;
std::string literal;
+ ScannerError error = ScannerError::NoError;
std::tuple<unsigned, unsigned> extendedTokenInfo;
};
@@ -159,6 +191,7 @@ private:
bool advance() { m_char = m_source.advanceAndGet(); return !m_source.isPastEndOfInput(); }
void rollback(int _amount) { m_char = m_source.rollback(_amount); }
+ inline Token selectErrorToken(ScannerError _err) { advance(); return setError(_err); }
inline Token selectToken(Token _tok) { advance(); return _tok; }
/// If the next character is _next, advance and return _then, otherwise return _else.
inline Token selectToken(char _next, Token _then, Token _else);
diff --git a/liblangutil/SourceReferenceFormatter.cpp b/liblangutil/SourceReferenceFormatter.cpp
new file mode 100644
index 00000000..58a65521
--- /dev/null
+++ b/liblangutil/SourceReferenceFormatter.cpp
@@ -0,0 +1,129 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Formatting functions for errors referencing positions and locations in the source.
+ */
+
+#include <liblangutil/SourceReferenceFormatter.h>
+#include <liblangutil/Scanner.h>
+#include <liblangutil/Exceptions.h>
+
+using namespace std;
+using namespace dev;
+using namespace langutil;
+
+void SourceReferenceFormatter::printSourceLocation(SourceLocation const* _location)
+{
+ if (!_location || !_location->sourceName)
+ return; // Nothing we can print here
+ auto const& scanner = m_scannerFromSourceName(*_location->sourceName);
+ int startLine;
+ int startColumn;
+ tie(startLine, startColumn) = scanner.translatePositionToLineColumn(_location->start);
+ int endLine;
+ int endColumn;
+ tie(endLine, endColumn) = scanner.translatePositionToLineColumn(_location->end);
+ if (startLine == endLine)
+ {
+ string line = scanner.lineAtPosition(_location->start);
+
+ int locationLength = endColumn - startColumn;
+ if (locationLength > 150)
+ {
+ line = line.substr(0, startColumn + 35) + " ... " + line.substr(endColumn - 35);
+ endColumn = startColumn + 75;
+ locationLength = 75;
+ }
+ if (line.length() > 150)
+ {
+ int len = line.length();
+ line = line.substr(max(0, startColumn - 35), min(startColumn, 35) + min(locationLength + 35, len - startColumn));
+ if (startColumn + locationLength + 35 < len)
+ line += " ...";
+ if (startColumn > 35)
+ {
+ line = " ... " + line;
+ startColumn = 40;
+ }
+ endColumn = startColumn + locationLength;
+ }
+
+ m_stream << line << endl;
+
+ for_each(
+ line.cbegin(),
+ line.cbegin() + startColumn,
+ [this](char const& ch) { m_stream << (ch == '\t' ? '\t' : ' '); }
+ );
+ m_stream << "^";
+ if (endColumn > startColumn + 2)
+ m_stream << string(endColumn - startColumn - 2, '-');
+ if (endColumn > startColumn + 1)
+ m_stream << "^";
+ m_stream << endl;
+ }
+ else
+ m_stream <<
+ scanner.lineAtPosition(_location->start) <<
+ endl <<
+ string(startColumn, ' ') <<
+ "^ (Relevant source part starts here and spans across multiple lines)." <<
+ endl;
+}
+
+void SourceReferenceFormatter::printSourceName(SourceLocation const* _location)
+{
+ if (!_location || !_location->sourceName)
+ return; // Nothing we can print here
+ auto const& scanner = m_scannerFromSourceName(*_location->sourceName);
+ int startLine;
+ int startColumn;
+ tie(startLine, startColumn) = scanner.translatePositionToLineColumn(_location->start);
+ m_stream << *_location->sourceName << ":" << (startLine + 1) << ":" << (startColumn + 1) << ": ";
+}
+
+void SourceReferenceFormatter::printExceptionInformation(
+ dev::Exception const& _exception,
+ string const& _name
+)
+{
+ SourceLocation const* location = boost::get_error_info<errinfo_sourceLocation>(_exception);
+ auto secondarylocation = boost::get_error_info<errinfo_secondarySourceLocation>(_exception);
+
+ printSourceName(location);
+
+ m_stream << _name;
+ if (string const* description = boost::get_error_info<errinfo_comment>(_exception))
+ m_stream << ": " << *description << endl;
+ else
+ m_stream << endl;
+
+ printSourceLocation(location);
+
+ if (secondarylocation && !secondarylocation->infos.empty())
+ {
+ for (auto info: secondarylocation->infos)
+ {
+ printSourceName(&info.second);
+ m_stream << info.first << endl;
+ printSourceLocation(&info.second);
+ }
+ m_stream << endl;
+ }
+}
diff --git a/liblangutil/SourceReferenceFormatter.h b/liblangutil/SourceReferenceFormatter.h
new file mode 100644
index 00000000..0ef3ca00
--- /dev/null
+++ b/liblangutil/SourceReferenceFormatter.h
@@ -0,0 +1,76 @@
+/*
+ This file is part of solidity.
+
+ solidity is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ solidity is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with solidity. If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @author Christian <c@ethdev.com>
+ * @date 2014
+ * Formatting functions for errors referencing positions and locations in the source.
+ */
+
+#pragma once
+
+#include <ostream>
+#include <sstream>
+#include <functional>
+
+namespace dev
+{
+struct Exception; // forward
+}
+
+namespace langutil
+{
+struct SourceLocation;
+class Scanner;
+
+class SourceReferenceFormatter
+{
+public:
+ using ScannerFromSourceNameFun = std::function<langutil::Scanner const&(std::string const&)>;
+
+ explicit SourceReferenceFormatter(
+ std::ostream& _stream,
+ ScannerFromSourceNameFun _scannerFromSourceName
+ ):
+ m_stream(_stream),
+ m_scannerFromSourceName(std::move(_scannerFromSourceName))
+ {}
+
+ /// Prints source location if it is given.
+ void printSourceLocation(langutil::SourceLocation const* _location);
+ void printExceptionInformation(dev::Exception const& _exception, std::string const& _name);
+
+ static std::string formatExceptionInformation(
+ dev::Exception const& _exception,
+ std::string const& _name,
+ ScannerFromSourceNameFun const& _scannerFromSourceName
+ )
+ {
+ std::ostringstream errorOutput;
+
+ SourceReferenceFormatter formatter(errorOutput, _scannerFromSourceName);
+ formatter.printExceptionInformation(_exception, _name);
+ return errorOutput.str();
+ }
+private:
+ /// Prints source name if location is given.
+ void printSourceName(langutil::SourceLocation const* _location);
+
+ std::ostream& m_stream;
+ ScannerFromSourceNameFun m_scannerFromSourceName;
+};
+
+}
diff --git a/liblangutil/Token.h b/liblangutil/Token.h
index d997b138..f832fdf7 100644
--- a/liblangutil/Token.h
+++ b/liblangutil/Token.h
@@ -263,8 +263,6 @@ namespace langutil
\
/* Illegal token - not able to scan. */ \
T(Illegal, "ILLEGAL", 0) \
- /* Illegal hex token */ \
- T(IllegalHex, "ILLEGAL_HEX", 0) \
\
/* Scanner-internal use only. */ \
T(Whitespace, nullptr, 0)