diff options
Diffstat (limited to 'libsolidity/parsing/Scanner.cpp')
-rw-r--r-- | libsolidity/parsing/Scanner.cpp | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/libsolidity/parsing/Scanner.cpp b/libsolidity/parsing/Scanner.cpp index d630d0ab..603f3e42 100644 --- a/libsolidity/parsing/Scanner.cpp +++ b/libsolidity/parsing/Scanner.cpp @@ -177,6 +177,41 @@ bool Scanner::scanHexByte(char& o_scannedByte) return true; } +bool Scanner::scanUnicode(unsigned & o_codepoint) +{ + unsigned x = 0; + for (int i = 0; i < 4; i++) + { + int d = hexValue(m_char); + if (d < 0) + { + rollback(i); + return false; + } + x = x * 16 + d; + advance(); + } + o_codepoint = x; + return true; +} + +// This supports codepoints between 0000 and FFFF. +void Scanner::addUnicodeAsUTF8(unsigned codepoint) +{ + if (codepoint <= 0x7f) + addLiteralChar(codepoint); + else if (codepoint <= 0x7ff) + { + addLiteralChar(0xc0 | (codepoint >> 6)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } + else + { + addLiteralChar(0xe0 | (codepoint >> 12)); + addLiteralChar(0x80 | ((codepoint >> 6) & 0x3f)); + addLiteralChar(0x80 | (codepoint & 0x3f)); + } +} // Ensure that tokens can be stored in a byte. BOOST_STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); @@ -556,7 +591,23 @@ void Scanner::scanToken() break; default: if (isIdentifierStart(m_char)) + { tie(token, m, n) = scanIdentifierOrKeyword(); + + // Special case for hexademical literals + if (token == Token::Hex) + { + // reset + m = 0; + n = 0; + + // Special quoted hex string must follow + if (m_char == '"' || m_char == '\'') + token = scanHexString(); + else + token = Token::Illegal; + } + } else if (isDecimalDigit(m_char)) token = scanNumber(); else if (skipWhitespace()) @@ -607,6 +658,14 @@ bool Scanner::scanEscape() case 'v': c = '\v'; break; + case 'u': + { + unsigned codepoint; + if (!scanUnicode(codepoint)) + return false; + addUnicodeAsUTF8(codepoint); + return true; + } case 'x': if (!scanHexByte(c)) return false; @@ -641,6 +700,25 @@ Token::Value Scanner::scanString() return Token::StringLiteral; } +Token::Value Scanner::scanHexString() +{ + char const quote = m_char; + advance(); // consume quote + LiteralScope literal(this, LITERAL_TYPE_STRING); + while (m_char != quote && !isSourcePastEndOfInput() && !isLineTerminator(m_char)) + { + char c = m_char; + if (!scanHexByte(c)) + return Token::Illegal; + addLiteralChar(c); + } + if (m_char != quote) + return Token::Illegal; + literal.complete(); + advance(); // consume quote + return Token::StringLiteral; +} + void Scanner::scanDecimalDigits() { while (isDecimalDigit(m_char)) |