diff options
45 files changed, 6441 insertions, 1 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a02b779e..18566bc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,10 +26,13 @@ include(EthUtils) include(EthOptions) configure_project(TESTS) +add_subdirectory(libevmasm) add_subdirectory(libsolidity) add_subdirectory(solc) if (NOT EMSCRIPTEN) + add_subdirectory(liblll) add_subdirectory(test) + add_subdirectory(lllc) endif() # TODO installation and packaging rules diff --git a/docs/frequently-asked-questions.rst b/docs/frequently-asked-questions.rst index db37a272..f960a0d6 100644 --- a/docs/frequently-asked-questions.rst +++ b/docs/frequently-asked-questions.rst @@ -613,6 +613,7 @@ Note that the full code of the created contract has to be included in the creato This also means that cyclic creations are not possible (because the contract would have to contain its own code) - at least not in a general way. + How do you create 2-dimensional arrays? ======================================= @@ -647,6 +648,22 @@ gas and return your 20 Wei). In the above example, the low-level function `call` is used to invoke another contract with `p.data` as payload and `p.amount` Wei is sent with that call. +How do I initialize a contract with only a specific amount of wei? +================================================================== + +Currently the approach is a little ugly, but there is little that can be done to improve it. +In the case of a `contract A` calling a new instance of `contract B`, parentheses have to be used around +`new B` because `B.value` would refer to a member of `B` called `value`. +You will need to make sure that you have both contracts aware of each other's presence. +In this example:: + contract B {} + contract A { + address child; + function test() { + child = (new B).value(10)(); //construct a new B with 10 wei + } + } + Can a contract function accept a two-dimensional array? ======================================================= diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp new file mode 100644 index 00000000..7277865e --- /dev/null +++ b/libevmasm/Assembly.cpp @@ -0,0 +1,545 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Assembly.h" +#include <fstream> +#include <libdevcore/Log.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/ControlFlowGraph.h> +#include <libevmasm/BlockDeduplicator.h> +#include <libevmasm/ConstantOptimiser.h> +#include <libevmasm/GasMeter.h> +#include <json/json.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +void Assembly::append(Assembly const& _a) +{ + auto newDeposit = m_deposit + _a.deposit(); + for (AssemblyItem i: _a.m_items) + { + if (i.type() == Tag || i.type() == PushTag) + i.setData(i.data() + m_usedTags); + else if (i.type() == PushSub || i.type() == PushSubSize) + i.setData(i.data() + m_subs.size()); + append(i); + } + m_deposit = newDeposit; + m_usedTags += _a.m_usedTags; + for (auto const& i: _a.m_data) + m_data.insert(i); + for (auto const& i: _a.m_strings) + m_strings.insert(i); + m_subs += _a.m_subs; + for (auto const& lib: _a.m_libraries) + m_libraries.insert(lib); + + assert(!_a.m_baseDeposit); + assert(!_a.m_totalDeposit); +} + +void Assembly::append(Assembly const& _a, int _deposit) +{ + if (_deposit > _a.m_deposit) + BOOST_THROW_EXCEPTION(InvalidDeposit()); + else + { + append(_a); + while (_deposit++ < _a.m_deposit) + append(Instruction::POP); + } +} + +string Assembly::out() const +{ + stringstream ret; + stream(ret); + return ret.str(); +} + +unsigned Assembly::bytesRequired() const +{ + for (unsigned br = 1;; ++br) + { + unsigned ret = 1; + for (auto const& i: m_data) + ret += i.second.size(); + + for (AssemblyItem const& i: m_items) + ret += i.bytesRequired(br); + if (dev::bytesRequired(ret) <= br) + return ret; + } +} + +string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const +{ + if (_location.isEmpty() || _sourceCodes.empty() || _location.start >= _location.end || _location.start < 0) + return ""; + + auto it = _sourceCodes.find(*_location.sourceName); + if (it == _sourceCodes.end()) + return ""; + + string const& source = it->second; + if (size_t(_location.start) >= source.size()) + return ""; + + string cut = source.substr(_location.start, _location.end - _location.start); + auto newLinePos = cut.find_first_of("\n"); + if (newLinePos != string::npos) + cut = cut.substr(0, newLinePos) + "..."; + + return cut; +} + +ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const +{ + _out << _prefix << ".code:" << endl; + for (AssemblyItem const& i: m_items) + { + _out << _prefix; + switch (i.type()) + { + case Operation: + _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << i.data(); + break; + case PushString: + _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; + break; + case PushTag: + if (i.data() == 0) + _out << " PUSH [ErrorTag]"; + else + _out << " PUSH [tag" << dec << i.data() << "]"; + break; + case PushSub: + _out << " PUSH [$" << h256(i.data()).abridgedMiddle() << "]"; + break; + case PushSubSize: + _out << " PUSH #[$" << h256(i.data()).abridgedMiddle() << "]"; + break; + case PushProgramSize: + _out << " PUSHSIZE"; + break; + case PushLibraryAddress: + _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; + break; + case Tag: + _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; + break; + case PushData: + _out << " PUSH [" << hex << (unsigned)i.data() << "]"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; + } + + if (!m_data.empty() || !m_subs.empty()) + { + _out << _prefix << ".data:" << endl; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; + for (size_t i = 0; i < m_subs.size(); ++i) + { + _out << _prefix << " " << hex << i << ": " << endl; + m_subs[i].stream(_out, _prefix + " ", _sourceCodes); + } + } + return _out; +} + +Json::Value Assembly::createJsonValue(string _name, int _begin, int _end, string _value, string _jumpType) const +{ + Json::Value value; + value["name"] = _name; + value["begin"] = _begin; + value["end"] = _end; + if (!_value.empty()) + value["value"] = _value; + if (!_jumpType.empty()) + value["jumpType"] = _jumpType; + return value; +} + +string toStringInHex(u256 _value) +{ + std::stringstream hexStr; + hexStr << hex << _value; + return hexStr.str(); +} + +Json::Value Assembly::streamAsmJson(ostream& _out, StringMap const& _sourceCodes) const +{ + Json::Value root; + + Json::Value collection(Json::arrayValue); + for (AssemblyItem const& i: m_items) + { + switch (i.type()) + { + case Operation: + collection.append( + createJsonValue(instructionInfo(i.instruction()).name, i.location().start, i.location().end, i.getJumpTypeAsString())); + break; + case Push: + collection.append( + createJsonValue("PUSH", i.location().start, i.location().end, toStringInHex(i.data()), i.getJumpTypeAsString())); + break; + case PushString: + collection.append( + createJsonValue("PUSH tag", i.location().start, i.location().end, m_strings.at((h256)i.data()))); + break; + case PushTag: + if (i.data() == 0) + collection.append( + createJsonValue("PUSH [ErrorTag]", i.location().start, i.location().end, "")); + else + collection.append( + createJsonValue("PUSH [tag]", i.location().start, i.location().end, string(i.data()))); + break; + case PushSub: + collection.append( + createJsonValue("PUSH [$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); + break; + case PushSubSize: + collection.append( + createJsonValue("PUSH #[$]", i.location().start, i.location().end, dev::toString(h256(i.data())))); + break; + case PushProgramSize: + collection.append( + createJsonValue("PUSHSIZE", i.location().start, i.location().end)); + break; + case PushLibraryAddress: + collection.append( + createJsonValue("PUSHLIB", i.location().start, i.location().end, m_libraries.at(h256(i.data()))) + ); + break; + case Tag: + collection.append( + createJsonValue("tag", i.location().start, i.location().end, string(i.data()))); + collection.append( + createJsonValue("JUMPDEST", i.location().start, i.location().end)); + break; + case PushData: + collection.append(createJsonValue("PUSH data", i.location().start, i.location().end, toStringInHex(i.data()))); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + + root[".code"] = collection; + + if (!m_data.empty() || !m_subs.empty()) + { + Json::Value data; + for (auto const& i: m_data) + if (u256(i.first) >= m_subs.size()) + data[toStringInHex((u256)i.first)] = toHex(i.second); + + for (size_t i = 0; i < m_subs.size(); ++i) + { + std::stringstream hexStr; + hexStr << hex << i; + data[hexStr.str()] = m_subs[i].stream(_out, "", _sourceCodes, true); + } + root[".data"] = data; + _out << root; + } + return root; +} + +Json::Value Assembly::stream(ostream& _out, string const& _prefix, StringMap const& _sourceCodes, bool _inJsonFormat) const +{ + if (_inJsonFormat) + return streamAsmJson(_out, _sourceCodes); + else + { + streamAsm(_out, _prefix, _sourceCodes); + return Json::Value(); + } +} + +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier) +{ + h256 h(dev::sha3(_identifier)); + m_libraries[h] = _identifier; + return AssemblyItem(PushLibraryAddress, h); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + +struct OptimiserChannel: public LogChannel { static const char* name() { return "OPT"; } static const int verbosity = 12; }; +#define copt dev::LogOutputStream<OptimiserChannel, true>() + +Assembly& Assembly::optimise(bool _enable, bool _isCreation, size_t _runs) +{ + if (!_enable) + return *this; + + unsigned total = 0; + for (unsigned count = 1; count > 0; total += count) + { + copt << toString(*this); + count = 0; + + copt << "Performing optimisation..."; + // This only modifies PushTags, we have to run again to actually remove code. + BlockDeduplicator dedup(m_items); + if (dedup.deduplicate()) + count++; + + { + ControlFlowGraph cfg(m_items); + AssemblyItems optimisedItems; + for (BasicBlock const& block: cfg.optimisedBlocks()) + { + assertThrow(!!block.startState, OptimizerException, ""); + CommonSubexpressionEliminator eliminator(*block.startState); + auto iter = m_items.begin() + block.begin; + auto const end = m_items.begin() + block.end; + while (iter < end) + { + auto orig = iter; + iter = eliminator.feedItems(iter, end); + bool shouldReplace = false; + AssemblyItems optimisedChunk; + try + { + optimisedChunk = eliminator.getOptimizedItems(); + shouldReplace = (optimisedChunk.size() < size_t(iter - orig)); + } + catch (StackTooDeepException const&) + { + // This might happen if the opcode reconstruction is not as efficient + // as the hand-crafted code. + } + catch (ItemNotAvailableException const&) + { + // This might happen if e.g. associativity and commutativity rules + // reorganise the expression tree, but not all leaves are available. + } + + if (shouldReplace) + { + copt << "Old size: " << (iter - orig) << ", new size: " << optimisedChunk.size(); + count++; + optimisedItems += optimisedChunk; + } + else + copy(orig, iter, back_inserter(optimisedItems)); + } + } + + if (optimisedItems.size() < m_items.size()) + { + m_items = move(optimisedItems); + count++; + } + } + } + + total += ConstantOptimisationMethod::optimiseConstants( + _isCreation, + _isCreation ? 1 : _runs, + *this, + m_items + ); + + copt << total << " optimisations done."; + + for (auto& sub: m_subs) + sub.optimise(true, false, _runs); + + return *this; +} + +LinkerObject const& Assembly::assemble() const +{ + if (!m_assembledObject.bytecode.empty()) + return m_assembledObject; + + LinkerObject& ret = m_assembledObject; + + unsigned totalBytes = bytesRequired(); + vector<unsigned> tagPos(m_usedTags); + map<unsigned, unsigned> tagRef; + multimap<h256, unsigned> dataRef; + multimap<size_t, size_t> subRef; + vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted + unsigned bytesPerTag = dev::bytesRequired(totalBytes); + byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + + unsigned bytesRequiredIncludingData = bytesRequired(); + for (auto const& sub: m_subs) + bytesRequiredIncludingData += sub.assemble().bytecode.size(); + + unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); + byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + ret.bytecode.reserve(bytesRequiredIncludingData); + + for (AssemblyItem const& i: m_items) + { + // store position of the invalid jump destination + if (i.type() != Tag && tagPos[0] == 0) + tagPos[0] = ret.bytecode.size(); + + switch (i.type()) + { + case Operation: + ret.bytecode.push_back((byte)i.data()); + break; + case PushString: + { + ret.bytecode.push_back((byte)Instruction::PUSH32); + unsigned ii = 0; + for (auto j: m_strings.at((h256)i.data())) + if (++ii > 32) + break; + else + ret.bytecode.push_back((byte)j); + while (ii++ < 32) + ret.bytecode.push_back(0); + break; + } + case Push: + { + byte b = max<unsigned>(1, dev::bytesRequired(i.data())); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(i.data(), byr); + break; + } + case PushTag: + { + ret.bytecode.push_back(tagPush); + tagRef[ret.bytecode.size()] = (unsigned)i.data(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); + break; + } + case PushData: + ret.bytecode.push_back(dataRefPush); + dataRef.insert(make_pair((h256)i.data(), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSub: + ret.bytecode.push_back(dataRefPush); + subRef.insert(make_pair(size_t(i.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + case PushSubSize: + { + auto s = m_subs.at(size_t(i.data())).assemble().bytecode.size(); + i.setPushedValue(u256(s)); + byte b = max<unsigned>(1, dev::bytesRequired(s)); + ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(s, byr); + break; + } + case PushProgramSize: + { + ret.bytecode.push_back(dataRefPush); + sizeRef.push_back(ret.bytecode.size()); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); + break; + } + case PushLibraryAddress: + ret.bytecode.push_back(byte(Instruction::PUSH20)); + ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); + ret.bytecode.resize(ret.bytecode.size() + 20); + break; + case Tag: + tagPos[(unsigned)i.data()] = ret.bytecode.size(); + assertThrow(i.data() != 0, AssemblyException, ""); + ret.bytecode.push_back((byte)Instruction::JUMPDEST); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + } + for (auto const& i: tagRef) + { + bytesRef r(ret.bytecode.data() + i.first, bytesPerTag); + auto tag = i.second; + if (tag >= tagPos.size()) + tag = 0; + if (tag == 0) + assertThrow(tagPos[tag] != 0, AssemblyException, ""); + + toBigEndian(tagPos[tag], r); + } + + if (!dataRef.empty() && !subRef.empty()) + ret.bytecode.push_back(0); + for (size_t i = 0; i < m_subs.size(); ++i) + { + auto references = subRef.equal_range(i); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) + { + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + ret.append(m_subs[i].assemble()); + } + for (auto const& dataItem: m_data) + { + auto references = dataRef.equal_range(dataItem.first); + if (references.first == references.second) + continue; + for (auto ref = references.first; ref != references.second; ++ref) + { + bytesRef r(ret.bytecode.data() + ref->second, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + ret.bytecode += dataItem.second; + } + for (unsigned pos: sizeRef) + { + bytesRef r(ret.bytecode.data() + pos, bytesPerDataRef); + toBigEndian(ret.bytecode.size(), r); + } + return ret; +} diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h new file mode 100644 index 00000000..28328277 --- /dev/null +++ b/libevmasm/Assembly.h @@ -0,0 +1,151 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libdevcore/SHA3.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/LinkerObject.h> +#include "Exceptions.h" +#include <json/json.h> + +namespace Json +{ +class Value; +} +namespace dev +{ +namespace eth +{ + +class Assembly +{ +public: + Assembly() {} + + AssemblyItem newTag() { return AssemblyItem(Tag, m_usedTags++); } + AssemblyItem newPushTag() { return AssemblyItem(PushTag, m_usedTags++); } + AssemblyItem newData(bytes const& _data) { h256 h(sha3(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); } + AssemblyItem newSub(Assembly const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } + Assembly const& sub(size_t _sub) const { return m_subs.at(_sub); } + Assembly& sub(size_t _sub) { return m_subs.at(_sub); } + AssemblyItem newPushString(std::string const& _data) { h256 h(sha3(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } + AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } + AssemblyItem newPushLibraryAddress(std::string const& _identifier); + + AssemblyItem append() { return append(newTag()); } + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + AssemblyItem const& append(AssemblyItem const& _i); + AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } + AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + AssemblyItem appendSubSize(Assembly const& _a) { auto ret = newSub(_a); append(newPushSubSize(ret.data())); return ret; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified + /// after compilation and CODESIZE is not an option. + void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } + void appendLibraryAddress(std::string const& _identifier) { append(newPushLibraryAddress(_identifier)); } + + AssemblyItem appendJump() { auto ret = append(newPushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI() { auto ret = append(newPushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem appendJump(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMP); return ret; } + AssemblyItem appendJumpI(AssemblyItem const& _tag) { auto ret = append(_tag.pushTag()); append(Instruction::JUMPI); return ret; } + AssemblyItem errorTag() { return AssemblyItem(PushTag, 0); } + + template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; } + AssemblyItems const& items() const { return m_items; } + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + + void onePath() { if (asserts(!m_totalDeposit && !m_baseDeposit)) BOOST_THROW_EXCEPTION(InvalidDeposit()); m_baseDeposit = m_deposit; m_totalDeposit = INT_MAX; } + void otherPath() { donePath(); m_totalDeposit = m_deposit; m_deposit = m_baseDeposit; } + void donePaths() { donePath(); m_totalDeposit = m_baseDeposit = 0; } + void ignored() { m_baseDeposit = m_deposit; } + void endIgnored() { m_deposit = m_baseDeposit; m_baseDeposit = 0; } + + void popTo(int _deposit) { while (m_deposit > _deposit) append(Instruction::POP); } + + void injectStart(AssemblyItem const& _i); + std::string out() const; + int deposit() const { return m_deposit; } + void adjustDeposit(int _adjustment) { m_deposit += _adjustment; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + void setDeposit(int _deposit) { m_deposit = _deposit; if (asserts(m_deposit >= 0)) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + + /// Changes the source location used for each appended item. + void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } + + /// Assembles the assembly into bytecode. The assembly should not be modified after this call. + LinkerObject const& assemble() const; + bytes const& data(h256 const& _i) const { return m_data.at(_i); } + + /// Modify (if @a _enable is set) and return the current assembly such that creation and + /// execution gas usage is optimised. @a _isCreation should be true for the top-level assembly. + /// @a _runs specifes an estimate on how often each opcode in this assembly will be executed, + /// i.e. use a small value to optimise for size and a large value to optimise for runtime. + Assembly& optimise(bool _enable, bool _isCreation = true, size_t _runs = 200); + Json::Value stream( + std::ostream& _out, + std::string const& _prefix = "", + const StringMap &_sourceCodes = StringMap(), + bool _inJsonFormat = false + ) const; + +protected: + std::string locationFromSources(StringMap const& _sourceCodes, SourceLocation const& _location) const; + void donePath() { if (m_totalDeposit != INT_MAX && m_totalDeposit != m_deposit) BOOST_THROW_EXCEPTION(InvalidDeposit()); } + unsigned bytesRequired() const; + +private: + Json::Value streamAsmJson(std::ostream& _out, StringMap const& _sourceCodes) const; + std::ostream& streamAsm(std::ostream& _out, std::string const& _prefix, StringMap const& _sourceCodes) const; + Json::Value createJsonValue(std::string _name, int _begin, int _end, std::string _value = std::string(), std::string _jumpType = std::string()) const; + +protected: + // 0 is reserved for exception + unsigned m_usedTags = 1; + AssemblyItems m_items; + std::map<h256, bytes> m_data; + std::vector<Assembly> m_subs; + std::map<h256, std::string> m_strings; + std::map<h256, std::string> m_libraries; ///< Identifiers of libraries to be linked. + + mutable LinkerObject m_assembledObject; + + int m_deposit = 0; + int m_baseDeposit = 0; + int m_totalDeposit = 0; + + SourceLocation m_currentSourceLocation; +}; + +inline std::ostream& operator<<(std::ostream& _out, Assembly const& _a) +{ + _a.stream(_out); + return _out; +} + +} +} diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp new file mode 100644 index 00000000..d7051064 --- /dev/null +++ b/libevmasm/AssemblyItem.cpp @@ -0,0 +1,134 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "AssemblyItem.h" +#include <fstream> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const +{ + switch (m_type) + { + case Operation: + case Tag: // 1 byte for the JUMPDEST + return 1; + case PushString: + return 33; + case Push: + return 1 + max<unsigned>(1, dev::bytesRequired(m_data)); + case PushSubSize: + case PushProgramSize: + return 4; // worst case: a 16MB program + case PushTag: + case PushData: + case PushSub: + return 1 + _addressLength; + case PushLibraryAddress: + return 21; + default: + break; + } + BOOST_THROW_EXCEPTION(InvalidOpcode()); +} + +int AssemblyItem::deposit() const +{ + switch (m_type) + { + case Operation: + return instructionInfo(instruction()).ret - instructionInfo(instruction()).args; + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + return 1; + case Tag: + return 0; + default:; + } + return 0; +} + +string AssemblyItem::getJumpTypeAsString() const +{ + switch (m_jumpType) + { + case JumpType::IntoFunction: + return "[in]"; + case JumpType::OutOfFunction: + return "[out]"; + case JumpType::Ordinary: + default: + return ""; + } +} + +ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) +{ + switch (_item.type()) + { + case Operation: + _out << " " << instructionInfo(_item.instruction()).name; + if (_item.instruction() == eth::Instruction::JUMP || _item.instruction() == eth::Instruction::JUMPI) + _out << "\t" << _item.getJumpTypeAsString(); + break; + case Push: + _out << " PUSH " << hex << _item.data(); + break; + case PushString: + _out << " PushString" << hex << (unsigned)_item.data(); + break; + case PushTag: + _out << " PushTag " << _item.data(); + break; + case Tag: + _out << " Tag " << _item.data(); + break; + case PushData: + _out << " PushData " << hex << (unsigned)_item.data(); + break; + case PushSub: + _out << " PushSub " << hex << h256(_item.data()).abridgedMiddle(); + break; + case PushSubSize: + _out << " PushSubSize " << hex << h256(_item.data()).abridgedMiddle(); + break; + case PushProgramSize: + _out << " PushProgramSize"; + break; + case PushLibraryAddress: + _out << " PushLibraryAddress " << hex << h256(_item.data()).abridgedMiddle(); + break; + case UndefinedItem: + _out << " ???"; + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + return _out; +} diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h new file mode 100644 index 00000000..795b5a8a --- /dev/null +++ b/libevmasm/AssemblyItem.h @@ -0,0 +1,123 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <iostream> +#include <sstream> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/SourceLocation.h> +#include "Exceptions.h" + +namespace dev +{ +namespace eth +{ + +enum AssemblyItemType { + UndefinedItem, + Operation, + Push, + PushString, + PushTag, + PushSub, + PushSubSize, + PushProgramSize, + Tag, + PushData, + PushLibraryAddress ///< Push a currently unknown address of another (library) contract. +}; + +class Assembly; + +class AssemblyItem +{ +public: + enum class JumpType { Ordinary, IntoFunction, OutOfFunction }; + + AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Push, _push, _location) { } + AssemblyItem(Instruction _i, SourceLocation const& _location = SourceLocation()): + AssemblyItem(Operation, byte(_i), _location) { } + AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): + m_type(_type), + m_data(_data), + m_location(_location) + { + } + + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + + AssemblyItemType type() const { return m_type; } + u256 const& data() const { return m_data; } + void setType(AssemblyItemType const _type) { m_type = _type; } + void setData(u256 const& _data) { m_data = _data; } + + /// @returns the instruction of this item (only valid if type() == Operation) + Instruction instruction() const { return Instruction(byte(m_data)); } + + /// @returns true if the type and data of the items are equal. + bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } + /// Less-than operator compatible with operator==. + bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } + + /// @returns an upper bound for the number of bytes required by this item, assuming that + /// the value of a jump tag takes @a _addressLength bytes. + unsigned bytesRequired(unsigned _addressLength) const; + int deposit() const; + + bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + void setLocation(SourceLocation const& _location) { m_location = _location; } + SourceLocation const& location() const { return m_location; } + + void setJumpType(JumpType _jumpType) { m_jumpType = _jumpType; } + JumpType getJumpType() const { return m_jumpType; } + std::string getJumpTypeAsString() const; + + void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared<u256>(_value); } + u256 const* pushedValue() const { return m_pushedValue.get(); } + +private: + AssemblyItemType m_type; + u256 m_data; + SourceLocation m_location; + JumpType m_jumpType = JumpType::Ordinary; + /// Pushed value for operations with data to be determined during assembly stage, + /// e.g. PushSubSize, PushTag, PushSub, etc. + mutable std::shared_ptr<u256> m_pushedValue; +}; + +using AssemblyItems = std::vector<AssemblyItem>; + +std::ostream& operator<<(std::ostream& _out, AssemblyItem const& _item); +inline std::ostream& operator<<(std::ostream& _out, AssemblyItems const& _items) +{ + for (AssemblyItem const& item: _items) + _out << item; + return _out; +} + +} +} diff --git a/libevmasm/BlockDeduplicator.cpp b/libevmasm/BlockDeduplicator.cpp new file mode 100644 index 00000000..d930ea22 --- /dev/null +++ b/libevmasm/BlockDeduplicator.cpp @@ -0,0 +1,126 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file BlockDeduplicator.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Unifies basic blocks that share content. + */ + +#include <libevmasm/BlockDeduplicator.h> +#include <functional> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/SemanticInformation.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool BlockDeduplicator::deduplicate() +{ + // Compares indices based on the suffix that starts there, ignoring tags and stopping at + // opcodes that stop the control flow. + + // Virtual tag that signifies "the current block" and which is used to optimise loops. + // We abort if this virtual tag actually exists. + AssemblyItem pushSelf(PushTag, u256(-4)); + if ( + std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) || + std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag()) + ) + return false; + + function<bool(size_t, size_t)> comparator = [&](size_t _i, size_t _j) + { + if (_i == _j) + return false; + + // To compare recursive loops, we have to already unify PushTag opcodes of the + // block's own tag. + AssemblyItem pushFirstTag(pushSelf); + AssemblyItem pushSecondTag(pushSelf); + + if (_i < m_items.size() && m_items.at(_i).type() == Tag) + pushFirstTag = m_items.at(_i).pushTag(); + if (_j < m_items.size() && m_items.at(_j).type() == Tag) + pushSecondTag = m_items.at(_j).pushTag(); + + BlockIterator first(m_items.begin() + _i, m_items.end(), &pushFirstTag, &pushSelf); + BlockIterator second(m_items.begin() + _j, m_items.end(), &pushSecondTag, &pushSelf); + BlockIterator end(m_items.end(), m_items.end()); + + if (first != end && (*first).type() == Tag) + ++first; + if (second != end && (*second).type() == Tag) + ++second; + + return std::lexicographical_compare(first, end, second, end); + }; + + size_t iterations = 0; + for (; ; ++iterations) + { + //@todo this should probably be optimized. + set<size_t, function<bool(size_t, size_t)>> blocksSeen(comparator); + map<u256, u256> tagReplacement; + for (size_t i = 0; i < m_items.size(); ++i) + { + if (m_items.at(i).type() != Tag) + continue; + auto it = blocksSeen.find(i); + if (it == blocksSeen.end()) + blocksSeen.insert(i); + else + tagReplacement[m_items.at(i).data()] = m_items.at(*it).data(); + } + + bool changed = false; + for (AssemblyItem& item: m_items) + if (item.type() == PushTag && tagReplacement.count(item.data())) + { + changed = true; + item.setData(tagReplacement.at(item.data())); + } + if (!changed) + break; + } + return iterations > 0; +} + +BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++() +{ + if (it == end) + return *this; + if (SemanticInformation::altersControlFlow(*it) && *it != AssemblyItem(eth::Instruction::JUMPI)) + it = end; + else + { + ++it; + while (it != end && it->type() == Tag) + ++it; + } + return *this; +} + +AssemblyItem const& BlockDeduplicator::BlockIterator::operator*() const +{ + if (replaceItem && replaceWith && *it == *replaceItem) + return *replaceWith; + else + return *it; +} diff --git a/libevmasm/BlockDeduplicator.h b/libevmasm/BlockDeduplicator.h new file mode 100644 index 00000000..c48835fd --- /dev/null +++ b/libevmasm/BlockDeduplicator.h @@ -0,0 +1,77 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file BlockDeduplicator.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Unifies basic blocks that share content. + */ + +#pragma once + +#include <cstddef> +#include <vector> +#include <functional> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Optimizer class to be used to unify blocks that share content. + * Modifies the passed vector in place. + */ +class BlockDeduplicator +{ +public: + BlockDeduplicator(AssemblyItems& _items): m_items(_items) {} + /// @returns true if something was changed + bool deduplicate(); + +private: + /// Iterator that skips tags and skips to the end if (all branches of) the control + /// flow does not continue to the next instruction. + /// If the arguments are supplied to the constructor, replaces items on the fly. + struct BlockIterator: std::iterator<std::forward_iterator_tag, AssemblyItem const> + { + public: + BlockIterator( + AssemblyItems::const_iterator _it, + AssemblyItems::const_iterator _end, + AssemblyItem const* _replaceItem = nullptr, + AssemblyItem const* _replaceWith = nullptr + ): + it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {} + BlockIterator& operator++(); + bool operator==(BlockIterator const& _other) const { return it == _other.it; } + bool operator!=(BlockIterator const& _other) const { return it != _other.it; } + AssemblyItem const& operator*() const; + AssemblyItems::const_iterator it; + AssemblyItems::const_iterator end; + AssemblyItem const* replaceItem; + AssemblyItem const* replaceWith; + }; + + AssemblyItems& m_items; +}; + +} +} diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt new file mode 100644 index 00000000..424644ca --- /dev/null +++ b/libevmasm/CMakeLists.txt @@ -0,0 +1,15 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE evmasm) + +file(GLOB HEADERS "*.h") + +include_directories(BEFORE ..) +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) +eth_use(${EXECUTABLE} REQUIRED Eth::evmcore) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp new file mode 100644 index 00000000..0797dd29 --- /dev/null +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -0,0 +1,506 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <libdevcore/SHA3.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems() +{ + optimizeBreakingItem(); + + KnownState nextInitialState = m_state; + if (m_breakingItem) + nextInitialState.feedItem(*m_breakingItem); + KnownState nextState = nextInitialState; + + ScopeGuard reset([&]() + { + m_breakingItem = nullptr; + m_storeOperations.clear(); + m_initialState = move(nextInitialState); + m_state = move(nextState); + }); + + map<int, Id> initialStackContents; + map<int, Id> targetStackContents; + int minHeight = m_state.stackHeight() + 1; + if (!m_state.stackElements().empty()) + minHeight = min(minHeight, m_state.stackElements().begin()->first); + for (int height = minHeight; height <= m_initialState.stackHeight(); ++height) + initialStackContents[height] = m_initialState.stackElement(height, SourceLocation()); + for (int height = minHeight; height <= m_state.stackHeight(); ++height) + targetStackContents[height] = m_state.stackElement(height, SourceLocation()); + + AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode( + m_initialState.sequenceNumber(), + m_initialState.stackHeight(), + initialStackContents, + targetStackContents + ); + if (m_breakingItem) + items.push_back(*m_breakingItem); + + return items; +} + +void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op = m_state.feedItem(_item, _copyItem); + if (op.isValid()) + m_storeOperations.push_back(op); +} + +void CommonSubexpressionEliminator::optimizeBreakingItem() +{ + if (!m_breakingItem) + return; + + ExpressionClasses& classes = m_state.expressionClasses(); + SourceLocation const& itemLocation = m_breakingItem->location(); + if (*m_breakingItem == AssemblyItem(Instruction::JUMPI)) + { + AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType(); + + Id condition = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); + if (classes.knownNonZero(condition)) + { + feedItem(AssemblyItem(Instruction::SWAP1, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + + AssemblyItem item(Instruction::JUMP, itemLocation); + item.setJumpType(jumpType); + m_breakingItem = classes.storeItem(item); + } + else if (classes.knownZero(condition)) + { + AssemblyItem it(Instruction::POP, itemLocation); + feedItem(it, true); + feedItem(it, true); + m_breakingItem = nullptr; + } + } + else if (*m_breakingItem == AssemblyItem(Instruction::RETURN)) + { + Id size = m_state.stackElement(m_state.stackHeight() - 1, itemLocation); + if (classes.knownZero(size)) + { + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + feedItem(AssemblyItem(Instruction::POP, itemLocation), true); + AssemblyItem item(Instruction::STOP, itemLocation); + m_breakingItem = classes.storeItem(item); + } + } +} + +CSECodeGenerator::CSECodeGenerator( + ExpressionClasses& _expressionClasses, + vector<CSECodeGenerator::StoreOperation> const& _storeOperations +): + m_expressionClasses(_expressionClasses) +{ + for (auto const& store: _storeOperations) + m_storeOperations[make_pair(store.target, store.slot)].push_back(store); +} + +AssemblyItems CSECodeGenerator::generateCode( + unsigned _initialSequenceNumber, + int _initialStackHeight, + map<int, Id> const& _initialStack, + map<int, Id> const& _targetStackContents +) +{ + m_stackHeight = _initialStackHeight; + m_stack = _initialStack; + m_targetStack = _targetStackContents; + for (auto const& item: m_stack) + m_classPositions[item.second].insert(item.first); + + // generate the dependency graph starting from final storage and memory writes and target stack contents + for (auto const& p: m_storeOperations) + addDependencies(p.second.back().expression); + for (auto const& targetItem: m_targetStack) + { + m_finalClasses.insert(targetItem.second); + addDependencies(targetItem.second); + } + + // store all needed sequenced expressions + set<pair<unsigned, Id>> sequencedExpressions; + for (auto const& p: m_neededBy) + for (auto id: {p.first, p.second}) + if (unsigned seqNr = m_expressionClasses.representative(id).sequenceNumber) + { + if (seqNr < _initialSequenceNumber) + // Invalid sequenced operation. + // @todo quick fix for now. Proper fix needs to choose representative with higher + // sequence number during dependency analyis. + BOOST_THROW_EXCEPTION(StackTooDeepException()); + sequencedExpressions.insert(make_pair(seqNr, id)); + } + + // Perform all operations on storage and memory in order, if they are needed. + for (auto const& seqAndId: sequencedExpressions) + if (!m_classPositions.count(seqAndId.second)) + generateClassElement(seqAndId.second, true); + + // generate the target stack elements + for (auto const& targetItem: m_targetStack) + { + if (m_stack.count(targetItem.first) && m_stack.at(targetItem.first) == targetItem.second) + continue; // already there + generateClassElement(targetItem.second); + assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, ""); + if (m_classPositions[targetItem.second].count(targetItem.first)) + continue; + SourceLocation sourceLocation; + if (m_expressionClasses.representative(targetItem.second).item) + sourceLocation = m_expressionClasses.representative(targetItem.second).item->location(); + int position = classElementPosition(targetItem.second); + if (position < targetItem.first) + // it is already at its target, we need another copy + appendDup(position, sourceLocation); + else + appendOrRemoveSwap(position, sourceLocation); + appendOrRemoveSwap(targetItem.first, sourceLocation); + } + + // remove surplus elements + while (removeStackTopIfPossible()) + { + // no-op + } + + // check validity + int finalHeight = 0; + if (!m_targetStack.empty()) + // have target stack, so its height should be the final height + finalHeight = (--m_targetStack.end())->first; + else if (!_initialStack.empty()) + // no target stack, only erase the initial stack + finalHeight = _initialStack.begin()->first - 1; + else + // neither initial no target stack, no change in height + finalHeight = _initialStackHeight; + assertThrow(finalHeight == m_stackHeight, OptimizerException, "Incorrect final stack height."); + + return m_generatedItems; +} + +void CSECodeGenerator::addDependencies(Id _c) +{ + if (m_classPositions.count(_c)) + return; // it is already on the stack + if (m_neededBy.count(_c)) + return; // we already computed the dependencies for _c + ExpressionClasses::Expression expr = m_expressionClasses.representative(_c); + if (expr.item->type() == UndefinedItem) + BOOST_THROW_EXCEPTION( + // If this exception happens, we need to find a different way to generate the + // compound expression. + ItemNotAvailableException() << errinfo_comment("Undefined item requested but not available.") + ); + for (Id argument: expr.arguments) + { + addDependencies(argument); + m_neededBy.insert(make_pair(argument, _c)); + } + if (expr.item && expr.item->type() == Operation && ( + expr.item->instruction() == Instruction::SLOAD || + expr.item->instruction() == Instruction::MLOAD || + expr.item->instruction() == Instruction::SHA3 + )) + { + // this loads an unknown value from storage or memory and thus, in addition to its + // arguments, depends on all store operations to addresses where we do not know that + // they are different that occur before this load + StoreOperation::Target target = expr.item->instruction() == Instruction::SLOAD ? + StoreOperation::Storage : StoreOperation::Memory; + Id slotToLoadFrom = expr.arguments.at(0); + for (auto const& p: m_storeOperations) + { + if (p.first.first != target) + continue; + Id slot = p.first.second; + StoreOperations const& storeOps = p.second; + if (storeOps.front().sequenceNumber > expr.sequenceNumber) + continue; + bool knownToBeIndependent = false; + switch (expr.item->instruction()) + { + case Instruction::SLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferent(slot, slotToLoadFrom); + break; + case Instruction::MLOAD: + knownToBeIndependent = m_expressionClasses.knownToBeDifferentBy32(slot, slotToLoadFrom); + break; + case Instruction::SHA3: + { + Id length = expr.arguments.at(1); + AssemblyItem offsetInstr(Instruction::SUB, expr.item->location()); + Id offsetToStart = m_expressionClasses.find(offsetInstr, {slot, slotToLoadFrom}); + u256 const* o = m_expressionClasses.knownConstant(offsetToStart); + u256 const* l = m_expressionClasses.knownConstant(length); + if (l && *l == 0) + knownToBeIndependent = true; + else if (o) + { + // We could get problems here if both *o and *l are larger than 2**254 + // but it is probably ok for the optimizer to produce wrong code for such cases + // which cannot be executed anyway because of the non-payable price. + if (u2s(*o) <= -32) + knownToBeIndependent = true; + else if (l && u2s(*o) >= 0 && *o >= *l) + knownToBeIndependent = true; + } + break; + } + default: + break; + } + if (knownToBeIndependent) + continue; + + // note that store and load never have the same sequence number + Id latestStore = storeOps.front().expression; + for (auto it = ++storeOps.begin(); it != storeOps.end(); ++it) + if (it->sequenceNumber < expr.sequenceNumber) + latestStore = it->expression; + addDependencies(latestStore); + m_neededBy.insert(make_pair(latestStore, _c)); + } + } +} + +void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) +{ + for (auto it: m_classPositions) + for (auto p: it.second) + if (p > m_stackHeight) + assertThrow(false, OptimizerException, ""); + // do some cleanup + removeStackTopIfPossible(); + + if (m_classPositions.count(_c)) + { + assertThrow( + !m_classPositions[_c].empty(), + OptimizerException, + "Element already removed but still needed." + ); + return; + } + ExpressionClasses::Expression const& expr = m_expressionClasses.representative(_c); + assertThrow( + _allowSequenced || expr.sequenceNumber == 0, + OptimizerException, + "Sequence constrained operation requested out of sequence." + ); + assertThrow(expr.item, OptimizerException, "Non-generated expression without item."); + assertThrow( + expr.item->type() != UndefinedItem, + OptimizerException, + "Undefined item requested but not available." + ); + vector<Id> const& arguments = expr.arguments; + for (Id arg: boost::adaptors::reverse(arguments)) + generateClassElement(arg); + + SourceLocation const& itemLocation = expr.item->location(); + // The arguments are somewhere on the stack now, so it remains to move them at the correct place. + // This is quite difficult as sometimes, the values also have to removed in this process + // (if canBeRemoved() returns true) and the two arguments can be equal. For now, this is + // implemented for every single case for combinations of up to two arguments manually. + if (arguments.size() == 1) + { + if (canBeRemoved(arguments[0], _c)) + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + else + appendDup(classElementPosition(arguments[0]), itemLocation); + } + else if (arguments.size() == 2) + { + if (canBeRemoved(arguments[1], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[1]), itemLocation); + if (arguments[0] == arguments[1]) + appendDup(m_stackHeight, itemLocation); + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + } + else + appendDup(classElementPosition(arguments[0]), itemLocation); + } + else + { + if (arguments[0] == arguments[1]) + { + appendDup(classElementPosition(arguments[0]), itemLocation); + appendDup(m_stackHeight, itemLocation); + } + else if (canBeRemoved(arguments[0], _c)) + { + appendOrRemoveSwap(classElementPosition(arguments[0]), itemLocation); + appendDup(classElementPosition(arguments[1]), itemLocation); + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + } + else + { + appendDup(classElementPosition(arguments[1]), itemLocation); + appendDup(classElementPosition(arguments[0]), itemLocation); + } + } + } + else + assertThrow( + arguments.size() <= 2, + OptimizerException, + "Opcodes with more than two arguments not implemented yet." + ); + for (size_t i = 0; i < arguments.size(); ++i) + assertThrow(m_stack[m_stackHeight - i] == arguments[i], OptimizerException, "Expected arguments not present." ); + + while (SemanticInformation::isCommutativeOperation(*expr.item) && + !m_generatedItems.empty() && + m_generatedItems.back() == AssemblyItem(Instruction::SWAP1)) + // this will not append a swap but remove the one that is already there + appendOrRemoveSwap(m_stackHeight - 1, itemLocation); + for (size_t i = 0; i < arguments.size(); ++i) + { + m_classPositions[m_stack[m_stackHeight - i]].erase(m_stackHeight - i); + m_stack.erase(m_stackHeight - i); + } + appendItem(*expr.item); + if (expr.item->type() != Operation || instructionInfo(expr.item->instruction()).ret == 1) + { + m_stack[m_stackHeight] = _c; + m_classPositions[_c].insert(m_stackHeight); + } + else + { + assertThrow( + instructionInfo(expr.item->instruction()).ret == 0, + OptimizerException, + "Invalid number of return values." + ); + m_classPositions[_c]; // ensure it is created to mark the expression as generated + } +} + +int CSECodeGenerator::classElementPosition(Id _id) const +{ + assertThrow( + m_classPositions.count(_id) && !m_classPositions.at(_id).empty(), + OptimizerException, + "Element requested but is not present." + ); + return *max_element(m_classPositions.at(_id).begin(), m_classPositions.at(_id).end()); +} + +bool CSECodeGenerator::canBeRemoved(Id _element, Id _result, int _fromPosition) +{ + // Default for _fromPosition is the canonical position of the element. + if (_fromPosition == c_invalidPosition) + _fromPosition = classElementPosition(_element); + + bool haveCopy = m_classPositions.at(_element).size() > 1; + if (m_finalClasses.count(_element)) + // It is part of the target stack. It can be removed if it is a copy that is not in the target position. + return haveCopy && (!m_targetStack.count(_fromPosition) || m_targetStack[_fromPosition] != _element); + else if (!haveCopy) + { + // Can be removed unless it is needed by a class that has not been computed yet. + // Note that m_classPositions also includes classes that were deleted in the meantime. + auto range = m_neededBy.equal_range(_element); + for (auto it = range.first; it != range.second; ++it) + if (it->second != _result && !m_classPositions.count(it->second)) + return false; + } + return true; +} + +bool CSECodeGenerator::removeStackTopIfPossible() +{ + if (m_stack.empty()) + return false; + assertThrow(m_stack.count(m_stackHeight) > 0, OptimizerException, ""); + Id top = m_stack[m_stackHeight]; + if (!canBeRemoved(top, Id(-1), m_stackHeight)) + return false; + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_stack.erase(m_stackHeight); + appendItem(AssemblyItem(Instruction::POP)); + return true; +} + +void CSECodeGenerator::appendDup(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + int instructionNum = 1 + m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(dupInstruction(instructionNum), _location)); + m_stack[m_stackHeight] = m_stack[_fromPosition]; + m_classPositions[m_stack[m_stackHeight]].insert(m_stackHeight); +} + +void CSECodeGenerator::appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location) +{ + assertThrow(_fromPosition != c_invalidPosition, OptimizerException, ""); + if (_fromPosition == m_stackHeight) + return; + int instructionNum = m_stackHeight - _fromPosition; + assertThrow(instructionNum <= 16, StackTooDeepException, "Stack too deep, try removing local variables."); + assertThrow(1 <= instructionNum, OptimizerException, "Invalid stack access."); + appendItem(AssemblyItem(swapInstruction(instructionNum), _location)); + + if (m_stack[m_stackHeight] != m_stack[_fromPosition]) + { + m_classPositions[m_stack[m_stackHeight]].erase(m_stackHeight); + m_classPositions[m_stack[m_stackHeight]].insert(_fromPosition); + m_classPositions[m_stack[_fromPosition]].erase(_fromPosition); + m_classPositions[m_stack[_fromPosition]].insert(m_stackHeight); + swap(m_stack[m_stackHeight], m_stack[_fromPosition]); + } + if (m_generatedItems.size() >= 2 && + SemanticInformation::isSwapInstruction(m_generatedItems.back()) && + *(m_generatedItems.end() - 2) == m_generatedItems.back()) + { + m_generatedItems.pop_back(); + m_generatedItems.pop_back(); + } +} + +void CSECodeGenerator::appendItem(AssemblyItem const& _item) +{ + m_generatedItems.push_back(_item); + m_stackHeight += _item.deposit(); +} diff --git a/libevmasm/CommonSubexpressionEliminator.h b/libevmasm/CommonSubexpressionEliminator.h new file mode 100644 index 00000000..f6c43c57 --- /dev/null +++ b/libevmasm/CommonSubexpressionEliminator.h @@ -0,0 +1,183 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file CommonSubexpressionEliminator.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Optimizer step for common subexpression elimination and stack reorganisation. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <ostream> +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Optimizer step that performs common subexpression elimination and stack reorganisation, + * i.e. it tries to infer equality among expressions and compute the values of two expressions + * known to be equal only once. + * + * The general workings are that for each assembly item that is fed into the eliminator, an + * equivalence class is derived from the operation and the equivalence class of its arguments. + * DUPi, SWAPi and some arithmetic instructions are used to infer equivalences while these + * classes are determined. + * + * When the list of optimized items is requested, they are generated in a bottom-up fashion, + * adding code for equivalence classes that were not yet computed. + */ +class CommonSubexpressionEliminator +{ +public: + using Id = ExpressionClasses::Id; + using StoreOperation = KnownState::StoreOperation; + + CommonSubexpressionEliminator(KnownState const& _state): m_initialState(_state), m_state(_state) {} + + /// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first + /// item that must be fed into a new instance of the eliminator. + template <class _AssemblyItemIterator> + _AssemblyItemIterator feedItems(_AssemblyItemIterator _iterator, _AssemblyItemIterator _end); + + /// @returns the resulting items after optimization. + AssemblyItems getOptimizedItems(); + +private: + /// Feeds the item into the system for analysis. + void feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Tries to optimize the item that breaks the basic block at the end. + void optimizeBreakingItem(); + + KnownState m_initialState; + KnownState m_state; + /// Keeps information about which storage or memory slots were written to at which sequence + /// number with what instruction. + std::vector<StoreOperation> m_storeOperations; + + /// The item that breaks the basic block, can be nullptr. + /// It is usually appended to the block but can be optimized in some cases. + AssemblyItem const* m_breakingItem = nullptr; +}; + +/** + * Unit that generates code from current stack layout, target stack layout and information about + * the equivalence classes. + */ +class CSECodeGenerator +{ +public: + using StoreOperation = CommonSubexpressionEliminator::StoreOperation; + using StoreOperations = std::vector<StoreOperation>; + using Id = ExpressionClasses::Id; + + /// Initializes the code generator with the given classes and store operations. + /// The store operations have to be sorted by sequence number in ascending order. + CSECodeGenerator(ExpressionClasses& _expressionClasses, StoreOperations const& _storeOperations); + + /// @returns the assembly items generated from the given requirements + /// @param _initialSequenceNumber starting sequence number, do not generate sequenced operations + /// before this number. + /// @param _initialStack current contents of the stack (up to stack height of zero) + /// @param _targetStackContents final contents of the stack, by stack height relative to initial + /// @note should only be called once on each object. + AssemblyItems generateCode( + unsigned _initialSequenceNumber, + int _initialStackHeight, + std::map<int, Id> const& _initialStack, + std::map<int, Id> const& _targetStackContents + ); + +private: + /// Recursively discovers all dependencies to @a m_requests. + void addDependencies(Id _c); + + /// Produce code that generates the given element if it is not yet present. + /// @param _allowSequenced indicates that sequence-constrained operations are allowed + void generateClassElement(Id _c, bool _allowSequenced = false); + /// @returns the position of the representative of the given id on the stack. + /// @note throws an exception if it is not on the stack. + int classElementPosition(Id _id) const; + + /// @returns true if the copy of @a _element can be removed from stack position _fromPosition + /// - in general or, if given, while computing @a _result. + bool canBeRemoved(Id _element, Id _result = Id(-1), int _fromPosition = c_invalidPosition); + + /// Appends code to remove the topmost stack element if it can be removed. + bool removeStackTopIfPossible(); + + /// Appends a dup instruction to m_generatedItems to retrieve the element at the given stack position. + void appendDup(int _fromPosition, SourceLocation const& _location); + /// Appends a swap instruction to m_generatedItems to retrieve the element at the given stack position. + /// @note this might also remove the last item if it exactly the same swap instruction. + void appendOrRemoveSwap(int _fromPosition, SourceLocation const& _location); + /// Appends the given assembly item. + void appendItem(AssemblyItem const& _item); + + static const int c_invalidPosition = -0x7fffffff; + + AssemblyItems m_generatedItems; + /// Current height of the stack relative to the start. + int m_stackHeight; + /// If (b, a) is in m_requests then b is needed to compute a. + std::multimap<Id, Id> m_neededBy; + /// Current content of the stack. + std::map<int, Id> m_stack; + /// Current positions of equivalence classes, equal to the empty set if already deleted. + std::map<Id, std::set<int>> m_classPositions; + + /// The actual eqivalence class items and how to compute them. + ExpressionClasses& m_expressionClasses; + /// Keeps information about which storage or memory slots were written to by which operations. + /// The operations are sorted ascendingly by sequence number. + std::map<std::pair<StoreOperation::Target, Id>, StoreOperations> m_storeOperations; + /// The set of equivalence classes that should be present on the stack at the end. + std::set<Id> m_finalClasses; + std::map<int, Id> m_targetStack; +}; + +template <class _AssemblyItemIterator> +_AssemblyItemIterator CommonSubexpressionEliminator::feedItems( + _AssemblyItemIterator _iterator, + _AssemblyItemIterator _end +) +{ + assertThrow(!m_breakingItem, OptimizerException, "Invalid use of CommonSubexpressionEliminator."); + for (; _iterator != _end && !SemanticInformation::breaksCSEAnalysisBlock(*_iterator); ++_iterator) + feedItem(*_iterator); + if (_iterator != _end) + m_breakingItem = &(*_iterator++); + return _iterator; +} + +} +} diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp new file mode 100644 index 00000000..766371b0 --- /dev/null +++ b/libevmasm/ConstantOptimiser.cpp @@ -0,0 +1,225 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file ConstantOptimiser.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include <libevmasm/ConstantOptimiser.h> +#include <libevmasm/Assembly.h> +#include <libevmasm/GasMeter.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +unsigned ConstantOptimisationMethod::optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items +) +{ + unsigned optimisations = 0; + map<AssemblyItem, size_t> pushes; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + pushes[item]++; + for (auto it: pushes) + { + AssemblyItem const& item = it.first; + if (item.data() < 0x100) + continue; + Params params; + params.multiplicity = it.second; + params.isCreation = _isCreation; + params.runs = _runs; + LiteralMethod lit(params, item.data()); + bigint literalGas = lit.gasNeeded(); + CodeCopyMethod copy(params, item.data()); + bigint copyGas = copy.gasNeeded(); + ComputeMethod compute(params, item.data()); + bigint computeGas = compute.gasNeeded(); + if (copyGas < literalGas && copyGas < computeGas) + { + copy.execute(_assembly, _items); + optimisations++; + } + else if (computeGas < literalGas && computeGas < copyGas) + { + compute.execute(_assembly, _items); + optimisations++; + } + } + return optimisations; +} + +bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) +{ + EVMSchedule schedule; // TODO: make relevant to context. + bigint gas = 0; + for (AssemblyItem const& item: _items) + if (item.type() == Push) + gas += GasMeter::runGas(Instruction::PUSH1, schedule); + else if (item.type() == Operation) + gas += GasMeter::runGas(item.instruction(), schedule); + return gas; +} + +bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const +{ + if (m_params.isCreation) + { + bigint gas; + for (auto b: _data) + gas += b ? m_schedule.txDataNonZeroGas : m_schedule.txDataZeroGas; + return gas; + } + else + return m_schedule.createDataGas * dataSize(); +} + +size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) +{ + size_t size = 0; + for (AssemblyItem const& item: _items) + size += item.bytesRequired(3); // assume 3 byte addresses + return size; +} + +void ConstantOptimisationMethod::replaceConstants( + AssemblyItems& _items, + AssemblyItems const& _replacement +) const +{ + assertThrow(_items.size() > 0, OptimizerException, ""); + for (size_t i = 0; i < _items.size(); ++i) + { + if (_items.at(i) != AssemblyItem(m_value)) + continue; + _items[i] = _replacement[0]; + _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); + i += _replacement.size() - 1; + } +} + +bigint LiteralMethod::gasNeeded() +{ + return combineGas( + simpleRunGas({Instruction::PUSH1}), + // PUSHX plus data + (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas) + dataGas(), + 0 + ); +} + +CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) +{ + m_copyRoutine = AssemblyItems{ + u256(0), + Instruction::DUP1, + Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE + }; +} + +bigint CodeCopyMethod::gasNeeded() +{ + return combineGas( + // Run gas: we ignore memory increase costs + simpleRunGas(m_copyRoutine) + m_schedule.copyGas, + // Data gas for copy routines: Some bytes are zero, but we ignore them. + bytesRequired(m_copyRoutine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), + // Data gas for data itself + dataGas(toBigEndian(m_value)) + ); +} + +void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +{ + bytes data = toBigEndian(m_value); + m_copyRoutine[4] = _assembly.newData(data); + replaceConstants(_items, m_copyRoutine); +} + +AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) +{ + if (_value < 0x10000) + // Very small value, not worth computing + return AssemblyItems{_value}; + else if (dev::bytesRequired(~_value) < dev::bytesRequired(_value)) + // Negated is shorter to represent + return findRepresentation(~_value) + AssemblyItems{Instruction::NOT}; + else + { + // Decompose value into a * 2**k + b where abs(b) << 2**k + // Is not always better, try literal and decomposition method. + AssemblyItems routine{u256(_value)}; + bigint bestGas = gasNeeded(routine); + for (unsigned bits = 255; bits > 8; --bits) + { + unsigned gapDetector = unsigned(_value >> (bits - 8)) & 0x1ff; + if (gapDetector != 0xff && gapDetector != 0x100) + continue; + + u256 powerOfTwo = u256(1) << bits; + u256 upperPart = _value >> bits; + bigint lowerPart = _value & (powerOfTwo - 1); + if (abs(powerOfTwo - lowerPart) < lowerPart) + lowerPart = lowerPart - powerOfTwo; // make it negative + if (abs(lowerPart) >= (powerOfTwo >> 8)) + continue; + + AssemblyItems newRoutine; + if (lowerPart != 0) + newRoutine += findRepresentation(u256(abs(lowerPart))); + newRoutine += AssemblyItems{u256(bits), u256(2), Instruction::EXP}; + if (upperPart != 1 && upperPart != 0) + newRoutine += findRepresentation(upperPart) + AssemblyItems{Instruction::MUL}; + if (lowerPart > 0) + newRoutine += AssemblyItems{Instruction::ADD}; + else if (lowerPart < 0) + newRoutine.push_back(Instruction::SUB); + + bigint newGas = gasNeeded(newRoutine); + if (newGas < bestGas) + { + bestGas = move(newGas); + routine = move(newRoutine); + } + } + return routine; + } +} + +bigint ComputeMethod::gasNeeded(AssemblyItems const& _routine) +{ + size_t numExps = count(_routine.begin(), _routine.end(), Instruction::EXP); + return combineGas( + simpleRunGas(_routine) + numExps * (m_schedule.expGas + m_schedule.expByteGas), + // Data gas for routine: Some bytes are zero, but we ignore them. + bytesRequired(_routine) * (m_params.isCreation ? m_schedule.txDataNonZeroGas : m_schedule.createDataGas), + 0 + ); +} diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h new file mode 100644 index 00000000..64cb66bb --- /dev/null +++ b/libevmasm/ConstantOptimiser.h @@ -0,0 +1,155 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file ConstantOptimiser.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <vector> +#include <libdevcore/CommonData.h> +#include <libdevcore/CommonIO.h> +#include <libethcore/ChainOperationParams.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; +class Assembly; + +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + +/** + * Abstract base class for one way to change how constants are represented in the code. + */ +class ConstantOptimisationMethod +{ +public: + /// Tries to optimised how constants are represented in the source code and modifies + /// @a _assembly and its @a _items. + /// @returns zero if no optimisations could be performed. + static unsigned optimiseConstants( + bool _isCreation, + size_t _runs, + Assembly& _assembly, + AssemblyItems& _items + ); + + struct Params + { + bool isCreation; ///< Whether this is called during contract creation or runtime. + size_t runs; ///< Estimated number of calls per opcode oven the lifetime of the contract. + size_t multiplicity; ///< Number of times the constant appears in the code. + }; + + explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): + m_params(_params), m_value(_value) {} + virtual bigint gasNeeded() = 0; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + +protected: + size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); } + + /// @returns the run gas for the given items ignoring special gas costs + static bigint simpleRunGas(AssemblyItems const& _items); + /// @returns the gas needed to store the given data literally + bigint dataGas(bytes const& _data) const; + /// @returns the gas needed to store the value literally + bigint dataGas() const { return dataGas(toCompactBigEndian(m_value, 1)); } + static size_t bytesRequired(AssemblyItems const& _items); + /// @returns the combined estimated gas usage taking @a m_params into account. + bigint combineGas( + bigint const& _runGas, + bigint const& _repeatedDataGas, + bigint const& _uniqueDataGas + ) + { + // _runGas is not multiplied by _multiplicity because the runs are "per opcode" + return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; + } + + /// Replaces the constant by the code given in @a _replacement. + void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + + Params m_params; + u256 const& m_value; + EVMSchedule m_schedule; +}; + +/** + * Optimisation method that pushes the constant to the stack literally. This is the default method, + * i.e. executing it does not alter the Assembly. + */ +class LiteralMethod: public ConstantOptimisationMethod +{ +public: + explicit LiteralMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) {} + virtual bigint gasNeeded() override; + virtual void execute(Assembly&, AssemblyItems&) override {} + + EVMSchedule m_schedule; +}; + +/** + * Method that stores the data in the .data section of the code and copies it to the stack. + */ +class CodeCopyMethod: public ConstantOptimisationMethod +{ +public: + explicit CodeCopyMethod(Params const& _params, u256 const& _value); + virtual bigint gasNeeded() override; + virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + +protected: + AssemblyItems m_copyRoutine; + EVMSchedule m_schedule; +}; + +/** + * Method that tries to compute the constant. + */ +class ComputeMethod: public ConstantOptimisationMethod +{ +public: + explicit ComputeMethod(Params const& _params, u256 const& _value): + ConstantOptimisationMethod(_params, _value) + { + m_routine = findRepresentation(m_value); + } + + virtual bigint gasNeeded() override { return gasNeeded(m_routine); } + virtual void execute(Assembly&, AssemblyItems& _items) override + { + replaceConstants(_items, m_routine); + } + +protected: + /// Tries to recursively find a way to compute @a _value. + AssemblyItems findRepresentation(u256 const& _value); + bigint gasNeeded(AssemblyItems const& _routine); + + AssemblyItems m_routine; + EVMSchedule m_schedule; +}; + +} +} diff --git a/libevmasm/ControlFlowGraph.cpp b/libevmasm/ControlFlowGraph.cpp new file mode 100644 index 00000000..fc2144c7 --- /dev/null +++ b/libevmasm/ControlFlowGraph.cpp @@ -0,0 +1,369 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#include <libevmasm/ControlFlowGraph.h> +#include <map> +#include <memory> +#include <algorithm> +#include <libevmasm/Exceptions.h> +#include <libevmasm/AssemblyItem.h> +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/KnownState.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +BlockId::BlockId(u256 const& _id): + m_id(unsigned(_id)) +{ + assertThrow( _id < initial().m_id, OptimizerException, "Tag number too large."); +} + +BasicBlocks ControlFlowGraph::optimisedBlocks() +{ + if (m_items.empty()) + return BasicBlocks(); + + findLargestTag(); + splitBlocks(); + resolveNextLinks(); + removeUnusedBlocks(); + setPrevLinks(); + gatherKnowledge(); + + return rebuildCode(); +} + +void ControlFlowGraph::findLargestTag() +{ + m_lastUsedId = 0; + for (auto const& item: m_items) + if (item.type() == Tag || item.type() == PushTag) + { + // Assert that it can be converted. + BlockId(item.data()); + m_lastUsedId = max(unsigned(item.data()), m_lastUsedId); + } +} + +void ControlFlowGraph::splitBlocks() +{ + m_blocks.clear(); + BlockId id = BlockId::initial(); + m_blocks[id].begin = 0; + for (size_t index = 0; index < m_items.size(); ++index) + { + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag) + { + if (id) + m_blocks[id].end = index; + id = BlockId::invalid(); + } + if (!id) + { + id = item.type() == Tag ? BlockId(item.data()) : generateNewId(); + m_blocks[id].begin = index; + } + if (item.type() == PushTag) + m_blocks[id].pushedTags.push_back(BlockId(item.data())); + if (SemanticInformation::altersControlFlow(item)) + { + m_blocks[id].end = index + 1; + if (item == Instruction::JUMP) + m_blocks[id].endType = BasicBlock::EndType::JUMP; + else if (item == Instruction::JUMPI) + m_blocks[id].endType = BasicBlock::EndType::JUMPI; + else + m_blocks[id].endType = BasicBlock::EndType::STOP; + id = BlockId::invalid(); + } + } + if (id) + { + m_blocks[id].end = m_items.size(); + if (m_blocks[id].endType == BasicBlock::EndType::HANDOVER) + m_blocks[id].endType = BasicBlock::EndType::STOP; + } +} + +void ControlFlowGraph::resolveNextLinks() +{ + map<unsigned, BlockId> blockByBeginPos; + for (auto const& idAndBlock: m_blocks) + if (idAndBlock.second.begin != idAndBlock.second.end) + blockByBeginPos[idAndBlock.second.begin] = idAndBlock.first; + + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + blockByBeginPos.count(block.end), + OptimizerException, + "Successor block not found." + ); + block.next = blockByBeginPos.at(block.end); + break; + default: + break; + } + } +} + +void ControlFlowGraph::removeUnusedBlocks() +{ + vector<BlockId> blocksToProcess{BlockId::initial()}; + set<BlockId> neededBlocks{BlockId::initial()}; + while (!blocksToProcess.empty()) + { + BasicBlock const& block = m_blocks.at(blocksToProcess.back()); + blocksToProcess.pop_back(); + for (BlockId tag: block.pushedTags) + if (!neededBlocks.count(tag) && m_blocks.count(tag)) + { + neededBlocks.insert(tag); + blocksToProcess.push_back(tag); + } + if (block.next && !neededBlocks.count(block.next)) + { + neededBlocks.insert(block.next); + blocksToProcess.push_back(block.next); + } + } + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (neededBlocks.count(it->first)) + ++it; + else + m_blocks.erase(it++); +} + +void ControlFlowGraph::setPrevLinks() +{ + for (auto& idAndBlock: m_blocks) + { + BasicBlock& block = idAndBlock.second; + switch (block.endType) + { + case BasicBlock::EndType::JUMPI: + case BasicBlock::EndType::HANDOVER: + assertThrow( + !m_blocks.at(block.next).prev, + OptimizerException, + "Successor already has predecessor." + ); + m_blocks[block.next].prev = idAndBlock.first; + break; + default: + break; + } + } + // If block ends with jump to not yet linked block, link them removing the jump + for (auto& idAndBlock: m_blocks) + { + BlockId blockId = idAndBlock.first; + BasicBlock& block = idAndBlock.second; + if (block.endType != BasicBlock::EndType::JUMP || block.end - block.begin < 2) + continue; + AssemblyItem const& push = m_items.at(block.end - 2); + if (push.type() != PushTag) + continue; + BlockId nextId(push.data()); + if (m_blocks.count(nextId) && m_blocks.at(nextId).prev) + continue; + bool hasLoop = false; + for (BlockId id = nextId; id && m_blocks.count(id) && !hasLoop; id = m_blocks.at(id).next) + hasLoop = (id == blockId); + if (hasLoop || !m_blocks.count(nextId)) + continue; + + m_blocks[nextId].prev = blockId; + block.next = nextId; + block.end -= 2; + assertThrow( + !block.pushedTags.empty() && block.pushedTags.back() == nextId, + OptimizerException, + "Last pushed tag not at end of pushed list." + ); + block.pushedTags.pop_back(); + block.endType = BasicBlock::EndType::HANDOVER; + } +} + +void ControlFlowGraph::gatherKnowledge() +{ + // @todo actually we know that memory is filled with zeros at the beginning, + // we could make use of that. + KnownStatePointer emptyState = make_shared<KnownState>(); + bool unknownJumpEncountered = false; + + struct WorkQueueItem { + BlockId blockId; + KnownStatePointer state; + set<BlockId> blocksSeen; + }; + + vector<WorkQueueItem> workQueue{WorkQueueItem{BlockId::initial(), emptyState->copy(), set<BlockId>()}}; + auto addWorkQueueItem = [&](WorkQueueItem const& _currentItem, BlockId _to, KnownStatePointer const& _state) + { + WorkQueueItem item; + item.blockId = _to; + item.state = _state->copy(); + item.blocksSeen = _currentItem.blocksSeen; + item.blocksSeen.insert(_currentItem.blockId); + workQueue.push_back(move(item)); + }; + + while (!workQueue.empty()) + { + WorkQueueItem item = move(workQueue.back()); + workQueue.pop_back(); + //@todo we might have to do something like incrementing the sequence number for each JUMPDEST + assertThrow(!!item.blockId, OptimizerException, ""); + if (!m_blocks.count(item.blockId)) + continue; // too bad, we do not know the tag, probably an invalid jump + BasicBlock& block = m_blocks.at(item.blockId); + KnownStatePointer state = item.state; + if (block.startState) + { + state->reduceToCommonKnowledge(*block.startState, !item.blocksSeen.count(item.blockId)); + if (*state == *block.startState) + continue; + } + + block.startState = state->copy(); + + // Feed all items except for the final jump yet because it will erase the target tag. + unsigned pc = block.begin; + while (pc < block.end && !SemanticInformation::altersControlFlow(m_items.at(pc))) + state->feedItem(m_items.at(pc++)); + + if ( + block.endType == BasicBlock::EndType::JUMP || + block.endType == BasicBlock::EndType::JUMPI + ) + { + assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, ""); + //@todo in the case of JUMPI, add knowledge about the condition to the state + // (for both values of the condition) + set<u256> tags = state->tagsInExpression( + state->stackElement(state->stackHeight(), SourceLocation()) + ); + state->feedItem(m_items.at(pc++)); + + if (tags.empty()) + { + if (!unknownJumpEncountered) + { + // We do not know the target of this jump, so we have to reset the states of all + // JUMPDESTs. + unknownJumpEncountered = true; + for (auto const& it: m_blocks) + if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag) + workQueue.push_back(WorkQueueItem{it.first, emptyState->copy(), set<BlockId>()}); + } + } + else + for (auto tag: tags) + addWorkQueueItem(item, BlockId(tag), state); + } + else if (block.begin <= pc && pc < block.end) + state->feedItem(m_items.at(pc++)); + assertThrow(block.end <= block.begin || pc == block.end, OptimizerException, ""); + + block.endState = state; + + if ( + block.endType == BasicBlock::EndType::HANDOVER || + block.endType == BasicBlock::EndType::JUMPI + ) + addWorkQueueItem(item, block.next, state); + } + + // Remove all blocks we never visited here. This might happen because a tag is pushed but + // never used for a JUMP. + // Note that this invalidates some contents of pushedTags + for (auto it = m_blocks.begin(); it != m_blocks.end();) + if (!it->second.startState) + it = m_blocks.erase(it); + else + it++; +} + +BasicBlocks ControlFlowGraph::rebuildCode() +{ + map<BlockId, unsigned> pushes; + for (auto& idAndBlock: m_blocks) + for (BlockId ref: idAndBlock.second.pushedTags) + if (m_blocks.count(ref)) + pushes[ref]++; + + set<BlockId> blocksToAdd; + for (auto it: m_blocks) + blocksToAdd.insert(it.first); + set<BlockId> blocksAdded; + BasicBlocks blocks; + + for ( + BlockId blockId = BlockId::initial(); + blockId; + blockId = blocksToAdd.empty() ? BlockId::invalid() : *blocksToAdd.begin() + ) + { + bool previousHandedOver = (blockId == BlockId::initial()); + while (m_blocks.at(blockId).prev) + blockId = m_blocks.at(blockId).prev; + for (; blockId; blockId = m_blocks.at(blockId).next) + { + BasicBlock& block = m_blocks.at(blockId); + blocksToAdd.erase(blockId); + blocksAdded.insert(blockId); + + if (block.begin == block.end) + continue; + // If block starts with unused tag, skip it. + if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag) + ++block.begin; + if (block.begin < block.end) + { + blocks.push_back(block); + blocks.back().startState->clearTagUnions(); + blocks.back().endState->clearTagUnions(); + } + previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER); + } + } + + return blocks; +} + +BlockId ControlFlowGraph::generateNewId() +{ + BlockId id = BlockId(++m_lastUsedId); + assertThrow(id < BlockId::initial(), OptimizerException, "Out of block IDs."); + return id; +} diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h new file mode 100644 index 00000000..4480ba49 --- /dev/null +++ b/libevmasm/ControlFlowGraph.h @@ -0,0 +1,120 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ControlFlowGraph.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Control flow analysis for the optimizer. + */ + +#pragma once + +#include <vector> +#include <memory> +#include <libdevcore/Common.h> +#include <libdevcore/Assertions.h> +#include <libevmasm/ExpressionClasses.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; +using KnownStatePointer = std::shared_ptr<KnownState>; + +/** + * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special + * ID for the inital block. + */ +class BlockId +{ +public: + BlockId() { *this = invalid(); } + explicit BlockId(unsigned _id): m_id(_id) {} + explicit BlockId(u256 const& _id); + static BlockId initial() { return BlockId(-2); } + static BlockId invalid() { return BlockId(-1); } + + bool operator==(BlockId const& _other) const { return m_id == _other.m_id; } + bool operator!=(BlockId const& _other) const { return m_id != _other.m_id; } + bool operator<(BlockId const& _other) const { return m_id < _other.m_id; } + explicit operator bool() const { return *this != invalid(); } + +private: + unsigned m_id; +}; + +/** + * Control flow block inside which instruction counter is always incremented by one + * (except for possibly the last instruction). + */ +struct BasicBlock +{ + /// Start index into assembly item list. + unsigned begin = 0; + /// End index (excluded) inte assembly item list. + unsigned end = 0; + /// Tags pushed inside this block, with multiplicity. + std::vector<BlockId> pushedTags; + /// ID of the block that always follows this one (either non-branching part of JUMPI or flow + /// into new block), or BlockId::invalid() otherwise + BlockId next = BlockId::invalid(); + /// ID of the block that has to precede this one (because control flows into it). + BlockId prev = BlockId::invalid(); + + enum class EndType { JUMP, JUMPI, STOP, HANDOVER }; + EndType endType = EndType::HANDOVER; + + /// Knowledge about the state when this block is entered. Intersection of all possible ways + /// to enter this block. + KnownStatePointer startState; + /// Knowledge about the state at the end of this block. + KnownStatePointer endState; +}; + +using BasicBlocks = std::vector<BasicBlock>; + +class ControlFlowGraph +{ +public: + /// Initializes the control flow graph. + /// @a _items has to persist across the usage of this class. + ControlFlowGraph(AssemblyItems const& _items): m_items(_items) {} + /// @returns vector of basic blocks in the order they should be used in the final code. + /// Should be called only once. + BasicBlocks optimisedBlocks(); + +private: + void findLargestTag(); + void splitBlocks(); + void resolveNextLinks(); + void removeUnusedBlocks(); + void gatherKnowledge(); + void setPrevLinks(); + BasicBlocks rebuildCode(); + + BlockId generateNewId(); + + unsigned m_lastUsedId = 0; + AssemblyItems const& m_items; + std::map<BlockId, BasicBlock> m_blocks; +}; + + +} +} diff --git a/libevmasm/Exceptions.h b/libevmasm/Exceptions.h new file mode 100644 index 00000000..03b8afde --- /dev/null +++ b/libevmasm/Exceptions.h @@ -0,0 +1,37 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Exceptions.h + * @author Christian <c@ethdev.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Exceptions.h> + +namespace dev +{ +namespace eth +{ + +struct AssemblyException: virtual Exception {}; +struct OptimizerException: virtual AssemblyException {}; +struct StackTooDeepException: virtual OptimizerException {}; +struct ItemNotAvailableException: virtual OptimizerException {}; + +} +} diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp new file mode 100644 index 00000000..9d13a57a --- /dev/null +++ b/libevmasm/ExpressionClasses.cpp @@ -0,0 +1,511 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include <libevmasm/ExpressionClasses.h> +#include <utility> +#include <tuple> +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> +#include <libevmasm/Assembly.h> +#include <libevmasm/CommonSubexpressionEliminator.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const +{ + assertThrow(!!item && !!_other.item, OptimizerException, ""); + auto type = item->type(); + auto otherType = _other.item->type(); + return std::tie(type, item->data(), arguments, sequenceNumber) < + std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); +} + +ExpressionClasses::Id ExpressionClasses::find( + AssemblyItem const& _item, + Ids const& _arguments, + bool _copyItem, + unsigned _sequenceNumber +) +{ + Expression exp; + exp.id = Id(-1); + exp.item = &_item; + exp.arguments = _arguments; + exp.sequenceNumber = _sequenceNumber; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + if (SemanticInformation::isDeterministic(_item)) + { + auto it = m_expressions.find(exp); + if (it != m_expressions.end()) + return it->id; + } + + if (_copyItem) + exp.item = storeItem(_item); + + ExpressionClasses::Id id = tryToSimplify(exp); + if (id < m_representatives.size()) + exp.id = id; + else + { + exp.id = m_representatives.size(); + m_representatives.push_back(exp); + } + m_expressions.insert(exp); + return exp.id; +} + +void ExpressionClasses::forceEqual( + ExpressionClasses::Id _id, + AssemblyItem const& _item, + ExpressionClasses::Ids const& _arguments, + bool _copyItem +) +{ + Expression exp; + exp.id = _id; + exp.item = &_item; + exp.arguments = _arguments; + + if (SemanticInformation::isCommutativeOperation(_item)) + sort(exp.arguments.begin(), exp.arguments.end()); + + if (_copyItem) + exp.item = storeItem(_item); + + m_expressions.insert(exp); +} + +ExpressionClasses::Id ExpressionClasses::newClass(SourceLocation const& _location) +{ + Expression exp; + exp.id = m_representatives.size(); + exp.item = storeItem(AssemblyItem(UndefinedItem, (u256(1) << 255) + exp.id, _location)); + m_representatives.push_back(exp); + m_expressions.insert(exp); + return exp.id; +} + +bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is a non-zero constant. + return knownNonZero(find(Instruction::SUB, {_a, _b})); +} + +bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b) +{ + // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero. + u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b})); + // forbidden interval is ["-31", 31] + return v && *v + 31 > u256(62); +} + +bool ExpressionClasses::knownZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(_c), *this); +} + +bool ExpressionClasses::knownNonZero(Id _c) +{ + return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this); +} + +u256 const* ExpressionClasses::knownConstant(Id _c) +{ + map<unsigned, Expression const*> matchGroups; + Pattern constant(Push); + constant.setMatchGroup(1, matchGroups); + if (!constant.matches(representative(_c), *this)) + return nullptr; + return &constant.d(); +} + +AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item) +{ + m_spareAssemblyItems.push_back(make_shared<AssemblyItem>(_item)); + return m_spareAssemblyItems.back().get(); +} + +string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const +{ + Expression const& expr = representative(_id); + stringstream str; + str << dec << expr.id << ":"; + if (expr.item) + { + str << *expr.item << "("; + for (Id arg: expr.arguments) + str << fullDAGToString(arg) << ","; + str << ")"; + } + else + str << " UNIQUE"; + return str.str(); +} + +class Rules: public boost::noncopyable +{ +public: + Rules(); + void resetMatchGroups() { m_matchGroups.clear(); } + vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; } + +private: + using Expression = ExpressionClasses::Expression; + map<unsigned, Expression const*> m_matchGroups; + vector<pair<Pattern, function<Pattern()>>> m_rules; +}; + +template <class S> S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template <class S> S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + m_rules = vector<pair<Pattern, function<Pattern()>>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + }; + // Double negation of opcodes with binary result + for (auto const& op: vector<Instruction>{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + m_rules.push_back({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); + // Associative operations + for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ + {Instruction::ADD, plus<u256>()}, + {Instruction::MUL, multiplies<u256>()}, + {Instruction::AND, bit_and<u256>()}, + {Instruction::OR, bit_or<u256>()}, + {Instruction::XOR, bit_xor<u256>()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + m_rules += vector<pair<Pattern, function<Pattern()>>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}; + } + // move constants across subtractions + m_rules += vector<pair<Pattern, function<Pattern()>>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }; +} + +ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) +{ + static Rules rules; + + if ( + !_expr.item || + _expr.item->type() != Operation || + !SemanticInformation::isDeterministic(*_expr.item) + ) + return -1; + + for (auto const& rule: rules.rules()) + { + rules.resetMatchGroups(); + if (rule.first.matches(_expr, *this)) + { + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << rule.first.toString() << endl; + //ExpressionTemplate t(rule.second()); + //cout << "to " << rule.second().toString() << endl; + return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); + } + } + + if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) + { + Expression expr = _expr; + swap(expr.arguments[0], expr.arguments[1]); + return tryToSimplify(expr, true); + } + + return -1; +} + +ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template) +{ + if (_template.hasId) + return _template.id; + + Ids arguments; + for (ExpressionTemplate const& t: _template.arguments) + arguments.push_back(rebuildExpression(t)); + return find(_template.item, arguments); +} + + +Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): + m_type(Operation), + m_requireDataMatch(true), + m_data(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + return AssemblyItem(m_type, m_data, _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(Instruction(unsigned(m_data))).name; + break; + case Push: + s << "PUSH " << hex << m_data; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + s << "t=" << dec << m_type << " d=" << hex << m_data; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const +{ + if (m_type == UndefinedItem) + return true; + if (!_item) + return false; + if (m_type != _item->type()) + return false; + if (m_requireDataMatch && m_data != _item->data()) + return false; + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h new file mode 100644 index 00000000..4bfd7d24 --- /dev/null +++ b/libevmasm/ExpressionClasses.h @@ -0,0 +1,190 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#pragma once + +#include <vector> +#include <map> +#include <memory> +#include <libdevcore/Common.h> +#include <libevmasm/AssemblyItem.h> + +namespace dev +{ +namespace eth +{ + +class Pattern; +struct ExpressionTemplate; + +/** + * Collection of classes of equivalent expressions that can also determine the class of an expression. + * Identifiers are contiguously assigned to new classes starting from zero. + */ +class ExpressionClasses +{ +public: + using Id = unsigned; + using Ids = std::vector<Id>; + + struct Expression + { + Id id; + AssemblyItem const* item = nullptr; + Ids arguments; + /// Storage modification sequence, only used for storage and memory operations. + unsigned sequenceNumber = 0; + /// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber). + bool operator<(Expression const& _other) const; + }; + + /// Retrieves the id of the expression equivalence class resulting from the given item applied to the + /// given classes, might also create a new one. + /// @param _copyItem if true, copies the assembly item to an internal storage instead of just + /// keeping a pointer. + /// The @a _sequenceNumber indicates the current storage or memory access sequence. + Id find( + AssemblyItem const& _item, + Ids const& _arguments = {}, + bool _copyItem = true, + unsigned _sequenceNumber = 0 + ); + /// @returns the canonical representative of an expression class. + Expression const& representative(Id _id) const { return m_representatives.at(_id); } + /// @returns the number of classes. + Id size() const { return m_representatives.size(); } + + /// Forces the given @a _item with @a _arguments to the class @a _id. This can be used to + /// add prior knowledge e.g. about CALLDATA, but has to be used with caution. Will not work as + /// expected if @a _item applied to @a _arguments already exists. + void forceEqual(Id _id, AssemblyItem const& _item, Ids const& _arguments, bool _copyItem = true); + + /// @returns the id of a new class which is different to all other classes. + Id newClass(SourceLocation const& _location); + + /// @returns true if the values of the given classes are known to be different (on every input). + /// @note that this function might still return false for some different inputs. + bool knownToBeDifferent(Id _a, Id _b); + /// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32. + bool knownToBeDifferentBy32(Id _a, Id _b); + /// @returns true if the value of the given class is known to be zero. + /// @note that this is not the negation of knownNonZero + bool knownZero(Id _c); + /// @returns true if the value of the given class is known to be nonzero. + /// @note that this is not the negation of knownZero + bool knownNonZero(Id _c); + /// @returns a pointer to the value if the given class is known to be a constant, + /// and a nullptr otherwise. + u256 const* knownConstant(Id _c); + + /// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for + /// the lifetime of the ExpressionClasses object. + AssemblyItem const* storeItem(AssemblyItem const& _item); + + std::string fullDAGToString(Id _id) const; + +private: + /// Tries to simplify the given expression. + /// @returns its class if it possible or Id(-1) otherwise. + /// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed + Id tryToSimplify(Expression const& _expr, bool _secondRun = false); + + /// Rebuilds an expression from a (matched) pattern. + Id rebuildExpression(ExpressionTemplate const& _template); + + std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const; + + /// Expression equivalence class representatives - we only store one item of an equivalence. + std::vector<Expression> m_representatives; + /// All expression ever encountered. + std::set<Expression> m_expressions; + std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector<Pattern> arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + +private: + bool matchesBaseItem(AssemblyItem const* _item) const; + Expression const& matchGroupValue() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + u256 m_data = 0; + std::vector<Pattern> m_arguments; + unsigned m_matchGroup = 0; + std::map<unsigned, Expression const*>* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector<ExpressionTemplate> arguments; +}; + +} +} diff --git a/libevmasm/GasMeter.cpp b/libevmasm/GasMeter.cpp new file mode 100644 index 00000000..93583169 --- /dev/null +++ b/libevmasm/GasMeter.cpp @@ -0,0 +1,217 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file GasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include "GasMeter.h" +#include <libevmasm/KnownState.h> +using namespace std; +using namespace dev; +using namespace dev::eth; + +GasMeter::GasConsumption& GasMeter::GasConsumption::operator+=(GasConsumption const& _other) +{ + if (_other.isInfinite && !isInfinite) + *this = infinite(); + if (isInfinite) + return *this; + bigint v = bigint(value) + _other.value; + if (v > numeric_limits<u256>::max()) + *this = infinite(); + else + value = u256(v); + return *this; +} + +GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item) +{ + GasConsumption gas; + switch (_item.type()) + { + case Push: + case PushTag: + case PushData: + case PushString: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + gas = runGas(Instruction::PUSH1); + break; + case Tag: + gas = runGas(Instruction::JUMPDEST); + break; + case Operation: + { + ExpressionClasses& classes = m_state->expressionClasses(); + gas = runGas(_item.instruction()); + switch (_item.instruction()) + { + case Instruction::SSTORE: + { + ExpressionClasses::Id slot = m_state->relativeStackElement(0); + ExpressionClasses::Id value = m_state->relativeStackElement(-1); + if (classes.knownZero(value) || ( + m_state->storageContent().count(slot) && + classes.knownNonZero(m_state->storageContent().at(slot)) + )) + gas += m_schedule.sstoreResetGas; //@todo take refunds into account + else + gas += m_schedule.sstoreSetGas; + break; + } + case Instruction::SLOAD: + gas += m_schedule.sloadGas; + break; + case Instruction::RETURN: + gas += memoryGas(0, -1); + break; + case Instruction::MLOAD: + case Instruction::MSTORE: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(32)) + })); + break; + case Instruction::MSTORE8: + gas += memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(0), + classes.find(AssemblyItem(1)) + })); + break; + case Instruction::SHA3: + gas = m_schedule.sha3Gas; + gas += wordGas(m_schedule.sha3WordGas, m_state->relativeStackElement(-1)); + gas += memoryGas(0, -1); + break; + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + gas += memoryGas(0, -2); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-2)); + break; + case Instruction::EXTCODECOPY: + gas += memoryGas(-1, -3); + gas += wordGas(m_schedule.copyGas, m_state->relativeStackElement(-3)); + break; + case Instruction::LOG0: + case Instruction::LOG1: + case Instruction::LOG2: + case Instruction::LOG3: + case Instruction::LOG4: + { + unsigned n = unsigned(_item.instruction()) - unsigned(Instruction::LOG0); + gas = m_schedule.logGas + m_schedule.logTopicGas * n; + gas += memoryGas(0, -1); + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += m_schedule.logDataGas * (*value); + else + gas = GasConsumption::infinite(); + break; + } + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + { + gas = m_schedule.callGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(0))) + gas += (*value); + else + gas = GasConsumption::infinite(); + if (_item.instruction() == Instruction::CALL) + gas += m_schedule.callNewAccountGas; // We very rarely know whether the address exists. + int valueSize = _item.instruction() == Instruction::DELEGATECALL ? 0 : 1; + if (!classes.knownZero(m_state->relativeStackElement(-1 - valueSize))) + gas += m_schedule.callValueTransferGas; + gas += memoryGas(-2 - valueSize, -3 - valueSize); + gas += memoryGas(-4 - valueSize, -5 - valueSize); + break; + } + case Instruction::CREATE: + gas = m_schedule.createGas; + gas += memoryGas(-1, -2); + break; + case Instruction::EXP: + gas = m_schedule.expGas; + if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) + gas += m_schedule.expByteGas * (32 - (h256(*value).firstBitSet() / 8)); + else + gas += m_schedule.expByteGas * 32; + break; + default: + break; + } + break; + } + default: + gas = GasConsumption::infinite(); + break; + } + + m_state->feedItem(_item); + return gas; +} + +GasMeter::GasConsumption GasMeter::wordGas(u256 const& _multiplier, ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + return GasConsumption(_multiplier * ((*value + 31) / 32)); +} + +GasMeter::GasConsumption GasMeter::memoryGas(ExpressionClasses::Id _position) +{ + u256 const* value = m_state->expressionClasses().knownConstant(_position); + if (!value) + return GasConsumption::infinite(); + if (*value < m_largestMemoryAccess) + return GasConsumption(u256(0)); + u256 previous = m_largestMemoryAccess; + m_largestMemoryAccess = *value; + auto memGas = [=](u256 const& pos) -> u256 + { + u256 size = (pos + 31) / 32; + return m_schedule.memoryGas * size + size * size / m_schedule.quadCoeffDiv; + }; + return memGas(*value) - memGas(previous); +} + +GasMeter::GasConsumption GasMeter::memoryGas(int _stackPosOffset, int _stackPosSize) +{ + ExpressionClasses& classes = m_state->expressionClasses(); + if (classes.knownZero(m_state->relativeStackElement(_stackPosSize))) + return GasConsumption(0); + else + return memoryGas(classes.find(eth::Instruction::ADD, { + m_state->relativeStackElement(_stackPosOffset), + m_state->relativeStackElement(_stackPosSize) + })); +} + +u256 GasMeter::runGas(Instruction _instruction, EVMSchedule const& _es) +{ + if (_instruction == Instruction::JUMPDEST) + return 1; + + int tier = instructionInfo(_instruction).gasPriceTier; + assertThrow(tier != InvalidTier, OptimizerException, "Invalid gas tier."); + return _es.tierStepGas[tier]; +} + + diff --git a/libevmasm/GasMeter.h b/libevmasm/GasMeter.h new file mode 100644 index 00000000..b11a63a5 --- /dev/null +++ b/libevmasm/GasMeter.h @@ -0,0 +1,102 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file GasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <ostream> +#include <tuple> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/AssemblyItem.h> +#include <libethcore/ChainOperationParams.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; + +// TODO: FIXME: HOMESTEAD: XXX: @chfast populate m_schedule from an ExtVMFace instance via ExtVMFace::evmSchedule. + +/** + * Class that helps computing the maximum gas consumption for instructions. + * Has to be initialized with a certain known state that will be automatically updated for + * each call to estimateMax. These calls have to supply strictly subsequent AssemblyItems. + * A new gas meter has to be constructed (with a new state) for control flow changes. + */ +class GasMeter +{ +public: + struct GasConsumption + { + GasConsumption(u256 _value = 0, bool _infinite = false): value(_value), isInfinite(_infinite) {} + static GasConsumption infinite() { return GasConsumption(0, true); } + + GasConsumption& operator+=(GasConsumption const& _other); + bool operator<(GasConsumption const& _other) const { return this->tuple() < _other.tuple(); } + + std::tuple<bool const&, u256 const&> tuple() const { return std::tie(isInfinite, value); } + + u256 value; + bool isInfinite; + }; + + /// Constructs a new gas meter given the current state. + explicit GasMeter(std::shared_ptr<KnownState> const& _state, u256 const& _largestMemoryAccess = 0): + m_state(_state), m_largestMemoryAccess(_largestMemoryAccess) {} + + /// @returns an upper bound on the gas consumed by the given instruction and updates + /// the state. + GasConsumption estimateMax(AssemblyItem const& _item); + + u256 const& largestMemoryAccess() const { return m_largestMemoryAccess; } + + u256 runGas(Instruction _instruction) const { return runGas(_instruction, m_schedule); } + static u256 runGas(Instruction _instruction, EVMSchedule const& _es); + +private: + /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. + GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); + /// @returns the gas needed to access the given memory position. + /// @todo this assumes that memory was never accessed before and thus over-estimates gas usage. + GasConsumption memoryGas(ExpressionClasses::Id _position); + /// @returns the memory gas for accessing the memory at a specific offset for a number of bytes + /// given as values on the stack at the given relative positions. + GasConsumption memoryGas(int _stackPosOffset, int _stackPosSize); + + std::shared_ptr<KnownState> m_state; + /// Largest point where memory was accessed since the creation of this object. + u256 m_largestMemoryAccess; + + EVMSchedule m_schedule; +}; + +inline std::ostream& operator<<(std::ostream& _str, GasMeter::GasConsumption const& _consumption) +{ + if (_consumption.isInfinite) + return _str << "[???]"; + else + return _str << std::dec << _consumption.value; +} + + +} +} diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp new file mode 100644 index 00000000..dd269ff4 --- /dev/null +++ b/libevmasm/KnownState.cpp @@ -0,0 +1,411 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#include "KnownState.h" +#include <functional> +#include <libdevcore/SHA3.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +ostream& KnownState::stream(ostream& _out) const +{ + auto streamExpressionClass = [this](ostream& _out, Id _id) + { + auto const& expr = m_expressionClasses->representative(_id); + _out << " " << dec << _id << ": "; + if (!expr.item) + _out << " no item"; + else if (expr.item->type() == UndefinedItem) + _out << " unknown " << int(expr.item->data()); + else + _out << *expr.item; + if (expr.sequenceNumber) + _out << "@" << dec << expr.sequenceNumber; + _out << "("; + for (Id arg: expr.arguments) + _out << dec << arg << ","; + _out << ")" << endl; + }; + + _out << "=== State ===" << endl; + _out << "Stack height: " << dec << m_stackHeight << endl; + _out << "Equivalence classes: " << endl; + for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass) + streamExpressionClass(_out, eqClass); + + _out << "Stack: " << endl; + for (auto const& it: m_stackElements) + { + _out << " " << dec << it.first << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Storage: " << endl; + for (auto const& it: m_storageContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + _out << "Memory: " << endl; + for (auto const& it: m_memoryContent) + { + _out << " "; + streamExpressionClass(_out, it.first); + _out << ": "; + streamExpressionClass(_out, it.second); + } + + return _out; +} + +KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem) +{ + StoreOperation op; + if (_item.type() == Tag) + { + // can be ignored + } + else if (_item.type() != Operation) + { + assertThrow(_item.deposit() == 1, InvalidDeposit, ""); + if (_item.pushedValue()) + // only available after assembly stage, should not be used for optimisation + setStackElement(++m_stackHeight, m_expressionClasses->find(*_item.pushedValue())); + else + setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem)); + } + else + { + Instruction instruction = _item.instruction(); + InstructionInfo info = instructionInfo(instruction); + if (SemanticInformation::isDupInstruction(_item)) + setStackElement( + m_stackHeight + 1, + stackElement( + m_stackHeight - int(instruction) + int(Instruction::DUP1), + _item.location() + ) + ); + else if (SemanticInformation::isSwapInstruction(_item)) + swapStackElements( + m_stackHeight, + m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1), + _item.location() + ); + else if (instruction != Instruction::POP) + { + vector<Id> arguments(info.args); + for (int i = 0; i < info.args; ++i) + arguments[i] = stackElement(m_stackHeight - i, _item.location()); + + if (_item.instruction() == Instruction::SSTORE) + op = storeInStorage(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::SLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromStorage(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::MSTORE) + op = storeInMemory(arguments[0], arguments[1], _item.location()); + else if (_item.instruction() == Instruction::MLOAD) + setStackElement( + m_stackHeight + _item.deposit(), + loadFromMemory(arguments[0], _item.location()) + ); + else if (_item.instruction() == Instruction::SHA3) + setStackElement( + m_stackHeight + _item.deposit(), + applySha3(arguments.at(0), arguments.at(1), _item.location()) + ); + else + { + bool invMem = SemanticInformation::invalidatesMemory(_item.instruction()); + bool invStor = SemanticInformation::invalidatesStorage(_item.instruction()); + // We could be a bit more fine-grained here (CALL only invalidates part of + // memory, etc), but we do not for now. + if (invMem) + resetMemory(); + if (invStor) + resetStorage(); + if (invMem || invStor) + m_sequenceNumber += 2; // Increment by two because it can read and write + assertThrow(info.ret <= 1, InvalidDeposit, ""); + if (info.ret == 1) + setStackElement( + m_stackHeight + _item.deposit(), + m_expressionClasses->find(_item, arguments, _copyItem) + ); + } + } + m_stackElements.erase( + m_stackElements.upper_bound(m_stackHeight + _item.deposit()), + m_stackElements.end() + ); + m_stackHeight += _item.deposit(); + } + return op; +} + +/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from +/// _this which is not in or not equal to the value in _other. +template <class _Mapping> void intersect(_Mapping& _this, _Mapping const& _other) +{ + for (auto it = _this.begin(); it != _this.end();) + if (_other.count(it->first) && _other.at(it->first) == it->second) + ++it; + else + it = _this.erase(it); +} + +void KnownState::reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers) +{ + int stackDiff = m_stackHeight - _other.m_stackHeight; + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (_other.m_stackElements.count(it->first - stackDiff)) + { + Id other = _other.m_stackElements.at(it->first - stackDiff); + if (it->second == other) + ++it; + else + { + set<u256> theseTags = tagsInExpression(it->second); + set<u256> otherTags = tagsInExpression(other); + if (!theseTags.empty() && !otherTags.empty()) + { + theseTags.insert(otherTags.begin(), otherTags.end()); + it->second = tagUnion(theseTags); + ++it; + } + else + it = m_stackElements.erase(it); + } + } + else + it = m_stackElements.erase(it); + + // Use the smaller stack height. Essential to terminate in case of loops. + if (m_stackHeight > _other.m_stackHeight) + { + map<int, Id> shiftedStack; + for (auto const& stackElement: m_stackElements) + shiftedStack[stackElement.first - stackDiff] = stackElement.second; + m_stackElements = move(shiftedStack); + m_stackHeight = _other.m_stackHeight; + } + + intersect(m_storageContent, _other.m_storageContent); + intersect(m_memoryContent, _other.m_memoryContent); + if (_combineSequenceNumbers) + m_sequenceNumber = max(m_sequenceNumber, _other.m_sequenceNumber); +} + +bool KnownState::operator==(KnownState const& _other) const +{ + if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent) + return false; + int stackDiff = m_stackHeight - _other.m_stackHeight; + auto thisIt = m_stackElements.cbegin(); + auto otherIt = _other.m_stackElements.cbegin(); + for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt) + if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second) + return false; + return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend()); +} + +ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location) +{ + if (m_stackElements.count(_stackHeight)) + return m_stackElements.at(_stackHeight); + // Stack element not found (not assigned yet), create new unknown equivalence class. + return m_stackElements[_stackHeight] = + m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location)); +} + +KnownState::Id KnownState::relativeStackElement(int _stackOffset, SourceLocation const& _location) +{ + return stackElement(m_stackHeight + _stackOffset, _location); +} + +void KnownState::clearTagUnions() +{ + for (auto it = m_stackElements.begin(); it != m_stackElements.end();) + if (m_tagUnions.left.count(it->second)) + it = m_stackElements.erase(it); + else + ++it; +} + +void KnownState::setStackElement(int _stackHeight, Id _class) +{ + m_stackElements[_stackHeight] = _class; +} + +void KnownState::swapStackElements( + int _stackHeightA, + int _stackHeightB, + SourceLocation const& _location +) +{ + assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements."); + // ensure they are created + stackElement(_stackHeightA, _location); + stackElement(_stackHeightB, _location); + + swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]); +} + +KnownState::StoreOperation KnownState::storeInStorage( + Id _slot, + Id _value, + SourceLocation const& _location) +{ + if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value) + // do not execute the storage if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_storageContent) storageContents; + // Copy over all values (i.e. retain knowledge about them) where we know that this store + // operation will not destroy the knowledge. Specifically, we copy storage locations we know + // are different from _slot or locations where we know that the stored value is equal to _value. + for (auto const& storageItem: m_storageContent) + if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value) + storageContents.insert(storageItem); + m_storageContent = move(storageContents); + + AssemblyItem item(Instruction::SSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id); + m_storageContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + + return operation; +} + +ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location) +{ + if (m_storageContent.count(_slot)) + return m_storageContent.at(_slot); + + AssemblyItem item(Instruction::SLOAD, _location); + return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value) + // do not execute the store if we know that the value is already there + return StoreOperation(); + m_sequenceNumber++; + decltype(m_memoryContent) memoryContents; + // copy over values at points where we know that they are different from _slot by at least 32 + for (auto const& memoryItem: m_memoryContent) + if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot)) + memoryContents.insert(memoryItem); + m_memoryContent = move(memoryContents); + + AssemblyItem item(Instruction::MSTORE, _location); + Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber); + StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id)); + m_memoryContent[_slot] = _value; + // increment a second time so that we get unique sequence numbers for writes + m_sequenceNumber++; + return operation; +} + +ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location) +{ + if (m_memoryContent.count(_slot)) + return m_memoryContent.at(_slot); + + AssemblyItem item(Instruction::MLOAD, _location); + return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber); +} + +KnownState::Id KnownState::applySha3( + Id _start, + Id _length, + SourceLocation const& _location +) +{ + AssemblyItem sha3Item(Instruction::SHA3, _location); + // Special logic if length is a short constant, otherwise we cannot tell. + u256 const* l = m_expressionClasses->knownConstant(_length); + // unknown or too large length + if (!l || *l > 128) + return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + + vector<Id> arguments; + for (u256 i = 0; i < *l; i += 32) + { + Id slot = m_expressionClasses->find( + AssemblyItem(Instruction::ADD, _location), + {_start, m_expressionClasses->find(i)} + ); + arguments.push_back(loadFromMemory(slot, _location)); + } + if (m_knownSha3Hashes.count(arguments)) + return m_knownSha3Hashes.at(arguments); + Id v; + // If all arguments are known constants, compute the sha3 here + if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); })) + { + bytes data; + for (Id a: arguments) + data += toBigEndian(*m_expressionClasses->knownConstant(a)); + data.resize(size_t(*l)); + v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location)); + } + else + v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber); + return m_knownSha3Hashes[arguments] = v; +} + +set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId) +{ + if (m_tagUnions.left.count(_expressionId)) + return m_tagUnions.left.at(_expressionId); + // Might be a tag, then return the set of itself. + ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId); + if (expr.item && expr.item->type() == PushTag) + return set<u256>({expr.item->data()}); + else + return set<u256>(); +} + +KnownState::Id KnownState::tagUnion(set<u256> _tags) +{ + if (m_tagUnions.right.count(_tags)) + return m_tagUnions.right.at(_tags); + else + { + Id id = m_expressionClasses->newClass(SourceLocation()); + m_tagUnions.right.insert(make_pair(_tags, id)); + return id; + } +} + diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h new file mode 100644 index 00000000..c1c602dc --- /dev/null +++ b/libevmasm/KnownState.h @@ -0,0 +1,179 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file KnownState.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Contains knowledge about the state of the virtual machine at a specific instruction. + */ + +#pragma once + +#include <vector> +#include <map> +#include <set> +#include <tuple> +#include <memory> +#include <ostream> +#pragma warning(push) +#pragma GCC diagnostic push +#pragma clang diagnostic ignored "-Wredeclared-class-member" +#include <boost/bimap.hpp> +#pragma warning(pop) +#pragma GCC diagnostic pop +#include <libdevcore/CommonIO.h> +#include <libdevcore/Exceptions.h> +#include <libevmasm/ExpressionClasses.h> +#include <libevmasm/SemanticInformation.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; +using AssemblyItems = std::vector<AssemblyItem>; + +/** + * Class to infer and store knowledge about the state of the virtual machine at a specific + * instruction. + * + * The general workings are that for each assembly item that is fed, an equivalence class is + * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some + * arithmetic instructions are used to infer equivalences while these classes are determined. + */ +class KnownState +{ +public: + using Id = ExpressionClasses::Id; + struct StoreOperation + { + enum Target { Invalid, Memory, Storage }; + StoreOperation(): target(Invalid), sequenceNumber(-1) {} + StoreOperation( + Target _target, + Id _slot, + unsigned _sequenceNumber, + Id _expression + ): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {} + bool isValid() const { return target != Invalid; } + Target target; + Id slot; + unsigned sequenceNumber; + Id expression; + }; + + explicit KnownState( + std::shared_ptr<ExpressionClasses> _expressionClasses = std::make_shared<ExpressionClasses>() + ): m_expressionClasses(_expressionClasses) + { + } + + /// Streams debugging information to @a _out. + std::ostream& stream(std::ostream& _out) const; + + /// Feeds the item into the system for analysis. + /// @returns a possible store operation + StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false); + + /// Resets any knowledge about storage. + void resetStorage() { m_storageContent.clear(); } + /// Resets any knowledge about storage. + void resetMemory() { m_memoryContent.clear(); } + /// Resets any knowledge about the current stack. + void resetStack() { m_stackElements.clear(); m_stackHeight = 0; } + /// Resets any knowledge. + void reset() { resetStorage(); resetMemory(); resetStack(); } + + unsigned sequenceNumber() const { return m_sequenceNumber; } + + /// Replaces the state by the intersection with _other, i.e. only equal knowledge is retained. + /// If the stack heighht is different, the smaller one is used and the stack is compared + /// relatively. + /// @param _combineSequenceNumbers if true, sets the sequence number to the maximum of both + void reduceToCommonKnowledge(KnownState const& _other, bool _combineSequenceNumbers); + + /// @returns a shared pointer to a copy of this state. + std::shared_ptr<KnownState> copy() const { return std::make_shared<KnownState>(*this); } + + /// @returns true if the knowledge about the state of both objects is (known to be) equal. + bool operator==(KnownState const& _other) const; + + /// Retrieves the current equivalence class fo the given stack element (or generates a new + /// one if it does not exist yet). + Id stackElement(int _stackHeight, SourceLocation const& _location); + /// @returns the stackElement relative to the current stack height. + Id relativeStackElement(int _stackOffset, SourceLocation const& _location = SourceLocation()); + + /// @returns its set of tags if the given expression class is a known tag union; returns a set + /// containing the tag if it is a PushTag expression and the empty set otherwise. + std::set<u256> tagsInExpression(Id _expressionId); + /// During analysis, different tags on the stack are partially treated as the same class. + /// This removes such classes not to confuse later analyzers. + void clearTagUnions(); + + int stackHeight() const { return m_stackHeight; } + std::map<int, Id> const& stackElements() const { return m_stackElements; } + ExpressionClasses& expressionClasses() const { return *m_expressionClasses; } + + std::map<Id, Id> const& storageContent() const { return m_storageContent; } + +private: + /// Assigns a new equivalence class to the next sequence number of the given stack element. + void setStackElement(int _stackHeight, Id _class); + /// Swaps the given stack elements in their next sequence number. + void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location); + + /// Increments the sequence number, deletes all storage information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if storage was not modified + StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in storage or creates a new special sload class. + Id loadFromStorage(Id _slot, SourceLocation const& _location); + /// Increments the sequence number, deletes all memory information that might be overwritten + /// and stores the new value at the given slot. + /// @returns the store operation, which might be invalid if memory was not modified + StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location); + /// Retrieves the current value at the given slot in memory or creates a new special mload class. + Id loadFromMemory(Id _slot, SourceLocation const& _location); + /// Finds or creates a new expression that applies the sha3 hash function to the contents in memory. + Id applySha3(Id _start, Id _length, SourceLocation const& _location); + + /// @returns a new or already used Id representing the given set of tags. + Id tagUnion(std::set<u256> _tags); + + /// Current stack height, can be negative. + int m_stackHeight = 0; + /// Current stack layout, mapping stack height -> equivalence class + std::map<int, Id> m_stackElements; + /// Current sequence number, this is incremented with each modification to storage or memory. + unsigned m_sequenceNumber = 1; + /// Knowledge about storage content. + std::map<Id, Id> m_storageContent; + /// Knowledge about memory content. Keys are memory addresses, note that the values overlap + /// and are not contained here if they are not completely known. + std::map<Id, Id> m_memoryContent; + /// Keeps record of all sha3 hashes that are computed. + std::map<std::vector<Id>, Id> m_knownSha3Hashes; + /// Structure containing the classes of equivalent expressions. + std::shared_ptr<ExpressionClasses> m_expressionClasses; + /// Container for unions of tags stored on the stack. + boost::bimap<Id, std::set<u256>> m_tagUnions; +}; + +} +} diff --git a/libevmasm/LinkerObject.cpp b/libevmasm/LinkerObject.cpp new file mode 100644 index 00000000..ceb864a1 --- /dev/null +++ b/libevmasm/LinkerObject.cpp @@ -0,0 +1,62 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file LinkerObject.cpp + * @author Christian R <c@ethdev.com> + * @date 2015 + */ + +#include <libevmasm/LinkerObject.h> +#include <libdevcore/CommonData.h> + +using namespace dev; +using namespace dev::eth; +using namespace std; + +void LinkerObject::append(LinkerObject const& _other) +{ + for (auto const& ref: _other.linkReferences) + linkReferences[ref.first + bytecode.size()] = ref.second; + bytecode += _other.bytecode; +} + +void LinkerObject::link(map<string, h160> const& _libraryAddresses) +{ + std::map<size_t, std::string> remainingRefs; + for (auto const& linkRef: linkReferences) + { + auto it = _libraryAddresses.find(linkRef.second); + if (it == _libraryAddresses.end()) + remainingRefs.insert(linkRef); + else + it->second.ref().copyTo(ref(bytecode).cropped(linkRef.first, 20)); + } + linkReferences.swap(remainingRefs); +} + +string LinkerObject::toHex() const +{ + string hex = dev::toHex(bytecode); + for (auto const& ref: linkReferences) + { + size_t pos = ref.first * 2; + string const& name = ref.second; + hex[pos] = hex[pos + 1] = hex[pos + 38] = hex[pos + 39] = '_'; + for (size_t i = 0; i < 36; ++i) + hex[pos + 2 + i] = i < name.size() ? name[i] : '_'; + } + return hex; +} diff --git a/libevmasm/LinkerObject.h b/libevmasm/LinkerObject.h new file mode 100644 index 00000000..83d2bd7e --- /dev/null +++ b/libevmasm/LinkerObject.h @@ -0,0 +1,55 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Assembly.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Common.h> +#include <libdevcore/FixedHash.h> + +namespace dev +{ +namespace eth +{ + +/** + * Binary object that potentially still needs to be linked (i.e. addresses of other contracts + * need to be filled in). + */ +struct LinkerObject +{ + bytes bytecode; + /// Map from offsets in bytecode to library identifiers. The addresses starting at those offsets + /// need to be replaced by the actual addresses by the linker. + std::map<size_t, std::string> linkReferences; + + /// Appends the bytecode of @a _other and incorporates its link references. + void append(LinkerObject const& _other); + + /// Links the given libraries by replacing their uses in the code and removes them from the references. + void link(std::map<std::string, h160> const& _libraryAddresses); + + /// @returns a hex representation of the bytecode of the given object, replacing unlinked + /// addresses by placeholders. + std::string toHex() const; +}; + +} +} diff --git a/libevmasm/PathGasMeter.cpp b/libevmasm/PathGasMeter.cpp new file mode 100644 index 00000000..8f7314f8 --- /dev/null +++ b/libevmasm/PathGasMeter.cpp @@ -0,0 +1,128 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file PathGasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#include "PathGasMeter.h" +#include <libevmasm/KnownState.h> +#include <libevmasm/SemanticInformation.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +PathGasMeter::PathGasMeter(AssemblyItems const& _items): + m_items(_items) +{ + for (size_t i = 0; i < m_items.size(); ++i) + if (m_items[i].type() == Tag) + m_tagPositions[m_items[i].data()] = i; +} + +GasMeter::GasConsumption PathGasMeter::estimateMax( + size_t _startIndex, + shared_ptr<KnownState> const& _state +) +{ + auto path = unique_ptr<GasPath>(new GasPath()); + path->index = _startIndex; + path->state = _state->copy(); + m_queue.push_back(move(path)); + + GasMeter::GasConsumption gas; + while (!m_queue.empty() && !gas.isInfinite) + gas = max(gas, handleQueueItem()); + return gas; +} + +GasMeter::GasConsumption PathGasMeter::handleQueueItem() +{ + assertThrow(!m_queue.empty(), OptimizerException, ""); + + unique_ptr<GasPath> path = move(m_queue.back()); + m_queue.pop_back(); + + shared_ptr<KnownState> state = path->state; + GasMeter meter(state, path->largestMemoryAccess); + ExpressionClasses& classes = state->expressionClasses(); + GasMeter::GasConsumption gas = path->gas; + size_t index = path->index; + + if (index >= m_items.size() || (index > 0 && m_items.at(index).type() != Tag)) + // Invalid jump usually provokes an out-of-gas exception, but we want to give an upper + // bound on the gas that is needed without changing the behaviour, so it is fine to + // return the current gas value. + return gas; + + set<u256> jumpTags; + for (; index < m_items.size() && !gas.isInfinite; ++index) + { + bool branchStops = false; + jumpTags.clear(); + AssemblyItem const& item = m_items.at(index); + if (item.type() == Tag || item == AssemblyItem(eth::Instruction::JUMPDEST)) + { + // Do not allow any backwards jump. This is quite restrictive but should work for + // the simplest things. + if (path->visitedJumpdests.count(index)) + return GasMeter::GasConsumption::infinite(); + path->visitedJumpdests.insert(index); + } + else if (item == AssemblyItem(eth::Instruction::JUMP)) + { + branchStops = true; + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + else if (item == AssemblyItem(eth::Instruction::JUMPI)) + { + ExpressionClasses::Id condition = state->relativeStackElement(-1); + if (classes.knownNonZero(condition) || !classes.knownZero(condition)) + { + jumpTags = state->tagsInExpression(state->relativeStackElement(0)); + if (jumpTags.empty()) // unknown jump destination + return GasMeter::GasConsumption::infinite(); + } + branchStops = classes.knownNonZero(condition); + } + else if (SemanticInformation::altersControlFlow(item)) + branchStops = true; + + gas += meter.estimateMax(item); + + for (u256 const& tag: jumpTags) + { + auto newPath = unique_ptr<GasPath>(new GasPath()); + newPath->index = m_items.size(); + if (m_tagPositions.count(tag)) + newPath->index = m_tagPositions.at(tag); + newPath->gas = gas; + newPath->largestMemoryAccess = meter.largestMemoryAccess(); + newPath->state = state->copy(); + newPath->visitedJumpdests = path->visitedJumpdests; + m_queue.push_back(move(newPath)); + } + + if (branchStops) + break; + } + + return gas; +} diff --git a/libevmasm/PathGasMeter.h b/libevmasm/PathGasMeter.h new file mode 100644 index 00000000..1ada460a --- /dev/null +++ b/libevmasm/PathGasMeter.h @@ -0,0 +1,66 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file PathGasMeter.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + */ + +#pragma once + +#include <set> +#include <vector> +#include <memory> +#include <libevmasm/GasMeter.h> + +namespace dev +{ +namespace eth +{ + +class KnownState; + +struct GasPath +{ + size_t index = 0; + std::shared_ptr<KnownState> state; + u256 largestMemoryAccess; + GasMeter::GasConsumption gas; + std::set<size_t> visitedJumpdests; +}; + +/** + * Computes an upper bound on the gas usage of a computation starting at a certain position in + * a list of AssemblyItems in a given state until the computation stops. + * Can be used to estimate the gas usage of functions on any given input. + */ +class PathGasMeter +{ +public: + PathGasMeter(AssemblyItems const& _items); + + GasMeter::GasConsumption estimateMax(size_t _startIndex, std::shared_ptr<KnownState> const& _state); + +private: + GasMeter::GasConsumption handleQueueItem(); + + std::vector<std::unique_ptr<GasPath>> m_queue; + std::map<u256, size_t> m_tagPositions; + AssemblyItems const& m_items; +}; + +} +} diff --git a/libevmasm/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp new file mode 100644 index 00000000..ea579b83 --- /dev/null +++ b/libevmasm/SemanticInformation.cpp @@ -0,0 +1,181 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#include <libevmasm/SemanticInformation.h> +#include <libevmasm/AssemblyItem.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bool SemanticInformation::breaksCSEAnalysisBlock(AssemblyItem const& _item) +{ + switch (_item.type()) + { + default: + case UndefinedItem: + case Tag: + return true; + case Push: + case PushString: + case PushTag: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushData: + case PushLibraryAddress: + return false; + case Operation: + { + if (isSwapInstruction(_item) || isDupInstruction(_item)) + return false; + if (_item.instruction() == Instruction::GAS || _item.instruction() == Instruction::PC) + return true; // GAS and PC assume a specific order of opcodes + if (_item.instruction() == Instruction::MSIZE) + return true; // msize is modified already by memory access, avoid that for now + InstructionInfo info = instructionInfo(_item.instruction()); + if (_item.instruction() == Instruction::SSTORE) + return false; + if (_item.instruction() == Instruction::MSTORE) + return false; + //@todo: We do not handle the following memory instructions for now: + // calldatacopy, codecopy, extcodecopy, mstore8, + // msize (note that msize also depends on memory read access) + + // the second requirement will be lifted once it is implemented + return info.sideEffects || info.args > 2; + } + } +} + +bool SemanticInformation::isCommutativeOperation(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + case Instruction::ADD: + case Instruction::MUL: + case Instruction::EQ: + case Instruction::AND: + case Instruction::OR: + case Instruction::XOR: + return true; + default: + return false; + } +} + +bool SemanticInformation::isDupInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::DUP1 <= _item.instruction() && _item.instruction() <= Instruction::DUP16; +} + +bool SemanticInformation::isSwapInstruction(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + return Instruction::SWAP1 <= _item.instruction() && _item.instruction() <= Instruction::SWAP16; +} + +bool SemanticInformation::isJumpInstruction(AssemblyItem const& _item) +{ + return _item == AssemblyItem(Instruction::JUMP) || _item == AssemblyItem(Instruction::JUMPI); +} + +bool SemanticInformation::altersControlFlow(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return false; + switch (_item.instruction()) + { + // note that CALL, CALLCODE and CREATE do not really alter the control flow, because we + // continue on the next instruction + case Instruction::JUMP: + case Instruction::JUMPI: + case Instruction::RETURN: + case Instruction::SUICIDE: + case Instruction::STOP: + return true; + default: + return false; + } +} + + +bool SemanticInformation::isDeterministic(AssemblyItem const& _item) +{ + if (_item.type() != Operation) + return true; + + switch (_item.instruction()) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + case Instruction::CREATE: + case Instruction::GAS: + case Instruction::PC: + case Instruction::MSIZE: // depends on previous writes and reads, not only on content + case Instruction::BALANCE: // depends on previous calls + case Instruction::EXTCODESIZE: + return false; + default: + return true; + } +} + +bool SemanticInformation::invalidatesMemory(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALLDATACOPY: + case Instruction::CODECOPY: + case Instruction::EXTCODECOPY: + case Instruction::MSTORE: + case Instruction::MSTORE8: + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + return true; + default: + return false; + } +} + +bool SemanticInformation::invalidatesStorage(Instruction _instruction) +{ + switch (_instruction) + { + case Instruction::CALL: + case Instruction::CALLCODE: + case Instruction::DELEGATECALL: + case Instruction::CREATE: + case Instruction::SSTORE: + return true; + default: + return false; + } +} diff --git a/libevmasm/SemanticInformation.h b/libevmasm/SemanticInformation.h new file mode 100644 index 00000000..094f4591 --- /dev/null +++ b/libevmasm/SemanticInformation.h @@ -0,0 +1,59 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SemanticInformation.h + * @author Christian <c@ethdev.com> + * @date 2015 + * Helper to provide semantic information about assembly items. + */ + +#pragma once + +#include <libevmcore/Instruction.h> + +namespace dev +{ +namespace eth +{ + +class AssemblyItem; + +/** + * Helper functions to provide context-independent information about assembly items. + */ +struct SemanticInformation +{ + /// @returns true if the given items starts a new block for common subexpression analysis. + static bool breaksCSEAnalysisBlock(AssemblyItem const& _item); + /// @returns true if the item is a two-argument operation whose value does not depend on the + /// order of its arguments. + static bool isCommutativeOperation(AssemblyItem const& _item); + static bool isDupInstruction(AssemblyItem const& _item); + static bool isSwapInstruction(AssemblyItem const& _item); + static bool isJumpInstruction(AssemblyItem const& _item); + static bool altersControlFlow(AssemblyItem const& _item); + /// @returns false if the value put on the stack by _item depends on anything else than + /// the information in the current block header, memory, storage or stack. + static bool isDeterministic(AssemblyItem const& _item); + /// @returns true if the given instruction modifies memory. + static bool invalidatesMemory(Instruction _instruction); + /// @returns true if the given instruction modifies storage (even indirectly). + static bool invalidatesStorage(Instruction _instruction); +}; + +} +} diff --git a/libevmasm/SourceLocation.h b/libevmasm/SourceLocation.h new file mode 100644 index 00000000..b8b57b60 --- /dev/null +++ b/libevmasm/SourceLocation.h @@ -0,0 +1,102 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Lefteris Karapetsas <lefteris@ethdev.com> + * @date 2015 + * Represents a location in a source file + */ + +#pragma once + +#include <memory> +#include <string> +#include <ostream> +#include <tuple> + +namespace dev +{ + +/** + * Representation of an interval of source positions. + * The interval includes start and excludes end. + */ +struct SourceLocation +{ + SourceLocation(int _start, int _end, std::shared_ptr<std::string const> _sourceName): + start(_start), end(_end), sourceName(_sourceName) { } + SourceLocation(): start(-1), end(-1) { } + + SourceLocation(SourceLocation const& _other): + start(_other.start), + end(_other.end), + sourceName(_other.sourceName) + {} + + SourceLocation& operator=(SourceLocation const& _other) + { + if (&_other == this) + return *this; + + start = _other.start; + end = _other.end; + sourceName = _other.sourceName; + return *this; + } + + bool operator==(SourceLocation const& _other) const { return start == _other.start && end == _other.end;} + bool operator!=(SourceLocation const& _other) const { return !operator==(_other); } + inline bool operator<(SourceLocation const& _other) const; + inline bool contains(SourceLocation const& _other) const; + inline bool intersects(SourceLocation const& _other) const; + + bool isEmpty() const { return start == -1 && end == -1; } + + int start; + int end; + std::shared_ptr<std::string const> sourceName; +}; + +/// Stream output for Location (used e.g. in boost exceptions). +inline std::ostream& operator<<(std::ostream& _out, SourceLocation const& _location) +{ + if (_location.isEmpty()) + return _out << "NO_LOCATION_SPECIFIED"; + return _out << *_location.sourceName << "[" << _location.start << "," << _location.end << ")"; +} + +bool SourceLocation::operator<(SourceLocation const& _other) const +{ + if (!sourceName || !_other.sourceName) + return int(!!sourceName) < int(!!_other.sourceName); + return make_tuple(*sourceName, start, end) < make_tuple(*_other.sourceName, _other.start, _other.end); +} + +bool SourceLocation::contains(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return start <= _other.start && _other.end <= end; +} + +bool SourceLocation::intersects(SourceLocation const& _other) const +{ + if (isEmpty() || _other.isEmpty() || !sourceName || !_other.sourceName || *sourceName != *_other.sourceName) + return false; + return _other.start < end && start < _other.end; +} + +} diff --git a/libevmasm/Version.cpp b/libevmasm/Version.cpp new file mode 100644 index 00000000..c1379848 --- /dev/null +++ b/libevmasm/Version.cpp @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2015 + * Versioning. + */ + +#include <string> +#include <ethereum/BuildInfo.h> +#include <libdevcore/Common.h> +#include <libevmasm/Version.h> + +using namespace dev; +using namespace std; + +char const* dev::eth::VersionNumberLibEvmAsm = ETH_PROJECT_VERSION; +extern string const dev::eth::VersionStringLibEvmAsm = + string(dev::eth::VersionNumberLibEvmAsm) + + "-" + + string(DEV_QUOTED(ETH_COMMIT_HASH)).substr(0, 8) + + (ETH_CLEAN_REPO ? "" : "*") + + "/" DEV_QUOTED(ETH_BUILD_TYPE) "-" DEV_QUOTED(ETH_BUILD_PLATFORM); + diff --git a/libevmasm/Version.h b/libevmasm/Version.h new file mode 100644 index 00000000..8cba6e83 --- /dev/null +++ b/libevmasm/Version.h @@ -0,0 +1,36 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @author Christian <c@ethdev.com> + * @date 2015 + * Versioning. + */ + +#pragma once + +#include <string> + +namespace dev +{ +namespace eth +{ + +extern char const* VersionNumberLibEvmAsm; +extern std::string const VersionStringLibEvmAsm; + +} +} diff --git a/liblll/All.h b/liblll/All.h new file mode 100644 index 00000000..7c4192f6 --- /dev/null +++ b/liblll/All.h @@ -0,0 +1,6 @@ +#pragma once + +#include "CodeFragment.h" +#include "Compiler.h" +#include "CompilerState.h" +#include "Parser.h" diff --git a/liblll/CMakeLists.txt b/liblll/CMakeLists.txt new file mode 100644 index 00000000..97a1a186 --- /dev/null +++ b/liblll/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_policy(SET CMP0015 NEW) +set(CMAKE_AUTOMOC OFF) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSTATICLIB") + +aux_source_directory(. SRC_LIST) + +#include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) +#include_directories(${Boost_INCLUDE_DIRS}) + +set(EXECUTABLE lll) + +file(GLOB HEADERS "*.h") + +include_directories(BEFORE ..) +add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) + +eth_use(${EXECUTABLE} REQUIRED Solidity::evmasm) +#target_link_libraries(${EXECUTABLE} evmasm) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + diff --git a/liblll/CodeFragment.cpp b/liblll/CodeFragment.cpp new file mode 100644 index 00000000..64680d5a --- /dev/null +++ b/liblll/CodeFragment.cpp @@ -0,0 +1,586 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CodeFragment.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "CodeFragment.h" + +#include <boost/algorithm/string.hpp> +#pragma warning(push) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include <boost/spirit/include/support_utree.hpp> +#pragma warning(pop) +#pragma GCC diagnostic pop +#include <libdevcore/Log.h> +#include <libdevcore/CommonIO.h> +#include <libevmcore/Instruction.h> +#include "CompilerState.h" +#include "Parser.h" +using namespace std; +using namespace dev; +using namespace dev::eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void CodeFragment::finalise(CompilerState const& _cs) +{ + if (_cs.usedAlloc && _cs.vars.size() && !m_finalised) + { + m_finalised = true; + m_asm.injectStart(Instruction::MSTORE8); + m_asm.injectStart((u256)((_cs.vars.size() + 2) * 32) - 1); + m_asm.injectStart((u256)1); + } +} + +CodeFragment::CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM) +{ +/* cdebug << "CodeFragment. Locals:"; + for (auto const& i: _s.defs) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Args:"; + for (auto const& i: _s.args) + cdebug << i.first << ":" << toHex(i.second.m_code); + cdebug << "Outers:"; + for (auto const& i: _s.outers) + cdebug << i.first << ":" << toHex(i.second.m_code); + debugOutAST(cout, _t); + cout << endl << flush; +*/ + switch (_t.which()) + { + case sp::utree_type::list_type: + constructOperation(_t, _s); + break; + case sp::utree_type::string_type: + { + auto sr = _t.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + string s(sr.begin(), sr.end()); + m_asm.append(s); + break; + } + case sp::utree_type::symbol_type: + { + auto sr = _t.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + string s(sr.begin(), sr.end()); + string us = boost::algorithm::to_upper_copy(s); + if (_allowASM && c_instructions.count(us)) + m_asm.append(c_instructions.at(us)); + else if (_s.defs.count(s)) + m_asm.append(_s.defs.at(s).m_asm); + else if (_s.args.count(s)) + m_asm.append(_s.args.at(s).m_asm); + else if (_s.outers.count(s)) + m_asm.append(_s.outers.at(s).m_asm); + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + { + auto it = _s.vars.find(s); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(s, make_pair(_s.stackSize, 32))); + _s.stackSize += 32; + } + m_asm.append((u256)it->second.first); + } + else + error<BareSymbol>(); + + break; + } + case sp::utree_type::any_type: + { + bigint i = *_t.get<bigint*>(); + if (i < 0 || i > bigint(u256(0) - 1)) + error<IntegerOutOfRange>(); + m_asm.append((u256)i); + break; + } + default: break; + } +} + +void CodeFragment::constructOperation(sp::utree const& _t, CompilerState& _s) +{ + if (_t.tag() == 0 && _t.empty()) + error<EmptyList>(); + else if (_t.tag() == 0 && _t.front().which() != sp::utree_type::symbol_type) + error<DataNotExecutable>(); + else + { + string s; + string us; + switch (_t.tag()) + { + case 0: + { + auto sr = _t.front().get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + s = string(sr.begin(), sr.end()); + us = boost::algorithm::to_upper_copy(s); + break; + } + case 1: + us = "MLOAD"; + break; + case 2: + us = "SLOAD"; + break; + case 3: + us = "MSTORE"; + break; + case 4: + us = "SSTORE"; + break; + case 5: + us = "SEQ"; + break; + case 6: + us = "CALLDATALOAD"; + break; + default:; + } + + auto firstAsString = [&]() + { + auto i = *++_t.begin(); + if (i.tag()) + error<InvalidName>(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + return string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + return _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); + } + return string(); + }; + + auto varAddress = [&](string const& n) + { + auto it = _s.vars.find(n); + if (it == _s.vars.end()) + { + bool ok; + tie(it, ok) = _s.vars.insert(make_pair(n, make_pair(_s.stackSize, 32))); + _s.stackSize += 32; + } + return it->second.first; + }; + + // Operations who args are not standard stack-pushers. + bool nonStandard = true; + if (us == "ASM") + { + int c = 0; + for (auto const& i: _t) + if (c++) + m_asm.append(CodeFragment(i, _s, true).m_asm); + } + else if (us == "INCLUDE") + { + if (_t.size() != 2) + error<IncorrectParameterCount>(); + m_asm.append(CodeFragment::compile(contentsString(firstAsString()), _s).m_asm); + } + else if (us == "SET") + { + if (_t.size() != 3) + error<IncorrectParameterCount>(); + int c = 0; + for (auto const& i: _t) + if (c++ == 2) + m_asm.append(CodeFragment(i, _s, false).m_asm); + m_asm.append((u256)varAddress(firstAsString())); + m_asm.append(Instruction::MSTORE); + } + else if (us == "GET") + { + if (_t.size() != 2) + error<IncorrectParameterCount>(); + m_asm.append((u256)varAddress(firstAsString())); + m_asm.append(Instruction::MLOAD); + } + else if (us == "REF") + m_asm.append((u256)varAddress(firstAsString())); + else if (us == "DEF") + { + string n; + unsigned ii = 0; + if (_t.size() != 3 && _t.size() != 4) + error<IncorrectParameterCount>(); + vector<string> args; + for (auto const& i: _t) + { + if (ii == 1) + { + if (i.tag()) + error<InvalidName>(); + if (i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + n = string(sr.begin(), sr.end()); + } + else if (i.which() == sp::utree_type::symbol_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + n = _s.getDef(string(sr.begin(), sr.end())).m_asm.backString(); + } + } + else if (ii == 2) + if (_t.size() == 3) + _s.defs[n] = CodeFragment(i, _s); + else + for (auto const& j: i) + { + if (j.tag() || j.which() != sp::utree_type::symbol_type) + error<InvalidMacroArgs>(); + auto sr = j.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); + args.push_back(string(sr.begin(), sr.end())); + } + else if (ii == 3) + { + auto k = make_pair(n, args.size()); + _s.macros[k].code = i; + _s.macros[k].env = _s.outers; + _s.macros[k].args = args; + for (auto const& i: _s.args) + _s.macros[k].env[i.first] = i.second; + for (auto const& i: _s.defs) + _s.macros[k].env[i.first] = i.second; + } + ++ii; + } + } + else if (us == "LIT") + { + if (_t.size() < 3) + error<IncorrectParameterCount>(); + unsigned ii = 0; + CodeFragment pos; + bytes data; + for (auto const& i: _t) + { + if (ii == 1) + { + pos = CodeFragment(i, _s); + if (pos.m_asm.deposit() != 1) + error<InvalidDeposit>(); + } + else if (ii == 2 && !i.tag() && i.which() == sp::utree_type::string_type) + { + auto sr = i.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>(); + data = bytes((byte const*)sr.begin(), (byte const*)sr.end()); + } + else if (ii >= 2 && !i.tag() && i.which() == sp::utree_type::any_type) + { + bigint bi = *i.get<bigint*>(); + if (bi < 0) + error<IntegerOutOfRange>(); + else if (bi > bigint(u256(0) - 1)) + { + if (ii == 2 && _t.size() == 3) + { + // One big int - allow it as hex. + data.resize(bytesRequired(bi)); + toBigEndian(bi, data); + } + else + error<IntegerOutOfRange>(); + } + else + { + data.resize(data.size() + 32); + *(h256*)(&data.back() - 31) = (u256)bi; + } + } + else if (ii) + error<InvalidLiteral>(); + ++ii; + } + m_asm.append((u256)data.size()); + m_asm.append(Instruction::DUP1); + m_asm.append(data); + m_asm.append(pos.m_asm, 1); + m_asm.append(Instruction::CODECOPY); + } + else + nonStandard = false; + + if (nonStandard) + return; + + std::map<std::string, Instruction> const c_arith = { { "+", Instruction::ADD }, { "-", Instruction::SUB }, { "*", Instruction::MUL }, { "/", Instruction::DIV }, { "%", Instruction::MOD }, { "&", Instruction::AND }, { "|", Instruction::OR }, { "^", Instruction::XOR } }; + std::map<std::string, pair<Instruction, bool>> const c_binary = { { "<", { Instruction::LT, false } }, { "<=", { Instruction::GT, true } }, { ">", { Instruction::GT, false } }, { ">=", { Instruction::LT, true } }, { "S<", { Instruction::SLT, false } }, { "S<=", { Instruction::SGT, true } }, { "S>", { Instruction::SGT, false } }, { "S>=", { Instruction::SLT, true } }, { "=", { Instruction::EQ, false } }, { "!=", { Instruction::EQ, true } } }; + std::map<std::string, Instruction> const c_unary = { { "!", Instruction::ISZERO } }; + + vector<CodeFragment> code; + CompilerState ns = _s; + ns.vars.clear(); + ns.usedAlloc = false; + int c = _t.tag() ? 1 : 0; + for (auto const& i: _t) + if (c++) + { + if (us == "LLL" && c == 1) + code.push_back(CodeFragment(i, ns)); + else + code.push_back(CodeFragment(i, _s)); + } + auto requireSize = [&](unsigned s) { if (code.size() != s) error<IncorrectParameterCount>(); }; + auto requireMinSize = [&](unsigned s) { if (code.size() < s) error<IncorrectParameterCount>(); }; + auto requireMaxSize = [&](unsigned s) { if (code.size() > s) error<IncorrectParameterCount>(); }; + auto requireDeposit = [&](unsigned i, int s) { if (code[i].m_asm.deposit() != s) error<InvalidDeposit>(); }; + + if (_s.macros.count(make_pair(s, code.size()))) + { + Macro const& m = _s.macros.at(make_pair(s, code.size())); + CompilerState cs = _s; + for (auto const& i: m.env) + cs.outers[i.first] = i.second; + for (auto const& i: cs.defs) + cs.outers[i.first] = i.second; + cs.defs.clear(); + for (unsigned i = 0; i < m.args.size(); ++i) + { + //requireDeposit(i, 1); + cs.args[m.args[i]] = code[i]; + } + m_asm.append(CodeFragment(m.code, cs).m_asm); + for (auto const& i: cs.defs) + _s.defs[i.first] = i.second; + for (auto const& i: cs.macros) + _s.macros.insert(i); + } + else if (c_instructions.count(us)) + { + auto it = c_instructions.find(us); + int ea = instructionInfo(it->second).args; + if (ea >= 0) + requireSize(ea); + else + requireMinSize(-ea); + + for (unsigned i = code.size(); i; --i) + m_asm.append(code[i - 1].m_asm, 1); + m_asm.append(it->second); + } + else if (c_arith.count(us)) + { + auto it = c_arith.find(us); + requireMinSize(1); + for (unsigned i = code.size(); i; --i) + { + requireDeposit(i - 1, 1); + m_asm.append(code[i - 1].m_asm, 1); + } + for (unsigned i = 1; i < code.size(); ++i) + m_asm.append(it->second); + } + else if (c_binary.count(us)) + { + auto it = c_binary.find(us); + requireSize(2); + requireDeposit(0, 1); + requireDeposit(1, 1); + m_asm.append(code[1].m_asm, 1); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second.first); + if (it->second.second) + m_asm.append(Instruction::ISZERO); + } + else if (c_unary.count(us)) + { + auto it = c_unary.find(us); + requireSize(1); + requireDeposit(0, 1); + m_asm.append(code[0].m_asm, 1); + m_asm.append(it->second); + } + else if (us == "IF") + { + requireSize(3); + requireDeposit(0, 1); + int minDep = min(code[1].m_asm.deposit(), code[2].m_asm.deposit()); + + m_asm.append(code[0].m_asm); + auto pos = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm.append(code[2].m_asm, minDep); + auto end = m_asm.appendJump(); + m_asm.otherPath(); + m_asm << pos.tag(); + m_asm.append(code[1].m_asm, minDep); + m_asm << end.tag(); + m_asm.donePaths(); + } + else if (us == "WHEN" || us == "UNLESS") + { + requireSize(2); + requireDeposit(0, 1); + + m_asm.append(code[0].m_asm); + if (us == "WHEN") + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.onePath(); + m_asm.otherPath(); + m_asm.append(code[1].m_asm, 0); + m_asm << end.tag(); + m_asm.donePaths(); + } + else if (us == "WHILE") + { + requireSize(2); + requireDeposit(0, 1); + + auto begin = m_asm.append(); + m_asm.append(code[0].m_asm); + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.append(code[1].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); + } + else if (us == "FOR") + { + requireSize(4); + requireDeposit(1, 1); + + m_asm.append(code[0].m_asm, 0); + auto begin = m_asm.append(); + m_asm.append(code[1].m_asm); + m_asm.append(Instruction::ISZERO); + auto end = m_asm.appendJumpI(); + m_asm.append(code[3].m_asm, 0); + m_asm.append(code[2].m_asm, 0); + m_asm.appendJump(begin); + m_asm << end.tag(); + } + else if (us == "ALLOC") + { + requireSize(1); + requireDeposit(0, 1); + + m_asm.append(Instruction::MSIZE); + m_asm.append(u256(0)); + m_asm.append(u256(1)); + m_asm.append(code[0].m_asm, 1); + m_asm.append(Instruction::MSIZE); + m_asm.append(Instruction::ADD); + m_asm.append(Instruction::SUB); + m_asm.append(Instruction::MSTORE8); + + _s.usedAlloc = true; + } + else if (us == "LLL") + { + requireMinSize(2); + requireMaxSize(3); + requireDeposit(1, 1); + + auto subPush = m_asm.appendSubSize(code[0].assembly(ns)); + m_asm.append(Instruction::DUP1); + if (code.size() == 3) + { + requireDeposit(2, 1); + m_asm.append(code[2].m_asm, 1); + m_asm.append(Instruction::LT); + m_asm.append(Instruction::ISZERO); + m_asm.append(Instruction::MUL); + m_asm.append(Instruction::DUP1); + } + m_asm.append(subPush); + m_asm.append(code[1].m_asm, 1); + m_asm.append(Instruction::CODECOPY); + } + else if (us == "&&" || us == "||") + { + requireMinSize(1); + for (unsigned i = 0; i < code.size(); ++i) + requireDeposit(i, 1); + + auto end = m_asm.newTag(); + if (code.size() > 1) + { + m_asm.append((u256)(us == "||" ? 1 : 0)); + for (unsigned i = 1; i < code.size(); ++i) + { + // Check if true - predicate + m_asm.append(code[i - 1].m_asm, 1); + if (us == "&&") + m_asm.append(Instruction::ISZERO); + m_asm.appendJumpI(end); + } + m_asm.append(Instruction::POP); + } + + // Check if true - predicate + m_asm.append(code.back().m_asm, 1); + + // At end now. + m_asm.append(end); + } + else if (us == "~") + { + requireSize(1); + requireDeposit(0, 1); + + m_asm.append(code[0].m_asm, 1); + m_asm.append((u256)1); + m_asm.append((u256)0); + m_asm.append(Instruction::SUB); + m_asm.append(Instruction::SUB); + } + else if (us == "SEQ") + { + unsigned ii = 0; + for (auto const& i: code) + if (++ii < code.size()) + m_asm.append(i.m_asm, 0); + else + m_asm.append(i.m_asm); + } + else if (us == "RAW") + { + for (auto const& i: code) + m_asm.append(i.m_asm); + m_asm.popTo(1); + } + else if (us.find_first_of("1234567890") != 0 && us.find_first_not_of("QWERTYUIOPASDFGHJKLZXCVBNM1234567890_") == string::npos) + m_asm.append((u256)varAddress(s)); + else + error<InvalidOperation>(); + } +} + +CodeFragment CodeFragment::compile(string const& _src, CompilerState& _s) +{ + CodeFragment ret; + sp::utree o; + parseTreeLLL(_src, o); + if (!o.empty()) + ret = CodeFragment(o, _s); + _s.treesToKill.push_back(o); + return ret; +} diff --git a/liblll/CodeFragment.h b/liblll/CodeFragment.h new file mode 100644 index 00000000..03f812b6 --- /dev/null +++ b/liblll/CodeFragment.h @@ -0,0 +1,64 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CodeFragment.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Common.h> +#include <libevmcore/Instruction.h> +#include <libevmasm/Assembly.h> +#include "Exceptions.h" + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace dev +{ +namespace eth +{ + +struct CompilerState; + +class CodeFragment +{ +public: + CodeFragment() {} + CodeFragment(sp::utree const& _t, CompilerState& _s, bool _allowASM = false); + + static CodeFragment compile(std::string const& _src, CompilerState& _s); + + /// Consolidates data and compiles code. + Assembly& assembly(CompilerState const& _cs) { finalise(_cs); return m_asm; } + +private: + void finalise(CompilerState const& _cs); + + template <class T> void error() const { BOOST_THROW_EXCEPTION(T() ); } + void constructOperation(sp::utree const& _t, CompilerState& _s); + + bool m_finalised = false; + Assembly m_asm; +}; + +static const CodeFragment NullCodeFragment; + +} +} + diff --git a/liblll/Compiler.cpp b/liblll/Compiler.cpp new file mode 100644 index 00000000..f1538789 --- /dev/null +++ b/liblll/Compiler.cpp @@ -0,0 +1,95 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Compiler.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Compiler.h" +#include "Parser.h" +#include "CompilerState.h" +#include "CodeFragment.h" + +using namespace std; +using namespace dev; +using namespace dev::eth; + +bytes dev::eth::compileLLL(string const& _src, bool _opt, vector<string>* _errors) +{ + try + { + CompilerState cs; + cs.populateStandard(); + auto f = CodeFragment::compile(_src, cs); + bytes ret = f.assembly(cs).optimise(_opt).assemble().bytecode; + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + { + _errors->push_back("Parse error."); + _errors->push_back(diagnostic_information(_e)); + } + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return bytes(); +} + +std::string dev::eth::compileLLLToAsm(std::string const& _src, bool _opt, std::vector<std::string>* _errors) +{ + try + { + CompilerState cs; + cs.populateStandard(); + string ret = CodeFragment::compile(_src, cs).assembly(cs).optimise(_opt).out(); + for (auto i: cs.treesToKill) + killBigints(i); + return ret; + } + catch (Exception const& _e) + { + if (_errors) + _errors->push_back(diagnostic_information(_e)); + } + catch (std::exception) + { + if (_errors) + _errors->push_back("Parse error."); + } + return string(); +} + +string dev::eth::parseLLL(string const& _src) +{ + sp::utree o; + try + { + parseTreeLLL(_src, o); + } + catch (...) {} + ostringstream ret; + debugOutAST(ret, o); + killBigints(o); + return ret.str(); +} diff --git a/liblll/Compiler.h b/liblll/Compiler.h new file mode 100644 index 00000000..0fadd278 --- /dev/null +++ b/liblll/Compiler.h @@ -0,0 +1,38 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Compiler.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <string> +#include <vector> +#include <libdevcore/Common.h> + +namespace dev +{ +namespace eth +{ + +std::string parseLLL(std::string const& _src); +std::string compileLLLToAsm(std::string const& _src, bool _opt = true, std::vector<std::string>* _errors = nullptr); +bytes compileLLL(std::string const& _src, bool _opt = true, std::vector<std::string>* _errors = nullptr); + +} +} diff --git a/liblll/CompilerState.cpp b/liblll/CompilerState.cpp new file mode 100644 index 00000000..63351bc4 --- /dev/null +++ b/liblll/CompilerState.cpp @@ -0,0 +1,82 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CompilerState.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "CompilerState.h" +#include "CodeFragment.h" + +using namespace std; +using namespace dev; +using namespace dev::eth; + +CompilerState::CompilerState() +{ +} + +CodeFragment const& CompilerState::getDef(std::string const& _s) +{ + if (defs.count(_s)) + return defs.at(_s); + else if (args.count(_s)) + return args.at(_s); + else if (outers.count(_s)) + return outers.at(_s); + else + return NullCodeFragment; +} + +void CompilerState::populateStandard() +{ + static const string s = "{" + "(def 'gav 0x51ba59315b3a95761d0863b05ccc7a7f54703d99)" + "(def 'config 0x661005d2720d855f1d9976f88bb10c1a3398c77f)" + "(def 'allgas (- (gas) 21))" + "(def 'send (to value) (call allgas to value 0 0 0 0))" + "(def 'send (gaslimit to value) (call gaslimit to value 0 0 0 0))" + "(def 'msg (gaslimit to value data datasize outsize) { (set x outsize) (set y (alloc @32)) (call gaslimit to value data datasize @0 @32) @0 })" + "(def 'msg (gaslimit to value data datasize) { (call gaslimit to value data datasize 0 32) @0 })" + "(def 'msg (gaslimit to value data) { [0]:data (msg gaslimit to value 0 32) })" + "(def 'msg (to value data) { [0]:data (msg allgas to value 0 32) })" + "(def 'msg (to data) { [0]:data (msg allgas to 0 0 32) })" + "(def 'create (value code) { [0]:(msize) (create value @0 (lll code @0)) })" + "(def 'create (code) { [0]:(msize) (create 0 @0 (lll code @0)) })" + "(def 'sha3 (val) { [0]:val (sha3 0 32) })" + "(def 'sha3pair (a b) { [0]:a [32]:b (sha3 0 64) })" + "(def 'sha3trip (a b c) { [0]:a [32]:b [64]:c (sha3 0 96) })" + "(def 'return (val) { [0]:val (return 0 32) })" + "(def 'returnlll (code) (return 0 (lll code 0)) )" + "(def 'makeperm (name pos) { (def name (sload pos)) (def name (v) (sstore pos v)) } )" + "(def 'permcount 0)" + "(def 'perm (name) { (makeperm name permcount) (def 'permcount (+ permcount 1)) } )" + "(def 'namereg (msg config 0))" + "(def 'coinreg (msg config 1))" + "(def 'gavcoin (msg config 2))" + "(def 'sendgavcoin (to value) { [32]'send [64]:to [96]:value (call allgas gavcoin 0 32 96 0 0) })" + "(def 'regname (name) { [32]'register [64]name (call allgas namereg 0 32 64 0 0) })" + "(def 'regcoin (name) { [32]name (call allgas coinreg 0 32 32 0 0) })" + "(def 'regcoin (name denom) { [32]name [64]denom (call allgas coinreg 0 32 64 0 0) })" + "(def 'ecrecover (r s v hash) { [0] r [32] s [64] v [96] hash (msg allgas 1 0 0 128) })" + "(def 'sha256 (data datasize) (msg allgas 2 0 data datasize))" + "(def 'ripemd160 (data datasize) (msg allgas 3 0 data datasize))" + "(def 'sha256 (val) { [0]:val (sha256 0 32) })" + "(def 'ripemd160 (val) { [0]:val (ripemd160 0 32) })" + "}"; + CodeFragment::compile(s, *this); +} diff --git a/liblll/CompilerState.h b/liblll/CompilerState.h new file mode 100644 index 00000000..bfe56f92 --- /dev/null +++ b/liblll/CompilerState.h @@ -0,0 +1,57 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file CompilerState.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <boost/spirit/include/support_utree.hpp> +#include "CodeFragment.h" + +namespace dev +{ +namespace eth +{ + +struct Macro +{ + std::vector<std::string> args; + boost::spirit::utree code; + std::map<std::string, CodeFragment> env; +}; + +struct CompilerState +{ + CompilerState(); + + CodeFragment const& getDef(std::string const& _s); + void populateStandard(); + + unsigned stackSize = 128; + std::map<std::string, std::pair<unsigned, unsigned>> vars; ///< maps name to stack offset & size. + std::map<std::string, CodeFragment> defs; + std::map<std::string, CodeFragment> args; + std::map<std::string, CodeFragment> outers; + std::map<std::pair<std::string, unsigned>, Macro> macros; + std::vector<boost::spirit::utree> treesToKill; + bool usedAlloc = false; +}; + +} +} diff --git a/liblll/Exceptions.h b/liblll/Exceptions.h new file mode 100644 index 00000000..1e9671b3 --- /dev/null +++ b/liblll/Exceptions.h @@ -0,0 +1,44 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Exceptions.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <libdevcore/Exceptions.h> + +namespace dev +{ +namespace eth +{ + +/// Compile a Low-level Lisp-like Language program into EVM-code. +class CompilerException: public dev::Exception {}; +class InvalidOperation: public CompilerException {}; +class IntegerOutOfRange: public CompilerException {}; +class EmptyList: public CompilerException {}; +class DataNotExecutable: public CompilerException {}; +class IncorrectParameterCount: public CompilerException {}; +class InvalidName: public CompilerException {}; +class InvalidMacroArgs: public CompilerException {}; +class InvalidLiteral: public CompilerException {}; +class BareSymbol: public CompilerException {}; + +} +} diff --git a/liblll/Parser.cpp b/liblll/Parser.cpp new file mode 100644 index 00000000..df30f389 --- /dev/null +++ b/liblll/Parser.cpp @@ -0,0 +1,145 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Parser.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#include "Parser.h" + +#define BOOST_RESULT_OF_USE_DECLTYPE +#define BOOST_SPIRIT_USE_PHOENIX_V3 +#include <boost/spirit/include/qi.hpp> +#include <boost/spirit/include/phoenix.hpp> +#include <boost/spirit/include/support_utree.hpp> + +using namespace std; +using namespace dev; +using namespace dev::eth; +namespace qi = boost::spirit::qi; +namespace px = boost::phoenix; +namespace sp = boost::spirit; + +void dev::eth::killBigints(sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: for (auto const& i: _this) killBigints(i); break; + case sp::utree_type::any_type: delete _this.get<bigint*>(); break; + default:; + } +} + +void dev::eth::debugOutAST(ostream& _out, sp::utree const& _this) +{ + switch (_this.which()) + { + case sp::utree_type::list_type: + switch (_this.tag()) + { + case 0: _out << "( "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << ")"; break; + case 1: _out << "@ "; debugOutAST(_out, _this.front()); break; + case 2: _out << "@@ "; debugOutAST(_out, _this.front()); break; + case 3: _out << "[ "; debugOutAST(_out, _this.front()); _out << " ] "; debugOutAST(_out, _this.back()); break; + case 4: _out << "[[ "; debugOutAST(_out, _this.front()); _out << " ]] "; debugOutAST(_out, _this.back()); break; + case 5: _out << "{ "; for (auto const& i: _this) { debugOutAST(_out, i); _out << " "; } _out << "}"; break; + case 6: _out << "$ "; debugOutAST(_out, _this.front()); break; + default:; + } + + break; + case sp::utree_type::int_type: _out << _this.get<int>(); break; + case sp::utree_type::string_type: _out << "\"" << _this.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::string_type>>() << "\""; break; + case sp::utree_type::symbol_type: _out << _this.get<sp::basic_string<boost::iterator_range<char const*>, sp::utree_type::symbol_type>>(); break; + case sp::utree_type::any_type: _out << *_this.get<bigint*>(); break; + default: _out << "nil"; + } +} + +namespace dev { namespace eth { +namespace parseTreeLLL_ { + +template<unsigned N> +struct tagNode +{ + void operator()(sp::utree& n, qi::rule<string::const_iterator, qi::ascii::space_type, sp::utree()>::context_type& c) const + { + (boost::fusion::at_c<0>(c.attributes) = n).tag(N); + } +}; + +}}} + +void dev::eth::parseTreeLLL(string const& _s, sp::utree& o_out) +{ + using qi::standard::space; + using qi::standard::space_type; + using dev::eth::parseTreeLLL_::tagNode; + using symbol_type = sp::basic_string<std::string, sp::utree_type::symbol_type>; + using it = string::const_iterator; + + static const u256 ether = u256(1000000000) * 1000000000; + static const u256 finney = u256(1000000000) * 1000000; + static const u256 szabo = u256(1000000000) * 1000; + + qi::rule<it, space_type, sp::utree()> element; + qi::rule<it, string()> str = '"' > qi::lexeme[+(~qi::char_(std::string("\"") + '\0'))] > '"'; + qi::rule<it, string()> strsh = '\'' > qi::lexeme[+(~qi::char_(std::string(" ;$@()[]{}:\n\t") + '\0'))]; + qi::rule<it, symbol_type()> symbol = qi::lexeme[+(~qi::char_(std::string(" $@[]{}:();\"\x01-\x1f\x7f") + '\0'))]; + qi::rule<it, string()> intstr = qi::lexeme[ qi::no_case["0x"][qi::_val = "0x"] >> *qi::char_("0-9a-fA-F")[qi::_val += qi::_1]] | qi::lexeme[+qi::char_("0-9")[qi::_val += qi::_1]]; + qi::rule<it, bigint()> integer = intstr; + qi::rule<it, bigint()> multiplier = qi::lit("wei")[qi::_val = 1] | qi::lit("szabo")[qi::_val = szabo] | qi::lit("finney")[qi::_val = finney] | qi::lit("ether")[qi::_val = ether]; + qi::rule<it, space_type, bigint()> quantity = integer[qi::_val = qi::_1] >> -multiplier[qi::_val *= qi::_1]; + qi::rule<it, space_type, sp::utree()> atom = quantity[qi::_val = px::construct<sp::any_ptr>(px::new_<bigint>(qi::_1))] | (str | strsh)[qi::_val = qi::_1] | symbol[qi::_val = qi::_1]; + qi::rule<it, space_type, sp::utree::list_type()> seq = '{' > *element > '}'; + qi::rule<it, space_type, sp::utree::list_type()> mload = '@' > element; + qi::rule<it, space_type, sp::utree::list_type()> sload = qi::lit("@@") > element; + qi::rule<it, space_type, sp::utree::list_type()> mstore = '[' > element > ']' > -qi::lit(":") > element; + qi::rule<it, space_type, sp::utree::list_type()> sstore = qi::lit("[[") > element > qi::lit("]]") > -qi::lit(":") > element; + qi::rule<it, space_type, sp::utree::list_type()> calldataload = qi::lit("$") > element; + qi::rule<it, space_type, sp::utree::list_type()> list = '(' > *element > ')'; + + qi::rule<it, space_type, sp::utree()> extra = sload[tagNode<2>()] | mload[tagNode<1>()] | sstore[tagNode<4>()] | mstore[tagNode<3>()] | seq[tagNode<5>()] | calldataload[tagNode<6>()]; + element = atom | list | extra; + + string s; + s.reserve(_s.size()); + bool incomment = false; + bool instring = false; + bool insstring = false; + for (auto i: _s) + { + if (i == ';' && !instring && !insstring) + incomment = true; + else if (i == '\n') + incomment = instring = insstring = false; + else if (i == '"' && !insstring) + instring = !instring; + else if (i == '\'') + insstring = true; + else if (i == ' ') + insstring = false; + if (!incomment) + s.push_back(i); + } + auto ret = s.cbegin(); + qi::phrase_parse(ret, s.cend(), element, space, qi::skip_flag::dont_postskip, o_out); + for (auto i = ret; i != s.cend(); ++i) + if (!isspace(*i)) + BOOST_THROW_EXCEPTION(std::exception()); +} + diff --git a/liblll/Parser.h b/liblll/Parser.h new file mode 100644 index 00000000..b21989f0 --- /dev/null +++ b/liblll/Parser.h @@ -0,0 +1,41 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file Parser.h + * @author Gav Wood <i@gavwood.com> + * @date 2014 + */ + +#pragma once + +#include <string> +#include <vector> +#include <libdevcore/Common.h> + +namespace boost { namespace spirit { class utree; } } +namespace sp = boost::spirit; + +namespace dev +{ +namespace eth +{ + +void killBigints(sp::utree const& _this); +void parseTreeLLL(std::string const& _s, sp::utree& o_out); +void debugOutAST(std::ostream& _out, sp::utree const& _this); + +} +} diff --git a/libsolidity/CMakeLists.txt b/libsolidity/CMakeLists.txt index fa2f4e9c..ab9ee1e2 100644 --- a/libsolidity/CMakeLists.txt +++ b/libsolidity/CMakeLists.txt @@ -14,7 +14,7 @@ file(GLOB HEADERS "*/*.h") include_directories(BEFORE ..) add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) -eth_use(${EXECUTABLE} REQUIRED Dev::devcore Eth::evmcore Eth::evmasm) +eth_use(${EXECUTABLE} REQUIRED Dev::devcore Eth::evmcore Solidity::evmasm) install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) diff --git a/lllc/CMakeLists.txt b/lllc/CMakeLists.txt new file mode 100644 index 00000000..989b8cbf --- /dev/null +++ b/lllc/CMakeLists.txt @@ -0,0 +1,12 @@ +aux_source_directory(. SRC_LIST) + +set(EXECUTABLE lllc) + +file(GLOB HEADERS "*.h") +include_directories(BEFORE ..) +add_executable(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) + +eth_use(${EXECUTABLE} REQUIRED Solidity::lll Dev::buildinfo) + +install( TARGETS ${EXECUTABLE} DESTINATION bin ) + diff --git a/lllc/main.cpp b/lllc/main.cpp new file mode 100644 index 00000000..5f3078c2 --- /dev/null +++ b/lllc/main.cpp @@ -0,0 +1,124 @@ +/* + This file is part of cpp-ethereum. + + cpp-ethereum is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + cpp-ethereum is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>. +*/ +/** @file main.cpp + * @author Gav Wood <i@gavwood.com> + * @date 2014 + * Ethereum client. + */ + +#include <fstream> +#include <iostream> +#include <liblll/Compiler.h> +#include <libdevcore/CommonIO.h> +#include <libdevcore/CommonData.h> +#include <libevmcore/Instruction.h> +#include "ethereum/BuildInfo.h" +using namespace std; +using namespace dev; +using namespace dev::eth; + +void help() +{ + cout + << "Usage lllc [OPTIONS] <file>" << endl + << "Options:" << endl + << " -b,--binary Parse, compile and assemble; output byte code in binary." << endl + << " -x,--hex Parse, compile and assemble; output byte code in hex." << endl + << " -a,--assembly Only parse and compile; show assembly." << endl + << " -t,--parse-tree Only parse; show parse tree." << endl + << " -h,--help Show this help message and exit." << endl + << " -V,--version Show the version and exit." << endl; + exit(0); +} + +void version() +{ + cout << "LLLC, the Lovely Little Language Compiler " << dev::Version << endl; + cout << " By Gav Wood, (c) 2014." << endl; + cout << "Build: " << DEV_QUOTED(ETH_BUILD_PLATFORM) << "/" << DEV_QUOTED(ETH_BUILD_TYPE) << endl; + exit(0); +} + +enum Mode { Binary, Hex, Assembly, ParseTree, Disassemble }; + +int main(int argc, char** argv) +{ + unsigned optimise = 1; + string infile; + Mode mode = Hex; + + for (int i = 1; i < argc; ++i) + { + string arg = argv[i]; + if (arg == "-h" || arg == "--help") + help(); + else if (arg == "-b" || arg == "--binary") + mode = Binary; + else if (arg == "-x" || arg == "--hex") + mode = Hex; + else if (arg == "-a" || arg == "--assembly") + mode = Assembly; + else if (arg == "-t" || arg == "--parse-tree") + mode = ParseTree; + else if ((arg == "-o" || arg == "--optimise") && argc > i + 1) + optimise = atoi(argv[++i]); + else if (arg == "-d" || arg == "--disassemble") + mode = Disassemble; + else if (arg == "-V" || arg == "--version") + version(); + else + infile = argv[i]; + } + + string src; + if (infile.empty()) + { + string s; + while (!cin.eof()) + { + getline(cin, s); + src.append(s); + } + } + else + src = contentsString(infile); + + vector<string> errors; + if (src.empty()) + errors.push_back("Empty file."); + else if (mode == Disassemble) + { + cout << disassemble(fromHex(src)) << endl; + } + else if (mode == Binary || mode == Hex) + { + auto bs = compileLLL(src, optimise ? true : false, &errors); + if (mode == Hex) + cout << toHex(bs) << endl; + else if (mode == Binary) + cout.write((char const*)bs.data(), bs.size()); + } + else if (mode == ParseTree) + cout << parseLLL(src) << endl; + else if (mode == Assembly) + cout << compileLLLToAsm(src, optimise ? true : false, &errors) << endl; + for (auto const& i: errors) + cerr << i << endl; + if ( errors.size() ) + return 1; + return 0; +} |