diff options
Diffstat (limited to 'libevmasm')
-rw-r--r-- | libevmasm/Assembly.cpp | 75 | ||||
-rw-r--r-- | libevmasm/Assembly.h | 29 | ||||
-rw-r--r-- | libevmasm/AssemblyItem.h | 2 | ||||
-rw-r--r-- | libevmasm/CommonSubexpressionEliminator.cpp | 4 | ||||
-rw-r--r-- | libevmasm/ConstantOptimiser.cpp | 11 | ||||
-rw-r--r-- | libevmasm/ConstantOptimiser.h | 2 | ||||
-rw-r--r-- | libevmasm/ControlFlowGraph.h | 2 | ||||
-rw-r--r-- | libevmasm/ExpressionClasses.cpp | 1 | ||||
-rw-r--r-- | libevmasm/GasMeter.cpp | 24 | ||||
-rw-r--r-- | libevmasm/GasMeter.h | 12 | ||||
-rw-r--r-- | libevmasm/Instruction.cpp | 16 | ||||
-rw-r--r-- | libevmasm/Instruction.h | 21 | ||||
-rw-r--r-- | libevmasm/KnownState.cpp | 23 | ||||
-rw-r--r-- | libevmasm/KnownState.h | 14 | ||||
-rw-r--r-- | libevmasm/LinkerObject.cpp | 12 | ||||
-rw-r--r-- | libevmasm/LinkerObject.h | 5 | ||||
-rw-r--r-- | libevmasm/PathGasMeter.h | 10 | ||||
-rw-r--r-- | libevmasm/PeepholeOptimiser.cpp | 19 | ||||
-rw-r--r-- | libevmasm/RuleList.h | 49 | ||||
-rw-r--r-- | libevmasm/SemanticInformation.cpp | 3 | ||||
-rw-r--r-- | libevmasm/SimplificationRules.cpp | 27 | ||||
-rw-r--r-- | libevmasm/SimplificationRules.h | 6 |
22 files changed, 260 insertions, 107 deletions
diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index b71bc80c..e63194a0 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -41,10 +41,19 @@ void Assembly::append(Assembly const& _a) auto newDeposit = m_deposit + _a.deposit(); for (AssemblyItem i: _a.m_items) { - if (i.type() == Tag || i.type() == PushTag) + switch (i.type()) + { + case Tag: + case PushTag: i.setData(i.data() + m_usedTags); - else if (i.type() == PushSub || i.type() == PushSubSize) + break; + case PushSub: + case PushSubSize: i.setData(i.data() + m_subs.size()); + break; + default: + break; + } append(i); } m_deposit = newDeposit; @@ -69,6 +78,21 @@ void Assembly::append(Assembly const& _a, int _deposit) append(Instruction::POP); } +AssemblyItem const& Assembly::append(AssemblyItem const& _i) +{ + assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); + m_deposit += _i.deposit(); + m_items.push_back(_i); + if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty()) + m_items.back().setLocation(m_currentSourceLocation); + return back(); +} + +void Assembly::injectStart(AssemblyItem const& _i) +{ + m_items.insert(m_items.begin(), _i); +} + unsigned Assembly::bytesRequired(unsigned subTagSize) const { for (unsigned tagSize = subTagSize; true; ++tagSize) @@ -264,7 +288,7 @@ Json::Value Assembly::assemblyJSON(StringMap const& _sourceCodes) const createJsonValue("PUSH [ErrorTag]", i.location().start, i.location().end, "")); else collection.append( - createJsonValue("PUSH [tag]", i.location().start, i.location().end, string(i.data()))); + createJsonValue("PUSH [tag]", i.location().start, i.location().end, dev::toString(i.data()))); break; case PushSub: collection.append( @@ -290,7 +314,7 @@ Json::Value Assembly::assemblyJSON(StringMap const& _sourceCodes) const break; case Tag: collection.append( - createJsonValue("tag", i.location().start, i.location().end, string(i.data()))); + createJsonValue("tag", i.location().start, i.location().end, dev::toString(i.data()))); collection.append( createJsonValue("JUMPDEST", i.location().start, i.location().end)); break; @@ -323,16 +347,6 @@ Json::Value Assembly::assemblyJSON(StringMap const& _sourceCodes) const return root; } -AssemblyItem const& Assembly::append(AssemblyItem const& _i) -{ - assertThrow(m_deposit >= 0, AssemblyException, "Stack underflow."); - m_deposit += _i.deposit(); - m_items.push_back(_i); - if (m_items.back().location().isEmpty() && !m_currentSourceLocation.isEmpty()) - m_items.back().setLocation(m_currentSourceLocation); - return back(); -} - AssemblyItem Assembly::namedTag(string const& _name) { assertThrow(!_name.empty(), AssemblyException, "Empty named tag."); @@ -348,11 +362,6 @@ AssemblyItem Assembly::newPushLibraryAddress(string const& _identifier) return AssemblyItem(PushLibraryAddress, h); } -void Assembly::injectStart(AssemblyItem const& _i) -{ - m_items.insert(m_items.begin(), _i); -} - Assembly& Assembly::optimise(bool _enable, EVMVersion _evmVersion, bool _isCreation, size_t _runs) { OptimiserSettings settings; @@ -516,14 +525,14 @@ LinkerObject const& Assembly::assemble() const multimap<size_t, size_t> subRef; vector<unsigned> sizeRef; ///< Pointers to code locations where the size of the program is inserted unsigned bytesPerTag = dev::bytesRequired(bytesRequiredForCode); - byte tagPush = (byte)Instruction::PUSH1 - 1 + bytesPerTag; + uint8_t tagPush = (uint8_t)Instruction::PUSH1 - 1 + bytesPerTag; unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + m_auxiliaryData.size(); for (auto const& sub: m_subs) bytesRequiredIncludingData += sub->assemble().bytecode.size(); unsigned bytesPerDataRef = dev::bytesRequired(bytesRequiredIncludingData); - byte dataRefPush = (byte)Instruction::PUSH1 - 1 + bytesPerDataRef; + uint8_t dataRefPush = (uint8_t)Instruction::PUSH1 - 1 + bytesPerDataRef; ret.bytecode.reserve(bytesRequiredIncludingData); for (AssemblyItem const& i: m_items) @@ -535,25 +544,25 @@ LinkerObject const& Assembly::assemble() const switch (i.type()) { case Operation: - ret.bytecode.push_back((byte)i.instruction()); + ret.bytecode.push_back((uint8_t)i.instruction()); break; case PushString: { - ret.bytecode.push_back((byte)Instruction::PUSH32); + ret.bytecode.push_back((uint8_t)Instruction::PUSH32); unsigned ii = 0; for (auto j: m_strings.at((h256)i.data())) if (++ii > 32) break; else - ret.bytecode.push_back((byte)j); + ret.bytecode.push_back((uint8_t)j); while (ii++ < 32) ret.bytecode.push_back(0); break; } case Push: { - byte b = max<unsigned>(1, dev::bytesRequired(i.data())); - ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + uint8_t b = max<unsigned>(1, dev::bytesRequired(i.data())); + ret.bytecode.push_back((uint8_t)Instruction::PUSH1 - 1 + b); ret.bytecode.resize(ret.bytecode.size() + b); bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(i.data(), byr); @@ -580,8 +589,8 @@ LinkerObject const& Assembly::assemble() const { auto s = m_subs.at(size_t(i.data()))->assemble().bytecode.size(); i.setPushedValue(u256(s)); - byte b = max<unsigned>(1, dev::bytesRequired(s)); - ret.bytecode.push_back((byte)Instruction::PUSH1 - 1 + b); + uint8_t b = max<unsigned>(1, dev::bytesRequired(s)); + ret.bytecode.push_back((uint8_t)Instruction::PUSH1 - 1 + b); ret.bytecode.resize(ret.bytecode.size() + b); bytesRef byr(&ret.bytecode.back() + 1 - b, b); toBigEndian(s, byr); @@ -595,12 +604,12 @@ LinkerObject const& Assembly::assemble() const break; } case PushLibraryAddress: - ret.bytecode.push_back(byte(Instruction::PUSH20)); + ret.bytecode.push_back(uint8_t(Instruction::PUSH20)); ret.linkReferences[ret.bytecode.size()] = m_libraries.at(i.data()); ret.bytecode.resize(ret.bytecode.size() + 20); break; case PushDeployTimeAddress: - ret.bytecode.push_back(byte(Instruction::PUSH20)); + ret.bytecode.push_back(uint8_t(Instruction::PUSH20)); ret.bytecode.resize(ret.bytecode.size() + 20); break; case Tag: @@ -609,7 +618,7 @@ LinkerObject const& Assembly::assemble() const assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); assertThrow(m_tagPositionsInBytecode[size_t(i.data())] == size_t(-1), AssemblyException, "Duplicate tag position."); m_tagPositionsInBytecode[size_t(i.data())] = ret.bytecode.size(); - ret.bytecode.push_back((byte)Instruction::JUMPDEST); + ret.bytecode.push_back((uint8_t)Instruction::JUMPDEST); break; default: BOOST_THROW_EXCEPTION(InvalidOpcode()); @@ -617,8 +626,8 @@ LinkerObject const& Assembly::assemble() const } if (!m_subs.empty() || !m_data.empty() || !m_auxiliaryData.empty()) - // Append a STOP just to be sure. - ret.bytecode.push_back(0); + // Append an INVALID here to help tests find miscompilation. + ret.bytecode.push_back(uint8_t(Instruction::INVALID)); for (size_t i = 0; i < m_subs.size(); ++i) { diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index 1ed9b859..8ef36923 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -27,7 +27,7 @@ #include <libdevcore/Common.h> #include <libdevcore/Assertions.h> -#include <libdevcore/SHA3.h> +#include <libdevcore/Keccak256.h> #include <json/json.h> @@ -52,18 +52,19 @@ public: /// Returns a tag identified by the given name. Creates it if it does not yet exist. AssemblyItem namedTag(std::string const& _name); AssemblyItem newData(bytes const& _data) { h256 h(dev::keccak256(asString(_data))); m_data[h] = _data; return AssemblyItem(PushData, h); } + bytes const& data(h256 const& _i) const { return m_data.at(_i); } AssemblyItem newSub(AssemblyPointer const& _sub) { m_subs.push_back(_sub); return AssemblyItem(PushSub, m_subs.size() - 1); } Assembly const& sub(size_t _sub) const { return *m_subs.at(_sub); } Assembly& sub(size_t _sub) { return *m_subs.at(_sub); } - AssemblyItem newPushString(std::string const& _data) { h256 h(dev::keccak256(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } AssemblyItem newPushSubSize(u256 const& _subId) { return AssemblyItem(PushSubSize, _subId); } AssemblyItem newPushLibraryAddress(std::string const& _identifier); - void append(Assembly const& _a); - void append(Assembly const& _a, int _deposit); AssemblyItem const& append(AssemblyItem const& _i); AssemblyItem const& append(std::string const& _data) { return append(newPushString(_data)); } AssemblyItem const& append(bytes const& _data) { return append(newData(_data)); } + + template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; } + /// Pushes the final size of the current assembly itself. Use this when the code is modified /// after compilation and CODESIZE is not an option. void appendProgramSize() { append(AssemblyItem(PushProgramSize)); } @@ -84,12 +85,9 @@ public: /// Appends @a _data literally to the very end of the bytecode. void appendAuxiliaryDataToEnd(bytes const& _data) { m_auxiliaryData += _data; } - template <class T> Assembly& operator<<(T const& _d) { append(_d); return *this; } + /// Returns the assembly items. AssemblyItems const& items() const { return m_items; } - AssemblyItem const& back() const { return m_items.back(); } - std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } - void injectStart(AssemblyItem const& _i); int deposit() const { return m_deposit; } void adjustDeposit(int _adjustment) { m_deposit += _adjustment; assertThrow(m_deposit >= 0, InvalidDeposit, ""); } void setDeposit(int _deposit) { m_deposit = _deposit; assertThrow(m_deposit >= 0, InvalidDeposit, ""); } @@ -97,9 +95,8 @@ public: /// Changes the source location used for each appended item. void setSourceLocation(SourceLocation const& _location) { m_currentSourceLocation = _location; } - /// Assembles the assembly into bytecode. The assembly should not be modified after this call. + /// Assembles the assembly into bytecode. The assembly should not be modified after this call, since the assembled version is cached. LinkerObject const& assemble() const; - bytes const& data(h256 const& _i) const { return m_data.at(_i); } struct OptimiserSettings { @@ -140,6 +137,18 @@ public: StringMap const& _sourceCodes = StringMap() ) const; +public: + // These features are only used by LLL + AssemblyItem newPushString(std::string const& _data) { h256 h(dev::keccak256(_data)); m_strings[h] = _data; return AssemblyItem(PushString, h); } + + void append(Assembly const& _a); + void append(Assembly const& _a, int _deposit); + + void injectStart(AssemblyItem const& _i); + + AssemblyItem const& back() const { return m_items.back(); } + std::string backString() const { return m_items.size() && m_items.back().type() == PushString ? m_strings.at((h256)m_items.back().data()) : std::string(); } + protected: /// Does the same operations as @a optimise, but should only be applied to a sub and /// returns the replaced tags. Also takes an argument containing the tags of this assembly diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index 5319a2b6..6187e18f 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -69,7 +69,7 @@ public: m_location(_location) { if (m_type == Operation) - m_instruction = Instruction(byte(_data)); + m_instruction = Instruction(uint8_t(_data)); else m_data = std::make_shared<u256>(_data); } diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 293cb02c..04926986 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -23,7 +23,7 @@ #include <functional> #include <boost/range/adaptor/reversed.hpp> -#include <libdevcore/SHA3.h> +#include <libdevcore/Keccak256.h> #include <libevmasm/CommonSubexpressionEliminator.h> #include <libevmasm/AssemblyItem.h> @@ -160,7 +160,7 @@ AssemblyItems CSECodeGenerator::generateCode( if (seqNr < _initialSequenceNumber) // Invalid sequenced operation. // @todo quick fix for now. Proper fix needs to choose representative with higher - // sequence number during dependency analyis. + // sequence number during dependency analysis. BOOST_THROW_EXCEPTION(StackTooDeepException()); sequencedExpressions.insert(make_pair(seqNr, id)); } diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index d0b6843c..9844ba3a 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -93,15 +93,8 @@ bigint ConstantOptimisationMethod::simpleRunGas(AssemblyItems const& _items) bigint ConstantOptimisationMethod::dataGas(bytes const& _data) const { - if (m_params.isCreation) - { - bigint gas; - for (auto b: _data) - gas += b ? GasCosts::txDataNonZeroGas : GasCosts::txDataZeroGas; - return gas; - } - else - return GasCosts::createDataGas * dataSize(); + assertThrow(_data.size() > 0, OptimizerException, "Empty bytecode generated."); + return bigint(GasMeter::dataGas(_data, m_params.isCreation)); } size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h index f0deb387..2c753fa8 100644 --- a/libevmasm/ConstantOptimiser.h +++ b/libevmasm/ConstantOptimiser.h @@ -75,8 +75,6 @@ public: virtual AssemblyItems execute(Assembly& _assembly) const = 0; protected: - size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); } - /// @returns the run gas for the given items ignoring special gas costs static bigint simpleRunGas(AssemblyItems const& _items); /// @returns the gas needed to store the given data literally diff --git a/libevmasm/ControlFlowGraph.h b/libevmasm/ControlFlowGraph.h index ebef543f..f0c9356c 100644 --- a/libevmasm/ControlFlowGraph.h +++ b/libevmasm/ControlFlowGraph.h @@ -39,7 +39,7 @@ using KnownStatePointer = std::shared_ptr<KnownState>; /** * Identifier for a block, coincides with the tag number of an AssemblyItem but adds a special - * ID for the inital block. + * ID for the initial block. */ class BlockId { diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index 69b33ec5..42a1819a 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -184,6 +184,7 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr) { static Rules rules; + assertThrow(rules.isInitialized(), OptimizerException, "Rule list not properly initialized."); if ( !_expr.item || diff --git a/libevmasm/GasMeter.cpp b/libevmasm/GasMeter.cpp index caa06fc0..d98b3efa 100644 --- a/libevmasm/GasMeter.cpp +++ b/libevmasm/GasMeter.cpp @@ -101,8 +101,8 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item, bool _ break; case Instruction::KECCAK256: gas = GasCosts::keccak256Gas; - gas += wordGas(GasCosts::keccak256WordGas, m_state->relativeStackElement(-1)); gas += memoryGas(0, -1); + gas += wordGas(GasCosts::keccak256WordGas, m_state->relativeStackElement(-1)); break; case Instruction::CALLDATACOPY: case Instruction::CODECOPY: @@ -114,6 +114,9 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item, bool _ case Instruction::EXTCODESIZE: gas = GasCosts::extCodeGas(m_evmVersion); break; + case Instruction::EXTCODEHASH: + gas = GasCosts::balanceGas(m_evmVersion); + break; case Instruction::EXTCODECOPY: gas = GasCosts::extCodeGas(m_evmVersion); gas += memoryGas(-1, -3); @@ -125,8 +128,7 @@ GasMeter::GasConsumption GasMeter::estimateMax(AssemblyItem const& _item, bool _ case Instruction::LOG3: case Instruction::LOG4: { - unsigned n = unsigned(_item.instruction()) - unsigned(Instruction::LOG0); - gas = GasCosts::logGas + GasCosts::logTopicGas * n; + gas = GasCosts::logGas + GasCosts::logTopicGas * getLogNumber(_item.instruction()); gas += memoryGas(0, -1); if (u256 const* value = classes.knownConstant(m_state->relativeStackElement(-1))) gas += GasCosts::logDataGas * (*value); @@ -215,7 +217,7 @@ GasMeter::GasConsumption GasMeter::memoryGas(ExpressionClasses::Id _position) if (!value) return GasConsumption::infinite(); if (*value < m_largestMemoryAccess) - return GasConsumption(u256(0)); + return GasConsumption(0); u256 previous = m_largestMemoryAccess; m_largestMemoryAccess = *value; auto memGas = [=](u256 const& pos) -> u256 @@ -258,4 +260,16 @@ unsigned GasMeter::runGas(Instruction _instruction) return 0; } - +u256 GasMeter::dataGas(bytes const& _data, bool _inCreation) +{ + bigint gas = 0; + if (_inCreation) + { + for (auto b: _data) + gas += (b != 0) ? GasCosts::txDataNonZeroGas : GasCosts::txDataZeroGas; + } + else + gas = bigint(GasCosts::createDataGas) * _data.size(); + assertThrow(gas < bigint(u256(-1)), OptimizerException, "Gas cost exceeds 256 bits."); + return u256(gas); +} diff --git a/libevmasm/GasMeter.h b/libevmasm/GasMeter.h index b131802f..da90b028 100644 --- a/libevmasm/GasMeter.h +++ b/libevmasm/GasMeter.h @@ -112,9 +112,10 @@ public: static GasConsumption infinite() { return GasConsumption(0, true); } GasConsumption& operator+=(GasConsumption const& _other); - bool operator<(GasConsumption const& _other) const { return this->tuple() < _other.tuple(); } - - std::tuple<bool const&, u256 const&> tuple() const { return std::tie(isInfinite, value); } + bool operator<(GasConsumption const& _other) const + { + return std::make_pair(isInfinite, value) < std::make_pair(_other.isInfinite, _other.value); + } u256 value; bool isInfinite; @@ -135,6 +136,11 @@ public: /// change with EVM versions) static unsigned runGas(Instruction _instruction); + /// @returns the gas cost of the supplied data, depending whether it is in creation code, or not. + /// In case of @a _inCreation, the data is only sent as a transaction and is not stored, whereas + /// otherwise code will be stored and have to pay "createDataGas" cost. + static u256 dataGas(bytes const& _data, bool _inCreation); + private: /// @returns _multiplier * (_value + 31) / 32, if _value is a known constant and infinite otherwise. GasConsumption wordGas(u256 const& _multiplier, ExpressionClasses::Id _value); diff --git a/libevmasm/Instruction.cpp b/libevmasm/Instruction.cpp index f9bbad2c..cf98c938 100644 --- a/libevmasm/Instruction.cpp +++ b/libevmasm/Instruction.cpp @@ -21,6 +21,7 @@ #include "./Instruction.h" +#include <algorithm> #include <functional> #include <libdevcore/Common.h> #include <libdevcore/CommonIO.h> @@ -72,6 +73,7 @@ const std::map<std::string, Instruction> dev::solidity::c_instructions = { "EXTCODECOPY", Instruction::EXTCODECOPY }, { "RETURNDATASIZE", Instruction::RETURNDATASIZE }, { "RETURNDATACOPY", Instruction::RETURNDATACOPY }, + { "EXTCODEHASH", Instruction::EXTCODEHASH }, { "BLOCKHASH", Instruction::BLOCKHASH }, { "COINBASE", Instruction::COINBASE }, { "TIMESTAMP", Instruction::TIMESTAMP }, @@ -215,6 +217,7 @@ static const std::map<Instruction, InstructionInfo> c_instructionInfo = { Instruction::EXTCODECOPY, { "EXTCODECOPY", 0, 4, 0, true, Tier::ExtCode } }, { Instruction::RETURNDATASIZE, {"RETURNDATASIZE", 0, 0, 1, false, Tier::Base } }, { Instruction::RETURNDATACOPY, {"RETURNDATACOPY", 0, 3, 0, true, Tier::VeryLow } }, + { Instruction::EXTCODEHASH, { "EXTCODEHASH", 0, 1, 1, false, Tier::Balance } }, { Instruction::BLOCKHASH, { "BLOCKHASH", 0, 1, 1, false, Tier::Ext } }, { Instruction::COINBASE, { "COINBASE", 0, 0, 1, false, Tier::Base } }, { Instruction::TIMESTAMP, { "TIMESTAMP", 0, 0, 1, false, Tier::Base } }, @@ -325,13 +328,20 @@ void dev::solidity::eachInstruction( size_t additional = 0; if (isValidInstruction(instr)) additional = instructionInfo(instr).additional; + u256 data; - for (size_t i = 0; i < additional; ++i) + + // fill the data with the additional data bytes from the instruction stream + while (additional > 0 && std::next(it) < _mem.end()) { data <<= 8; - if (++it < _mem.end()) - data |= *it; + data |= *++it; + --additional; } + + // pad the remaining number of additional octets with zeros + data <<= 8 * additional; + _onInstruction(instr, data); } } diff --git a/libevmasm/Instruction.h b/libevmasm/Instruction.h index dc116f88..539a83b0 100644 --- a/libevmasm/Instruction.h +++ b/libevmasm/Instruction.h @@ -82,6 +82,7 @@ enum class Instruction: uint8_t EXTCODECOPY, ///< copy external code (from another contract) RETURNDATASIZE = 0x3d, ///< get size of return data buffer RETURNDATACOPY = 0x3e, ///< copy return data in current environment to memory + EXTCODEHASH = 0x3f, ///< get external code hash (from another contract) BLOCKHASH = 0x40, ///< get hash of most recent complete block COINBASE, ///< get the block's coinbase address @@ -192,8 +193,8 @@ enum class Instruction: uint8_t CALLCODE, ///< message-call with another account's code only RETURN, ///< halt execution returning output data DELEGATECALL, ///< like CALLCODE but keeps caller's value and sender + CREATE2 = 0xf5, ///< create new account with associated code at address `sha3(0xff + sender + salt + init code) % 2**160` STATICCALL = 0xfa, ///< like CALL but disallow state modifications - CREATE2 = 0xfb, ///< create new account with associated code at address `sha3(sender + salt + sha3(init code)) % 2**160` REVERT = 0xfd, ///< halt execution, revert state and return output data INVALID = 0xfe, ///< invalid instruction for expressing runtime errors (e.g., division-by-zero) @@ -218,22 +219,34 @@ inline bool isSwapInstruction(Instruction _inst) return Instruction::SWAP1 <= _inst && _inst <= Instruction::SWAP16; } +/// @returns true if the instruction is a LOG +inline bool isLogInstruction(Instruction _inst) +{ + return Instruction::LOG0 <= _inst && _inst <= Instruction::LOG4; +} + /// @returns the number of PUSH Instruction _inst inline unsigned getPushNumber(Instruction _inst) { - return (byte)_inst - unsigned(Instruction::PUSH1) + 1; + return (uint8_t)_inst - unsigned(Instruction::PUSH1) + 1; } /// @returns the number of DUP Instruction _inst inline unsigned getDupNumber(Instruction _inst) { - return (byte)_inst - unsigned(Instruction::DUP1) + 1; + return (uint8_t)_inst - unsigned(Instruction::DUP1) + 1; } /// @returns the number of SWAP Instruction _inst inline unsigned getSwapNumber(Instruction _inst) { - return (byte)_inst - unsigned(Instruction::SWAP1) + 1; + return (uint8_t)_inst - unsigned(Instruction::SWAP1) + 1; +} + +/// @returns the number of LOG Instruction _inst +inline unsigned getLogNumber(Instruction _inst) +{ + return (uint8_t)_inst - unsigned(Instruction::LOG0); } /// @returns the PUSH<_number> instruction diff --git a/libevmasm/KnownState.cpp b/libevmasm/KnownState.cpp index e2f10f22..b6c1bcc9 100644 --- a/libevmasm/KnownState.cpp +++ b/libevmasm/KnownState.cpp @@ -23,7 +23,7 @@ #include "KnownState.h" #include <functional> -#include <libdevcore/SHA3.h> +#include <libdevcore/Keccak256.h> #include <libevmasm/AssemblyItem.h> using namespace std; @@ -121,28 +121,33 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool vector<Id> arguments(info.args); for (int i = 0; i < info.args; ++i) arguments[i] = stackElement(m_stackHeight - i, _item.location()); - - if (_item.instruction() == Instruction::SSTORE) + switch (_item.instruction()) + { + case Instruction::SSTORE: op = storeInStorage(arguments[0], arguments[1], _item.location()); - else if (_item.instruction() == Instruction::SLOAD) + break; + case Instruction::SLOAD: setStackElement( m_stackHeight + _item.deposit(), loadFromStorage(arguments[0], _item.location()) ); - else if (_item.instruction() == Instruction::MSTORE) + break; + case Instruction::MSTORE: op = storeInMemory(arguments[0], arguments[1], _item.location()); - else if (_item.instruction() == Instruction::MLOAD) + break; + case Instruction::MLOAD: setStackElement( m_stackHeight + _item.deposit(), loadFromMemory(arguments[0], _item.location()) ); - else if (_item.instruction() == Instruction::KECCAK256) + break; + case Instruction::KECCAK256: setStackElement( m_stackHeight + _item.deposit(), applyKeccak256(arguments.at(0), arguments.at(1), _item.location()) ); - else - { + break; + default: bool invMem = SemanticInformation::invalidatesMemory(_item.instruction()); bool invStor = SemanticInformation::invalidatesStorage(_item.instruction()); // We could be a bit more fine-grained here (CALL only invalidates part of diff --git a/libevmasm/KnownState.h b/libevmasm/KnownState.h index 8568b163..cd50550e 100644 --- a/libevmasm/KnownState.h +++ b/libevmasm/KnownState.h @@ -29,12 +29,18 @@ #include <tuple> #include <memory> #include <ostream> -#pragma warning(push) -#pragma GCC diagnostic push + +#if defined(__clang__) +#pragma clang diagnostic push #pragma clang diagnostic ignored "-Wredeclared-class-member" +#endif // defined(__clang__) + #include <boost/bimap.hpp> -#pragma warning(pop) -#pragma GCC diagnostic pop + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif // defined(__clang__) + #include <libdevcore/CommonIO.h> #include <libdevcore/Exceptions.h> #include <libevmasm/ExpressionClasses.h> diff --git a/libevmasm/LinkerObject.cpp b/libevmasm/LinkerObject.cpp index 8b7d9e06..6eec54ea 100644 --- a/libevmasm/LinkerObject.cpp +++ b/libevmasm/LinkerObject.cpp @@ -21,6 +21,7 @@ #include <libevmasm/LinkerObject.h> #include <libdevcore/CommonData.h> +#include <libdevcore/Keccak256.h> using namespace dev; using namespace dev::eth; @@ -50,14 +51,19 @@ string LinkerObject::toHex() const for (auto const& ref: linkReferences) { size_t pos = ref.first * 2; - string const& name = ref.second; + string hash = libraryPlaceholder(ref.second); hex[pos] = hex[pos + 1] = hex[pos + 38] = hex[pos + 39] = '_'; for (size_t i = 0; i < 36; ++i) - hex[pos + 2 + i] = i < name.size() ? name[i] : '_'; + hex[pos + 2 + i] = hash.at(i); } return hex; } +string LinkerObject::libraryPlaceholder(string const& _libraryName) +{ + return "$" + keccak256(_libraryName).hex().substr(0, 34) + "$"; +} + h160 const* LinkerObject::matchLibrary( string const& _linkRefName, @@ -68,7 +74,7 @@ LinkerObject::matchLibrary( if (it != _libraryAddresses.end()) return &it->second; // If the user did not supply a fully qualified library name, - // try to match only the simple libary name + // try to match only the simple library name size_t colon = _linkRefName.find(':'); if (colon == string::npos) return nullptr; diff --git a/libevmasm/LinkerObject.h b/libevmasm/LinkerObject.h index 152487b4..92890803 100644 --- a/libevmasm/LinkerObject.h +++ b/libevmasm/LinkerObject.h @@ -50,6 +50,11 @@ struct LinkerObject /// addresses by placeholders. std::string toHex() const; + /// @returns a 36 character string that is used as a placeholder for the library + /// address (enclosed by `__` on both sides). The placeholder is the hex representation + /// of the first 18 bytes of the keccak-256 hash of @a _libraryName. + static std::string libraryPlaceholder(std::string const& _libraryName); + private: static h160 const* matchLibrary( std::string const& _linkRefName, diff --git a/libevmasm/PathGasMeter.h b/libevmasm/PathGasMeter.h index 9537b176..fb821684 100644 --- a/libevmasm/PathGasMeter.h +++ b/libevmasm/PathGasMeter.h @@ -57,6 +57,16 @@ public: GasMeter::GasConsumption estimateMax(size_t _startIndex, std::shared_ptr<KnownState> const& _state); + static GasMeter::GasConsumption estimateMax( + AssemblyItems const& _items, + solidity::EVMVersion _evmVersion, + size_t _startIndex, + std::shared_ptr<KnownState> const& _state + ) + { + return PathGasMeter(_items, _evmVersion).estimateMax(_startIndex, _state); + } + private: /// Adds a new path item to the queue, but only if we do not already have /// a higher gas usage at that point. diff --git a/libevmasm/PeepholeOptimiser.cpp b/libevmasm/PeepholeOptimiser.cpp index 8a39de24..6d8e1df6 100644 --- a/libevmasm/PeepholeOptimiser.cpp +++ b/libevmasm/PeepholeOptimiser.cpp @@ -249,6 +249,23 @@ struct TagConjunctions: SimplePeepholeOptimizerMethod<TagConjunctions, 3> } }; +struct TruthyAnd: SimplePeepholeOptimizerMethod<TruthyAnd, 3> +{ + static bool applySimple( + AssemblyItem const& _push, + AssemblyItem const& _not, + AssemblyItem const& _and, + std::back_insert_iterator<AssemblyItems> + ) + { + return ( + _push.type() == Push && _push.data() == 0 && + _not == Instruction::NOT && + _and == Instruction::AND + ); + } +}; + /// Removes everything after a JUMP (or similar) until the next JUMPDEST. struct UnreachableCode { @@ -305,7 +322,7 @@ bool PeepholeOptimiser::optimise() { OptimiserState state {m_items, 0, std::back_inserter(m_optimisedItems)}; while (state.i < m_items.size()) - applyMethods(state, PushPop(), OpPop(), DoublePush(), DoubleSwap(), CommutativeSwap(), SwapComparison(), JumpToNext(), UnreachableCode(), TagConjunctions(), Identity()); + applyMethods(state, PushPop(), OpPop(), DoublePush(), DoubleSwap(), CommutativeSwap(), SwapComparison(), JumpToNext(), UnreachableCode(), TagConjunctions(), TruthyAnd(), Identity()); if (m_optimisedItems.size() < m_items.size() || ( m_optimisedItems.size() == m_items.size() && ( eth::bytesRequired(m_optimisedItems, 3) < eth::bytesRequired(m_items, 3) || diff --git a/libevmasm/RuleList.h b/libevmasm/RuleList.h index 2b7da01b..874a8929 100644 --- a/libevmasm/RuleList.h +++ b/libevmasm/RuleList.h @@ -44,12 +44,11 @@ template <class S> S modWorkaround(S const& _a, S const& _b) return (S)(bigint(_a) % bigint(_b)); } -/// @returns a list of simplification rules given certain match placeholders. -/// A, B and C should represent constants, X and Y arbitrary expressions. -/// The simplifications should neven change the order of evaluation of -/// arbitrary operations. +// This part of simplificationRuleList below was split out to prevent +// stack overflows in the JavaScript optimizer for emscripten builds +// that affected certain browser versions. template <class Pattern> -std::vector<SimplificationRule<Pattern>> simplificationRuleList( +std::vector<SimplificationRule<Pattern>> simplificationRuleListPart1( Pattern A, Pattern B, Pattern C, @@ -57,9 +56,8 @@ std::vector<SimplificationRule<Pattern>> simplificationRuleList( Pattern Y ) { - std::vector<SimplificationRule<Pattern>> rules; - rules += std::vector<SimplificationRule<Pattern>>{ - // arithmetics on constants + return std::vector<SimplificationRule<Pattern>> { + // arithmetic on constants {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }, false}, {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }, false}, {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }, false}, @@ -162,6 +160,22 @@ std::vector<SimplificationRule<Pattern>> simplificationRuleList( {{Instruction::OR, {X, {Instruction::NOT, {X}}}}, [=]{ return ~u256(0); }, true}, {{Instruction::OR, {{Instruction::NOT, {X}}, X}}, [=]{ return ~u256(0); }, true}, }; +} + + +// This part of simplificationRuleList below was split out to prevent +// stack overflows in the JavaScript optimizer for emscripten builds +// that affected certain browser versions. +template <class Pattern> +std::vector<SimplificationRule<Pattern>> simplificationRuleListPart2( + Pattern A, + Pattern B, + Pattern, + Pattern X, + Pattern Y +) +{ + std::vector<SimplificationRule<Pattern>> rules; // Replace MOD X, <power-of-two> with AND X, <power-of-two> - 1 for (size_t i = 0; i < 256; ++i) @@ -292,5 +306,24 @@ std::vector<SimplificationRule<Pattern>> simplificationRuleList( return rules; } +/// @returns a list of simplification rules given certain match placeholders. +/// A, B and C should represent constants, X and Y arbitrary expressions. +/// The simplifications should never change the order of evaluation of +/// arbitrary operations. +template <class Pattern> +std::vector<SimplificationRule<Pattern>> simplificationRuleList( + Pattern A, + Pattern B, + Pattern C, + Pattern X, + Pattern Y +) +{ + std::vector<SimplificationRule<Pattern>> rules; + rules += simplificationRuleListPart1(A, B, C, X, Y); + rules += simplificationRuleListPart2(A, B, C, X, Y); + return rules; +} + } } diff --git a/libevmasm/SemanticInformation.cpp b/libevmasm/SemanticInformation.cpp index 71267ee8..78f3c9c7 100644 --- a/libevmasm/SemanticInformation.cpp +++ b/libevmasm/SemanticInformation.cpp @@ -151,6 +151,7 @@ bool SemanticInformation::isDeterministic(AssemblyItem const& _item) case Instruction::MSIZE: // depends on previous writes and reads, not only on content case Instruction::BALANCE: // depends on previous calls case Instruction::EXTCODESIZE: + case Instruction::EXTCODEHASH: case Instruction::RETURNDATACOPY: // depends on previous calls case Instruction::RETURNDATASIZE: return false; @@ -172,6 +173,7 @@ bool SemanticInformation::movable(Instruction _instruction) case Instruction::KECCAK256: case Instruction::BALANCE: case Instruction::EXTCODESIZE: + case Instruction::EXTCODEHASH: case Instruction::RETURNDATASIZE: case Instruction::SLOAD: case Instruction::PC: @@ -233,6 +235,7 @@ bool SemanticInformation::invalidInPureFunctions(Instruction _instruction) case Instruction::GASPRICE: case Instruction::EXTCODESIZE: case Instruction::EXTCODECOPY: + case Instruction::EXTCODEHASH: case Instruction::BLOCKHASH: case Instruction::COINBASE: case Instruction::TIMESTAMP: diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp index 53a5f9fc..120d1787 100644 --- a/libevmasm/SimplificationRules.cpp +++ b/libevmasm/SimplificationRules.cpp @@ -21,16 +21,19 @@ * Container for equivalence classes of expressions for use in common subexpression elimination. */ +#include <libevmasm/SimplificationRules.h> + #include <libevmasm/ExpressionClasses.h> -#include <utility> -#include <functional> -#include <boost/range/adaptor/reversed.hpp> -#include <boost/noncopyable.hpp> #include <libevmasm/Assembly.h> #include <libevmasm/CommonSubexpressionEliminator.h> -#include <libevmasm/SimplificationRules.h> - #include <libevmasm/RuleList.h> +#include <libdevcore/Assertions.h> + +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> + +#include <utility> +#include <functional> using namespace std; using namespace dev; @@ -45,7 +48,7 @@ SimplificationRule<Pattern> const* Rules::findFirstMatch( resetMatchGroups(); assertThrow(_expr.item, OptimizerException, ""); - for (auto const& rule: m_rules[byte(_expr.item->instruction())]) + for (auto const& rule: m_rules[uint8_t(_expr.item->instruction())]) { if (rule.pattern.matches(_expr, _classes)) return &rule; @@ -54,6 +57,11 @@ SimplificationRule<Pattern> const* Rules::findFirstMatch( return nullptr; } +bool Rules::isInitialized() const +{ + return !m_rules[uint8_t(Instruction::ADD)].empty(); +} + void Rules::addRules(std::vector<SimplificationRule<Pattern>> const& _rules) { for (auto const& r: _rules) @@ -62,12 +70,12 @@ void Rules::addRules(std::vector<SimplificationRule<Pattern>> const& _rules) void Rules::addRule(SimplificationRule<Pattern> const& _rule) { - m_rules[byte(_rule.pattern.instruction())].push_back(_rule); + m_rules[uint8_t(_rule.pattern.instruction())].push_back(_rule); } Rules::Rules() { - // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Multiple occurrences of one of these inside one rule must match the same equivalence class. // Constants. Pattern A(Push); Pattern B(Push); @@ -82,6 +90,7 @@ Rules::Rules() Y.setMatchGroup(5, m_matchGroups); addRules(simplificationRuleList(A, B, C, X, Y)); + assertThrow(isInitialized(), OptimizerException, "Rule list not properly initialized."); } Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): diff --git a/libevmasm/SimplificationRules.h b/libevmasm/SimplificationRules.h index 53f7e595..fbe5a2b0 100644 --- a/libevmasm/SimplificationRules.h +++ b/libevmasm/SimplificationRules.h @@ -26,6 +26,8 @@ #include <libevmasm/ExpressionClasses.h> #include <libevmasm/SimplificationRule.h> +#include <boost/noncopyable.hpp> + #include <functional> #include <vector> @@ -53,6 +55,10 @@ public: ExpressionClasses const& _classes ); + /// Checks whether the rulelist is non-empty. This is usually enforced + /// by the constructor, but we had some issues with static initialization. + bool isInitialized() const; + private: void addRules(std::vector<SimplificationRule<Pattern>> const& _rules); void addRule(SimplificationRule<Pattern> const& _rule); |