diff options
author | chriseth <chris@ethereum.org> | 2017-01-13 20:05:02 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-01-13 20:05:02 +0800 |
commit | 60cc1668517f56ce6ca8225555472e7a27eab8b0 (patch) | |
tree | 8eac35131efc4beeee921356052375233edd7102 /libevmasm | |
parent | 822622cf5bf23e79a6e2292cb837d1a39ca1c419 (diff) | |
parent | e22672b7c739dde9f37a919e63245abda4b1fc89 (diff) | |
download | dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar.gz dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar.bz2 dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar.lz dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar.xz dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.tar.zst dexon-solidity-60cc1668517f56ce6ca8225555472e7a27eab8b0.zip |
Merge pull request #1561 from ethereum/develop
Merge develop into release for 0.4.8
Diffstat (limited to 'libevmasm')
-rw-r--r-- | libevmasm/Assembly.cpp | 71 | ||||
-rw-r--r-- | libevmasm/AssemblyItem.cpp | 90 | ||||
-rw-r--r-- | libevmasm/AssemblyItem.h | 50 | ||||
-rw-r--r-- | libevmasm/CommonSubexpressionEliminator.cpp | 2 | ||||
-rw-r--r-- | libevmasm/ConstantOptimiser.cpp | 78 | ||||
-rw-r--r-- | libevmasm/ConstantOptimiser.h | 19 | ||||
-rw-r--r-- | libevmasm/ExpressionClasses.cpp | 343 | ||||
-rw-r--r-- | libevmasm/ExpressionClasses.h | 65 | ||||
-rw-r--r-- | libevmasm/PeepholeOptimiser.cpp | 2 | ||||
-rw-r--r-- | libevmasm/SimplificationRules.cpp | 370 | ||||
-rw-r--r-- | libevmasm/SimplificationRules.h | 140 |
11 files changed, 738 insertions, 492 deletions
diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index a9ca24dc..845abfd4 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -117,69 +117,36 @@ string Assembly::locationFromSources(StringMap const& _sourceCodes, SourceLocati ostream& Assembly::streamAsm(ostream& _out, string const& _prefix, StringMap const& _sourceCodes) const { - _out << _prefix << ".code:" << endl; - for (AssemblyItem const& i: m_items) + for (size_t i = 0; i < m_items.size(); ++i) { - _out << _prefix; - switch (i.type()) + AssemblyItem const& item = m_items[i]; + if (!item.location().isEmpty() && (i == 0 || m_items[i - 1].location() != item.location())) { - case Operation: - _out << " " << instructionInfo(i.instruction()).name << "\t" << i.getJumpTypeAsString(); - break; - case Push: - _out << " PUSH" << dec << max<unsigned>(1, dev::bytesRequired(i.data())) << " 0x" << hex << i.data(); - break; - case PushString: - _out << " PUSH \"" << m_strings.at((h256)i.data()) << "\""; - break; - case PushTag: - if (i.data() == 0) - _out << " PUSH [ErrorTag]"; - else - { - size_t subId = i.splitForeignPushTag().first; - if (subId == size_t(-1)) - _out << " PUSH [tag" << dec << i.splitForeignPushTag().second << "]"; - else - _out << " PUSH [tag" << dec << subId << ":" << i.splitForeignPushTag().second << "]"; - } - break; - case PushSub: - _out << " PUSH [$" << size_t(i.data()) << "]"; - break; - case PushSubSize: - _out << " PUSH #[$" << size_t(i.data()) << "]"; - break; - case PushProgramSize: - _out << " PUSHSIZE"; - break; - case PushLibraryAddress: - _out << " PUSHLIB \"" << m_libraries.at(h256(i.data())) << "\""; - break; - case Tag: - _out << "tag" << dec << i.data() << ": " << endl << _prefix << " JUMPDEST"; - break; - case PushData: - _out << " PUSH [" << hex << (unsigned)i.data() << "]"; - break; - default: - BOOST_THROW_EXCEPTION(InvalidOpcode()); + _out << _prefix << " /*"; + if (item.location().sourceName) + _out << " \"" + *item.location().sourceName + "\""; + if (!item.location().isEmpty()) + _out << ":" << to_string(item.location().start) + ":" + to_string(item.location().end); + _out << " */" << endl; } - _out << "\t\t" << locationFromSources(_sourceCodes, i.location()) << endl; + _out << _prefix << (item.type() == Tag ? "" : " ") << item.toAssemblyText() << endl; } if (!m_data.empty() || !m_subs.empty()) { - _out << _prefix << ".data:" << endl; + _out << _prefix << "stop" << endl; + Json::Value data; for (auto const& i: m_data) - if (u256(i.first) >= m_subs.size()) - _out << _prefix << " " << hex << (unsigned)(u256)i.first << ": " << dev::toHex(i.second) << endl; + assertThrow(u256(i.first) < m_subs.size(), AssemblyException, "Data not yet implemented."); + for (size_t i = 0; i < m_subs.size(); ++i) { - _out << _prefix << " " << hex << i << ": " << endl; - m_subs[i]->stream(_out, _prefix + " ", _sourceCodes); + _out << endl << _prefix << "sub_" << i << ": assembly {\n"; + m_subs[i]->streamAsm(_out, _prefix + " ", _sourceCodes); + _out << _prefix << "}" << endl; } } + return _out; } @@ -449,7 +416,7 @@ LinkerObject const& Assembly::assemble() const switch (i.type()) { case Operation: - ret.bytecode.push_back((byte)i.data()); + ret.bytecode.push_back((byte)i.instruction()); break; case PushString: { diff --git a/libevmasm/AssemblyItem.cpp b/libevmasm/AssemblyItem.cpp index 54e38de8..6c7d5425 100644 --- a/libevmasm/AssemblyItem.cpp +++ b/libevmasm/AssemblyItem.cpp @@ -20,6 +20,7 @@ */ #include "AssemblyItem.h" +#include <libevmasm/SemanticInformation.h> #include <fstream> using namespace std; @@ -28,19 +29,19 @@ using namespace dev::eth; AssemblyItem AssemblyItem::toSubAssemblyTag(size_t _subId) const { - assertThrow(m_data < (u256(1) << 64), Exception, "Tag already has subassembly set."); + assertThrow(data() < (u256(1) << 64), Exception, "Tag already has subassembly set."); assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); AssemblyItem r = *this; r.m_type = PushTag; - r.setPushTagSubIdAndTag(_subId, size_t(m_data)); + r.setPushTagSubIdAndTag(_subId, size_t(data())); return r; } pair<size_t, size_t> AssemblyItem::splitForeignPushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); - return make_pair(size_t(m_data / (u256(1) << 64)) - 1, size_t(m_data)); + return make_pair(size_t((data()) / (u256(1) << 64)) - 1, size_t(data())); } void AssemblyItem::setPushTagSubIdAndTag(size_t _subId, size_t _tag) @@ -59,7 +60,7 @@ unsigned AssemblyItem::bytesRequired(unsigned _addressLength) const case PushString: return 33; case Push: - return 1 + max<unsigned>(1, dev::bytesRequired(m_data)); + return 1 + max<unsigned>(1, dev::bytesRequired(data())); case PushSubSize: case PushProgramSize: return 4; // worst case: a 16MB program @@ -97,6 +98,28 @@ int AssemblyItem::deposit() const return 0; } +bool AssemblyItem::canBeFunctional() const +{ + switch (m_type) + { + case Operation: + return !SemanticInformation::isDupInstruction(*this) && !SemanticInformation::isSwapInstruction(*this); + case Push: + case PushString: + case PushTag: + case PushData: + case PushSub: + case PushSubSize: + case PushProgramSize: + case PushLibraryAddress: + return true; + case Tag: + return false; + default:; + } + return 0; +} + string AssemblyItem::getJumpTypeAsString() const { switch (m_jumpType) @@ -111,6 +134,65 @@ string AssemblyItem::getJumpTypeAsString() const } } +string AssemblyItem::toAssemblyText() const +{ + string text; + switch (type()) + { + case Operation: + { + assertThrow(isValidInstruction(instruction()), AssemblyException, "Invalid instruction."); + string name = instructionInfo(instruction()).name; + transform(name.begin(), name.end(), name.begin(), [](unsigned char _c) { return tolower(_c); }); + text = name; + break; + } + case Push: + text = toHex(toCompactBigEndian(data(), 1), 1, HexPrefix::Add); + break; + case PushString: + assertThrow(false, AssemblyException, "Push string assembly output not implemented."); + break; + case PushTag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())); + break; + case Tag: + assertThrow(data() < 0x10000, AssemblyException, "Sub-assembly tags not yet implemented."); + text = string("tag_") + to_string(size_t(data())) + ":"; + break; + case PushData: + assertThrow(false, AssemblyException, "Push data not implemented."); + break; + case PushSub: + text = string("dataOffset(sub_") + to_string(size_t(data())) + ")"; + break; + case PushSubSize: + text = string("dataSize(sub_") + to_string(size_t(data())) + ")"; + break; + case PushProgramSize: + text = string("bytecodeSize"); + break; + case PushLibraryAddress: + text = string("linkerSymbol(\"") + toHex(data()) + string("\")"); + break; + case UndefinedItem: + assertThrow(false, AssemblyException, "Invalid assembly item."); + break; + default: + BOOST_THROW_EXCEPTION(InvalidOpcode()); + } + if (m_jumpType == JumpType::IntoFunction || m_jumpType == JumpType::OutOfFunction) + { + text += "\t//"; + if (m_jumpType == JumpType::IntoFunction) + text += " in"; + else + text += " out"; + } + return text; +} + ostream& dev::eth::operator<<(ostream& _out, AssemblyItem const& _item) { switch (_item.type()) diff --git a/libevmasm/AssemblyItem.h b/libevmasm/AssemblyItem.h index b5bd3ed8..002b3c87 100644 --- a/libevmasm/AssemblyItem.h +++ b/libevmasm/AssemblyItem.h @@ -59,16 +59,22 @@ public: AssemblyItem(u256 _push, SourceLocation const& _location = SourceLocation()): AssemblyItem(Push, _push, _location) { } AssemblyItem(solidity::Instruction _i, SourceLocation const& _location = SourceLocation()): - AssemblyItem(Operation, byte(_i), _location) { } + m_type(Operation), + m_instruction(_i), + m_location(_location) + {} AssemblyItem(AssemblyItemType _type, u256 _data = 0, SourceLocation const& _location = SourceLocation()): m_type(_type), - m_data(_data), m_location(_location) { + if (m_type == Operation) + m_instruction = Instruction(byte(_data)); + else + m_data = std::make_shared<u256>(_data); } - AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, m_data); } - AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, m_data); } + AssemblyItem tag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(Tag, data()); } + AssemblyItem pushTag() const { assertThrow(m_type == PushTag || m_type == Tag, Exception, ""); return AssemblyItem(PushTag, data()); } /// Converts the tag to a subassembly tag. This has to be called in order to move a tag across assemblies. /// @param _subId the identifier of the subassembly the tag is taken from. AssemblyItem toSubAssemblyTag(size_t _subId) const; @@ -79,25 +85,42 @@ public: void setPushTagSubIdAndTag(size_t _subId, size_t _tag); AssemblyItemType type() const { return m_type; } - u256 const& data() const { return m_data; } - void setType(AssemblyItemType const _type) { m_type = _type; } - void setData(u256 const& _data) { m_data = _data; } + u256 const& data() const { assertThrow(m_type != Operation, Exception, ""); return *m_data; } + void setData(u256 const& _data) { assertThrow(m_type != Operation, Exception, ""); m_data = std::make_shared<u256>(_data); } /// @returns the instruction of this item (only valid if type() == Operation) - Instruction instruction() const { return Instruction(byte(m_data)); } + Instruction instruction() const { assertThrow(m_type == Operation, Exception, ""); return m_instruction; } /// @returns true if the type and data of the items are equal. - bool operator==(AssemblyItem const& _other) const { return m_type == _other.m_type && m_data == _other.m_data; } + bool operator==(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return false; + if (type() == Operation) + return instruction() == _other.instruction(); + else + return data() == _other.data(); + } bool operator!=(AssemblyItem const& _other) const { return !operator==(_other); } /// Less-than operator compatible with operator==. - bool operator<(AssemblyItem const& _other) const { return std::tie(m_type, m_data) < std::tie(_other.m_type, _other.m_data); } + bool operator<(AssemblyItem const& _other) const + { + if (type() != _other.type()) + return type() < _other.type(); + else if (type() == Operation) + return instruction() < _other.instruction(); + else + return data() < _other.data(); + } /// @returns an upper bound for the number of bytes required by this item, assuming that /// the value of a jump tag takes @a _addressLength bytes. unsigned bytesRequired(unsigned _addressLength) const; int deposit() const; - bool match(AssemblyItem const& _i) const { return _i.m_type == UndefinedItem || (m_type == _i.m_type && (m_type != Operation || m_data == _i.m_data)); } + /// @returns true if the assembly item can be used in a functional context. + bool canBeFunctional() const; + void setLocation(SourceLocation const& _location) { m_location = _location; } SourceLocation const& location() const { return m_location; } @@ -108,9 +131,12 @@ public: void setPushedValue(u256 const& _value) const { m_pushedValue = std::make_shared<u256>(_value); } u256 const* pushedValue() const { return m_pushedValue.get(); } + std::string toAssemblyText() const; + private: AssemblyItemType m_type; - u256 m_data; + Instruction m_instruction; ///< Only valid if m_type == Operation + std::shared_ptr<u256> m_data; ///< Only valid if m_type != Operation SourceLocation m_location; JumpType m_jumpType = JumpType::Ordinary; /// Pushed value for operations with data to be determined during assembly stage, diff --git a/libevmasm/CommonSubexpressionEliminator.cpp b/libevmasm/CommonSubexpressionEliminator.cpp index 6294e579..fd4fffa6 100644 --- a/libevmasm/CommonSubexpressionEliminator.cpp +++ b/libevmasm/CommonSubexpressionEliminator.cpp @@ -303,7 +303,9 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced) for (auto it: m_classPositions) for (auto p: it.second) if (p > m_stackHeight) + { assertThrow(false, OptimizerException, ""); + } // do some cleanup removeStackTopIfPossible(); diff --git a/libevmasm/ConstantOptimiser.cpp b/libevmasm/ConstantOptimiser.cpp index f4a50c2d..86244e17 100644 --- a/libevmasm/ConstantOptimiser.cpp +++ b/libevmasm/ConstantOptimiser.cpp @@ -38,6 +38,7 @@ unsigned ConstantOptimisationMethod::optimiseConstants( for (AssemblyItem const& item: _items) if (item.type() == Push) pushes[item]++; + map<u256, AssemblyItems> pendingReplacements; for (auto it: pushes) { AssemblyItem const& item = it.first; @@ -53,17 +54,22 @@ unsigned ConstantOptimisationMethod::optimiseConstants( bigint copyGas = copy.gasNeeded(); ComputeMethod compute(params, item.data()); bigint computeGas = compute.gasNeeded(); + AssemblyItems replacement; if (copyGas < literalGas && copyGas < computeGas) { - copy.execute(_assembly, _items); + replacement = copy.execute(_assembly); optimisations++; } - else if (computeGas < literalGas && computeGas < copyGas) + else if (computeGas < literalGas && computeGas <= copyGas) { - compute.execute(_assembly, _items); + replacement = compute.execute(_assembly); optimisations++; } + if (!replacement.empty()) + pendingReplacements[item.data()] = replacement; } + if (!pendingReplacements.empty()) + replaceConstants(_items, pendingReplacements); return optimisations; } @@ -101,18 +107,24 @@ size_t ConstantOptimisationMethod::bytesRequired(AssemblyItems const& _items) void ConstantOptimisationMethod::replaceConstants( AssemblyItems& _items, - AssemblyItems const& _replacement -) const + map<u256, AssemblyItems> const& _replacements +) { - assertThrow(_items.size() > 0, OptimizerException, ""); - for (size_t i = 0; i < _items.size(); ++i) + AssemblyItems replaced; + for (AssemblyItem const& item: _items) { - if (_items.at(i) != AssemblyItem(m_value)) - continue; - _items[i] = _replacement[0]; - _items.insert(_items.begin() + i + 1, _replacement.begin() + 1, _replacement.end()); - i += _replacement.size() - 1; + if (item.type() == Push) + { + auto it = _replacements.find(item.data()); + if (it != _replacements.end()) + { + replaced += it->second; + continue; + } + } + replaced.push_back(item); } + _items = std::move(replaced); } bigint LiteralMethod::gasNeeded() @@ -128,38 +140,44 @@ bigint LiteralMethod::gasNeeded() CodeCopyMethod::CodeCopyMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) { - m_copyRoutine = AssemblyItems{ - u256(0), - Instruction::DUP1, - Instruction::MLOAD, // back up memory - u256(32), - AssemblyItem(PushData, u256(1) << 16), // has to be replaced - Instruction::DUP4, - Instruction::CODECOPY, - Instruction::DUP2, - Instruction::MLOAD, - Instruction::SWAP2, - Instruction::MSTORE - }; } bigint CodeCopyMethod::gasNeeded() { return combineGas( // Run gas: we ignore memory increase costs - simpleRunGas(m_copyRoutine) + GasCosts::copyGas, + simpleRunGas(copyRoutine()) + GasCosts::copyGas, // Data gas for copy routines: Some bytes are zero, but we ignore them. - bytesRequired(m_copyRoutine) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), + bytesRequired(copyRoutine()) * (m_params.isCreation ? GasCosts::txDataNonZeroGas : GasCosts::createDataGas), // Data gas for data itself dataGas(toBigEndian(m_value)) ); } -void CodeCopyMethod::execute(Assembly& _assembly, AssemblyItems& _items) +AssemblyItems CodeCopyMethod::execute(Assembly& _assembly) { bytes data = toBigEndian(m_value); - m_copyRoutine[4] = _assembly.newData(data); - replaceConstants(_items, m_copyRoutine); + AssemblyItems actualCopyRoutine = copyRoutine(); + actualCopyRoutine[4] = _assembly.newData(data); + return actualCopyRoutine; +} + +AssemblyItems const& CodeCopyMethod::copyRoutine() const +{ + AssemblyItems static copyRoutine{ + u256(0), + Instruction::DUP1, + Instruction::MLOAD, // back up memory + u256(32), + AssemblyItem(PushData, u256(1) << 16), // has to be replaced + Instruction::DUP4, + Instruction::CODECOPY, + Instruction::DUP2, + Instruction::MLOAD, + Instruction::SWAP2, + Instruction::MSTORE + }; + return copyRoutine; } AssemblyItems ComputeMethod::findRepresentation(u256 const& _value) diff --git a/libevmasm/ConstantOptimiser.h b/libevmasm/ConstantOptimiser.h index b35b2a69..dfa2fbf8 100644 --- a/libevmasm/ConstantOptimiser.h +++ b/libevmasm/ConstantOptimiser.h @@ -60,7 +60,10 @@ public: explicit ConstantOptimisationMethod(Params const& _params, u256 const& _value): m_params(_params), m_value(_value) {} virtual bigint gasNeeded() = 0; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) = 0; + /// Executes the method, potentially appending to the assembly and returns a vector of + /// assembly items the constant should be relpaced with in one sweep. + /// If the vector is empty, the constants will not be deleted. + virtual AssemblyItems execute(Assembly& _assembly) = 0; protected: size_t dataSize() const { return std::max<size_t>(1, dev::bytesRequired(m_value)); } @@ -83,8 +86,8 @@ protected: return m_params.runs * _runGas + m_params.multiplicity * _repeatedDataGas + _uniqueDataGas; } - /// Replaces the constant by the code given in @a _replacement. - void replaceConstants(AssemblyItems& _items, AssemblyItems const& _replacement) const; + /// Replaces all constants i by the code given in @a _replacement[i]. + static void replaceConstants(AssemblyItems& _items, std::map<u256, AssemblyItems> const& _replacement); Params m_params; u256 const& m_value; @@ -100,7 +103,7 @@ public: explicit LiteralMethod(Params const& _params, u256 const& _value): ConstantOptimisationMethod(_params, _value) {} virtual bigint gasNeeded() override; - virtual void execute(Assembly&, AssemblyItems&) override {} + virtual AssemblyItems execute(Assembly&) override { return AssemblyItems{}; } }; /** @@ -111,10 +114,10 @@ class CodeCopyMethod: public ConstantOptimisationMethod public: explicit CodeCopyMethod(Params const& _params, u256 const& _value); virtual bigint gasNeeded() override; - virtual void execute(Assembly& _assembly, AssemblyItems& _items) override; + virtual AssemblyItems execute(Assembly& _assembly) override; protected: - AssemblyItems m_copyRoutine; + AssemblyItems const& copyRoutine() const; }; /** @@ -130,9 +133,9 @@ public: } virtual bigint gasNeeded() override { return gasNeeded(m_routine); } - virtual void execute(Assembly&, AssemblyItems& _items) override + virtual AssemblyItems execute(Assembly&) override { - replaceConstants(_items, m_routine); + return m_routine; } protected: diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index d5ccd7e3..fc283b0b 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -29,6 +29,7 @@ #include <boost/noncopyable.hpp> #include <libevmasm/Assembly.h> #include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/SimplificationRules.h> using namespace std; using namespace dev; @@ -40,8 +41,18 @@ bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression cons assertThrow(!!item && !!_other.item, OptimizerException, ""); auto type = item->type(); auto otherType = _other.item->type(); - return std::tie(type, item->data(), arguments, sequenceNumber) < - std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber); + if (type != otherType) + return type < otherType; + else if (type == Operation) + { + auto instr = item->instruction(); + auto otherInstr = _other.item->instruction(); + return std::tie(instr, arguments, sequenceNumber) < + std::tie(otherInstr, _other.arguments, _other.sequenceNumber); + } + else + return std::tie(item->data(), arguments, sequenceNumber) < + std::tie(_other.item->data(), _other.arguments, _other.sequenceNumber); } ExpressionClasses::Id ExpressionClasses::find( @@ -170,191 +181,6 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const return str.str(); } -class Rules: public boost::noncopyable -{ -public: - Rules(); - void resetMatchGroups() { m_matchGroups.clear(); } - vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; } - -private: - using Expression = ExpressionClasses::Expression; - map<unsigned, Expression const*> m_matchGroups; - vector<pair<Pattern, function<Pattern()>>> m_rules; -}; - -template <class S> S divWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) / bigint(_b)); -} - -template <class S> S modWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) % bigint(_b)); -} - -Rules::Rules() -{ - // Multiple occurences of one of these inside one rule must match the same equivalence class. - // Constants. - Pattern A(Push); - Pattern B(Push); - Pattern C(Push); - // Anything. - Pattern X; - Pattern Y; - Pattern Z; - A.setMatchGroup(1, m_matchGroups); - B.setMatchGroup(2, m_matchGroups); - C.setMatchGroup(3, m_matchGroups); - X.setMatchGroup(4, m_matchGroups); - Y.setMatchGroup(5, m_matchGroups); - Z.setMatchGroup(6, m_matchGroups); - - m_rules = vector<pair<Pattern, function<Pattern()>>>{ - // arithmetics on constants - {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, - {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, - {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, - {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, - {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, - {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, - {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, - {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, - {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, - {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, - {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, - {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, - {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, - {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, - {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, - {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, - {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { - if (A.d() >= 31) - return B.d(); - unsigned testBit = unsigned(A.d()) * 8 + 7; - u256 mask = (u256(1) << testBit) - 1; - return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); - }}, - - // invariants involving known constants - {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, - {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, - {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, - {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::OR, {X, 0}}, [=]{ return X; }}, - {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, - {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, - {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, - {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, - // operations involving an expression and itself - {{Instruction::AND, {X, X}}, [=]{ return X; }}, - {{Instruction::OR, {X, X}}, [=]{ return X; }}, - {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, - {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, - - {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, - {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, - {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, - {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, - }; - // Double negation of opcodes with binary result - for (auto const& op: vector<Instruction>{ - Instruction::EQ, - Instruction::LT, - Instruction::SLT, - Instruction::GT, - Instruction::SGT - }) - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, - [=]() -> Pattern { return {op, {X, Y}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, - [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, - [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } - }); - // Associative operations - for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ - {Instruction::ADD, plus<u256>()}, - {Instruction::MUL, multiplies<u256>()}, - {Instruction::AND, bit_and<u256>()}, - {Instruction::OR, bit_or<u256>()}, - {Instruction::XOR, bit_xor<u256>()} - }) - { - auto op = opFun.first; - auto fun = opFun.second; - // Moving constants to the outside, order matters here! - // we need actions that return expressions (or patterns?) here, and we need also reversed rules - // (X+A)+B -> X+(A+B) - m_rules += vector<pair<Pattern, function<Pattern()>>>{{ - {op, {{op, {X, A}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - // X+(Y+A) -> (X+Y)+A - {op, {{op, {X, A}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }, { - // For now, we still need explicit commutativity for the inner pattern - {op, {{op, {A, X}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - {op, {{op, {A, X}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }}; - } - // move constants across subtractions - m_rules += vector<pair<Pattern, function<Pattern()>>>{ - { - // X - A -> X + (-A) - {Instruction::SUB, {X, A}}, - [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } - }, { - // (X + A) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // (A + X) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // X - (Y + A) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - }, { - // X - (A + Y) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - } - }; -} - ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) { static Rules rules; @@ -366,21 +192,17 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, ) return -1; - for (auto const& rule: rules.rules()) + if (auto match = rules.findFirstMatch(_expr, *this)) { - rules.resetMatchGroups(); - if (rule.first.matches(_expr, *this)) - { - // Debug info - //cout << "Simplifying " << *_expr.item << "("; - //for (Id arg: _expr.arguments) - // cout << fullDAGToString(arg) << ", "; - //cout << ")" << endl; - //cout << "with rule " << rule.first.toString() << endl; - //ExpressionTemplate t(rule.second()); - //cout << "to " << rule.second().toString() << endl; - return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); - } + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << match->first.toString() << endl; + //ExpressionTemplate t(match->second()); + //cout << "to " << match->second().toString() << endl; + return rebuildExpression(ExpressionTemplate(match->second(), _expr.item->location())); } if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) @@ -403,122 +225,3 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co arguments.push_back(rebuildExpression(t)); return find(_template.item, arguments); } - - -Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): - m_type(Operation), - m_requireDataMatch(true), - m_data(_instruction), - m_arguments(_arguments) -{ -} - -void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) -{ - m_matchGroup = _group; - m_matchGroups = &_matchGroups; -} - -bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const -{ - if (!matchesBaseItem(_expr.item)) - return false; - if (m_matchGroup) - { - if (!m_matchGroups->count(m_matchGroup)) - (*m_matchGroups)[m_matchGroup] = &_expr; - else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) - return false; - } - assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); - for (size_t i = 0; i < m_arguments.size(); ++i) - if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) - return false; - return true; -} - -AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const -{ - return AssemblyItem(m_type, m_data, _location); -} - -string Pattern::toString() const -{ - stringstream s; - switch (m_type) - { - case Operation: - s << instructionInfo(Instruction(unsigned(m_data))).name; - break; - case Push: - s << "PUSH " << hex << m_data; - break; - case UndefinedItem: - s << "ANY"; - break; - default: - s << "t=" << dec << m_type << " d=" << hex << m_data; - break; - } - if (!m_requireDataMatch) - s << " ~"; - if (m_matchGroup) - s << "[" << dec << m_matchGroup << "]"; - s << "("; - for (Pattern const& p: m_arguments) - s << p.toString() << ", "; - s << ")"; - return s.str(); -} - -bool Pattern::matchesBaseItem(AssemblyItem const* _item) const -{ - if (m_type == UndefinedItem) - return true; - if (!_item) - return false; - if (m_type != _item->type()) - return false; - if (m_requireDataMatch && m_data != _item->data()) - return false; - return true; -} - -Pattern::Expression const& Pattern::matchGroupValue() const -{ - assertThrow(m_matchGroup > 0, OptimizerException, ""); - assertThrow(!!m_matchGroups, OptimizerException, ""); - assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); - return *(*m_matchGroups)[m_matchGroup]; -} - - -ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) -{ - if (_pattern.matchGroup()) - { - hasId = true; - id = _pattern.id(); - } - else - { - hasId = false; - item = _pattern.toAssemblyItem(_location); - } - for (auto const& arg: _pattern.arguments()) - arguments.push_back(ExpressionTemplate(arg, _location)); -} - -string ExpressionTemplate::toString() const -{ - stringstream s; - if (hasId) - s << id; - else - s << item; - s << "("; - for (auto const& arg: arguments) - s << arg.toString(); - s << ")"; - return s.str(); -} diff --git a/libevmasm/ExpressionClasses.h b/libevmasm/ExpressionClasses.h index 11a698dd..5d53b292 100644 --- a/libevmasm/ExpressionClasses.h +++ b/libevmasm/ExpressionClasses.h @@ -121,70 +121,5 @@ private: std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems; }; -/** - * Pattern to match against an expression. - * Also stores matched expressions to retrieve them later, for constructing new expressions using - * ExpressionTemplate. - */ -class Pattern -{ -public: - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - - // Matches a specific constant value. - Pattern(unsigned _value): Pattern(u256(_value)) {} - // Matches a specific constant value. - Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {} - // Matches a specific assembly item type or anything if not given. - Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} - // Matches a given instruction with given arguments - Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); - /// Sets this pattern to be part of the match group with the identifier @a _group. - /// Inside one rule, all patterns in the same match group have to match expressions from the - /// same expression equivalence class. - void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); - unsigned matchGroup() const { return m_matchGroup; } - bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; - - AssemblyItem toAssemblyItem(SourceLocation const& _location) const; - std::vector<Pattern> arguments() const { return m_arguments; } - - /// @returns the id of the matched expression if this pattern is part of a match group. - Id id() const { return matchGroupValue().id; } - /// @returns the data of the matched expression if this pattern is part of a match group. - u256 const& d() const { return matchGroupValue().item->data(); } - - std::string toString() const; - -private: - bool matchesBaseItem(AssemblyItem const* _item) const; - Expression const& matchGroupValue() const; - - AssemblyItemType m_type; - bool m_requireDataMatch = false; - u256 m_data = 0; - std::vector<Pattern> m_arguments; - unsigned m_matchGroup = 0; - std::map<unsigned, Expression const*>* m_matchGroups = nullptr; -}; - -/** - * Template for a new expression that can be built from matched patterns. - */ -struct ExpressionTemplate -{ - using Expression = ExpressionClasses::Expression; - using Id = ExpressionClasses::Id; - explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); - std::string toString() const; - bool hasId = false; - /// Id of the matched expression, if available. - Id id = Id(-1); - // Otherwise, assembly item. - AssemblyItem item = UndefinedItem; - std::vector<ExpressionTemplate> arguments; -}; - } } diff --git a/libevmasm/PeepholeOptimiser.cpp b/libevmasm/PeepholeOptimiser.cpp index b96b0295..923ffa67 100644 --- a/libevmasm/PeepholeOptimiser.cpp +++ b/libevmasm/PeepholeOptimiser.cpp @@ -120,7 +120,7 @@ struct OpPop: SimplePeepholeOptimizerMethod<OpPop, 2> if (instructionInfo(instr).ret == 1 && !instructionInfo(instr).sideEffects) { for (int j = 0; j < instructionInfo(instr).args; j++) - *_out = Instruction::POP; + *_out = {Instruction::POP, _op.location()}; return true; } } diff --git a/libevmasm/SimplificationRules.cpp b/libevmasm/SimplificationRules.cpp new file mode 100644 index 00000000..2976d95f --- /dev/null +++ b/libevmasm/SimplificationRules.cpp @@ -0,0 +1,370 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file ExpressionClasses.cpp + * @author Christian <c@ethdev.com> + * @date 2015 + * Container for equivalence classes of expressions for use in common subexpression elimination. + */ + +#include <libevmasm/ExpressionClasses.h> +#include <utility> +#include <tuple> +#include <functional> +#include <boost/range/adaptor/reversed.hpp> +#include <boost/noncopyable.hpp> +#include <libevmasm/Assembly.h> +#include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/SimplificationRules.h> + +using namespace std; +using namespace dev; +using namespace dev::eth; + + +pair<Pattern, function<Pattern()> > const* Rules::findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes +) +{ + resetMatchGroups(); + + assertThrow(_expr.item, OptimizerException, ""); + for (auto const& rule: m_rules[byte(_expr.item->instruction())]) + { + if (rule.first.matches(_expr, _classes)) + return &rule; + resetMatchGroups(); + } + return nullptr; +} + +void Rules::addRules(std::vector<std::pair<Pattern, std::function<Pattern ()> > > const& _rules) +{ + for (auto const& r: _rules) + addRule(r); +} + +void Rules::addRule(std::pair<Pattern, std::function<Pattern()> > const& _rule) +{ + m_rules[byte(_rule.first.instruction())].push_back(_rule); +} + +template <class S> S divWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) / bigint(_b)); +} + +template <class S> S modWorkaround(S const& _a, S const& _b) +{ + return (S)(bigint(_a) % bigint(_b)); +} + +Rules::Rules() +{ + // Multiple occurences of one of these inside one rule must match the same equivalence class. + // Constants. + Pattern A(Push); + Pattern B(Push); + Pattern C(Push); + // Anything. + Pattern X; + Pattern Y; + Pattern Z; + A.setMatchGroup(1, m_matchGroups); + B.setMatchGroup(2, m_matchGroups); + C.setMatchGroup(3, m_matchGroups); + X.setMatchGroup(4, m_matchGroups); + Y.setMatchGroup(5, m_matchGroups); + Z.setMatchGroup(6, m_matchGroups); + + addRules(vector<pair<Pattern, function<Pattern()>>>{ + // arithmetics on constants + {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, + {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, + {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, + {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, + {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, + {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, + {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, + {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, + {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, + {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, + {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, + {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, + {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, + {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, + {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, + {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, + {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, + {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, + {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, + {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { + if (A.d() >= 31) + return B.d(); + unsigned testBit = unsigned(A.d()) * 8 + 7; + u256 mask = (u256(1) << testBit) - 1; + return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); + }}, + + // invariants involving known constants + {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, + {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, + {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, + {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, + {{Instruction::OR, {X, 0}}, [=]{ return X; }}, + {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, + {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, + {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, + {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, + {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, + // operations involving an expression and itself + {{Instruction::AND, {X, X}}, [=]{ return X; }}, + {{Instruction::OR, {X, X}}, [=]{ return X; }}, + {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, + {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, + {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, + + {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, + {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, + {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, + {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, + {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, + }); + // Double negation of opcodes with binary result + for (auto const& op: vector<Instruction>{ + Instruction::EQ, + Instruction::LT, + Instruction::SLT, + Instruction::GT, + Instruction::SGT + }) + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, + [=]() -> Pattern { return {op, {X, Y}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, + [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } + }); + addRule({ + {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, + [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } + }); + // Associative operations + for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ + {Instruction::ADD, plus<u256>()}, + {Instruction::MUL, multiplies<u256>()}, + {Instruction::AND, bit_and<u256>()}, + {Instruction::OR, bit_or<u256>()}, + {Instruction::XOR, bit_xor<u256>()} + }) + { + auto op = opFun.first; + auto fun = opFun.second; + // Moving constants to the outside, order matters here! + // we need actions that return expressions (or patterns?) here, and we need also reversed rules + // (X+A)+B -> X+(A+B) + addRules(vector<pair<Pattern, function<Pattern()>>>{{ + {op, {{op, {X, A}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + // X+(Y+A) -> (X+Y)+A + {op, {{op, {X, A}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }, { + // For now, we still need explicit commutativity for the inner pattern + {op, {{op, {A, X}}, B}}, + [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } + }, { + {op, {{op, {A, X}}, Y}}, + [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } + }}); + } + // move constants across subtractions + addRules(vector<pair<Pattern, function<Pattern()>>>{ + { + // X - A -> X + (-A) + {Instruction::SUB, {X, A}}, + [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } + }, { + // (X + A) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // (A + X) - Y -> (X - Y) + A + {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } + }, { + // X - (Y + A) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + }, { + // X - (A + Y) -> (X - Y) + (-A) + {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, + [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } + } + }); +} + +Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): + m_type(Operation), + m_instruction(_instruction), + m_arguments(_arguments) +{ +} + +void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) +{ + m_matchGroup = _group; + m_matchGroups = &_matchGroups; +} + +bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const +{ + if (!matchesBaseItem(_expr.item)) + return false; + if (m_matchGroup) + { + if (!m_matchGroups->count(m_matchGroup)) + (*m_matchGroups)[m_matchGroup] = &_expr; + else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) + return false; + } + assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); + for (size_t i = 0; i < m_arguments.size(); ++i) + if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) + return false; + return true; +} + +AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const +{ + if (m_type == Operation) + return AssemblyItem(m_instruction, _location); + else + return AssemblyItem(m_type, data(), _location); +} + +string Pattern::toString() const +{ + stringstream s; + switch (m_type) + { + case Operation: + s << instructionInfo(m_instruction).name; + break; + case Push: + if (m_data) + s << "PUSH " << hex << data(); + else + s << "PUSH "; + break; + case UndefinedItem: + s << "ANY"; + break; + default: + if (m_data) + s << "t=" << dec << m_type << " d=" << hex << data(); + else + s << "t=" << dec << m_type << " d: nullptr"; + break; + } + if (!m_requireDataMatch) + s << " ~"; + if (m_matchGroup) + s << "[" << dec << m_matchGroup << "]"; + s << "("; + for (Pattern const& p: m_arguments) + s << p.toString() << ", "; + s << ")"; + return s.str(); +} + +bool Pattern::matchesBaseItem(AssemblyItem const* _item) const +{ + if (m_type == UndefinedItem) + return true; + if (!_item) + return false; + if (m_type != _item->type()) + return false; + else if (m_type == Operation) + return m_instruction == _item->instruction(); + else if (m_requireDataMatch) + return data() == _item->data(); + return true; +} + +Pattern::Expression const& Pattern::matchGroupValue() const +{ + assertThrow(m_matchGroup > 0, OptimizerException, ""); + assertThrow(!!m_matchGroups, OptimizerException, ""); + assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); + return *(*m_matchGroups)[m_matchGroup]; +} + +u256 const& Pattern::data() const +{ + assertThrow(m_data, OptimizerException, ""); + return *m_data; +} + +ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) +{ + if (_pattern.matchGroup()) + { + hasId = true; + id = _pattern.id(); + } + else + { + hasId = false; + item = _pattern.toAssemblyItem(_location); + } + for (auto const& arg: _pattern.arguments()) + arguments.push_back(ExpressionTemplate(arg, _location)); +} + +string ExpressionTemplate::toString() const +{ + stringstream s; + if (hasId) + s << id; + else + s << item; + s << "("; + for (auto const& arg: arguments) + s << arg.toString(); + s << ")"; + return s.str(); +} diff --git a/libevmasm/SimplificationRules.h b/libevmasm/SimplificationRules.h new file mode 100644 index 00000000..a4da5849 --- /dev/null +++ b/libevmasm/SimplificationRules.h @@ -0,0 +1,140 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see <http://www.gnu.org/licenses/>. +*/ +/** + * @file SimplificationRules + * @author Christian <chris@ethereum.org> + * @date 2017 + * Module for applying replacement rules against Expressions. + */ + +#pragma once + +#include <libevmasm/ExpressionClasses.h> + +#include <functional> +#include <vector> + +namespace dev +{ +namespace eth +{ + +class Pattern; + +/** + * Container for all simplification rules. + */ +class Rules: public boost::noncopyable +{ +public: + using Expression = ExpressionClasses::Expression; + + Rules(); + + /// @returns a pointer to the first matching pattern and sets the match + /// groups accordingly. + std::pair<Pattern, std::function<Pattern()>> const* findFirstMatch( + Expression const& _expr, + ExpressionClasses const& _classes + ); + +private: + void addRules(std::vector<std::pair<Pattern, std::function<Pattern()>>> const& _rules); + void addRule(std::pair<Pattern, std::function<Pattern()>> const& _rule); + + void resetMatchGroups() { m_matchGroups.clear(); } + + std::map<unsigned, Expression const*> m_matchGroups; + std::vector<std::pair<Pattern, std::function<Pattern()>>> m_rules[256]; +}; + +/** + * Pattern to match against an expression. + * Also stores matched expressions to retrieve them later, for constructing new expressions using + * ExpressionTemplate. + */ +class Pattern +{ +public: + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + + // Matches a specific constant value. + Pattern(unsigned _value): Pattern(u256(_value)) {} + // Matches a specific constant value. + Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(std::make_shared<u256>(_value)) {} + // Matches a specific assembly item type or anything if not given. + Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {} + // Matches a given instruction with given arguments + Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {}); + /// Sets this pattern to be part of the match group with the identifier @a _group. + /// Inside one rule, all patterns in the same match group have to match expressions from the + /// same expression equivalence class. + void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups); + unsigned matchGroup() const { return m_matchGroup; } + bool matches(Expression const& _expr, ExpressionClasses const& _classes) const; + + AssemblyItem toAssemblyItem(SourceLocation const& _location) const; + std::vector<Pattern> arguments() const { return m_arguments; } + + /// @returns the id of the matched expression if this pattern is part of a match group. + Id id() const { return matchGroupValue().id; } + /// @returns the data of the matched expression if this pattern is part of a match group. + u256 const& d() const { return matchGroupValue().item->data(); } + + std::string toString() const; + + AssemblyItemType type() const { return m_type; } + Instruction instruction() const + { + assertThrow(type() == Operation, OptimizerException, ""); + return m_instruction; + } + +private: + bool matchesBaseItem(AssemblyItem const* _item) const; + Expression const& matchGroupValue() const; + u256 const& data() const; + + AssemblyItemType m_type; + bool m_requireDataMatch = false; + Instruction m_instruction; ///< Only valid if m_type is Operation + std::shared_ptr<u256> m_data; ///< Only valid if m_type is not Operation + std::vector<Pattern> m_arguments; + unsigned m_matchGroup = 0; + std::map<unsigned, Expression const*>* m_matchGroups = nullptr; +}; + +/** + * Template for a new expression that can be built from matched patterns. + */ +struct ExpressionTemplate +{ + using Expression = ExpressionClasses::Expression; + using Id = ExpressionClasses::Id; + explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location); + std::string toString() const; + bool hasId = false; + /// Id of the matched expression, if available. + Id id = Id(-1); + // Otherwise, assembly item. + AssemblyItem item = UndefinedItem; + std::vector<ExpressionTemplate> arguments; +}; + +} +} |