diff options
Optimise simplification rules (sort into bins).
Diffstat (limited to 'libevmasm/ExpressionClasses.cpp')
-rw-r--r-- | libevmasm/ExpressionClasses.cpp | 338 |
1 files changed, 11 insertions, 327 deletions
diff --git a/libevmasm/ExpressionClasses.cpp b/libevmasm/ExpressionClasses.cpp index fccac1b0..fc283b0b 100644 --- a/libevmasm/ExpressionClasses.cpp +++ b/libevmasm/ExpressionClasses.cpp @@ -29,6 +29,7 @@ #include <boost/noncopyable.hpp> #include <libevmasm/Assembly.h> #include <libevmasm/CommonSubexpressionEliminator.h> +#include <libevmasm/SimplificationRules.h> using namespace std; using namespace dev; @@ -180,191 +181,6 @@ string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const return str.str(); } -class Rules: public boost::noncopyable -{ -public: - Rules(); - void resetMatchGroups() { m_matchGroups.clear(); } - vector<pair<Pattern, function<Pattern()>>> const& rules() const { return m_rules; } - -private: - using Expression = ExpressionClasses::Expression; - map<unsigned, Expression const*> m_matchGroups; - vector<pair<Pattern, function<Pattern()>>> m_rules; -}; - -template <class S> S divWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) / bigint(_b)); -} - -template <class S> S modWorkaround(S const& _a, S const& _b) -{ - return (S)(bigint(_a) % bigint(_b)); -} - -Rules::Rules() -{ - // Multiple occurences of one of these inside one rule must match the same equivalence class. - // Constants. - Pattern A(Push); - Pattern B(Push); - Pattern C(Push); - // Anything. - Pattern X; - Pattern Y; - Pattern Z; - A.setMatchGroup(1, m_matchGroups); - B.setMatchGroup(2, m_matchGroups); - C.setMatchGroup(3, m_matchGroups); - X.setMatchGroup(4, m_matchGroups); - Y.setMatchGroup(5, m_matchGroups); - Z.setMatchGroup(6, m_matchGroups); - - m_rules = vector<pair<Pattern, function<Pattern()>>>{ - // arithmetics on constants - {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }}, - {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }}, - {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : divWorkaround(A.d(), B.d()); }}, - {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(divWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : modWorkaround(A.d(), B.d()); }}, - {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(modWorkaround(u2s(A.d()), u2s(B.d()))); }}, - {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }}, - {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }}, - {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }}, - {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }}, - {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }}, - {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }}, - {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }}, - {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }}, - {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }}, - {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }}, - {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }}, - {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }}, - {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }}, - {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }}, - {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 { - if (A.d() >= 31) - return B.d(); - unsigned testBit = unsigned(A.d()) * 8 + 7; - u256 mask = (u256(1) << testBit) - 1; - return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask); - }}, - - // invariants involving known constants - {{Instruction::ADD, {X, 0}}, [=]{ return X; }}, - {{Instruction::SUB, {X, 0}}, [=]{ return X; }}, - {{Instruction::MUL, {X, 1}}, [=]{ return X; }}, - {{Instruction::DIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::SDIV, {X, 1}}, [=]{ return X; }}, - {{Instruction::OR, {X, 0}}, [=]{ return X; }}, - {{Instruction::XOR, {X, 0}}, [=]{ return X; }}, - {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }}, - {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::DIV, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }}, - {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }}, - {{Instruction::EQ, {X, 0}}, [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } }, - // operations involving an expression and itself - {{Instruction::AND, {X, X}}, [=]{ return X; }}, - {{Instruction::OR, {X, X}}, [=]{ return X; }}, - {{Instruction::XOR, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }}, - {{Instruction::LT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::GT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }}, - {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }}, - - {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }}, - {{Instruction::XOR, {{{X}, {Instruction::XOR, {X, Y}}}}}, [=]{ return Y; }}, - {{Instruction::OR, {{{X}, {Instruction::AND, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::OR, {X, Y}}}}}, [=]{ return X; }}, - {{Instruction::AND, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return u256(0); }}, - {{Instruction::OR, {{{X}, {Instruction::NOT, {X}}}}}, [=]{ return ~u256(0); }}, - }; - // Double negation of opcodes with binary result - for (auto const& op: vector<Instruction>{ - Instruction::EQ, - Instruction::LT, - Instruction::SLT, - Instruction::GT, - Instruction::SGT - }) - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{op, {X, Y}}}}}}, - [=]() -> Pattern { return {op, {X, Y}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::ISZERO, {{Instruction::ISZERO, {X}}}}}}, - [=]() -> Pattern { return {Instruction::ISZERO, {X}}; } - }); - m_rules.push_back({ - {Instruction::ISZERO, {{Instruction::XOR, {X, Y}}}}, - [=]() -> Pattern { return { Instruction::EQ, {X, Y} }; } - }); - // Associative operations - for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{ - {Instruction::ADD, plus<u256>()}, - {Instruction::MUL, multiplies<u256>()}, - {Instruction::AND, bit_and<u256>()}, - {Instruction::OR, bit_or<u256>()}, - {Instruction::XOR, bit_xor<u256>()} - }) - { - auto op = opFun.first; - auto fun = opFun.second; - // Moving constants to the outside, order matters here! - // we need actions that return expressions (or patterns?) here, and we need also reversed rules - // (X+A)+B -> X+(A+B) - m_rules += vector<pair<Pattern, function<Pattern()>>>{{ - {op, {{op, {X, A}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - // X+(Y+A) -> (X+Y)+A - {op, {{op, {X, A}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }, { - // For now, we still need explicit commutativity for the inner pattern - {op, {{op, {A, X}}, B}}, - [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; } - }, { - {op, {{op, {A, X}}, Y}}, - [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; } - }}; - } - // move constants across subtractions - m_rules += vector<pair<Pattern, function<Pattern()>>>{ - { - // X - A -> X + (-A) - {Instruction::SUB, {X, A}}, - [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; } - }, { - // (X + A) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // (A + X) - Y -> (X - Y) + A - {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; } - }, { - // X - (Y + A) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - }, { - // X - (A + Y) -> (X - Y) + (-A) - {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}}, - [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; } - } - }; -} - ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun) { static Rules rules; @@ -376,21 +192,17 @@ ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, ) return -1; - for (auto const& rule: rules.rules()) + if (auto match = rules.findFirstMatch(_expr, *this)) { - rules.resetMatchGroups(); - if (rule.first.matches(_expr, *this)) - { - // Debug info - //cout << "Simplifying " << *_expr.item << "("; - //for (Id arg: _expr.arguments) - // cout << fullDAGToString(arg) << ", "; - //cout << ")" << endl; - //cout << "with rule " << rule.first.toString() << endl; - //ExpressionTemplate t(rule.second()); - //cout << "to " << rule.second().toString() << endl; - return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->location())); - } + // Debug info + //cout << "Simplifying " << *_expr.item << "("; + //for (Id arg: _expr.arguments) + // cout << fullDAGToString(arg) << ", "; + //cout << ")" << endl; + //cout << "with rule " << match->first.toString() << endl; + //ExpressionTemplate t(match->second()); + //cout << "to " << match->second().toString() << endl; + return rebuildExpression(ExpressionTemplate(match->second(), _expr.item->location())); } if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item)) @@ -413,131 +225,3 @@ ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate co arguments.push_back(rebuildExpression(t)); return find(_template.item, arguments); } - - -Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments): - m_type(Operation), - m_instruction(_instruction), - m_arguments(_arguments) -{ -} - -void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups) -{ - m_matchGroup = _group; - m_matchGroups = &_matchGroups; -} - -bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const -{ - if (!matchesBaseItem(_expr.item)) - return false; - if (m_matchGroup) - { - if (!m_matchGroups->count(m_matchGroup)) - (*m_matchGroups)[m_matchGroup] = &_expr; - else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id) - return false; - } - assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, ""); - for (size_t i = 0; i < m_arguments.size(); ++i) - if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes)) - return false; - return true; -} - -AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const -{ - if (m_type == Operation) - return AssemblyItem(m_instruction, _location); - else - return AssemblyItem(m_type, data(), _location); -} - -string Pattern::toString() const -{ - stringstream s; - switch (m_type) - { - case Operation: - s << instructionInfo(m_instruction).name; - break; - case Push: - s << "PUSH " << hex << data(); - break; - case UndefinedItem: - s << "ANY"; - break; - default: - s << "t=" << dec << m_type << " d=" << hex << data(); - break; - } - if (!m_requireDataMatch) - s << " ~"; - if (m_matchGroup) - s << "[" << dec << m_matchGroup << "]"; - s << "("; - for (Pattern const& p: m_arguments) - s << p.toString() << ", "; - s << ")"; - return s.str(); -} - -bool Pattern::matchesBaseItem(AssemblyItem const* _item) const -{ - if (m_type == UndefinedItem) - return true; - if (!_item) - return false; - if (m_type != _item->type()) - return false; - else if (m_type == Operation) - return m_instruction == _item->instruction(); - else if (m_requireDataMatch) - return data() == _item->data(); - return true; -} - -Pattern::Expression const& Pattern::matchGroupValue() const -{ - assertThrow(m_matchGroup > 0, OptimizerException, ""); - assertThrow(!!m_matchGroups, OptimizerException, ""); - assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, ""); - return *(*m_matchGroups)[m_matchGroup]; -} - -u256 const& Pattern::data() const -{ - assertThrow(m_data, OptimizerException, ""); - return *m_data; -} - -ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location) -{ - if (_pattern.matchGroup()) - { - hasId = true; - id = _pattern.id(); - } - else - { - hasId = false; - item = _pattern.toAssemblyItem(_location); - } - for (auto const& arg: _pattern.arguments()) - arguments.push_back(ExpressionTemplate(arg, _location)); -} - -string ExpressionTemplate::toString() const -{ - stringstream s; - if (hasId) - s << id; - else - s << item; - s << "("; - for (auto const& arg: arguments) - s << arg.toString(); - s << ")"; - return s.str(); -} |