aboutsummaryrefslogblamecommitdiffstats
path: root/ExpressionClasses.cpp
blob: cfbeba7fa060e34c79d52502f4d8d5430b6cb3bf (plain) (tree)






































                                                                                                
                                                                     


















                                                                                                  



                                                                         
 



                                                  



























































                                                                                                  









                                                           
























































































































































                                                                                                                                               




                                                                  
























































                                                                                                                   
                                         
















































                                                                                                                     
                                                              


                                    


                                    
                             
                                                          









































                                                                                                
/*
    This file is part of cpp-ethereum.

    cpp-ethereum is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    cpp-ethereum is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
*/
/**
 * @file ExpressionClasses.cpp
 * @author Christian <c@ethdev.com>
 * @date 2015
 * Container for equivalence classes of expressions for use in common subexpression elimination.
 */

#include <libevmasm/ExpressionClasses.h>
#include <utility>
#include <tuple>
#include <functional>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/noncopyable.hpp>
#include <libevmasm/Assembly.h>
#include <libevmasm/CommonSubexpressionEliminator.h>

using namespace std;
using namespace dev;
using namespace dev::eth;


bool ExpressionClasses::Expression::operator<(ExpressionClasses::Expression const& _other) const
{
    assertThrow(!!item && !!_other.item, OptimizerException, "");
    auto type = item->type();
    auto otherType = _other.item->type();
    return std::tie(type, item->data(), arguments, sequenceNumber) <
        std::tie(otherType, _other.item->data(), _other.arguments, _other.sequenceNumber);
}

ExpressionClasses::Id ExpressionClasses::find(
    AssemblyItem const& _item,
    Ids const& _arguments,
    bool _copyItem,
    unsigned _sequenceNumber
)
{
    Expression exp;
    exp.id = Id(-1);
    exp.item = &_item;
    exp.arguments = _arguments;
    exp.sequenceNumber = _sequenceNumber;

    if (SemanticInformation::isDeterministic(_item))
    {
        if (SemanticInformation::isCommutativeOperation(_item))
            sort(exp.arguments.begin(), exp.arguments.end());

        auto it = m_expressions.find(exp);
        if (it != m_expressions.end())
            return it->id;
    }

    if (_copyItem)
        exp.item = storeItem(_item);

    ExpressionClasses::Id id = tryToSimplify(exp);
    if (id < m_representatives.size())
        exp.id = id;
    else
    {
        exp.id = m_representatives.size();
        m_representatives.push_back(exp);
    }
    m_expressions.insert(exp);
    return exp.id;
}

bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
{
    // Try to simplify "_a - _b" and return true iff the value is a non-zero constant.
    return knownNonZero(find(Instruction::SUB, {_a, _b}));
}

bool ExpressionClasses::knownToBeDifferentBy32(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
{
    // Try to simplify "_a - _b" and return true iff the value is at least 32 away from zero.
    u256 const* v = knownConstant(find(Instruction::SUB, {_a, _b}));
    // forbidden interval is ["-31", 31]
    return v && *v + 31 > u256(62);
}

bool ExpressionClasses::knownZero(Id _c)
{
    return Pattern(u256(0)).matches(representative(_c), *this);
}

bool ExpressionClasses::knownNonZero(Id _c)
{
    return Pattern(u256(0)).matches(representative(find(Instruction::ISZERO, {_c})), *this);
}

u256 const* ExpressionClasses::knownConstant(Id _c)
{
    map<unsigned, Expression const*> matchGroups;
    Pattern constant(Push);
    constant.setMatchGroup(1, matchGroups);
    if (!constant.matches(representative(_c), *this))
        return nullptr;
    return &constant.d();
}

AssemblyItem const* ExpressionClasses::storeItem(AssemblyItem const& _item)
{
    m_spareAssemblyItems.push_back(make_shared<AssemblyItem>(_item));
    return m_spareAssemblyItems.back().get();
}

string ExpressionClasses::fullDAGToString(ExpressionClasses::Id _id) const
{
    Expression const& expr = representative(_id);
    stringstream str;
    str << dec << expr.id << ":";
    if (expr.item)
    {
        str << *expr.item << "(";
        for (Id arg: expr.arguments)
            str << fullDAGToString(arg) << ",";
        str << ")";
    }
    else
        str << " UNIQUE";
    return str.str();
}

class Rules: public boost::noncopyable
{
public:
    Rules();
    void resetMatchGroups() { m_matchGroups.clear(); }
    vector<pair<Pattern, function<Pattern()>>> rules() const { return m_rules; }

private:
    using Expression = ExpressionClasses::Expression;
    map<unsigned, Expression const*> m_matchGroups;
    vector<pair<Pattern, function<Pattern()>>> m_rules;
};

Rules::Rules()
{
    // Multiple occurences of one of these inside one rule must match the same equivalence class.
    // Constants.
    Pattern A(Push);
    Pattern B(Push);
    Pattern C(Push);
    // Anything.
    Pattern X;
    Pattern Y;
    Pattern Z;
    A.setMatchGroup(1, m_matchGroups);
    B.setMatchGroup(2, m_matchGroups);
    C.setMatchGroup(3, m_matchGroups);
    X.setMatchGroup(4, m_matchGroups);
    Y.setMatchGroup(5, m_matchGroups);
    Z.setMatchGroup(6, m_matchGroups);

    m_rules = vector<pair<Pattern, function<Pattern()>>>{
        // arithmetics on constants
        {{Instruction::ADD, {A, B}}, [=]{ return A.d() + B.d(); }},
        {{Instruction::MUL, {A, B}}, [=]{ return A.d() * B.d(); }},
        {{Instruction::SUB, {A, B}}, [=]{ return A.d() - B.d(); }},
        {{Instruction::DIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() / B.d(); }},
        {{Instruction::SDIV, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) / u2s(B.d())); }},
        {{Instruction::MOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : A.d() % B.d(); }},
        {{Instruction::SMOD, {A, B}}, [=]{ return B.d() == 0 ? 0 : s2u(u2s(A.d()) % u2s(B.d())); }},
        {{Instruction::EXP, {A, B}}, [=]{ return u256(boost::multiprecision::powm(bigint(A.d()), bigint(B.d()), bigint(1) << 256)); }},
        {{Instruction::NOT, {A}}, [=]{ return ~A.d(); }},
        {{Instruction::LT, {A, B}}, [=]() { return A.d() < B.d() ? u256(1) : 0; }},
        {{Instruction::GT, {A, B}}, [=]() -> u256 { return A.d() > B.d() ? 1 : 0; }},
        {{Instruction::SLT, {A, B}}, [=]() -> u256 { return u2s(A.d()) < u2s(B.d()) ? 1 : 0; }},
        {{Instruction::SGT, {A, B}}, [=]() -> u256 { return u2s(A.d()) > u2s(B.d()) ? 1 : 0; }},
        {{Instruction::EQ, {A, B}}, [=]() -> u256 { return A.d() == B.d() ? 1 : 0; }},
        {{Instruction::ISZERO, {A}}, [=]() -> u256 { return A.d() == 0 ? 1 : 0; }},
        {{Instruction::AND, {A, B}}, [=]{ return A.d() & B.d(); }},
        {{Instruction::OR, {A, B}}, [=]{ return A.d() | B.d(); }},
        {{Instruction::XOR, {A, B}}, [=]{ return A.d() ^ B.d(); }},
        {{Instruction::BYTE, {A, B}}, [=]{ return A.d() >= 32 ? 0 : (B.d() >> unsigned(8 * (31 - A.d()))) & 0xff; }},
        {{Instruction::ADDMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) + bigint(B.d())) % C.d()); }},
        {{Instruction::MULMOD, {A, B, C}}, [=]{ return C.d() == 0 ? 0 : u256((bigint(A.d()) * bigint(B.d())) % C.d()); }},
        {{Instruction::MULMOD, {A, B, C}}, [=]{ return A.d() * B.d(); }},
        {{Instruction::SIGNEXTEND, {A, B}}, [=]() -> u256 {
            if (A.d() >= 31)
                return B.d();
            unsigned testBit = unsigned(A.d()) * 8 + 7;
            u256 mask = (u256(1) << testBit) - 1;
            return u256(boost::multiprecision::bit_test(B.d(), testBit) ? B.d() | ~mask : B.d() & mask);
        }},

        // invariants involving known constants
        {{Instruction::ADD, {X, 0}}, [=]{ return X; }},
        {{Instruction::MUL, {X, 1}}, [=]{ return X; }},
        {{Instruction::DIV, {X, 1}}, [=]{ return X; }},
        {{Instruction::SDIV, {X, 1}}, [=]{ return X; }},
        {{Instruction::OR, {X, 0}}, [=]{ return X; }},
        {{Instruction::XOR, {X, 0}}, [=]{ return X; }},
        {{Instruction::AND, {X, ~u256(0)}}, [=]{ return X; }},
        {{Instruction::MUL, {X, 0}}, [=]{ return u256(0); }},
        {{Instruction::DIV, {X, 0}}, [=]{ return u256(0); }},
        {{Instruction::MOD, {X, 0}}, [=]{ return u256(0); }},
        {{Instruction::MOD, {0, X}}, [=]{ return u256(0); }},
        {{Instruction::AND, {X, 0}}, [=]{ return u256(0); }},
        {{Instruction::OR, {X, ~u256(0)}}, [=]{ return ~u256(0); }},
        // operations involving an expression and itself
        {{Instruction::AND, {X, X}}, [=]{ return X; }},
        {{Instruction::OR, {X, X}}, [=]{ return X; }},
        {{Instruction::SUB, {X, X}}, [=]{ return u256(0); }},
        {{Instruction::EQ, {X, X}}, [=]{ return u256(1); }},
        {{Instruction::LT, {X, X}}, [=]{ return u256(0); }},
        {{Instruction::SLT, {X, X}}, [=]{ return u256(0); }},
        {{Instruction::GT, {X, X}}, [=]{ return u256(0); }},
        {{Instruction::SGT, {X, X}}, [=]{ return u256(0); }},
        {{Instruction::MOD, {X, X}}, [=]{ return u256(0); }},

        {{Instruction::NOT, {{Instruction::NOT, {X}}}}, [=]{ return X; }},
    };
    // Associative operations
    for (auto const& opFun: vector<pair<Instruction,function<u256(u256 const&,u256 const&)>>>{
        {Instruction::ADD, plus<u256>()},
        {Instruction::MUL, multiplies<u256>()},
        {Instruction::AND, bit_and<u256>()},
        {Instruction::OR, bit_or<u256>()},
        {Instruction::XOR, bit_xor<u256>()}
    })
    {
        auto op = opFun.first;
        auto fun = opFun.second;
        // Moving constants to the outside, order matters here!
        // we need actions that return expressions (or patterns?) here, and we need also reversed rules
        // (X+A)+B -> X+(A+B)
        m_rules += vector<pair<Pattern, function<Pattern()>>>{{
            {op, {{op, {X, A}}, B}},
            [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
        }, {
        // X+(Y+A) -> (X+Y)+A
            {op, {{op, {X, A}}, Y}},
            [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
        }, {
        // For now, we still need explicit commutativity for the inner pattern
            {op, {{op, {A, X}}, B}},
            [=]() -> Pattern { return {op, {X, fun(A.d(), B.d())}}; }
        }, {
            {op, {{op, {A, X}}, Y}},
            [=]() -> Pattern { return {op, {{op, {X, Y}}, A}}; }
        }};
    }
    // move constants across subtractions
    m_rules += vector<pair<Pattern, function<Pattern()>>>{
        {
            // X - A -> X + (-A)
            {Instruction::SUB, {X, A}},
            [=]() -> Pattern { return {Instruction::ADD, {X, 0 - A.d()}}; }
        }, {
            // (X + A) - Y -> (X - Y) + A
            {Instruction::SUB, {{Instruction::ADD, {X, A}}, Y}},
            [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
        }, {
            // (A + X) - Y -> (X - Y) + A
            {Instruction::SUB, {{Instruction::ADD, {A, X}}, Y}},
            [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, A}}; }
        }, {
            // X - (Y + A) -> (X - Y) + (-A)
            {Instruction::SUB, {X, {Instruction::ADD, {Y, A}}}},
            [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
        }, {
            // X - (A + Y) -> (X - Y) + (-A)
            {Instruction::SUB, {X, {Instruction::ADD, {A, Y}}}},
            [=]() -> Pattern { return {Instruction::ADD, {{Instruction::SUB, {X, Y}}, 0 - A.d()}}; }
        }
    };
}

ExpressionClasses::Id ExpressionClasses::tryToSimplify(Expression const& _expr, bool _secondRun)
{
    static Rules rules;

    if (
        !_expr.item ||
        _expr.item->type() != Operation ||
        !SemanticInformation::isDeterministic(*_expr.item)
    )
        return -1;

    for (auto const& rule: rules.rules())
    {
        rules.resetMatchGroups();
        if (rule.first.matches(_expr, *this))
        {
            // Debug info
            //cout << "Simplifying " << *_expr.item << "(";
            //for (Id arg: _expr.arguments)
            //  cout << fullDAGToString(arg) << ", ";
            //cout << ")" << endl;
            //cout << "with rule " << rule.first.toString() << endl;
            //ExpressionTemplate t(rule.second());
            //cout << "to " << rule.second().toString() << endl;
            return rebuildExpression(ExpressionTemplate(rule.second(), _expr.item->getLocation()));
        }
    }

    if (!_secondRun && _expr.arguments.size() == 2 && SemanticInformation::isCommutativeOperation(*_expr.item))
    {
        Expression expr = _expr;
        swap(expr.arguments[0], expr.arguments[1]);
        return tryToSimplify(expr, true);
    }

    return -1;
}

ExpressionClasses::Id ExpressionClasses::rebuildExpression(ExpressionTemplate const& _template)
{
    if (_template.hasId)
        return _template.id;

    Ids arguments;
    for (ExpressionTemplate const& t: _template.arguments)
        arguments.push_back(rebuildExpression(t));
    return find(_template.item, arguments);
}


Pattern::Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments):
    m_type(Operation),
    m_requireDataMatch(true),
    m_data(_instruction),
    m_arguments(_arguments)
{
}

void Pattern::setMatchGroup(unsigned _group, map<unsigned, Expression const*>& _matchGroups)
{
    m_matchGroup = _group;
    m_matchGroups = &_matchGroups;
}

bool Pattern::matches(Expression const& _expr, ExpressionClasses const& _classes) const
{
    if (!matchesBaseItem(_expr.item))
        return false;
    if (m_matchGroup)
    {
        if (!m_matchGroups->count(m_matchGroup))
            (*m_matchGroups)[m_matchGroup] = &_expr;
        else if ((*m_matchGroups)[m_matchGroup]->id != _expr.id)
            return false;
    }
    assertThrow(m_arguments.size() == 0 || _expr.arguments.size() == m_arguments.size(), OptimizerException, "");
    for (size_t i = 0; i < m_arguments.size(); ++i)
        if (!m_arguments[i].matches(_classes.representative(_expr.arguments[i]), _classes))
            return false;
    return true;
}

AssemblyItem Pattern::toAssemblyItem(SourceLocation const& _location) const
{
    return AssemblyItem(m_type, m_data, _location);
}

string Pattern::toString() const
{
    stringstream s;
    switch (m_type)
    {
    case Operation:
        s << instructionInfo(Instruction(unsigned(m_data))).name;
        break;
    case Push:
        s << "PUSH " << hex << m_data;
        break;
    case UndefinedItem:
        s << "ANY";
        break;
    default:
        s << "t=" << dec << m_type << " d=" << hex << m_data;
        break;
    }
    if (!m_requireDataMatch)
        s << " ~";
    if (m_matchGroup)
        s << "[" << dec << m_matchGroup << "]";
    s << "(";
    for (Pattern const& p: m_arguments)
        s << p.toString() << ", ";
    s << ")";
    return s.str();
}

bool Pattern::matchesBaseItem(AssemblyItem const* _item) const
{
    if (m_type == UndefinedItem)
        return true;
    if (!_item)
        return false;
    if (m_type != _item->type())
        return false;
    if (m_requireDataMatch && m_data != _item->data())
        return false;
    return true;
}

Pattern::Expression const& Pattern::matchGroupValue() const
{
    assertThrow(m_matchGroup > 0, OptimizerException, "");
    assertThrow(!!m_matchGroups, OptimizerException, "");
    assertThrow((*m_matchGroups)[m_matchGroup], OptimizerException, "");
    return *(*m_matchGroups)[m_matchGroup];
}


ExpressionTemplate::ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location)
{
    if (_pattern.matchGroup())
    {
        hasId = true;
        id = _pattern.id();
    }
    else
    {
        hasId = false;
        item = _pattern.toAssemblyItem(_location);
    }
    for (auto const& arg: _pattern.arguments())
        arguments.push_back(ExpressionTemplate(arg, _location));
}

string ExpressionTemplate::toString() const
{
    stringstream s;
    if (hasId)
        s << id;
    else
        s << item;
    s << "(";
    for (auto const& arg: arguments)
        s << arg.toString();
    s << ")";
    return s.str();
}