/*
This file is part of cpp-ethereum.
cpp-ethereum is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
cpp-ethereum is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* @file ExpressionClasses.h
* @author Christian <c@ethdev.com>
* @date 2015
* Container for equivalence classes of expressions for use in common subexpression elimination.
*/
#pragma once
#include <vector>
#include <map>
#include <memory>
#include <libdevcore/Common.h>
#include <libevmasm/AssemblyItem.h>
namespace dev
{
namespace eth
{
class Pattern;
struct ExpressionTemplate;
/**
* Collection of classes of equivalent expressions that can also determine the class of an expression.
* Identifiers are contiguously assigned to new classes starting from zero.
*/
class ExpressionClasses
{
public:
using Id = unsigned;
using Ids = std::vector<Id>;
struct Expression
{
Id id;
AssemblyItem const* item = nullptr;
Ids arguments;
/// Storage modification sequence, only used for storage and memory operations.
unsigned sequenceNumber = 0;
/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
bool operator<(Expression const& _other) const;
};
/// Retrieves the id of the expression equivalence class resulting from the given item applied to the
/// given classes, might also create a new one.
/// @param _copyItem if true, copies the assembly item to an internal storage instead of just
/// keeping a pointer.
/// The @a _sequenceNumber indicates the current storage or memory access sequence.
Id find(
AssemblyItem const& _item,
Ids const& _arguments = {},
bool _copyItem = true,
unsigned _sequenceNumber = 0
);
/// @returns the canonical representative of an expression class.
Expression const& representative(Id _id) const { return m_representatives.at(_id); }
/// @returns the number of classes.
Id size() const { return m_representatives.size(); }
/// Forces the given @a _item with @a _arguments to the class @a _id. This can be used to
/// add prior knowledge e.g. about CALLDATA, but has to be used with caution. Will not work as
/// expected if @a _item applied to @a _arguments already exists.
void forceEqual(Id _id, AssemblyItem const& _item, Ids const& _arguments, bool _copyItem = true);
/// @returns the id of a new class which is different to all other classes.
Id newClass(SourceLocation const& _location);
/// @returns true if the values of the given classes are known to be different (on every input).
/// @note that this function might still return false for some different inputs.
bool knownToBeDifferent(Id _a, Id _b);
/// Similar to @a knownToBeDifferent but require that abs(_a - b) >= 32.
bool knownToBeDifferentBy32(Id _a, Id _b);
/// @returns true if the value of the given class is known to be zero.
/// @note that this is not the negation of knownNonZero
bool knownZero(Id _c);
/// @returns true if the value of the given class is known to be nonzero.
/// @note that this is not the negation of knownZero
bool knownNonZero(Id _c);
/// @returns a pointer to the value if the given class is known to be a constant,
/// and a nullptr otherwise.
u256 const* knownConstant(Id _c);
/// Stores a copy of the given AssemblyItem and returns a pointer to the copy that is valid for
/// the lifetime of the ExpressionClasses object.
AssemblyItem const* storeItem(AssemblyItem const& _item);
std::string fullDAGToString(Id _id) const;
private:
/// Tries to simplify the given expression.
/// @returns its class if it possible or Id(-1) otherwise.
/// @param _secondRun is set to true for the second run where arguments of commutative expressions are reversed
Id tryToSimplify(Expression const& _expr, bool _secondRun = false);
/// Rebuilds an expression from a (matched) pattern.
Id rebuildExpression(ExpressionTemplate const& _template);
std::vector<std::pair<Pattern, std::function<Pattern()>>> createRules() const;
/// Expression equivalence class representatives - we only store one item of an equivalence.
std::vector<Expression> m_representatives;
/// All expression ever encountered.
std::set<Expression> m_expressions;
std::vector<std::shared_ptr<AssemblyItem>> m_spareAssemblyItems;
};
/**
* Pattern to match against an expression.
* Also stores matched expressions to retrieve them later, for constructing new expressions using
* ExpressionTemplate.
*/
class Pattern
{
public:
using Expression = ExpressionClasses::Expression;
using Id = ExpressionClasses::Id;
// Matches a specific constant value.
Pattern(unsigned _value): Pattern(u256(_value)) {}
// Matches a specific constant value.
Pattern(u256 const& _value): m_type(Push), m_requireDataMatch(true), m_data(_value) {}
// Matches a specific assembly item type or anything if not given.
Pattern(AssemblyItemType _type = UndefinedItem): m_type(_type) {}
// Matches a given instruction with given arguments
Pattern(Instruction _instruction, std::vector<Pattern> const& _arguments = {});
/// Sets this pattern to be part of the match group with the identifier @a _group.
/// Inside one rule, all patterns in the same match group have to match expressions from the
/// same expression equivalence class.
void setMatchGroup(unsigned _group, std::map<unsigned, Expression const*>& _matchGroups);
unsigned matchGroup() const { return m_matchGroup; }
bool matches(Expression const& _expr, ExpressionClasses const& _classes) const;
AssemblyItem toAssemblyItem(SourceLocation const& _location) const;
std::vector<Pattern> arguments() const { return m_arguments; }
/// @returns the id of the matched expression if this pattern is part of a match group.
Id id() const { return matchGroupValue().id; }
/// @returns the data of the matched expression if this pattern is part of a match group.
u256 const& d() const { return matchGroupValue().item->data(); }
std::string toString() const;
private:
bool matchesBaseItem(AssemblyItem const* _item) const;
Expression const& matchGroupValue() const;
AssemblyItemType m_type;
bool m_requireDataMatch = false;
u256 m_data = 0;
std::vector<Pattern> m_arguments;
unsigned m_matchGroup = 0;
std::map<unsigned, Expression const*>* m_matchGroups = nullptr;
};
/**
* Template for a new expression that can be built from matched patterns.
*/
struct ExpressionTemplate
{
using Expression = ExpressionClasses::Expression;
using Id = ExpressionClasses::Id;
explicit ExpressionTemplate(Pattern const& _pattern, SourceLocation const& _location);
std::string toString() const;
bool hasId = false;
/// Id of the matched expression, if available.
Id id = Id(-1);
// Otherwise, assembly item.
AssemblyItem item = UndefinedItem;
std::vector<ExpressionTemplate> arguments;
};
}
}