aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CommonSubexpressionEliminator.cpp9
-rw-r--r--ControlFlowGraph.cpp51
-rw-r--r--ControlFlowGraph.h4
-rw-r--r--ExpressionClasses.cpp10
-rw-r--r--ExpressionClasses.h6
-rw-r--r--KnownState.cpp92
-rw-r--r--KnownState.h19
7 files changed, 125 insertions, 66 deletions
diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp
index e369c9db..7564fcd9 100644
--- a/CommonSubexpressionEliminator.cpp
+++ b/CommonSubexpressionEliminator.cpp
@@ -153,7 +153,9 @@ AssemblyItems CSECodeGenerator::generateCode(
assertThrow(!m_classPositions[targetItem.second].empty(), OptimizerException, "");
if (m_classPositions[targetItem.second].count(targetItem.first))
continue;
- SourceLocation const& location = m_expressionClasses.representative(targetItem.second).item->getLocation();
+ SourceLocation location;
+ if (m_expressionClasses.representative(targetItem.second).item)
+ location = m_expressionClasses.representative(targetItem.second).item->getLocation();
int position = classElementPosition(targetItem.second);
if (position < targetItem.first)
// it is already at its target, we need another copy
@@ -197,7 +199,9 @@ void CSECodeGenerator::addDependencies(Id _c)
addDependencies(argument);
m_neededBy.insert(make_pair(argument, _c));
}
- if (expr.item->type() == Operation && (
+ if (
+ expr.item &&
+ expr.item->type() == Operation && (
expr.item->instruction() == Instruction::SLOAD ||
expr.item->instruction() == Instruction::MLOAD ||
expr.item->instruction() == Instruction::SHA3
@@ -288,6 +292,7 @@ void CSECodeGenerator::generateClassElement(Id _c, bool _allowSequenced)
OptimizerException,
"Sequence constrained operation requested out of sequence."
);
+ assertThrow(expr.item, OptimizerException, "Non-generated expression without item.");
vector<Id> const& arguments = expr.arguments;
for (Id arg: boost::adaptors::reverse(arguments))
generateClassElement(arg);
diff --git a/ControlFlowGraph.cpp b/ControlFlowGraph.cpp
index cc68b2af..3566bdb1 100644
--- a/ControlFlowGraph.cpp
+++ b/ControlFlowGraph.cpp
@@ -24,6 +24,7 @@
#include <libevmasm/ControlFlowGraph.h>
#include <map>
#include <memory>
+#include <algorithm>
#include <libevmasm/Exceptions.h>
#include <libevmasm/AssemblyItem.h>
#include <libevmasm/SemanticInformation.h>
@@ -217,7 +218,6 @@ void ControlFlowGraph::gatherKnowledge()
// @todo actually we know that memory is filled with zeros at the beginning,
// we could make use of that.
KnownStatePointer emptyState = make_shared<KnownState>();
- ExpressionClasses& expr = emptyState->expressionClasses();
bool unknownJumpEncountered = false;
vector<pair<BlockId, KnownStatePointer>> workQueue({make_pair(BlockId::initial(), emptyState->copy())});
@@ -238,8 +238,6 @@ void ControlFlowGraph::gatherKnowledge()
}
block.startState = state->copy();
- //@todo we might know the return address for the first pass, but not anymore for the second,
- // -> store knowledge about tags as a union.
// Feed all items except for the final jump yet because it will erase the target tag.
unsigned pc = block.begin;
@@ -254,22 +252,29 @@ void ControlFlowGraph::gatherKnowledge()
assertThrow(block.begin <= pc && pc == block.end - 1, OptimizerException, "");
//@todo in the case of JUMPI, add knowledge about the condition to the state
// (for both values of the condition)
- BlockId nextBlock = expressionClassToBlockId(
- state->stackElement(state->stackHeight(), SourceLocation()),
- expr
+ set<u256> tags = state->tagsInExpression(
+ state->stackElement(state->stackHeight(), SourceLocation())
);
state->feedItem(m_items.at(pc++));
- if (nextBlock)
- workQueue.push_back(make_pair(nextBlock, state->copy()));
- else if (!unknownJumpEncountered)
+
+ if (tags.empty() || std::any_of(tags.begin(), tags.end(), [&](u256 const& _tag)
+ {
+ return !m_blocks.count(BlockId(_tag));
+ }))
{
- // We do not know where this jump goes, so we have to reset the states of all
- // JUMPDESTs.
- unknownJumpEncountered = true;
- for (auto const& it: m_blocks)
- if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag)
- workQueue.push_back(make_pair(it.first, emptyState->copy()));
+ if (!unknownJumpEncountered)
+ {
+ // We do not know the target of this jump, so we have to reset the states of all
+ // JUMPDESTs.
+ unknownJumpEncountered = true;
+ for (auto const& it: m_blocks)
+ if (it.second.begin < it.second.end && m_items[it.second.begin].type() == Tag)
+ workQueue.push_back(make_pair(it.first, emptyState->copy()));
+ }
}
+ else
+ for (auto tag: tags)
+ workQueue.push_back(make_pair(BlockId(tag), state->copy()));
}
else if (block.begin <= pc && pc < block.end)
state->feedItem(m_items.at(pc++));
@@ -329,7 +334,11 @@ BasicBlocks ControlFlowGraph::rebuildCode()
if (previousHandedOver && !pushes[blockId] && m_items[block.begin].type() == Tag)
++block.begin;
if (block.begin < block.end)
+ {
blocks.push_back(block);
+ blocks.back().startState->clearTagUnions();
+ blocks.back().endState->clearTagUnions();
+ }
previousHandedOver = (block.endType == BasicBlock::EndType::HANDOVER);
}
}
@@ -337,18 +346,6 @@ BasicBlocks ControlFlowGraph::rebuildCode()
return blocks;
}
-BlockId ControlFlowGraph::expressionClassToBlockId(
- ExpressionClasses::Id _id,
- ExpressionClasses& _exprClasses
-)
-{
- ExpressionClasses::Expression expr = _exprClasses.representative(_id);
- if (expr.item && expr.item->type() == PushTag)
- return BlockId(expr.item->data());
- else
- return BlockId::invalid();
-}
-
BlockId ControlFlowGraph::generateNewId()
{
BlockId id = BlockId(++m_lastUsedId);
diff --git a/ControlFlowGraph.h b/ControlFlowGraph.h
index 3366dc45..4480ba49 100644
--- a/ControlFlowGraph.h
+++ b/ControlFlowGraph.h
@@ -108,10 +108,6 @@ private:
void setPrevLinks();
BasicBlocks rebuildCode();
- /// @returns the corresponding BlockId if _id is a pushed jump tag,
- /// and an invalid BlockId otherwise.
- BlockId expressionClassToBlockId(ExpressionClasses::Id _id, ExpressionClasses& _exprClasses);
-
BlockId generateNewId();
unsigned m_lastUsedId = 0;
diff --git a/ExpressionClasses.cpp b/ExpressionClasses.cpp
index cfbeba7f..81adc0db 100644
--- a/ExpressionClasses.cpp
+++ b/ExpressionClasses.cpp
@@ -82,6 +82,16 @@ ExpressionClasses::Id ExpressionClasses::find(
return exp.id;
}
+ExpressionClasses::Id ExpressionClasses::newClass(SourceLocation const& _location)
+{
+ Expression exp;
+ exp.id = m_representatives.size();
+ exp.item = storeItem(AssemblyItem(UndefinedItem, (u256(1) << 255) + exp.id, _location));
+ m_representatives.push_back(exp);
+ m_expressions.insert(exp);
+ return exp.id;
+}
+
bool ExpressionClasses::knownToBeDifferent(ExpressionClasses::Id _a, ExpressionClasses::Id _b)
{
// Try to simplify "_a - _b" and return true iff the value is a non-zero constant.
diff --git a/ExpressionClasses.h b/ExpressionClasses.h
index c8352030..dd94092e 100644
--- a/ExpressionClasses.h
+++ b/ExpressionClasses.h
@@ -52,7 +52,8 @@ public:
Id id;
AssemblyItem const* item = nullptr;
Ids arguments;
- unsigned sequenceNumber; ///< Storage modification sequence, only used for SLOAD/SSTORE instructions.
+ /// Storage modification sequence, only used for storage and memory operations.
+ unsigned sequenceNumber = 0;
/// Behaves as if this was a tuple of (item->type(), item->data(), arguments, sequenceNumber).
bool operator<(Expression const& _other) const;
};
@@ -73,6 +74,9 @@ public:
/// @returns the number of classes.
Id size() const { return m_representatives.size(); }
+ /// @returns the id of a new class which is different to all other classes.
+ Id newClass(SourceLocation const& _location);
+
/// @returns true if the values of the given classes are known to be different (on every input).
/// @note that this function might still return false for some different inputs.
bool knownToBeDifferent(Id _a, Id _b);
diff --git a/KnownState.cpp b/KnownState.cpp
index 5a70a74f..b84e656a 100644
--- a/KnownState.cpp
+++ b/KnownState.cpp
@@ -162,29 +162,41 @@ KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool
/// Helper function for KnownState::reduceToCommonKnowledge, removes everything from
/// _this which is not in or not equal to the value in _other.
-template <class _Mapping, class _KeyType> void intersect(
- _Mapping& _this,
- _Mapping const& _other,
- function<_KeyType(_KeyType)> const& _keyTrans = [](_KeyType _k) { return _k; }
-)
+template <class _Mapping> void intersect(_Mapping& _this, _Mapping const& _other)
{
for (auto it = _this.begin(); it != _this.end();)
- if (_other.count(_keyTrans(it->first)) && _other.at(_keyTrans(it->first)) == it->second)
+ if (_other.count(it->first) && _other.at(it->first) == it->second)
++it;
else
it = _this.erase(it);
}
-template <class _Mapping> void intersect(_Mapping& _this, _Mapping const& _other)
-{
- intersect<_Mapping, ExpressionClasses::Id>(_this, _other, [](ExpressionClasses::Id _k) { return _k; });
-}
-
void KnownState::reduceToCommonKnowledge(KnownState const& _other)
{
int stackDiff = m_stackHeight - _other.m_stackHeight;
- function<int(int)> stackKeyTransform = [=](int _key) -> int { return _key - stackDiff; };
- intersect(m_stackElements, _other.m_stackElements, stackKeyTransform);
+ for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
+ if (_other.m_stackElements.count(it->first - stackDiff))
+ {
+ Id other = _other.m_stackElements.at(it->first - stackDiff);
+ if (it->second == other)
+ ++it;
+ else
+ {
+ set<u256> theseTags = tagsInExpression(it->second);
+ set<u256> otherTags = tagsInExpression(other);
+ if (!theseTags.empty() && !otherTags.empty())
+ {
+ theseTags.insert(otherTags.begin(), otherTags.end());
+ it->second = tagUnion(theseTags);
+ ++it;
+ }
+ else
+ it = m_stackElements.erase(it);
+ }
+ }
+ else
+ it = m_stackElements.erase(it);
+
// Use the smaller stack height. Essential to terminate in case of loops.
if (m_stackHeight > _other.m_stackHeight)
{
@@ -201,10 +213,15 @@ void KnownState::reduceToCommonKnowledge(KnownState const& _other)
bool KnownState::operator==(const KnownState& _other) const
{
- return m_storageContent == _other.m_storageContent &&
- m_memoryContent == _other.m_memoryContent &&
- m_stackHeight == _other.m_stackHeight &&
- m_stackElements == _other.m_stackElements;
+ if (m_storageContent != _other.m_storageContent || m_memoryContent != _other.m_memoryContent)
+ return false;
+ int stackDiff = m_stackHeight - _other.m_stackHeight;
+ auto thisIt = m_stackElements.cbegin();
+ auto otherIt = _other.m_stackElements.cbegin();
+ for (; thisIt != m_stackElements.cend() && otherIt != _other.m_stackElements.cend(); ++thisIt, ++otherIt)
+ if (thisIt->first - stackDiff != otherIt->first || thisIt->second != otherIt->second)
+ return false;
+ return (thisIt == m_stackElements.cend() && otherIt == _other.m_stackElements.cend());
}
ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
@@ -212,18 +229,17 @@ ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation
if (m_stackElements.count(_stackHeight))
return m_stackElements.at(_stackHeight);
// Stack element not found (not assigned yet), create new unknown equivalence class.
- //@todo check that we do not infer incorrect equivalences when the stack is cleared partially
- //in between.
- return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
+ return m_stackElements[_stackHeight] =
+ m_expressionClasses->find(AssemblyItem(UndefinedItem, _stackHeight, _location));
}
-ExpressionClasses::Id KnownState::initialStackElement(
- int _stackHeight,
- SourceLocation const& _location
-)
+void KnownState::clearTagUnions()
{
- // This is a special assembly item that refers to elements pre-existing on the initial stack.
- return m_expressionClasses->find(AssemblyItem(UndefinedItem, u256(_stackHeight), _location));
+ for (auto it = m_stackElements.begin(); it != m_stackElements.end();)
+ if (m_tagUnions.left.count(it->second))
+ it = m_stackElements.erase(it);
+ else
+ ++it;
}
void KnownState::setStackElement(int _stackHeight, Id _class)
@@ -352,3 +368,27 @@ KnownState::Id KnownState::applySha3(
return m_knownSha3Hashes[arguments] = v;
}
+set<u256> KnownState::tagsInExpression(KnownState::Id _expressionId)
+{
+ if (m_tagUnions.left.count(_expressionId))
+ return m_tagUnions.left.at(_expressionId);
+ // Might be a tag, then return the set of itself.
+ ExpressionClasses::Expression expr = m_expressionClasses->representative(_expressionId);
+ if (expr.item && expr.item->type() == PushTag)
+ return set<u256>({expr.item->data()});
+ else
+ return set<u256>();
+}
+
+KnownState::Id KnownState::tagUnion(set<u256> _tags)
+{
+ if (m_tagUnions.right.count(_tags))
+ return m_tagUnions.right.at(_tags);
+ else
+ {
+ Id id = m_expressionClasses->newClass(SourceLocation());
+ m_tagUnions.right.insert(make_pair(_tags, id));
+ return id;
+ }
+}
+
diff --git a/KnownState.h b/KnownState.h
index f7a3dd67..3505df74 100644
--- a/KnownState.h
+++ b/KnownState.h
@@ -29,6 +29,7 @@
#include <tuple>
#include <memory>
#include <ostream>
+#include <boost/bimap.hpp>
#include <libdevcore/CommonIO.h>
#include <libdevcore/Exceptions.h>
#include <libevmasm/ExpressionClasses.h>
@@ -107,15 +108,16 @@ public:
/// @returns true if the knowledge about the state of both objects is (known to be) equal.
bool operator==(KnownState const& _other) const;
- ///@todo the sequence numbers in two copies of this class should never be the same.
- /// might be doable using two-dimensional sequence numbers, where the first value is incremented
- /// for each copy
-
/// Retrieves the current equivalence class fo the given stack element (or generates a new
/// one if it does not exist yet).
Id stackElement(int _stackHeight, SourceLocation const& _location);
- /// @returns the equivalence class id of the special initial stack element at the given height.
- Id initialStackElement(int _stackHeight, SourceLocation const& _location);
+
+ /// @returns its set of tags if the given expression class is a known tag union; returns a set
+ /// containing the tag if it is a PushTag expression and the empty set otherwise.
+ std::set<u256> tagsInExpression(Id _expressionId);
+ /// During analysis, different tags on the stack are partially treated as the same class.
+ /// This removes such classes not to confuse later analyzers.
+ void clearTagUnions();
int stackHeight() const { return m_stackHeight; }
std::map<int, Id> const& stackElements() const { return m_stackElements; }
@@ -142,6 +144,9 @@ private:
/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
Id applySha3(Id _start, Id _length, SourceLocation const& _location);
+ /// @returns a new or already used Id representing the given set of tags.
+ Id tagUnion(std::set<u256> _tags);
+
/// Current stack height, can be negative.
int m_stackHeight = 0;
/// Current stack layout, mapping stack height -> equivalence class
@@ -157,6 +162,8 @@ private:
std::map<std::vector<Id>, Id> m_knownSha3Hashes;
/// Structure containing the classes of equivalent expressions.
std::shared_ptr<ExpressionClasses> m_expressionClasses;
+ /// Container for unions of tags stored on the stack.
+ boost::bimap<Id, std::set<u256>> m_tagUnions;
};
}