From 9106d72a02aa52b0c48db2eef7e4f9df213500b5 Mon Sep 17 00:00:00 2001
From: chriseth <c@ethdev.com>
Date: Wed, 29 Apr 2015 18:16:05 +0200
Subject: Split known state from common subexpression eliminator.

---
 Assembly.cpp                      |   3 +-
 CommonSubexpressionEliminator.cpp | 267 +++---------------------------------
 CommonSubexpressionEliminator.h   |  59 +-------
 KnownState.cpp                    | 278 ++++++++++++++++++++++++++++++++++++++
 KnownState.h                      | 149 ++++++++++++++++++++
 5 files changed, 451 insertions(+), 305 deletions(-)
 create mode 100644 KnownState.cpp
 create mode 100644 KnownState.h

diff --git a/Assembly.cpp b/Assembly.cpp
index 6cc09a4b..c7253622 100644
--- a/Assembly.cpp
+++ b/Assembly.cpp
@@ -329,7 +329,8 @@ Assembly& Assembly::optimise(bool _enable)
 		copt << "Performing common subexpression elimination...";
 		for (auto iter = m_items.begin(); iter != m_items.end();)
 		{
-			CommonSubexpressionEliminator eliminator;
+			KnownState state;
+			CommonSubexpressionEliminator eliminator(state);
 			auto orig = iter;
 			iter = eliminator.feedItems(iter, m_items.end());
 			AssemblyItems optItems;
diff --git a/CommonSubexpressionEliminator.cpp b/CommonSubexpressionEliminator.cpp
index 63524d6f..645a426d 100644
--- a/CommonSubexpressionEliminator.cpp
+++ b/CommonSubexpressionEliminator.cpp
@@ -37,18 +37,19 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
 
 	map<int, Id> initialStackContents;
 	map<int, Id> targetStackContents;
-	int minHeight = m_stackHeight + 1;
-	if (!m_stackElements.empty())
-		minHeight = min(minHeight, m_stackElements.begin()->first);
+	int minHeight = m_state.stackHeight() + 1;
+	if (!m_state.stackElements().empty())
+		minHeight = min(minHeight, m_state.stackElements().begin()->first);
 	for (int height = minHeight; height <= 0; ++height)
-		initialStackContents[height] = initialStackElement(height, SourceLocation());
-	for (int height = minHeight; height <= m_stackHeight; ++height)
-		targetStackContents[height] = stackElement(height, SourceLocation());
+		//@todo this is not nice as it is here - should be "unknownStackElement" - but is it really unknown?
+		initialStackContents[height] = m_state.initialStackElement(height, SourceLocation());
+	for (int height = minHeight; height <= m_state.stackHeight(); ++height)
+		targetStackContents[height] = m_state.stackElement(height, SourceLocation());
 
 	// Debug info:
 	//stream(cout, initialStackContents, targetStackContents);
 
-	AssemblyItems items = CSECodeGenerator(m_expressionClasses, m_storeOperations).generateCode(
+	AssemblyItems items = CSECodeGenerator(m_state.expressionClasses(), m_storeOperations).generateCode(
 		initialStackContents,
 		targetStackContents
 	);
@@ -57,103 +58,11 @@ vector<AssemblyItem> CommonSubexpressionEliminator::getOptimizedItems()
 	return items;
 }
 
-ostream& CommonSubexpressionEliminator::stream(
-	ostream& _out,
-	map<int, Id> _initialStack,
-	map<int, Id> _targetStack
-) const
-{
-	auto streamExpressionClass = [this](ostream& _out, Id _id)
-	{
-		auto const& expr = m_expressionClasses.representative(_id);
-		_out << "  " << dec << _id << ": " << *expr.item;
-		if (expr.sequenceNumber)
-			_out << "@" << dec << expr.sequenceNumber;
-		_out << "(";
-		for (Id arg: expr.arguments)
-			_out << dec << arg << ",";
-		_out << ")" << endl;
-	};
-
-	_out << "Optimizer analysis:" << endl;
-	_out << "Final stack height: " << dec << m_stackHeight << endl;
-	_out << "Equivalence classes: " << endl;
-	for (Id eqClass = 0; eqClass < m_expressionClasses.size(); ++eqClass)
-		streamExpressionClass(_out, eqClass);
-
-	_out << "Initial stack: " << endl;
-	for (auto const& it: _initialStack)
-	{
-		_out << "  " << dec << it.first << ": ";
-		streamExpressionClass(_out, it.second);
-	}
-	_out << "Target stack: " << endl;
-	for (auto const& it: _targetStack)
-	{
-		_out << "  " << dec << it.first << ": ";
-		streamExpressionClass(_out, it.second);
-	}
-
-	return _out;
-}
-
 void CommonSubexpressionEliminator::feedItem(AssemblyItem const& _item, bool _copyItem)
 {
-	if (_item.type() != Operation)
-	{
-		assertThrow(_item.deposit() == 1, InvalidDeposit, "");
-		setStackElement(++m_stackHeight, m_expressionClasses.find(_item, {}, _copyItem));
-	}
-	else
-	{
-		Instruction instruction = _item.instruction();
-		InstructionInfo info = instructionInfo(instruction);
-		if (SemanticInformation::isDupInstruction(_item))
-			setStackElement(
-				m_stackHeight + 1,
-				stackElement(
-					m_stackHeight - int(instruction) + int(Instruction::DUP1),
-					_item.getLocation()
-				)
-			);
-		else if (SemanticInformation::isSwapInstruction(_item))
-			swapStackElements(
-				m_stackHeight,
-				m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
-				_item.getLocation()
-			);
-		else if (instruction != Instruction::POP)
-		{
-			vector<Id> arguments(info.args);
-			for (int i = 0; i < info.args; ++i)
-				arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
-			if (_item.instruction() == Instruction::SSTORE)
-				storeInStorage(arguments[0], arguments[1], _item.getLocation());
-			else if (_item.instruction() == Instruction::SLOAD)
-				setStackElement(
-					m_stackHeight + _item.deposit(),
-					loadFromStorage(arguments[0], _item.getLocation())
-				);
-			else if (_item.instruction() == Instruction::MSTORE)
-				storeInMemory(arguments[0], arguments[1], _item.getLocation());
-			else if (_item.instruction() == Instruction::MLOAD)
-				setStackElement(
-					m_stackHeight + _item.deposit(),
-					loadFromMemory(arguments[0], _item.getLocation())
-				);
-			else if (_item.instruction() == Instruction::SHA3)
-				setStackElement(
-					m_stackHeight + _item.deposit(),
-					applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
-				);
-			else
-				setStackElement(
-					m_stackHeight + _item.deposit(),
-					m_expressionClasses.find(_item, arguments, _copyItem)
-				);
-		}
-		m_stackHeight += _item.deposit();
-	}
+	StoreOperation op = m_state.feedItem(_item, _copyItem);
+	if (op.isValid())
+		m_storeOperations.push_back(op);
 }
 
 void CommonSubexpressionEliminator::optimizeBreakingItem()
@@ -164,20 +73,20 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
 	SourceLocation const& location = m_breakingItem->getLocation();
 	AssemblyItem::JumpType jumpType = m_breakingItem->getJumpType();
 
-	Id condition = stackElement(m_stackHeight - 1, location);
-	Id zero = m_expressionClasses.find(u256(0));
-	if (m_expressionClasses.knownToBeDifferent(condition, zero))
+	Id condition = m_state.stackElement(m_state.stackHeight() - 1, location);
+	Id zero = m_state.expressionClasses().find(u256(0));
+	if (m_state.expressionClasses().knownToBeDifferent(condition, zero))
 	{
 		feedItem(AssemblyItem(Instruction::SWAP1, location), true);
 		feedItem(AssemblyItem(Instruction::POP, location), true);
 
 		AssemblyItem item(Instruction::JUMP, location);
 		item.setJumpType(jumpType);
-		m_breakingItem = m_expressionClasses.storeItem(item);
+		m_breakingItem = m_state.expressionClasses().storeItem(item);
 		return;
 	}
-	Id negatedCondition = m_expressionClasses.find(Instruction::ISZERO, {condition});
-	if (m_expressionClasses.knownToBeDifferent(negatedCondition, zero))
+	Id negatedCondition = m_state.expressionClasses().find(Instruction::ISZERO, {condition});
+	if (m_state.expressionClasses().knownToBeDifferent(negatedCondition, zero))
 	{
 		AssemblyItem it(Instruction::POP, location);
 		feedItem(it, true);
@@ -186,148 +95,6 @@ void CommonSubexpressionEliminator::optimizeBreakingItem()
 	}
 }
 
-void CommonSubexpressionEliminator::setStackElement(int _stackHeight, Id _class)
-{
-	m_stackElements[_stackHeight] = _class;
-}
-
-void CommonSubexpressionEliminator::swapStackElements(
-	int _stackHeightA,
-	int _stackHeightB,
-	SourceLocation const& _location
-)
-{
-	assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
-	// ensure they are created
-	stackElement(_stackHeightA, _location);
-	stackElement(_stackHeightB, _location);
-
-	swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::stackElement(
-	int _stackHeight,
-	SourceLocation const& _location
-)
-{
-	if (m_stackElements.count(_stackHeight))
-		return m_stackElements.at(_stackHeight);
-	// Stack element not found (not assigned yet), create new equivalence class.
-	return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::initialStackElement(
-	int _stackHeight,
-	SourceLocation const& _location
-)
-{
-	assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
-	assertThrow(_stackHeight > -16, StackTooDeepException, "");
-	// This is a special assembly item that refers to elements pre-existing on the initial stack.
-	return m_expressionClasses.find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
-}
-
-void CommonSubexpressionEliminator::storeInStorage(Id _slot, Id _value, SourceLocation const& _location)
-{
-	if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
-		// do not execute the storage if we know that the value is already there
-		return;
-	m_sequenceNumber++;
-	decltype(m_storageContent) storageContents;
-	// Copy over all values (i.e. retain knowledge about them) where we know that this store
-	// operation will not destroy the knowledge. Specifically, we copy storage locations we know
-	// are different from _slot or locations where we know that the stored value is equal to _value.
-	for (auto const& storageItem: m_storageContent)
-		if (m_expressionClasses.knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
-			storageContents.insert(storageItem);
-	m_storageContent = move(storageContents);
-
-	AssemblyItem item(Instruction::SSTORE, _location);
-	Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
-	m_storeOperations.push_back(StoreOperation(StoreOperation::Storage, _slot, m_sequenceNumber, id));
-	m_storageContent[_slot] = _value;
-	// increment a second time so that we get unique sequence numbers for writes
-	m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromStorage(Id _slot, SourceLocation const& _location)
-{
-	if (m_storageContent.count(_slot))
-		return m_storageContent.at(_slot);
-
-	AssemblyItem item(Instruction::SLOAD, _location);
-	return m_storageContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-void CommonSubexpressionEliminator::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
-{
-	if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
-		// do not execute the store if we know that the value is already there
-		return;
-	m_sequenceNumber++;
-	decltype(m_memoryContent) memoryContents;
-	// copy over values at points where we know that they are different from _slot by at least 32
-	for (auto const& memoryItem: m_memoryContent)
-		if (m_expressionClasses.knownToBeDifferentBy32(memoryItem.first, _slot))
-			memoryContents.insert(memoryItem);
-	m_memoryContent = move(memoryContents);
-
-	AssemblyItem item(Instruction::MSTORE, _location);
-	Id id = m_expressionClasses.find(item, {_slot, _value}, true, m_sequenceNumber);
-	m_storeOperations.push_back(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
-	m_memoryContent[_slot] = _value;
-	// increment a second time so that we get unique sequence numbers for writes
-	m_sequenceNumber++;
-}
-
-ExpressionClasses::Id CommonSubexpressionEliminator::loadFromMemory(Id _slot, SourceLocation const& _location)
-{
-	if (m_memoryContent.count(_slot))
-		return m_memoryContent.at(_slot);
-
-	AssemblyItem item(Instruction::MLOAD, _location);
-	return m_memoryContent[_slot] = m_expressionClasses.find(item, {_slot}, true, m_sequenceNumber);
-}
-
-CommonSubexpressionEliminator::Id CommonSubexpressionEliminator::applySha3(
-	Id _start,
-	Id _length,
-	SourceLocation const& _location
-)
-{
-	AssemblyItem sha3Item(Instruction::SHA3, _location);
-	// Special logic if length is a short constant, otherwise we cannot tell.
-	u256 const* l = m_expressionClasses.knownConstant(_length);
-	// unknown or too large length
-	if (!l || *l > 128)
-		return m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
-
-	vector<Id> arguments;
-	for (u256 i = 0; i < *l; i += 32)
-	{
-		Id slot = m_expressionClasses.find(
-			AssemblyItem(Instruction::ADD, _location),
-			{_start, m_expressionClasses.find(i)}
-		);
-		arguments.push_back(loadFromMemory(slot, _location));
-	}
-	if (m_knownSha3Hashes.count(arguments))
-		return m_knownSha3Hashes.at(arguments);
-	Id v;
-	// If all arguments are known constants, compute the sha3 here
-	if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses.knownConstant(_a); }))
-	{
-		bytes data;
-		for (Id a: arguments)
-			data += toBigEndian(*m_expressionClasses.knownConstant(a));
-		data.resize(size_t(*l));
-		v = m_expressionClasses.find(AssemblyItem(u256(sha3(data)), _location));
-	}
-	else
-		v = m_expressionClasses.find(sha3Item, {_start, _length}, true, m_sequenceNumber);
-	return m_knownSha3Hashes[arguments] = v;
-}
-
 CSECodeGenerator::CSECodeGenerator(
 	ExpressionClasses& _expressionClasses,
 	vector<CSECodeGenerator::StoreOperation> const& _storeOperations
diff --git a/CommonSubexpressionEliminator.h b/CommonSubexpressionEliminator.h
index 6156bc81..2ed92640 100644
--- a/CommonSubexpressionEliminator.h
+++ b/CommonSubexpressionEliminator.h
@@ -32,6 +32,7 @@
 #include <libdevcore/Exceptions.h>
 #include <libevmasm/ExpressionClasses.h>
 #include <libevmasm/SemanticInformation.h>
+#include <libevmasm/KnownState.h>
 
 namespace dev
 {
@@ -58,20 +59,9 @@ class CommonSubexpressionEliminator
 {
 public:
 	using Id = ExpressionClasses::Id;
-	struct StoreOperation
-	{
-		enum Target { Memory, Storage };
-		StoreOperation(
-			Target _target,
-			Id _slot,
-			unsigned _sequenceNumber,
-			Id _expression
-		): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
-		Target target;
-		Id slot;
-		unsigned sequenceNumber;
-		Id expression;
-	};
+	using StoreOperation = KnownState::StoreOperation;
+
+	CommonSubexpressionEliminator(KnownState const& _state): m_state(_state) {}
 
 	/// Feeds AssemblyItems into the eliminator and @returns the iterator pointing at the first
 	/// item that must be fed into a new instance of the eliminator.
@@ -95,49 +85,10 @@ private:
 	/// Tries to optimize the item that breaks the basic block at the end.
 	void optimizeBreakingItem();
 
-	/// Simplifies the given item using
-	/// Assigns a new equivalence class to the next sequence number of the given stack element.
-	void setStackElement(int _stackHeight, Id _class);
-	/// Swaps the given stack elements in their next sequence number.
-	void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
-	/// Retrieves the current equivalence class fo the given stack element (or generates a new
-	/// one if it does not exist yet).
-	Id stackElement(int _stackHeight, SourceLocation const& _location);
-	/// @returns the equivalence class id of the special initial stack element at the given height
-	/// (must not be positive).
-	Id initialStackElement(int _stackHeight, SourceLocation const& _location);
-
-	/// Increments the sequence number, deletes all storage information that might be overwritten
-	/// and stores the new value at the given slot.
-	void storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
-	/// Retrieves the current value at the given slot in storage or creates a new special sload class.
-	Id loadFromStorage(Id _slot, SourceLocation const& _location);
-	/// Increments the sequence number, deletes all memory information that might be overwritten
-	/// and stores the new value at the given slot.
-	void storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
-	/// Retrieves the current value at the given slot in memory or creates a new special mload class.
-	Id loadFromMemory(Id _slot, SourceLocation const& _location);
-	/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
-	Id applySha3(Id _start, Id _length, SourceLocation const& _location);
-
-	/// Current stack height, can be negative.
-	int m_stackHeight = 0;
-	/// Current stack layout, mapping stack height -> equivalence class
-	std::map<int, Id> m_stackElements;
-	/// Current sequence number, this is incremented with each modification to storage or memory.
-	unsigned m_sequenceNumber = 1;
-	/// Knowledge about storage content.
-	std::map<Id, Id> m_storageContent;
-	/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
-	/// and are not contained here if they are not completely known.
-	std::map<Id, Id> m_memoryContent;
-	/// Keeps record of all sha3 hashes that are computed.
-	std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+	KnownState m_state;
 	/// Keeps information about which storage or memory slots were written to at which sequence
 	/// number with what instruction.
 	std::vector<StoreOperation> m_storeOperations;
-	/// Structure containing the classes of equivalent expressions.
-	ExpressionClasses m_expressionClasses;
 
 	/// The item that breaks the basic block, can be nullptr.
 	/// It is usually appended to the block but can be optimized in some cases.
diff --git a/KnownState.cpp b/KnownState.cpp
new file mode 100644
index 00000000..244270fb
--- /dev/null
+++ b/KnownState.cpp
@@ -0,0 +1,278 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.cpp
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#include "KnownState.h"
+#include <functional>
+#include <libdevcrypto/SHA3.h>
+#include <libevmasm/AssemblyItem.h>
+
+using namespace std;
+using namespace dev;
+using namespace dev::eth;
+
+ostream& KnownState::stream(
+	ostream& _out,
+	map<int, Id> _initialStack,
+	map<int, Id> _targetStack
+) const
+{
+	auto streamExpressionClass = [this](ostream& _out, Id _id)
+	{
+		auto const& expr = m_expressionClasses->representative(_id);
+		_out << "  " << dec << _id << ": " << *expr.item;
+		if (expr.sequenceNumber)
+			_out << "@" << dec << expr.sequenceNumber;
+		_out << "(";
+		for (Id arg: expr.arguments)
+			_out << dec << arg << ",";
+		_out << ")" << endl;
+	};
+
+	_out << "Optimizer analysis:" << endl;
+	_out << "Final stack height: " << dec << m_stackHeight << endl;
+	_out << "Equivalence classes: " << endl;
+	for (Id eqClass = 0; eqClass < m_expressionClasses->size(); ++eqClass)
+		streamExpressionClass(_out, eqClass);
+
+	_out << "Initial stack: " << endl;
+	for (auto const& it: _initialStack)
+	{
+		_out << "  " << dec << it.first << ": ";
+		streamExpressionClass(_out, it.second);
+	}
+	_out << "Target stack: " << endl;
+	for (auto const& it: _targetStack)
+	{
+		_out << "  " << dec << it.first << ": ";
+		streamExpressionClass(_out, it.second);
+	}
+
+	return _out;
+}
+
+KnownState::StoreOperation KnownState::feedItem(AssemblyItem const& _item, bool _copyItem)
+{
+	StoreOperation op;
+	if (_item.type() != Operation)
+	{
+		assertThrow(_item.deposit() == 1, InvalidDeposit, "");
+		setStackElement(++m_stackHeight, m_expressionClasses->find(_item, {}, _copyItem));
+	}
+	else
+	{
+		Instruction instruction = _item.instruction();
+		InstructionInfo info = instructionInfo(instruction);
+		if (SemanticInformation::isDupInstruction(_item))
+			setStackElement(
+				m_stackHeight + 1,
+				stackElement(
+					m_stackHeight - int(instruction) + int(Instruction::DUP1),
+					_item.getLocation()
+				)
+			);
+		else if (SemanticInformation::isSwapInstruction(_item))
+			swapStackElements(
+				m_stackHeight,
+				m_stackHeight - 1 - int(instruction) + int(Instruction::SWAP1),
+				_item.getLocation()
+			);
+		else if (instruction != Instruction::POP)
+		{
+			vector<Id> arguments(info.args);
+			for (int i = 0; i < info.args; ++i)
+				arguments[i] = stackElement(m_stackHeight - i, _item.getLocation());
+			if (_item.instruction() == Instruction::SSTORE)
+				op = storeInStorage(arguments[0], arguments[1], _item.getLocation());
+			else if (_item.instruction() == Instruction::SLOAD)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					loadFromStorage(arguments[0], _item.getLocation())
+				);
+			else if (_item.instruction() == Instruction::MSTORE)
+				op = storeInMemory(arguments[0], arguments[1], _item.getLocation());
+			else if (_item.instruction() == Instruction::MLOAD)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					loadFromMemory(arguments[0], _item.getLocation())
+				);
+			else if (_item.instruction() == Instruction::SHA3)
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					applySha3(arguments.at(0), arguments.at(1), _item.getLocation())
+				);
+			else
+				setStackElement(
+					m_stackHeight + _item.deposit(),
+					m_expressionClasses->find(_item, arguments, _copyItem)
+				);
+		}
+		m_stackHeight += _item.deposit();
+	}
+	return op;
+}
+
+ExpressionClasses::Id KnownState::stackElement(int _stackHeight, SourceLocation const& _location)
+{
+	if (m_stackElements.count(_stackHeight))
+		return m_stackElements.at(_stackHeight);
+	// Stack element not found (not assigned yet), create new equivalence class.
+	return m_stackElements[_stackHeight] = initialStackElement(_stackHeight, _location);
+}
+
+ExpressionClasses::Id KnownState::initialStackElement(
+	int _stackHeight,
+	SourceLocation const& _location
+)
+{
+	assertThrow(_stackHeight <= 0, OptimizerException, "Initial stack element of positive height requested.");
+	assertThrow(_stackHeight > -16, StackTooDeepException, "");
+	// This is a special assembly item that refers to elements pre-existing on the initial stack.
+	return m_expressionClasses->find(AssemblyItem(dupInstruction(1 - _stackHeight), _location));
+}
+
+void KnownState::setStackElement(int _stackHeight, Id _class)
+{
+	m_stackElements[_stackHeight] = _class;
+}
+
+void KnownState::swapStackElements(
+	int _stackHeightA,
+	int _stackHeightB,
+	SourceLocation const& _location
+)
+{
+	assertThrow(_stackHeightA != _stackHeightB, OptimizerException, "Swap on same stack elements.");
+	// ensure they are created
+	stackElement(_stackHeightA, _location);
+	stackElement(_stackHeightB, _location);
+
+	swap(m_stackElements[_stackHeightA], m_stackElements[_stackHeightB]);
+}
+
+KnownState::StoreOperation KnownState::storeInStorage(
+	Id _slot,
+	Id _value,
+	SourceLocation const& _location)
+{
+	if (m_storageContent.count(_slot) && m_storageContent[_slot] == _value)
+		// do not execute the storage if we know that the value is already there
+		return StoreOperation();
+	m_sequenceNumber++;
+	decltype(m_storageContent) storageContents;
+	// Copy over all values (i.e. retain knowledge about them) where we know that this store
+	// operation will not destroy the knowledge. Specifically, we copy storage locations we know
+	// are different from _slot or locations where we know that the stored value is equal to _value.
+	for (auto const& storageItem: m_storageContent)
+		if (m_expressionClasses->knownToBeDifferent(storageItem.first, _slot) || storageItem.second == _value)
+			storageContents.insert(storageItem);
+	m_storageContent = move(storageContents);
+
+	AssemblyItem item(Instruction::SSTORE, _location);
+	Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+	StoreOperation operation(StoreOperation::Storage, _slot, m_sequenceNumber, id);
+	m_storageContent[_slot] = _value;
+	// increment a second time so that we get unique sequence numbers for writes
+	m_sequenceNumber++;
+
+	return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromStorage(Id _slot, SourceLocation const& _location)
+{
+	if (m_storageContent.count(_slot))
+		return m_storageContent.at(_slot);
+
+	AssemblyItem item(Instruction::SLOAD, _location);
+	return m_storageContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::StoreOperation KnownState::storeInMemory(Id _slot, Id _value, SourceLocation const& _location)
+{
+	if (m_memoryContent.count(_slot) && m_memoryContent[_slot] == _value)
+		// do not execute the store if we know that the value is already there
+		return StoreOperation();
+	m_sequenceNumber++;
+	decltype(m_memoryContent) memoryContents;
+	// copy over values at points where we know that they are different from _slot by at least 32
+	for (auto const& memoryItem: m_memoryContent)
+		if (m_expressionClasses->knownToBeDifferentBy32(memoryItem.first, _slot))
+			memoryContents.insert(memoryItem);
+	m_memoryContent = move(memoryContents);
+
+	AssemblyItem item(Instruction::MSTORE, _location);
+	Id id = m_expressionClasses->find(item, {_slot, _value}, true, m_sequenceNumber);
+	StoreOperation operation(StoreOperation(StoreOperation::Memory, _slot, m_sequenceNumber, id));
+	m_memoryContent[_slot] = _value;
+	// increment a second time so that we get unique sequence numbers for writes
+	m_sequenceNumber++;
+	return operation;
+}
+
+ExpressionClasses::Id KnownState::loadFromMemory(Id _slot, SourceLocation const& _location)
+{
+	if (m_memoryContent.count(_slot))
+		return m_memoryContent.at(_slot);
+
+	AssemblyItem item(Instruction::MLOAD, _location);
+	return m_memoryContent[_slot] = m_expressionClasses->find(item, {_slot}, true, m_sequenceNumber);
+}
+
+KnownState::Id KnownState::applySha3(
+	Id _start,
+	Id _length,
+	SourceLocation const& _location
+)
+{
+	AssemblyItem sha3Item(Instruction::SHA3, _location);
+	// Special logic if length is a short constant, otherwise we cannot tell.
+	u256 const* l = m_expressionClasses->knownConstant(_length);
+	// unknown or too large length
+	if (!l || *l > 128)
+		return m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+
+	vector<Id> arguments;
+	for (u256 i = 0; i < *l; i += 32)
+	{
+		Id slot = m_expressionClasses->find(
+			AssemblyItem(Instruction::ADD, _location),
+			{_start, m_expressionClasses->find(i)}
+		);
+		arguments.push_back(loadFromMemory(slot, _location));
+	}
+	if (m_knownSha3Hashes.count(arguments))
+		return m_knownSha3Hashes.at(arguments);
+	Id v;
+	// If all arguments are known constants, compute the sha3 here
+	if (all_of(arguments.begin(), arguments.end(), [this](Id _a) { return !!m_expressionClasses->knownConstant(_a); }))
+	{
+		bytes data;
+		for (Id a: arguments)
+			data += toBigEndian(*m_expressionClasses->knownConstant(a));
+		data.resize(size_t(*l));
+		v = m_expressionClasses->find(AssemblyItem(u256(sha3(data)), _location));
+	}
+	else
+		v = m_expressionClasses->find(sha3Item, {_start, _length}, true, m_sequenceNumber);
+	return m_knownSha3Hashes[arguments] = v;
+}
+
diff --git a/KnownState.h b/KnownState.h
new file mode 100644
index 00000000..c6dfcee6
--- /dev/null
+++ b/KnownState.h
@@ -0,0 +1,149 @@
+/*
+	This file is part of cpp-ethereum.
+
+	cpp-ethereum is free software: you can redistribute it and/or modify
+	it under the terms of the GNU General Public License as published by
+	the Free Software Foundation, either version 3 of the License, or
+	(at your option) any later version.
+
+	cpp-ethereum is distributed in the hope that it will be useful,
+	but WITHOUT ANY WARRANTY; without even the implied warranty of
+	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	GNU General Public License for more details.
+
+	You should have received a copy of the GNU General Public License
+	along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/**
+ * @file KnownState.h
+ * @author Christian <c@ethdev.com>
+ * @date 2015
+ * Contains knowledge about the state of the virtual machine at a specific instruction.
+ */
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <set>
+#include <tuple>
+#include <ostream>
+#include <libdevcore/CommonIO.h>
+#include <libdevcore/Exceptions.h>
+#include <libevmasm/ExpressionClasses.h>
+#include <libevmasm/SemanticInformation.h>
+
+namespace dev
+{
+namespace eth
+{
+
+class AssemblyItem;
+using AssemblyItems = std::vector<AssemblyItem>;
+
+/**
+ * Class to infer and store knowledge about the state of the virtual machine at a specific
+ * instruction.
+ *
+ * The general workings are that for each assembly item that is fed, an equivalence class is
+ * derived from the operation and the equivalence class of its arguments. DUPi, SWAPi and some
+ * arithmetic instructions are used to infer equivalences while these classes are determined.
+ */
+class KnownState
+{
+public:
+	using Id = ExpressionClasses::Id;
+	struct StoreOperation
+	{
+		enum Target { Invalid, Memory, Storage };
+		StoreOperation(): target(Invalid), sequenceNumber(-1) {}
+		StoreOperation(
+			Target _target,
+			Id _slot,
+			unsigned _sequenceNumber,
+			Id _expression
+		): target(_target), slot(_slot), sequenceNumber(_sequenceNumber), expression(_expression) {}
+		bool isValid() const { return target != Invalid; }
+		Target target;
+		Id slot;
+		unsigned sequenceNumber;
+		Id expression;
+	};
+
+	KnownState(): m_expressionClasses(std::make_shared<ExpressionClasses>()) {}
+
+	/// Streams debugging information to @a _out.
+	std::ostream& stream(
+		std::ostream& _out,
+		std::map<int, Id> _initialStack = std::map<int, Id>(),
+		std::map<int, Id> _targetStack = std::map<int, Id>()
+	) const;
+
+	/// Feeds the item into the system for analysis.
+	/// @returns a possible store operation
+	StoreOperation feedItem(AssemblyItem const& _item, bool _copyItem = false);
+
+	/// Resets any knowledge about storage.
+	void resetStorage() { m_storageContent.clear(); }
+	/// Resets any knowledge about storage.
+	void resetMemory() { m_memoryContent.clear(); }
+	/// Resets any knowledge about the current stack.
+	void resetStack() { m_stackElements.clear(); m_stackHeight = 0; }
+	/// Resets any knowledge.
+	void reset() { resetStorage(); resetMemory(); resetStack(); }
+
+	///@todo the sequence numbers in two copies of this class should never be the same.
+	/// might be doable using two-dimensional sequence numbers, where the first value is incremented
+	/// for each copy
+
+	/// Retrieves the current equivalence class fo the given stack element (or generates a new
+	/// one if it does not exist yet).
+	Id stackElement(int _stackHeight, SourceLocation const& _location);
+	/// @returns the equivalence class id of the special initial stack element at the given height
+	/// (must not be positive).
+	Id initialStackElement(int _stackHeight, SourceLocation const& _location);
+
+	int stackHeight() const { return m_stackHeight; }
+	std::map<int, Id> const& stackElements() const { return m_stackElements; }
+	ExpressionClasses& expressionClasses() const { return *m_expressionClasses; }
+
+private:
+	/// Assigns a new equivalence class to the next sequence number of the given stack element.
+	void setStackElement(int _stackHeight, Id _class);
+	/// Swaps the given stack elements in their next sequence number.
+	void swapStackElements(int _stackHeightA, int _stackHeightB, SourceLocation const& _location);
+
+	/// Increments the sequence number, deletes all storage information that might be overwritten
+	/// and stores the new value at the given slot.
+	/// @returns the store operation, which might be invalid if storage was not modified
+	StoreOperation storeInStorage(Id _slot, Id _value, SourceLocation const& _location);
+	/// Retrieves the current value at the given slot in storage or creates a new special sload class.
+	Id loadFromStorage(Id _slot, SourceLocation const& _location);
+	/// Increments the sequence number, deletes all memory information that might be overwritten
+	/// and stores the new value at the given slot.
+	/// @returns the store operation, which might be invalid if memory was not modified
+	StoreOperation storeInMemory(Id _slot, Id _value, SourceLocation const& _location);
+	/// Retrieves the current value at the given slot in memory or creates a new special mload class.
+	Id loadFromMemory(Id _slot, SourceLocation const& _location);
+	/// Finds or creates a new expression that applies the sha3 hash function to the contents in memory.
+	Id applySha3(Id _start, Id _length, SourceLocation const& _location);
+
+	/// Current stack height, can be negative.
+	int m_stackHeight = 0;
+	/// Current stack layout, mapping stack height -> equivalence class
+	std::map<int, Id> m_stackElements;
+	/// Current sequence number, this is incremented with each modification to storage or memory.
+	unsigned m_sequenceNumber = 1;
+	/// Knowledge about storage content.
+	std::map<Id, Id> m_storageContent;
+	/// Knowledge about memory content. Keys are memory addresses, note that the values overlap
+	/// and are not contained here if they are not completely known.
+	std::map<Id, Id> m_memoryContent;
+	/// Keeps record of all sha3 hashes that are computed.
+	std::map<std::vector<Id>, Id> m_knownSha3Hashes;
+	/// Structure containing the classes of equivalent expressions.
+	std::shared_ptr<ExpressionClasses> m_expressionClasses;
+};
+
+}
+}
-- 
cgit v1.2.3