import * as ethUtil from 'ethereumjs-util'; import * as _ from 'lodash'; import { constants } from '../utils/constants'; import { EncodingRules } from '../utils/rules'; import { PointerCalldataBlock } from './blocks/pointer'; import { SetCalldataBlock } from './blocks/set'; import { CalldataBlock } from './calldata_block'; import { CalldataIterator, ReverseCalldataIterator } from './iterator'; export class Calldata { private readonly _rules: EncodingRules; private _selector: string; private _root: CalldataBlock | undefined; public constructor(rules: EncodingRules) { this._rules = rules; this._selector = ''; this._root = undefined; } /** * Sets the root calldata block. This block usually corresponds to a Method. */ public setRoot(block: CalldataBlock): void { this._root = block; } /** * Sets the selector to be prepended onto the calldata. * If the root block was created by a Method then a selector will likely be set. */ public setSelector(selector: string): void { if (!_.startsWith(selector, '0x')) { throw new Error(`Expected selector to be hex. Missing prefix '0x'`); } else if (selector.length !== constants.HEX_SELECTOR_LENGTH_IN_CHARS) { throw new Error(`Invalid selector '${selector}'`); } this._selector = selector; } /** * Iterates through the calldata blocks, starting from the root block, to construct calldata as a hex string. * If the `optimize` flag is set then this calldata will be condensed, to save gas. * If the `annotate` flag is set then this will return human-readable calldata. * If the `annotate` flag is *not* set then this will return EVM-compatible calldata. */ public toString(): string { // Sanity check: root block must be set if (_.isUndefined(this._root)) { throw new Error('expected root'); } // Optimize, if flag set if (this._rules.optimize) { this._optimize(); } // Set offsets const iterator = new CalldataIterator(this._root); let offset = 0; for (const block of iterator) { block.setOffset(offset); offset += block.getSizeInBytes(); } // Generate hex string const hexString = this._rules.annotate ? this._toHumanReadableCallData() : this._toEvmCompatibeCallDataHex(); return hexString; } /** * There are three types of calldata blocks: Blob, Set and Pointer. * Scenarios arise where distinct pointers resolve to identical values. * We optimize by keeping only one such instance of the identical value, and redirecting all pointers here. * We keep the last such duplicate value because pointers can only be positive (they cannot point backwards). * * Example #1: * function f(string[], string[]) * f(["foo", "bar", "blitz"], ["foo", "bar", "blitz"]) * The array ["foo", "bar", "blitz"] will only be included in the calldata once. * * Example #2: * function f(string[], string) * f(["foo", "bar", "blitz"], "foo") * The string "foo" will only be included in the calldata once. * * Example #3: * function f((string, uint, bytes), string, uint, bytes) * f(("foo", 5, "0x05"), "foo", 5, "0x05") * The string "foo" and bytes "0x05" will only be included in the calldata once. * The duplicate `uint 5` values cannot be optimized out because they are static values (no pointer points to them). * * @TODO #1: * This optimization strategy handles blocks that are exact duplicates of one another. * But what if some block is a combination of two other blocks? Or a subset of another block? * This optimization problem is not much different from the current implemetation. * Instead of tracking "observed" hashes, at each node we would simply do pattern-matching on the calldata. * This strategy would be applied after assigning offsets to the tree, rather than before (as in this strategy). * Note that one consequence of this strategy is pointers may resolve to offsets that are not word-aligned. * This shouldn't be a problem but further investigation should be done. * * @TODO #2: * To be done as a follow-up to @TODO #1. * Since we optimize from the bottom-up, we could be affecting the outcome of a later potential optimization. * For example, what if by removing one duplicate value we miss out on optimizing another block higher in the tree. * To handle this case, at each node we can store a candidate optimization in a priority queue (sorted by calldata size). * At the end of traversing the tree, the candidate at the front of the queue will be the most optimal output. * */ private _optimize(): void { // Step 1/1 Create a reverse iterator (starts from the end of the calldata to the beginning) if (_.isUndefined(this._root)) { throw new Error('expected root'); } const iterator = new ReverseCalldataIterator(this._root); // Step 2/2 Iterate over each block, keeping track of which blocks have been seen and pruning redundant blocks. const blocksByHash: { [key: string]: CalldataBlock } = {}; for (const block of iterator) { // If a block is a pointer and its value has already been observed, then update // the pointer to resolve to the existing value. if (block instanceof PointerCalldataBlock) { const dependencyBlockHashBuf = block.getDependency().computeHash(); const dependencyBlockHash = ethUtil.bufferToHex(dependencyBlockHashBuf); if (dependencyBlockHash in blocksByHash) { const blockWithSameHash = blocksByHash[dependencyBlockHash]; if (blockWithSameHash !== block.getDependency()) { block.setAlias(blockWithSameHash); } } continue; } // This block has not been seen. Record its hash. const blockHashBuf = block.computeHash(); const blockHash = ethUtil.bufferToHex(blockHashBuf); if (!(blockHash in blocksByHash)) { blocksByHash[blockHash] = block; } } } private _toEvmCompatibeCallDataHex(): string { // Sanity check: must have a root block. if (_.isUndefined(this._root)) { throw new Error('expected root'); } // Construct an array of buffers (one buffer for each block). const selectorBuffer = ethUtil.toBuffer(this._selector); const valueBufs: Buffer[] = [selectorBuffer]; const iterator = new CalldataIterator(this._root); for (const block of iterator) { valueBufs.push(block.toBuffer()); } // Create hex from buffer array. const combinedBuffers = Buffer.concat(valueBufs); const hexValue = ethUtil.bufferToHex(combinedBuffers); return hexValue; } /** * Returns human-readable calldata. * * Example: * simpleFunction(string[], string[]) * strings = ["Hello", "World"] * simpleFunction(strings, strings) * * Output: * 0xbb4f12e3 * ### simpleFunction * 0x0 0000000000000000000000000000000000000000000000000000000000000040 ptr (alias for array2) * 0x20 0000000000000000000000000000000000000000000000000000000000000040 ptr * * 0x40 0000000000000000000000000000000000000000000000000000000000000002 ### array2 * 0x60 0000000000000000000000000000000000000000000000000000000000000040 ptr * 0x80 0000000000000000000000000000000000000000000000000000000000000080 ptr * 0xa0 0000000000000000000000000000000000000000000000000000000000000005 array2[0] * 0xc0 48656c6c6f000000000000000000000000000000000000000000000000000000 * 0xe0 0000000000000000000000000000000000000000000000000000000000000005 array2[1] * 0x100 576f726c64000000000000000000000000000000000000000000000000000000 */ private _toHumanReadableCallData(): string { // Sanity check: must have a root block. if (_.isUndefined(this._root)) { throw new Error('expected root'); } // Constants for constructing annotated string const offsetPadding = 10; const valuePadding = 74; const namePadding = 80; const evmWordStartIndex = 0; const emptySize = 0; // Construct annotated calldata let hexValue = `${this._selector}`; let offset = 0; const functionName: string = this._root.getName(); const iterator = new CalldataIterator(this._root); for (const block of iterator) { // Process each block 1 word at a time const size = block.getSizeInBytes(); const name = block.getName(); const parentName = block.getParentName(); const prettyName = name.replace(`${parentName}.`, '').replace(`${functionName}.`, ''); // Resulting line will be let offsetStr = ''; let valueStr = ''; let nameStr = ''; let lineStr = ''; if (size === emptySize) { // This is a Set block with no header. // For example, a tuple or an array with a defined length. offsetStr = ' '.repeat(offsetPadding); valueStr = ' '.repeat(valuePadding); nameStr = `### ${prettyName.padEnd(namePadding)}`; lineStr = `\n${offsetStr}${valueStr}${nameStr}`; } else { // This block has at least one word of value. offsetStr = `0x${offset.toString(constants.HEX_BASE)}`.padEnd(offsetPadding); valueStr = ethUtil .stripHexPrefix( ethUtil.bufferToHex( block.toBuffer().slice(evmWordStartIndex, constants.EVM_WORD_WIDTH_IN_BYTES), ), ) .padEnd(valuePadding); if (block instanceof SetCalldataBlock) { nameStr = `### ${prettyName.padEnd(namePadding)}`; lineStr = `\n${offsetStr}${valueStr}${nameStr}`; } else { nameStr = ` ${prettyName.padEnd(namePadding)}`; lineStr = `${offsetStr}${valueStr}${nameStr}`; } } // This block has a value that is more than 1 word. for (let j = constants.EVM_WORD_WIDTH_IN_BYTES; j < size; j += constants.EVM_WORD_WIDTH_IN_BYTES) { offsetStr = `0x${(offset + j).toString(constants.HEX_BASE)}`.padEnd(offsetPadding); valueStr = ethUtil .stripHexPrefix( ethUtil.bufferToHex(block.toBuffer().slice(j, j + constants.EVM_WORD_WIDTH_IN_BYTES)), ) .padEnd(valuePadding); nameStr = ' '.repeat(namePadding); lineStr = `${lineStr}\n${offsetStr}${valueStr}${nameStr}`; } // Append to hex value hexValue = `${hexValue}\n${lineStr}`; offset += size; } return hexValue; } }