diff options
author | Alex Browne <stephenalexbrowne@gmail.com> | 2019-01-08 07:04:00 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-08 07:04:00 +0800 |
commit | 7dda953bc929e218121c331fedb3884b24555855 (patch) | |
tree | cb82f82ce4264e5faac5213936837acfec121e72 /packages/pipeline/src | |
parent | b8f3fa9c8cf215e73a496ea99e1748bad21efc40 (diff) | |
download | dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar.gz dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar.bz2 dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar.lz dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar.xz dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.tar.zst dexon-sol-tools-7dda953bc929e218121c331fedb3884b24555855.zip |
Optimize SQL queries in pull_missing_blocks (#1458)
* Optimize SQL queries in pull_missing_blocks
* Update comment in pull_missing_blocks
Diffstat (limited to 'packages/pipeline/src')
-rw-r--r-- | packages/pipeline/src/scripts/pull_missing_blocks.ts | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/packages/pipeline/src/scripts/pull_missing_blocks.ts b/packages/pipeline/src/scripts/pull_missing_blocks.ts index bb5385126..ced9d99eb 100644 --- a/packages/pipeline/src/scripts/pull_missing_blocks.ts +++ b/packages/pipeline/src/scripts/pull_missing_blocks.ts @@ -14,20 +14,29 @@ import { handleError, INFURA_ROOT_URL } from '../utils'; // Number of blocks to save at once. const BATCH_SAVE_SIZE = 1000; // Maximum number of requests to send at once. -const MAX_CONCURRENCY = 10; +const MAX_CONCURRENCY = 20; // Maximum number of blocks to query for at once. This is also the maximum // number of blocks we will hold in memory prior to being saved to the database. const MAX_BLOCKS_PER_QUERY = 1000; let connection: Connection; +const tablesWithMissingBlocks = [ + 'raw.exchange_fill_events', + 'raw.exchange_cancel_events', + 'raw.exchange_cancel_up_to_events', + 'raw.erc20_approval_events', +]; + (async () => { connection = await createConnection(ormConfig as ConnectionOptions); const provider = web3Factory.getRpcProvider({ rpcUrl: INFURA_ROOT_URL, }); const web3Source = new Web3Source(provider); - await getAllMissingBlocksAsync(web3Source); + for (const tableName of tablesWithMissingBlocks) { + await getAllMissingBlocksAsync(web3Source, tableName); + } process.exit(0); })().catch(handleError); @@ -35,10 +44,11 @@ interface MissingBlocksResponse { block_number: string; } -async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> { +async function getAllMissingBlocksAsync(web3Source: Web3Source, tableName: string): Promise<void> { const blocksRepository = connection.getRepository(Block); while (true) { - const blockNumbers = await getMissingBlockNumbersAsync(); + console.log(`Checking for missing blocks in ${tableName}...`); + const blockNumbers = await getMissingBlockNumbersAsync(tableName); if (blockNumbers.length === 0) { // There are no more missing blocks. We're done. break; @@ -46,24 +56,14 @@ async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> { await getAndSaveBlocksAsync(web3Source, blocksRepository, blockNumbers); } const totalBlocks = await blocksRepository.count(); - console.log(`Done saving blocks. There are now ${totalBlocks} total blocks.`); + console.log(`Done saving blocks for ${tableName}. There are now ${totalBlocks} total blocks.`); } -async function getMissingBlockNumbersAsync(): Promise<number[]> { - // Note(albrow): The easiest way to get all the blocks we need is to - // consider all the events tables together in a single query. If this query - // gets too slow, we should consider re-architecting so that we can work on - // getting the blocks for one type of event at a time. +async function getMissingBlockNumbersAsync(tableName: string): Promise<number[]> { + // This query returns up to `MAX_BLOCKS_PER_QUERY` distinct block numbers + // which are present in `tableName` but not in `raw.blocks`. const response = (await connection.query( - `WITH all_events AS ( - SELECT block_number FROM raw.exchange_fill_events - UNION SELECT block_number FROM raw.exchange_cancel_events - UNION SELECT block_number FROM raw.exchange_cancel_up_to_events - UNION SELECT block_number FROM raw.erc20_approval_events - ) - SELECT DISTINCT(block_number) FROM all_events - WHERE block_number NOT IN (SELECT number FROM raw.blocks) - ORDER BY block_number ASC LIMIT $1`, + `SELECT DISTINCT(block_number) FROM ${tableName} LEFT JOIN raw.blocks ON ${tableName}.block_number = raw.blocks.number WHERE number IS NULL LIMIT $1;`, [MAX_BLOCKS_PER_QUERY], )) as MissingBlocksResponse[]; const blockNumberStrings = R.pluck('block_number', response); @@ -86,4 +86,5 @@ async function getAndSaveBlocksAsync( const blocks = R.map(parseBlock, rawBlocks); console.log(`Saving ${blocks.length} blocks...`); await blocksRepository.save(blocks, { chunk: Math.ceil(blocks.length / BATCH_SAVE_SIZE) }); + console.log('Done saving this batch of blocks'); } |