diff options
Diffstat (limited to 'packages/pipeline/src/scripts')
-rw-r--r-- | packages/pipeline/src/scripts/pull_copper.ts | 129 | ||||
-rw-r--r-- | packages/pipeline/src/scripts/pull_missing_blocks.ts | 39 | ||||
-rw-r--r-- | packages/pipeline/src/scripts/pull_radar_relay_orders.ts | 29 |
3 files changed, 167 insertions, 30 deletions
diff --git a/packages/pipeline/src/scripts/pull_copper.ts b/packages/pipeline/src/scripts/pull_copper.ts new file mode 100644 index 000000000..69814f209 --- /dev/null +++ b/packages/pipeline/src/scripts/pull_copper.ts @@ -0,0 +1,129 @@ +// tslint:disable:no-console +import * as R from 'ramda'; +import { Connection, ConnectionOptions, createConnection, Repository } from 'typeorm'; + +import { CopperEndpoint, CopperSearchParams, CopperSource } from '../data_sources/copper'; +import { CopperActivity, CopperActivityType, CopperCustomField, CopperLead, CopperOpportunity } from '../entities'; +import * as ormConfig from '../ormconfig'; +import { + CopperSearchResponse, + parseActivities, + parseActivityTypes, + parseCustomFields, + parseLeads, + parseOpportunities, +} from '../parsers/copper'; +import { handleError } from '../utils'; +const ONE_SECOND = 1000; +const COPPER_RATE_LIMIT = 10; +let connection: Connection; + +(async () => { + connection = await createConnection(ormConfig as ConnectionOptions); + + const accessToken = process.env.COPPER_ACCESS_TOKEN; + const userEmail = process.env.COPPER_USER_EMAIL; + if (accessToken === undefined || userEmail === undefined) { + throw new Error('Missing required env var: COPPER_ACCESS_TOKEN and/or COPPER_USER_EMAIL'); + } + const source = new CopperSource(COPPER_RATE_LIMIT, accessToken, userEmail); + + const fetchPromises = [ + fetchAndSaveLeadsAsync(source), + fetchAndSaveOpportunitiesAsync(source), + fetchAndSaveActivitiesAsync(source), + fetchAndSaveCustomFieldsAsync(source), + fetchAndSaveActivityTypesAsync(source), + ]; + fetchPromises.forEach(async fn => { + await fn; + }); +})().catch(handleError); + +async function fetchAndSaveLeadsAsync(source: CopperSource): Promise<void> { + const repository = connection.getRepository(CopperLead); + const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_leads'); + console.log(`Fetching Copper leads starting from ${startTime}...`); + await fetchAndSaveAsync(CopperEndpoint.Leads, source, startTime, {}, parseLeads, repository); +} + +async function fetchAndSaveOpportunitiesAsync(source: CopperSource): Promise<void> { + const repository = connection.getRepository(CopperOpportunity); + const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_opportunities'); + console.log(`Fetching Copper opportunities starting from ${startTime}...`); + await fetchAndSaveAsync( + CopperEndpoint.Opportunities, + source, + startTime, + { sort_by: 'name' }, + parseOpportunities, + repository, + ); +} + +async function fetchAndSaveActivitiesAsync(source: CopperSource): Promise<void> { + const repository = connection.getRepository(CopperActivity); + const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_activities'); + const searchParams = { + minimum_activity_date: Math.floor(startTime / ONE_SECOND), + }; + console.log(`Fetching Copper activities starting from ${startTime}...`); + await fetchAndSaveAsync(CopperEndpoint.Activities, source, startTime, searchParams, parseActivities, repository); +} + +async function getMaxAsync(conn: Connection, sortColumn: string, tableName: string): Promise<number> { + const queryResult = await conn.query(`SELECT MAX(${sortColumn}) as _max from ${tableName};`); + if (R.isEmpty(queryResult)) { + return 0; + } else { + return queryResult[0]._max; + } +} + +// (Xianny): Copper API doesn't allow queries to filter by date. To ensure that we are filling in ascending chronological +// order and not missing any records, we are scraping all available pages. If Copper data gets larger, +// it would make sense to search for and start filling from the first page that contains a new record. +// This search would increase our network calls and is not efficient to implement with our current small volume +// of Copper records. +async function fetchAndSaveAsync<T extends CopperSearchResponse, E>( + endpoint: CopperEndpoint, + source: CopperSource, + startTime: number, + searchParams: CopperSearchParams, + parseFn: (recs: T[]) => E[], + repository: Repository<E>, +): Promise<void> { + let saved = 0; + const numPages = await source.fetchNumberOfPagesAsync(endpoint); + try { + for (let i = numPages; i > 0; i--) { + console.log(`Fetching page ${i}/${numPages} of ${endpoint}...`); + const raw = await source.fetchSearchResultsAsync<T>(endpoint, { + ...searchParams, + page_number: i, + }); + const newRecords = raw.filter(rec => rec.date_modified * ONE_SECOND > startTime); + const parsed = parseFn(newRecords); + await repository.save<any>(parsed); + saved += newRecords.length; + } + } catch (err) { + console.log(`Error fetching ${endpoint}, stopping: ${err.stack}`); + } finally { + console.log(`Saved ${saved} items from ${endpoint}, done.`); + } +} + +async function fetchAndSaveActivityTypesAsync(source: CopperSource): Promise<void> { + console.log(`Fetching Copper activity types...`); + const activityTypes = await source.fetchActivityTypesAsync(); + const repository = connection.getRepository(CopperActivityType); + await repository.save(parseActivityTypes(activityTypes)); +} + +async function fetchAndSaveCustomFieldsAsync(source: CopperSource): Promise<void> { + console.log(`Fetching Copper custom fields...`); + const customFields = await source.fetchCustomFieldsAsync(); + const repository = connection.getRepository(CopperCustomField); + await repository.save(parseCustomFields(customFields)); +} diff --git a/packages/pipeline/src/scripts/pull_missing_blocks.ts b/packages/pipeline/src/scripts/pull_missing_blocks.ts index bb5385126..ced9d99eb 100644 --- a/packages/pipeline/src/scripts/pull_missing_blocks.ts +++ b/packages/pipeline/src/scripts/pull_missing_blocks.ts @@ -14,20 +14,29 @@ import { handleError, INFURA_ROOT_URL } from '../utils'; // Number of blocks to save at once. const BATCH_SAVE_SIZE = 1000; // Maximum number of requests to send at once. -const MAX_CONCURRENCY = 10; +const MAX_CONCURRENCY = 20; // Maximum number of blocks to query for at once. This is also the maximum // number of blocks we will hold in memory prior to being saved to the database. const MAX_BLOCKS_PER_QUERY = 1000; let connection: Connection; +const tablesWithMissingBlocks = [ + 'raw.exchange_fill_events', + 'raw.exchange_cancel_events', + 'raw.exchange_cancel_up_to_events', + 'raw.erc20_approval_events', +]; + (async () => { connection = await createConnection(ormConfig as ConnectionOptions); const provider = web3Factory.getRpcProvider({ rpcUrl: INFURA_ROOT_URL, }); const web3Source = new Web3Source(provider); - await getAllMissingBlocksAsync(web3Source); + for (const tableName of tablesWithMissingBlocks) { + await getAllMissingBlocksAsync(web3Source, tableName); + } process.exit(0); })().catch(handleError); @@ -35,10 +44,11 @@ interface MissingBlocksResponse { block_number: string; } -async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> { +async function getAllMissingBlocksAsync(web3Source: Web3Source, tableName: string): Promise<void> { const blocksRepository = connection.getRepository(Block); while (true) { - const blockNumbers = await getMissingBlockNumbersAsync(); + console.log(`Checking for missing blocks in ${tableName}...`); + const blockNumbers = await getMissingBlockNumbersAsync(tableName); if (blockNumbers.length === 0) { // There are no more missing blocks. We're done. break; @@ -46,24 +56,14 @@ async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> { await getAndSaveBlocksAsync(web3Source, blocksRepository, blockNumbers); } const totalBlocks = await blocksRepository.count(); - console.log(`Done saving blocks. There are now ${totalBlocks} total blocks.`); + console.log(`Done saving blocks for ${tableName}. There are now ${totalBlocks} total blocks.`); } -async function getMissingBlockNumbersAsync(): Promise<number[]> { - // Note(albrow): The easiest way to get all the blocks we need is to - // consider all the events tables together in a single query. If this query - // gets too slow, we should consider re-architecting so that we can work on - // getting the blocks for one type of event at a time. +async function getMissingBlockNumbersAsync(tableName: string): Promise<number[]> { + // This query returns up to `MAX_BLOCKS_PER_QUERY` distinct block numbers + // which are present in `tableName` but not in `raw.blocks`. const response = (await connection.query( - `WITH all_events AS ( - SELECT block_number FROM raw.exchange_fill_events - UNION SELECT block_number FROM raw.exchange_cancel_events - UNION SELECT block_number FROM raw.exchange_cancel_up_to_events - UNION SELECT block_number FROM raw.erc20_approval_events - ) - SELECT DISTINCT(block_number) FROM all_events - WHERE block_number NOT IN (SELECT number FROM raw.blocks) - ORDER BY block_number ASC LIMIT $1`, + `SELECT DISTINCT(block_number) FROM ${tableName} LEFT JOIN raw.blocks ON ${tableName}.block_number = raw.blocks.number WHERE number IS NULL LIMIT $1;`, [MAX_BLOCKS_PER_QUERY], )) as MissingBlocksResponse[]; const blockNumberStrings = R.pluck('block_number', response); @@ -86,4 +86,5 @@ async function getAndSaveBlocksAsync( const blocks = R.map(parseBlock, rawBlocks); console.log(`Saving ${blocks.length} blocks...`); await blocksRepository.save(blocks, { chunk: Math.ceil(blocks.length / BATCH_SAVE_SIZE) }); + console.log('Done saving this batch of blocks'); } diff --git a/packages/pipeline/src/scripts/pull_radar_relay_orders.ts b/packages/pipeline/src/scripts/pull_radar_relay_orders.ts index 40bb6fc97..03fc764f2 100644 --- a/packages/pipeline/src/scripts/pull_radar_relay_orders.ts +++ b/packages/pipeline/src/scripts/pull_radar_relay_orders.ts @@ -29,18 +29,23 @@ async function getOrderbookAsync(): Promise<void> { console.log(`Got ${rawOrders.records.length} orders.`); console.log('Parsing orders...'); // Parse the sra orders, then add source url to each. - const orders = R.pipe(parseSraOrders, R.map(setSourceUrl(RADAR_RELAY_URL)))(rawOrders); + const orders = R.pipe( + parseSraOrders, + R.map(setSourceUrl(RADAR_RELAY_URL)), + )(rawOrders); // Save all the orders and update the observed time stamps in a single // transaction. console.log('Saving orders and updating timestamps...'); const observedTimestamp = Date.now(); - await connection.transaction(async (manager: EntityManager): Promise<void> => { - for (const order of orders) { - await manager.save(SraOrder, order); - const orderObservation = createObservedTimestampForOrder(order, observedTimestamp); - await manager.save(orderObservation); - } - }); + await connection.transaction( + async (manager: EntityManager): Promise<void> => { + for (const order of orders) { + await manager.save(SraOrder, order); + const orderObservation = createObservedTimestampForOrder(order, observedTimestamp); + await manager.save(orderObservation); + } + }, + ); } const sourceUrlProp = R.lensProp('sourceUrl'); @@ -49,6 +54,8 @@ const sourceUrlProp = R.lensProp('sourceUrl'); * Sets the source url for a single order. Returns a new order instead of * mutating the given one. */ -const setSourceUrl = R.curry((sourceURL: string, order: SraOrder): SraOrder => { - return R.set(sourceUrlProp, sourceURL, order); -}); +const setSourceUrl = R.curry( + (sourceURL: string, order: SraOrder): SraOrder => { + return R.set(sourceUrlProp, sourceURL, order); + }, +); |