aboutsummaryrefslogtreecommitdiffstats
path: root/packages/pipeline/src/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/pipeline/src/scripts')
-rw-r--r--packages/pipeline/src/scripts/pull_copper.ts129
-rw-r--r--packages/pipeline/src/scripts/pull_missing_blocks.ts39
2 files changed, 149 insertions, 19 deletions
diff --git a/packages/pipeline/src/scripts/pull_copper.ts b/packages/pipeline/src/scripts/pull_copper.ts
new file mode 100644
index 000000000..69814f209
--- /dev/null
+++ b/packages/pipeline/src/scripts/pull_copper.ts
@@ -0,0 +1,129 @@
+// tslint:disable:no-console
+import * as R from 'ramda';
+import { Connection, ConnectionOptions, createConnection, Repository } from 'typeorm';
+
+import { CopperEndpoint, CopperSearchParams, CopperSource } from '../data_sources/copper';
+import { CopperActivity, CopperActivityType, CopperCustomField, CopperLead, CopperOpportunity } from '../entities';
+import * as ormConfig from '../ormconfig';
+import {
+ CopperSearchResponse,
+ parseActivities,
+ parseActivityTypes,
+ parseCustomFields,
+ parseLeads,
+ parseOpportunities,
+} from '../parsers/copper';
+import { handleError } from '../utils';
+const ONE_SECOND = 1000;
+const COPPER_RATE_LIMIT = 10;
+let connection: Connection;
+
+(async () => {
+ connection = await createConnection(ormConfig as ConnectionOptions);
+
+ const accessToken = process.env.COPPER_ACCESS_TOKEN;
+ const userEmail = process.env.COPPER_USER_EMAIL;
+ if (accessToken === undefined || userEmail === undefined) {
+ throw new Error('Missing required env var: COPPER_ACCESS_TOKEN and/or COPPER_USER_EMAIL');
+ }
+ const source = new CopperSource(COPPER_RATE_LIMIT, accessToken, userEmail);
+
+ const fetchPromises = [
+ fetchAndSaveLeadsAsync(source),
+ fetchAndSaveOpportunitiesAsync(source),
+ fetchAndSaveActivitiesAsync(source),
+ fetchAndSaveCustomFieldsAsync(source),
+ fetchAndSaveActivityTypesAsync(source),
+ ];
+ fetchPromises.forEach(async fn => {
+ await fn;
+ });
+})().catch(handleError);
+
+async function fetchAndSaveLeadsAsync(source: CopperSource): Promise<void> {
+ const repository = connection.getRepository(CopperLead);
+ const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_leads');
+ console.log(`Fetching Copper leads starting from ${startTime}...`);
+ await fetchAndSaveAsync(CopperEndpoint.Leads, source, startTime, {}, parseLeads, repository);
+}
+
+async function fetchAndSaveOpportunitiesAsync(source: CopperSource): Promise<void> {
+ const repository = connection.getRepository(CopperOpportunity);
+ const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_opportunities');
+ console.log(`Fetching Copper opportunities starting from ${startTime}...`);
+ await fetchAndSaveAsync(
+ CopperEndpoint.Opportunities,
+ source,
+ startTime,
+ { sort_by: 'name' },
+ parseOpportunities,
+ repository,
+ );
+}
+
+async function fetchAndSaveActivitiesAsync(source: CopperSource): Promise<void> {
+ const repository = connection.getRepository(CopperActivity);
+ const startTime = await getMaxAsync(connection, 'date_modified', 'raw.copper_activities');
+ const searchParams = {
+ minimum_activity_date: Math.floor(startTime / ONE_SECOND),
+ };
+ console.log(`Fetching Copper activities starting from ${startTime}...`);
+ await fetchAndSaveAsync(CopperEndpoint.Activities, source, startTime, searchParams, parseActivities, repository);
+}
+
+async function getMaxAsync(conn: Connection, sortColumn: string, tableName: string): Promise<number> {
+ const queryResult = await conn.query(`SELECT MAX(${sortColumn}) as _max from ${tableName};`);
+ if (R.isEmpty(queryResult)) {
+ return 0;
+ } else {
+ return queryResult[0]._max;
+ }
+}
+
+// (Xianny): Copper API doesn't allow queries to filter by date. To ensure that we are filling in ascending chronological
+// order and not missing any records, we are scraping all available pages. If Copper data gets larger,
+// it would make sense to search for and start filling from the first page that contains a new record.
+// This search would increase our network calls and is not efficient to implement with our current small volume
+// of Copper records.
+async function fetchAndSaveAsync<T extends CopperSearchResponse, E>(
+ endpoint: CopperEndpoint,
+ source: CopperSource,
+ startTime: number,
+ searchParams: CopperSearchParams,
+ parseFn: (recs: T[]) => E[],
+ repository: Repository<E>,
+): Promise<void> {
+ let saved = 0;
+ const numPages = await source.fetchNumberOfPagesAsync(endpoint);
+ try {
+ for (let i = numPages; i > 0; i--) {
+ console.log(`Fetching page ${i}/${numPages} of ${endpoint}...`);
+ const raw = await source.fetchSearchResultsAsync<T>(endpoint, {
+ ...searchParams,
+ page_number: i,
+ });
+ const newRecords = raw.filter(rec => rec.date_modified * ONE_SECOND > startTime);
+ const parsed = parseFn(newRecords);
+ await repository.save<any>(parsed);
+ saved += newRecords.length;
+ }
+ } catch (err) {
+ console.log(`Error fetching ${endpoint}, stopping: ${err.stack}`);
+ } finally {
+ console.log(`Saved ${saved} items from ${endpoint}, done.`);
+ }
+}
+
+async function fetchAndSaveActivityTypesAsync(source: CopperSource): Promise<void> {
+ console.log(`Fetching Copper activity types...`);
+ const activityTypes = await source.fetchActivityTypesAsync();
+ const repository = connection.getRepository(CopperActivityType);
+ await repository.save(parseActivityTypes(activityTypes));
+}
+
+async function fetchAndSaveCustomFieldsAsync(source: CopperSource): Promise<void> {
+ console.log(`Fetching Copper custom fields...`);
+ const customFields = await source.fetchCustomFieldsAsync();
+ const repository = connection.getRepository(CopperCustomField);
+ await repository.save(parseCustomFields(customFields));
+}
diff --git a/packages/pipeline/src/scripts/pull_missing_blocks.ts b/packages/pipeline/src/scripts/pull_missing_blocks.ts
index bb5385126..ced9d99eb 100644
--- a/packages/pipeline/src/scripts/pull_missing_blocks.ts
+++ b/packages/pipeline/src/scripts/pull_missing_blocks.ts
@@ -14,20 +14,29 @@ import { handleError, INFURA_ROOT_URL } from '../utils';
// Number of blocks to save at once.
const BATCH_SAVE_SIZE = 1000;
// Maximum number of requests to send at once.
-const MAX_CONCURRENCY = 10;
+const MAX_CONCURRENCY = 20;
// Maximum number of blocks to query for at once. This is also the maximum
// number of blocks we will hold in memory prior to being saved to the database.
const MAX_BLOCKS_PER_QUERY = 1000;
let connection: Connection;
+const tablesWithMissingBlocks = [
+ 'raw.exchange_fill_events',
+ 'raw.exchange_cancel_events',
+ 'raw.exchange_cancel_up_to_events',
+ 'raw.erc20_approval_events',
+];
+
(async () => {
connection = await createConnection(ormConfig as ConnectionOptions);
const provider = web3Factory.getRpcProvider({
rpcUrl: INFURA_ROOT_URL,
});
const web3Source = new Web3Source(provider);
- await getAllMissingBlocksAsync(web3Source);
+ for (const tableName of tablesWithMissingBlocks) {
+ await getAllMissingBlocksAsync(web3Source, tableName);
+ }
process.exit(0);
})().catch(handleError);
@@ -35,10 +44,11 @@ interface MissingBlocksResponse {
block_number: string;
}
-async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> {
+async function getAllMissingBlocksAsync(web3Source: Web3Source, tableName: string): Promise<void> {
const blocksRepository = connection.getRepository(Block);
while (true) {
- const blockNumbers = await getMissingBlockNumbersAsync();
+ console.log(`Checking for missing blocks in ${tableName}...`);
+ const blockNumbers = await getMissingBlockNumbersAsync(tableName);
if (blockNumbers.length === 0) {
// There are no more missing blocks. We're done.
break;
@@ -46,24 +56,14 @@ async function getAllMissingBlocksAsync(web3Source: Web3Source): Promise<void> {
await getAndSaveBlocksAsync(web3Source, blocksRepository, blockNumbers);
}
const totalBlocks = await blocksRepository.count();
- console.log(`Done saving blocks. There are now ${totalBlocks} total blocks.`);
+ console.log(`Done saving blocks for ${tableName}. There are now ${totalBlocks} total blocks.`);
}
-async function getMissingBlockNumbersAsync(): Promise<number[]> {
- // Note(albrow): The easiest way to get all the blocks we need is to
- // consider all the events tables together in a single query. If this query
- // gets too slow, we should consider re-architecting so that we can work on
- // getting the blocks for one type of event at a time.
+async function getMissingBlockNumbersAsync(tableName: string): Promise<number[]> {
+ // This query returns up to `MAX_BLOCKS_PER_QUERY` distinct block numbers
+ // which are present in `tableName` but not in `raw.blocks`.
const response = (await connection.query(
- `WITH all_events AS (
- SELECT block_number FROM raw.exchange_fill_events
- UNION SELECT block_number FROM raw.exchange_cancel_events
- UNION SELECT block_number FROM raw.exchange_cancel_up_to_events
- UNION SELECT block_number FROM raw.erc20_approval_events
- )
- SELECT DISTINCT(block_number) FROM all_events
- WHERE block_number NOT IN (SELECT number FROM raw.blocks)
- ORDER BY block_number ASC LIMIT $1`,
+ `SELECT DISTINCT(block_number) FROM ${tableName} LEFT JOIN raw.blocks ON ${tableName}.block_number = raw.blocks.number WHERE number IS NULL LIMIT $1;`,
[MAX_BLOCKS_PER_QUERY],
)) as MissingBlocksResponse[];
const blockNumberStrings = R.pluck('block_number', response);
@@ -86,4 +86,5 @@ async function getAndSaveBlocksAsync(
const blocks = R.map(parseBlock, rawBlocks);
console.log(`Saving ${blocks.length} blocks...`);
await blocksRepository.save(blocks, { chunk: Math.ceil(blocks.length / BATCH_SAVE_SIZE) });
+ console.log('Done saving this batch of blocks');
}