Skip to content

Commit

Permalink
speed up deduplication query: thanks to Hiemanshu for this improvements!
Browse files Browse the repository at this point in the history
  • Loading branch information
schuyler1d committed Sep 3, 2020
1 parent a547c46 commit 628eb37
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 12 deletions.
11 changes: 9 additions & 2 deletions __test__/extensions/contact-loaders/csv-upload.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ const dupeContacts = [
zip: "10025",
custom_fields: '{"custom1": "abc"}'
},
{
first_name: "second",
last_name: "thirdlast",
cell: "+12125550100",
zip: "10025",
custom_fields: '{"custom1": "xyz"}'
},
{
first_name: "fdsa",
last_name: "yyyy",
Expand Down Expand Up @@ -111,7 +118,7 @@ describe("ingest-contact-loader method: csv-upload backend", async () => {
expect(dbContacts[0].last_name).toBe("xxxx");
expect(dbContacts[0].custom_fields).toBe('{"custom1": "abc"}');
});
it("csv-upload:processContactLoad dedupe", async () => {
it("csv-upload:processContactLoad dedupe last wins", async () => {
const job = {
payload: await gzip(JSON.stringify({ contacts: dupeContacts })),
campaign_id: testCampaign.id,
Expand All @@ -127,7 +134,7 @@ describe("ingest-contact-loader method: csv-upload backend", async () => {
.where("campaign_id", testCampaign.id)
.first();
expect(dbContacts.length).toBe(1);
expect(adminResult.duplicate_contacts_count).toBe(1);
expect(adminResult.duplicate_contacts_count).toBe(2);
expect(adminResult.contacts_count).toBe(1);
expect(dbContacts[0].first_name).toBe("fdsa");
expect(dbContacts[0].last_name).toBe("yyyy");
Expand Down
17 changes: 7 additions & 10 deletions src/workers/jobs.js
Original file line number Diff line number Diff line change
Expand Up @@ -291,28 +291,25 @@ export async function completeContactLoad(
console.log("Error deleting opt-outs:", campaignId, err);
});

// delete duplicate cells
// delete duplicate cells (last wins)
await r
.knex("campaign_contact")
.whereIn(
.whereNotIn(
"id",
r
.knex("campaign_contact")
.select("campaign_contact.id")
.leftJoin("campaign_contact AS c2", function joinSelf() {
this.on("c2.campaign_id", "=", "campaign_contact.campaign_id")
.andOn("c2.cell", "=", "campaign_contact.cell")
.andOn("c2.id", ">", "campaign_contact.id");
})
.where("campaign_contact.campaign_id", campaignId)
.whereNotNull("c2.id")
.select(r.knex.raw("max(id) as id"))
.where("campaign_id", campaignId)
.groupBy("cell")
)
.where("campaign_contact.campaign_id", campaignId)
.delete()
.then(result => {
deleteDuplicateCells = result;
console.log("Deduplication result", campaignId, result);
})
.catch(err => {
deleteDuplicateCells = -1;
console.error("Failed deduplication", campaignId, err);
});

Expand Down

0 comments on commit 628eb37

Please sign in to comment.