-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_transform.Rmd
126 lines (100 loc) · 3.86 KB
/
data_transform.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
---
title: "Legacy Data Transformation"
author: "Felix Chen"
date: "7/26/2022"
output: html_document
---
# Legacy Data Transformation
Migration from i3 + i4 to i5
Import libraries
```{r}
library(plyr)
library(tidyverse)
```
## Users Table
```{r}
old_users = read_csv("data/masmallc_scas_table_i3_Users.csv", na = c("", "NA", "NULL"))
head(old_users)
# summary(old_users)
# initialize empty dataframe
new_users <- data.frame(matrix(ncol = 0, nrow = nrow(old_users)))
# copy over values from old format
new_users$id <- old_users$UserID
new_users$name <- old_users$UserName
new_users$grad_year <- old_users$YOG
new_users$email <- old_users$Email
new_users$email_verified_at <- NA
new_users$password <- NA
new_users$remember_token <- NA
new_users$created_at <- new_users$updated_at <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
# write to output file
write_csv(new_users,"output/users.csv", na = "NULL", col_names = FALSE)
```
## Contacts Table
```{r}
old_contacts = read_csv("data/masmallc_scas_table_dbi4_Contacts.csv", na = c("", "NA", "NULL"))
head(old_contacts)
# summary(old_contacts)
# initialize empty dataframe
new_contacts <- data.frame(matrix(ncol = 0, nrow = nrow(old_contacts)))
# copy over values from old format
new_contacts$id <- old_contacts$ContactID
new_contacts$client_id <- old_contacts$ClientID
new_contacts$author_id <- old_contacts$UserAddedID
new_contacts$contact_type_id <- old_contacts$ContactTypeID
new_contacts$contact_date <- old_contacts$ContactDate
new_contacts$contact_summary <- old_contacts$ContactSummary
new_contacts$created_at <- old_contacts$ContactDate
new_contacts$updated_at <- old_contacts$ContactEditDate
# write to output file
write_csv(new_contacts,"output/contacts.csv", na = "", col_names = FALSE)
```
## Categories Table
```{r}
categories = read_csv("category_translations.csv")
new_category_ids <- categories$new_category_id
old_category_ids <- categories$old_category_id
```
## Referral Sources Table
```{r}
referral_sources = read_csv("referral_source_translations.csv", na = c("", "NA", "NULL"))
new_referral_source_ids <- referral_sources$new_referral_source_id
old_referral_source_ids <- referral_sources$old_referral_source_id
```
## Clients Table
```{r}
old_clients = read_csv("data/masmallc_scas_table_db_Clients.csv", na = c("", "NA", "NULL", "0000-00-00"))
head(old_clients)
# summary(old_clients)
# initialize empty dataframe
new_clients <- data.frame(matrix(ncol = 0, nrow = nrow(old_clients)))
# copy over values from old format
new_clients$id <- old_clients$ClientID
new_clients$last_name <- old_clients$LastName
new_clients$first_name <- old_clients$FirstName
new_clients$phone_number <- paste0(old_clients$Phone1AreaCode,'-',old_clients$Phone1Number)
new_clients$email <- old_clients$Email
new_clients$language <- old_clients$Language
new_clients$address_line_1 <- old_clients$Address1
new_clients$address_line_2 <- old_clients$Address2
new_clients$city <- old_clients$City
new_clients$state <- old_clients$State
new_clients$postal_code <- old_clients$ZIP
new_clients$country <- old_clients$Country
new_clients$case_type_id <- old_clients$CaseTypeID
new_clients$case_type_id <- ifelse(new_clients$case_type_id == "1", "2", new_clients$case_type_id)
new_clients$case_type_id <- ifelse(new_clients$case_type_id == "0", "1", new_clients$case_type_id)
new_clients$category_id <- NA
new_clients$category_id <- mapvalues(old_clients$CategoryID,
from=old_category_ids,
to=new_category_ids)
new_clients$referral_source_id <- NA
new_clients$referral_source_id <- mapvalues(old_clients$ReferralSource,
from=old_referral_source_ids,
to=new_referral_source_ids)
new_clients$comments <- old_clients$Notes # this needs to be added
new_clients$created_at <- old_clients$LastEditTime
new_clients$updated_at <- old_clients$LastEditTime
# write to output file
write_csv(new_clients,"output/clients.csv", na = "NULL", col_names = FALSE)
```