-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathfunction_read404.R
173 lines (149 loc) · 6.46 KB
/
function_read404.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
## Currently, the cost reports are separated based on year and population
## To have a meaningful master dataset, first have to compile and format each individual dataset
library(tidyverse)
# Function to read section 404 data from .csv format
read404 <- function(path, fy, pop = c("DD", "MIA", "MIC")) {
df <- read_csv(path) %>%
select(1, # CMH Name (column name not always present)
contains("Service"), # Service Descriptoin
contains("Revenue"), # Revenue Code
contains("HCPCS"), # HCPC Code
contains("Modifier"), # Code Modifier
contains("Unit") & contains("Type"), # Unit Type
ends_with("Cases"), # Sum of Cases
ends_with("Units"), # Sum of Units
ends_with("Cost") & !contains("Other") # Sum of Cost
)
colnames(df) <- c("CMHSP","FirstofService.Description","FirstOfRevenue.Code",
"FirstOfHCPCS.Code","FirstOfModifier","UnitType",
"SumOfCases","SumOfUnits","SumOfCost")
df <-
df %>%
# Replace NAs with name CMHSP
fill(CMHSP, .direction = "down") %>%
mutate(
FY = factor(fy),
Population = factor(pop),
FirstOfRevenue.Code = as.character(FirstOfRevenue.Code),
FirstOfHCPCS.Code = as.character(FirstOfHCPCS.Code),
SumOfCost = gsub(",", "", SumOfCost),
SumOfCost = gsub("\\..*", "", SumOfCost),
SumOfCost = as.numeric(gsub("[[:punct:]]", "", SumOfCost)),
SumOfCases = as.numeric(gsub(",", "", SumOfCases)),
SumOfCases = as.numeric(gsub("\\..*", "", SumOfCases)),
SumOfUnits = as.numeric(gsub(",", "", SumOfUnits)),
SumOfUnits = as.numeric(gsub("\\..*", "", SumOfUnits))
) %>%
filter(is.na(FirstofService.Description) == F) %>%
mutate(
CostPerCase = round(SumOfCost/SumOfCases, digits = 2),
CostPerUnit = round(SumOfCost/SumOfUnits, digits = 2),
UnitPerCase = round(SumOfUnits/SumOfCases, digits = 1)
) %>%
select(
CMHSP,FY,Population,FirstofService.Description,
FirstOfRevenue.Code,FirstOfHCPCS.Code,FirstOfModifier,
UnitType,SumOfCases,SumOfUnits,SumOfCost,
CostPerCase,CostPerUnit,UnitPerCase
)
return(df)
}
# Function to read and combine all .csv files in a directory
# Assumes files with naming convention "CMH-DD_FY2006.csv"
combine404 <- function(directory) {
## 'directory' is a char vector of len 1 indicating location of CSV files
files <- list.files(directory, full.names = TRUE) # make list of full file names
n <- length(files)
df <- tibble() #create empty data frame
for (i in 1:n) {
# loop through files, rbinding them together
fy <- str_sub(files[i], start = -8L, end = -5L) # extract 4 chars for FY
pop <- str_sub(files[i], start = -14L, end = -12L) # extract 3 chars for pop
pop <- ifelse(pop == "-DD", yes = "DD", no = pop)
x <- read404(path = files[i], fy = fy, pop = pop)
print(paste0(length(colnames(x)), " columns: ", fy,"_",pop))
## For use in diagnosing issues:
# print(paste0("Column names: ", colnames(x)))
df <- rbind(df, x)
}
return(df)
}
clean404 <- function(df) {
df <-
df %>%
rename_all(list(~str_to_lower(.))) %>%
rename_all(list(~str_replace(.,"firstof",""))) %>%
rename_all(list(~str_replace(.,"sumof",""))) %>%
rename_all(list(~str_replace(.,"\\.","_"))) %>%
rename_all(list(~str_replace(.,"per","_per_")))%>%
select(-unittype) %>%
# Identify IBNR records
mutate(ibnr_record = if_else(cases == 0 & units == 0 & cost > 0, 1, 0)) %>%
filter(
cases > 0 | units > 0 | cost > 0, # Must be used # 99,175
ibnr_record != 1 # Exclude IBNR records (Inpatient Psychiatric) # 98,555
) %>%
mutate(
hcpcs_code = str_replace(hcpcs_code,"[[:punct:]].*",""),
hcpcs_code = case_when(
hcpcs_code == "00104" ~ "104",
hcpcs_code == "ALL" ~ "Jxxxx",
is.na(hcpcs_code) & str_detect(service_description,"^Peer") ~ "prxxx",
is.na(hcpcs_code) & str_detect(service_description,"^Pharmacy") ~ "phxxx",
is.na(hcpcs_code) & service_description == "Other" ~ "xxxxx",
TRUE ~ hcpcs_code
),
modifier = case_when(
# Make services with separate SUD reporting identifiable via modifier
str_detect(tolower(service_description), "^substance abuse") ~ "SUD",
TRUE ~ modifier
),
revenue_code = case_when(
revenue_code == "0114, 0124, 0134, 0154" ~ "01X4", # Psychiatric Inpatient post-FY15
revenue_code == "0250-0254, 0257-0258" ~ "025X", # Pharmacy
revenue_code == "0270-0272" ~ "027X", # Medical/Surgical Supplies and Devices
revenue_code == "0300-0302, 0305-0307" ~ "030X", # Laboratory
revenue_code == "0420-0424" ~ "042X", # Physical Therapy
revenue_code == "0430-0434" ~ "043X", # Occupational Therapy
revenue_code == "0440-0444" ~ "044X", # Speech Therapy – Language Pathology
revenue_code == "0470-0472" ~ "047X", # Audiology
revenue_code == "0610-0611" ~ "061X", # Magnetic Resonance Imaging (MRI)
revenue_code == "0730-0731" ~ "073X", # EKG/ECG (Electrocardiogram)
revenue_code %in% c("0900, 0902-0904, 0911, 0914-0919",
"0900, 0914, 0915, 0916, 0919",
"0900, 0906, 0914, 0915, 0916, 0919") ~ "09XX", # Behavioral Health Treatment
revenue_code == "0940-0942" ~ "094X", # Other Therapeutic Services
TRUE ~ revenue_code
),
revenue_code = str_replace(revenue_code,"[[:punct:]].*",""),
revenue_code = case_when(
# Make 4 digit revenue codes into 3 digits
str_length(revenue_code) == 4 & str_detect(revenue_code,"^0") ~ str_sub(revenue_code,2,4),
TRUE ~ revenue_code
),
code = case_when(
modifier == "PT22" ~ "PT22",
modifier == "PT65" ~ "PT65",
modifier == "PT68" &
fy %in% c("2006","2007","2008","2009","2010","2011","2012","2013","2014","2015") ~ "PT68",
modifier == "PT73" &
fy %in% c("2006","2007","2008","2009","2010","2011","2012","2013","2014","2015") ~ "PT73",
!is.na(hcpcs_code) ~ hcpcs_code,
!is.na(revenue_code) ~ revenue_code,
!is.na(modifier) ~ modifier
),
# Remove pesky carriage returns
code = str_replace_all(code,"\n|\r","")
) %>%
## Standardize CMHSP names
mutate(
cmhsp = recode(
cmhsp,
`LifeWays` = 'Lifeways',
`Manistee-Benzie (Centra Wellness)` = 'Manistee-Benzie',
`Muskegon (HealthWest)` = 'Muskegon'
)
) %>%
select(-ibnr_record)
return(df)
}