-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSIFI.R
395 lines (323 loc) · 18 KB
/
SIFI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# This is the code used to calculate SIFI
# For specific examples, see 'figures' folder
# Author: David Bomze, 2020
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Basic SIFI function
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sifi <- function(sv_data, treatment_arm = NULL, # 'sv_data' should contain three columns: (1) time, (2) event, (3) arm
operation = c("flip","clone"), # Flip or clone the best/worst responder
direction = c("best","worst"), # Use the best responder or the worst responder
cols = c("#0754A0","#F12A29"), # color of KM curves
stat_test = c("logrank","wald"), # test used to calculate p-value in each iteration
agnostic = F, # Agnostic determination of experimental vs reference group (based on the lower HR)
plot_iteration = F, file_iteration = NA){
require(dplyr)
require(survival)
# Evaluate input
operation <- match.arg(operation)
direction <- match.arg(direction)
stat_test <- match.arg(stat_test)
# Prepare data
names(sv_data) <- c("time","event","arm")
sv_data$arm <- as.factor(sv_data$arm)
sv_data$id <- 1:nrow(sv_data)
count <- 0 ; flag <- T
# If the treatment arm wasn't defined, we use the agnostic approach,
# by assigning the group that shows benefit (HR < 1) as the experimental group regardless of signifiance
if((length(treatment_arm) == 0) | agnostic){
sv_cox <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
treatment_arm <- ifelse(sv_cox$coefficients < 0, yes = levels(sv_data$arm)[2], no = levels(sv_data$arm)[1])
}
# Original count
n_arms <- table(sv_data$arm)
#@@@@@@@@@@ WE NOW HAVE 4 OPTIONS (2x2)
# 1) Re-designate the best responder (longest time) from experimental to control group
# 2) Re-designate the worst responder (shortest time) from control to experimental group
# 3) Flip that responder, 4) Clone that responder
#@@@@@@@@@@
# Plot the iteration to PDF if we have filename, otherwise to Viewer
if(plot_iteration & !is.na(file_iteration)){
cairo_pdf(file_iteration, width = 11.69, height = 8.27, onefile = T)
par(mfrow = c(3,4))
}
# Initialize while loop
while(flag){
# Option A: Calculate p-value of log-rank test (default)
if(stat_test == "logrank"){
sdf <- survdiff(Surv(time, event, type = "right") ~ arm, data = sv_data)
pval <- 1 - pchisq(sdf$chisq, length(sdf$n) - 1) # Log-rank p-value directly from survdiff
}
# Option B: Calculate p-value of Wald's test
if(stat_test == "wald"){
cxp <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
pval <- summary(cxp)$coefficients[5]
}
#@@@@@@@@@@ NEGATIVE SIFI, if in the first iteration the p-val is insignificant
# calculate the negative SIFI, i.e. try to get it from non-significant to significant
if(count == 0 & pval > 0.05){
# Dump parameters
count_neg <- neg_sifi(sv_data = sv_data[ , 1:3], treatment_arm = treatment_arm,
operation = operation, direction = direction,
cols = cols, stat_test = stat_test,
agnostic = agnostic,
plot_iteration = plot_iteration, file_iteration = file_iteration)
return(count_neg)
}
#@@@@@@@@@@
if(plot_iteration){
# Build survival model, run COXPH, and calculate HR
sft <- survfit(Surv(time, event, type = "right") ~ arm, data = sv_data)
cxp <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
hr <- summary(cxp)$conf.int[c(1,3,4)]
# Create labels
hr_lab <- paste0("HR = ", sprintf("%1.2f", hr[1]), " (",
sprintf("%1.2f", hr[2]), ",", sprintf("%1.2f", hr[3]), ")")
pval_lab <- ifelse(pval < 0.001, yes = "p < 0.001", no = paste0("p = ", sprintf("%1.3f", pval)))
plot(sft, col = cols, lwd = 1.5,
xlab = "", ylab = "% Survival", cex.lab = 1.0,
main = paste0("Iteration #", count,
"\n", hr_lab,
"\n", pval_lab),
mark.time = T, mark = "|" , cex = 0.7,
xaxt = "n", yaxt = "n", frame.plot = FALSE)
# Plot axes
axis(side = 1, pos = 0) ; abline( h =0)
title(xlab = "Time", line = 2.5, cex.lab = 1.0)
axis(side = 2, at = seq(0, 1, 0.2), labels = seq(0, 1, 0.2)*100, las = 2)
bounds <- par("usr")
# Plot legend
legend("bottomleft", legend = c("Experimental","Control"), col = rev(cols), bty = "n", lwd = 1.5)
# Add strategy (operation-direction)
par(xpd = TRUE) # Prevents clipping
text(bounds[2], bounds[4]-0.05, adj = c(1,1), cex = 1.0,
label = paste0(stringi::stri_trans_totitle(paste0(operation, " ", direction, " responder")),
ifelse(direction == "best", yes ="\nExperimental --> Control", no = "\nControl --> Experimental")))
# If there is already a best/worst responder that was flipped/cloned (from previous iteration), then plot it
if(exists("jd")){
points(jd[1], jd[2], type = "b", pch = 19, col = "red3", cex = 1.5)
text(jd[1], jd[2]-0.05, adj = c(0.5,1), cex = 1.0, col = "red3",
label = paste0("Just\n", ifelse(operation == "flip", "Flipped", "Cloned"), " (", responder$event, ")"))
}
}
# If we reached NON-significance, then we're done and return SIFI, else re-deisgnate/add clone
if(pval > 0.05){
if(plot_iteration & !is.na(file_iteration)) dev.off() # Shut down device we have a filename
return(count)
} else {
count <- count + 1
}
# Two options:
# (1) Re-designate BEST responder from the experimental to the control group (either event or censored)
if(direction == "best") responder <- sv_data %>% filter(arm == treatment_arm) %>% arrange(time) %>% tail(1)
# (2) Re-desginate WORST responder from the control group to the experimental group (must be event)
if(direction == "worst") responder <- sv_data %>% filter(arm != treatment_arm & event == 1) %>% arrange(time) %>% head(1)
if(plot_iteration){
# Calculate time of new responder
s <- summary(sft, times = responder$time, extend = TRUE) # Otherwise we get: 'Error in array(xx, dim = dd) : vector is too large'
# 'sr' some responder, 'jd' just redesignated
if(direction == "best"){
sr <- c(responder$time, s$surv[paste0("arm=", treatment_arm) == s$strata])
jd <- c(responder$time, s$surv[paste0("arm=", treatment_arm) != s$strata])
}
if(direction == "worst"){
sr <- c(responder$time, s$surv[paste0("arm=", treatment_arm) != s$strata])
jd <- c(responder$time, s$surv[paste0("arm=", treatment_arm) == s$strata])
}
# Add the point of the responder (event/censored in parentheses)
points(sr[1], sr[2], type = "b", pch = 19, col = "green4", cex = 1.5)
text(sr[1], sr[2]+0.05, adj = c(0.5,0), cex = 1.0, col = "green4",
label = paste0(ifelse(direction == "best", "Best", "Worst"),"\nResponder", " (", responder$event, ")"))
par(xpd = FALSE) # Turn off
}
# Two options:
# (3) Flip the responder from its original group to the other arm
if(operation == "flip") sv_data[responder$id , "arm"] <- setdiff(levels(responder$arm) , responder$arm)
# (4) Clone the responder and ADD it to the other arm
if(operation == "clone"){
responder$id <- paste0(responder$id,"_clone") # Add a tag
responder$arm <- setdiff(levels(responder$arm) , responder$arm) # Change to the other arm
sv_data <- rbind(sv_data, responder) # Concatenate it to the original cohort
}
#@@@@@@@@@@ KEEP IN MIND THAT IF WE CLONE A *CENSORED* INDIVIDUAL THE HR MAY NOT CHANGE AND WE WILL GET STUCK IN A LOOP
#@@@@@@@@@@ THIS IS A SAFETY MECHANISM FOR THESE EXTREME CASES OR WHERE WE CAN'T REACH NON-SIGNIFICANCE...
if(count > min(n_arms)){
if(plot_iteration & !is.na(file_iteration)) dev.off() # Shut down device if we have a filename
return(NA)
}
}
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Negative SIFI function, called from 'sifi()'
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
neg_sifi <- function(sv_data, treatment_arm = NULL, # 'sv_data' should contain three columns: (1) time, (2) event, (3) arm
operation = c("flip","clone"), # Flip or clone the best/worst responder
direction = c("best","worst"), # Use the best responder or the worst responder
cols = c("#0754A0","#F12A29"), # color of KM curves
stat_test = c("logrank","wald"), # test used to calculate p-value in each iteration
agnostic = F, # Agnostic determination of experimental vs reference group (based on the lower HR)
plot_iteration = F, file_iteration = NA){
require(dplyr)
require(survival)
# Evaluate input
operation <- match.arg(operation)
direction <- match.arg(direction)
stat_test <- match.arg(stat_test)
# Prepare data
names(sv_data) <- c("time","event","arm")
sv_data$arm <- as.factor(sv_data$arm)
sv_data$id <- 1:nrow(sv_data)
count <- 0 ; flag <- T
# If the treatment arm wasn't defined, we use the agnostic approach,
# by assigning the group that shows benefit (HR < 1) as the experimental group regardless of signifiance
if((length(treatment_arm) == 0) | agnostic){
sv_cox <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
treatment_arm <- ifelse(sv_cox$coefficients < 0, yes = levels(sv_data$arm)[2], no = levels(sv_data$arm)[1])
}
# Original count
n_arms <- table(sv_data$arm)
#@@@@@@@@@@ WE NOW HAVE 4 OPTIONS (2x2)
# 1) Re-designate the best responder (longest time) from experimental to control group
# 2) Re-designate the worst responder (shortest time) from control to experimental group
# 3) Flip that responder, 4) Clone that responder
#@@@@@@@@@@
# Plot to PDF if we have filename, otherwise to viewer
if(plot_iteration & !is.na(file_iteration)){
cairo_pdf(file_iteration, width = 11.69, height = 8.27, onefile = T)
par(mfrow = c(3,4))
}
# Initialize while loop
while(flag){
# Option A: Calculate p-value of log-rank test (default)
if(stat_test == "logrank"){
sdf <- survdiff(Surv(time, event, type = "right") ~ arm, data = sv_data)
pval <- 1 - pchisq(sdf$chisq, length(sdf$n) - 1) # Log-rank p-value directly from survdiff
}
# Option B: Calculate p-value of Wald's test
if(stat_test == "wald"){
cxp <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
pval <- summary(cxp)$coefficients[5]
}
if(plot_iteration){
# Build survival model, run COXPH, and calculate HR
sft <- survfit(Surv(time, event, type = "right") ~ arm, data = sv_data)
cxp <- coxph(Surv(time, event, type = "right") ~ arm, data = sv_data)
hr <- summary(cxp)$conf.int[c(1,3,4)]
# Create labels
hr_lab <- paste0("HR = ", sprintf("%1.2f", hr[1]), " (",
sprintf("%1.2f", hr[2]), ",", sprintf("%1.2f", hr[3]), ")")
pval_lab <- ifelse(pval < 0.001, yes = "p < 0.001", no = paste0("p = ", sprintf("%1.3f", pval)))
plot(sft, col = cols, lwd = 1.5,
xlab = "", ylab = "% Survival", cex.lab = 1.0,
main = paste0("Iteration #", count,
"\n", hr_lab,
"\n", pval_lab),
mark.time = T, mark = "|" , cex = 0.7, # Might be useful to keep for illustration
xaxt = "n", yaxt = "n", frame.plot = FALSE)
# Plot axes
axis(side = 1, pos = 0) ; abline( h =0)
title(xlab = "Time", line = 2.5, cex.lab = 1.0)
axis(side = 2, at = seq(0, 1, 0.2), labels = seq(0, 1, 0.2)*100, las = 2)
bounds <- par("usr")
# Plot legend
legend("bottomleft", legend = c("Experimental","Control"), col = rev(cols), bty = "n", lwd = 1.5)
# Add strategy (operation-direction)
par(xpd = TRUE) # Prevents clipping
text(bounds[2], bounds[4]-0.05, adj = c(1,1), cex = 1.0,
label = paste0(stringi::stri_trans_totitle(paste0(operation, " ", direction, " responder")),
ifelse(direction == "best", yes ="\nExperimental --> Control", no = "\nControl --> Experimental")))
# If there is already a best/worst responder that was flipped/cloned (from previous iteration), then plot it
if(exists("jd")){
points(jd[1], jd[2], type = "b", pch = 19, col = "red3", cex = 1.5)
text(jd[1], jd[2]-0.05, adj = c(0.5,1), cex = 1.0, col = "red3",
label = paste0("Just\n", ifelse(operation == "flip", "Flipped", "Cloned"), " (", responder$event, ")"))
}
}
# If we reached YES-significance, then we're done and return SIFI, else re-designate/add clone (mirror of positive SIFI)
if(pval < 0.05){
if(plot_iteration & !is.na(file_iteration)) dev.off() # Shut down device we have a filename
return(count)
} else {
count <- count - 1 # Negative
}
# WE NOW MIRROR THE SAME APPROACH as positive SIFI
control_arm <- setdiff(levels(sv_data$arm) , treatment_arm)
# Two options:
# (1) Re-designate BEST responder from the CONTROL to the EXPERIMENT group (i.e. the mirror of positive SIFI) (either event or censored)
if(direction == "best") responder <- sv_data %>% filter(arm == control_arm) %>% arrange(time) %>% tail(1)
# (2) Re-desginate WORST responder from the EXPERIMENTAL group to the CONTROL group (i.e. the mirror of positive SIFI) (must be event)
if(direction == "worst") responder <- sv_data %>% filter(arm != control_arm & event == 1) %>% arrange(time) %>% head(1)
if(plot_iteration){
# Calculate time of new responder
s <- summary(sft, times = responder$time, extend = TRUE) # Otherwise we get: 'Error in array(xx, dim = dd) : vector is too large'
# 'sr' some responder, 'jd' just redesignated
if(direction == "best"){
sr <- c(responder$time, s$surv[paste0("arm=", control_arm) == s$strata]) # mirror of positive SIFI
jd <- c(responder$time, s$surv[paste0("arm=", control_arm) != s$strata]) # same...
}
if(direction == "worst"){
sr <- c(responder$time, s$surv[paste0("arm=", control_arm) != s$strata]) # same...
jd <- c(responder$time, s$surv[paste0("arm=", control_arm) == s$strata]) # same...
}
# Add the point of the responder (event/censored in parentheses)
points(sr[1], sr[2], type = "b", pch = 19, col = "green4", cex = 1.5)
text(sr[1], sr[2]+0.05, adj = c(0.5,0), cex = 1.0, col = "green4",
label = paste0(ifelse(direction == "best", "Best", "Worst"),"\nResponder", " (", responder$event, ")"))
par(xpd = FALSE) # Turn off
}
# Two options:
# (3) Flip the responder from its original group to the other arm
if(operation == "flip") sv_data[responder$id , "arm"] <- setdiff(levels(responder$arm) , responder$arm)
# (4) Clone the responder and ADD it to the other arm
if(operation == "clone"){
responder$id <- paste0(responder$id,"_clone") # Add a tag
responder$arm <- setdiff(levels(responder$arm) , responder$arm) # Change to the other arm
sv_data <- rbind(sv_data, responder) # Concatenate it to the original cohort
}
#@@@@@@@@@@ KEEP IN MIND THAT IF WE CLONE A *CENSORED* INDIVIDUAL THE HR MAY NOT CHANGE AND WE WILL GET STUCK IN A LOOP
#@@@@@@@@@@ THIS IS A SAFETY MECHANISM FOR THESE EXTREME CASES OR WHERE WE CAN'T REACH NON-SIGNIFICANCE...
if(abs(count) > min(n_arms)){ # Absolute because 'count' is negative
if(plot_iteration & !is.na(file_iteration)) dev.off() # Shut down device we have a filename
return(NA)
}
}
}
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
# Calculate all SIFI strategies in one run
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
sifi_all <- function(sv_data, treatment_arm = NULL, # 'sv_data' should contain three columns: (1) time, (2) event, (3) arm
cols = c("#0754A0","#F12A29"), # color of KM curves
stat_test = c("logrank","wald"), # test used to calculate p-value in each iteration
agnostic = F, # Agnostic determination of experimental vs reference group (based on the lower HR)
plot_iteration = F, file_prefix = NA){
require(dplyr)
# Evaluate input
stat_test <- match.arg(stat_test)
# Just need a data input, and run it 4 times
#@@@@@@@@@@ WE NOW HAVE 4 OPTIONS (2x2)
# 1) Re-designate the best responder (longest time) from experimental to control group
# 2) Re-designate the worst responder (shortest time) from control to experimental group
# 3) Flip that responder, 4) Clone that responder
#@@@@@@@@@@
# Create list to store SIFI
sifi_all4 <- vector("list", 4)
names(sifi_all4) <- expand.grid(operation = c("flip","clone"), direction = c("best","worst")) %>%
arrange(operation) %>%
tidyr::unite("strategy", 1:2, remove = T) %>% pull(strategy)
# Strategy ID
strtgy <- 1
# We run SIFI using the four different strategies
for(op in c("flip","clone")){
for(dr in c("best","worst")){
sifi_all4[paste0(op, "_", dr)] <- sifi(sv_data = sv_data, treatment_arm = treatment_arm,
direction = dr, operation = op,
cols = cols, stat_test = stat_test,
agnostic = agnostic, # Agnostic determination of experimental vs reference group (based on the lower HR)
plot_iteration = plot_iteration,
file_iteration = ifelse(plot_iteration, yes = paste0(file_prefix, "_strategy", strtgy, "_", paste0(op, "_", dr), ".pdf"), no = NA))
strtgy <- strtgy + 1
}
}
return(sifi_all4)
}