-
Notifications
You must be signed in to change notification settings - Fork 10
/
Getters.R
518 lines (476 loc) · 21 KB
/
Getters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
#' Downloads Polity IV
#'
#' Downloads the Polity IV data set. It keeps specified variables and creates a
#' standard country ID variable that can be used for merging the data with other
#' data sets.
#'
#' @param url character string. The URL for the Polity IV data set you would
#' like to download. Note: it must be for the SPSS version of the file.
#' @param vars character vector containing the variables to keep. If
#' \code{vars = NULL} then the entire data set is returned. Note that the
#' \code{country} and \code{year} variables are always returned.
#' @param OutCountryID character string. The type of country ID you would like
#' to include in the output file along with the country name. See
#' \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country
#' names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param na.rm logical. Drop observations where \code{OutCountryID} is
#' \code{NA}.
#' @param duplicates character specifying how to handle duplicated country-year
#' observations. Can be set to \code{none} to do nothing, \code{message} to
#' simply report duplicates, \code{drop} to report and drop duplicates, and
#' \code{return} to return a data frame with only duplicated observations
#' (see also \code{fromLast}).
#' @param fromLast logical indicating if duplication should be considered
#' from the reverse side. Only relevant if \code{duplicates = 'drop'} or
#' \code{duplicates = 'out'}.
#'
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download full data set
#' PolityData <- PolityGet()
#'
#' # Create data frame with only the main Polity democracy variable (polity2)
#' Polity2Data <- PolityGet(vars = 'polity2',
#' OutCountryID = 'imf')
#' }
#'
#' @seealso \code{\link{countrycode}}, \code{\link{CountryID}},
#' \code{\link{WinsetCreator}}
#'
#' @importFrom rio import
#'
#' @export
PolityGet <- function(url = 'http://www.systemicpeace.org/inscr/p4v2015.sav',
vars = NULL, OutCountryID = 'iso2c',
standardCountryName = TRUE,
na.rm = TRUE, duplicates = 'message', fromLast = FALSE){
# Download underlying Polity IV data
PolityData <- import(url)
# Clean up
# Ensure that vars are in the data frame
if (!is.null(vars)) {
if (!all(vars %in% names(PolityData))) {
stop('Specified variables not found in data.')
}
Vars <- c('country', 'year', vars)
PolityData <- PolityData[, Vars]
}
# Include new country ID variable and standardise country names
PolityData <- CountryID(data = PolityData, OutCountryID = OutCountryID,
timeVar = 'year', duplicates = duplicates,
standardCountryName = standardCountryName,
fromLast = fromLast)
# Drop NAs for OutCountryID
if (isTRUE(na.rm)) {
PolityData <- DropNA.psData(data = PolityData,
timeVar='year',
OutCountryID=OutCountryID)
}
return(PolityData)
}
#' Downloads the Database of Political Institutions (DPI)
#'
#' Downloads the Database of Political Institutions (DPI) data set. It keeps
#' specified variables and creates a standard country ID variable that can be
#' used for merging the data with other data sets.
#'
#' @param url character string. The URL for the Polity IV data set you would
#' like to download. Note this is exclusively to download previous, IMF hosted,
#' versions of the data set. If a value is not supplied, then the 2015 IDB
#' hosted version will be downloaded. If a link is supplied it must be to a
#' Stata formated file.
#' @param vars character vector containing the variables to keep. If
#' \code{vars = NULL} then the entire data set is returned. Note that
#' \code{country} and \code{year} variables are always returned.
#' @param OutCountryID character string. The type of country ID you would like
#' to include in the output file along with the country name. See
#' \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country
#' names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param na.rm logical. Drop observations where \code{OutCountryID} is
#' \code{NA}.
#' @param duplicates character specifying how to handle duplicated country-year
#' observations. Can be set to \code{none} to do nothing, \code{message} to
#' simply report duplicates, \code{drop} to report and drop duplicates, and
#' \code{return} to return a data frame with only duplicated observations
#' (see also \code{fromLast}).
#' @param fromLast logical indicating if duplication should be considered from
#' the reverse side. Only relevant if \code{duplicates = 'drop'} or
#' \code{duplicates = 'out'}.
#'
#'
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download full data set
#' DpiData <- DpiGet()
#'
#' # Create data frame with only the military variable
#' DpiSub <- DpiGet(vars = 'military',
#' OutCountryID = 'imf')
#' }
#'
#' @seealso \code{\link{countrycode}}, \code{\link{CountryID}},
#' \code{\link{WinsetCreator}}
#'
#' @importFrom rio import
#' @importFrom utils download.file unzip
#'
#' @export
DpiGet <- function(url, vars = NULL,
OutCountryID = 'iso2c',
standardCountryName = TRUE, na.rm = TRUE,
duplicates = 'message', fromLast = FALSE){
# Download underlying Dpi IV data
if (missing(url) || is.null(url)){
message('Downloading the 2020 DPI from: http://dx.doi.org/10.18235/0003049\n\n')
url <- 'https://publications.iadb.org/publications/english/document/The-Database-of-Political-Institutions-2020-DPI2020.zip'
tmp_file <- tempfile(fileext = ".zip")
download.file(url, tmp_file, mode = "wb")
con <- unzip(tmp_file, files = 'DPI2020/DPI2020_stata13.dta')
DpiData <- import(con)
}
else if (!missing(url)) {
DpiData <- import(url, format = 'dta')
DpiData <- labelDataset(DpiData)
}
# Clean up
if (!is.null(vars)) {
if (!all(vars %in% names(DpiData))) {
stop('Specified variables not found in data.')
}
Vars <- c('countryname', 'year', vars)
DpiData <- DpiData[, Vars]
}
# Include new country ID variable
DpiData <- CountryID(data = DpiData,
OutCountryID = OutCountryID, countryVar = 'countryname',
timeVar = 'year', duplicates = duplicates,
standardCountryName = standardCountryName,
fromLast = fromLast)
# Drop NAs for OutCountryID
if (isTRUE(na.rm)) {
DpiData <- DropNA.psData(data = DpiData,
countryVar = 'countryname', timeVar='year',
OutCountryID=OutCountryID)
}
return(DpiData)
}
#' Download and combine Reinhart and Rogoff's (2010) crisis dummy variables into
#' one data frame
#'
#' @param urls URLs for each Excel file in the Reinhart and Rogoff data set. See
#' \url{https://carmenreinhart.com/browse-by-topic/}.
#' @param OutCountryID character string. The type of country ID you would like
#' to include in the output file along with the country name. See
#' \code{\link{countrycode}} for available options.
#' @param message logical. Whether or not to notify you which of sheets are
#' being cleaned and organised.
#' @param standardCountryName logical. Whether or not to standardise the country
#' names variable based on \code{country.name} from \code{\link{countrycode}}.
#'
#' @return Returns a data frame with the following columns:
#' \itemize{
#' \item{\code{iso2c}: }{The ISO two letter country code identifying the
#' country. This can be changed to another country ID system using
#' \code{OutCountryID}}
#' \item{\code{country}: }{Country names.}
#' \item{\code{year}: }{The year.}
#' \item{\code{RR_Independence}: }{Year of independence.}
#' \item{\code{RR_CurrencyCrisis}: }{Currency crisis.}
#' \item{\code{RR_InflationCrisis}: }{Inflation crisis.}
#' \item{\code{RR_StockMarketCrash}: }{Stock market crash.}
#' \item{\code{RR_SovDebtCrisisDom}: }{Domestic sovereign debt crisis.}
#' \item{\code{RR_SovDebtCrisisExt}: }{External sovereign debt crisis.}
#' \item{\code{RR_BankingCrisis}: }{Banking crisis.}
#' \item{\code{RR_YearlyCrisisTally}: }{Total number of crises per year.}
#' }
#'
#' @examples
#' \dontrun{
#' RRDummies <- RRCrisisGet()
#' }
#'
#' @source
#' Reinhart, Camen M. and Kenneth S. Rogoff, ''From Financial Crash to Debt
#' Crisis,'' NBER Working Paper 15795, March 2010. Forthcoming in American
#' Economic Review.
#'
#' @importFrom DataCombine MoveFront DropNA
#' @export
RRCrisisGet <- function(urls = c(
'http://www.carmenreinhart.com/user_uploads/data/22_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/35_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/23_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/25_data.xls'),
OutCountryID = 'iso2c', message = TRUE,
standardCountryName = TRUE){
message("These datasets have been moved. Returning NULL")
return(NULL)
OutData <- data.frame()
for (i in urls){
tmpfile <- tempfile()
download.file(i, tmpfile, mode = "wb")
# Load workbook and find relevant sheet names
WBNames <- readxl::excel_sheets(tmpfile)
Droppers <- c('Contents', 'CrisisDefinition', 'CrisisDefinitions', 'Sheet1',
'Sheet3')
WBNames <- WBNames[!is.element(WBNames, Droppers)]
for (u in WBNames){
Temp <- readxl::read_excel(tmpfile, u)
# Keep only the year and crisis indicators
Temp <- Temp[13:nrow(Temp), c(1:9)]
# Extract the country name
if (u == 'UK'){
Temp$country <- 'United Kingdom'
}
else if (u == 'US'){
Temp$country <- 'United States'
}
else if (u != 'US' & u != 'UK'){
TempNames <- names(Temp[1])
CountryName <- gsub('([A-z]+).*', '\\1', TempNames)
Temp$country <- CountryName
}
Temp <- MoveFront(Temp, 'country')
# Rename variables
names(Temp) <- c('country', 'year', 'RR_Independence',
'RR_CurrencyCrisis', 'RR_InflationCrisis',
'RR_StockMarketCrash', 'RR_SovDebtCrisisDom',
'RR_SovDebtCrisisExt',
'RR_BankingCrisis', 'RR_YearlyCrisisTally')
if (isTRUE(message)){
message(paste0('Cleaning up Excel sheet for ', u, '.\n'))
}
Temp <- DropNA(Temp, c('year', 'RR_BankingCrisis'), message = FALSE)
OutData <- rbind(OutData, Temp)
}
}
# Clean up country names and add ID
OutData$country[OutData$country == 'New'] <- 'New Zealand'
OutData$country[OutData$country == 'South'] <- 'South Africa'
OutData$country[OutData$country == 'Sri'] <- 'Sri Lanka'
OutData$country[OutData$country == 'Central'] <- 'Central African Republic'
OutData$country[OutData$country == 'Costa'] <- 'Costa Rica'
OutData$country[OutData$country == 'Cote'] <- 'Cote dIvoire'
OutData$country[OutData$country == 'Costa'] <-
OutData$country[OutData$country == 'Dominican'] <- 'Domincan Republic'
OutData$country[OutData$country == 'El'] <- 'El Salvador'
OutData <- CountryID(data = OutData, OutCountryID = OutCountryID,
timeVar = 'year',
standardCountryName = standardCountryName)
# Clean year
OutData$year <- as.character(OutData$year)
OutData$year <- as.numeric(OutData$year)
OutData <- OutData[order(OutData[, OutCountryID], OutData[, 'year']), ]
return(OutData)
}
#' Downloads Dreher's data set of IMF programs and World Bank projects
#' (1970-2011)
#'
#' Downloads Dreher's data set of IMF programs and World Bank projects
#' (1970-2011). It keeps specified variables and creates a standard country ID
#' variable that can be used for merging the data with other data sets.
#'
#' @param url character string. The URL for the Dreher data set you would like
#' to download. Note: it must be for the xlx version of the file. Currently only
#' the 1970-2011 version is supported.
#' @param sheets character vector of the Excel sheets (variables) that you would
#' like to return. See Details for more information.
#' @param OutCountryID character string. The type of country ID you would like
#' to include in the output file along with the country name. See
#' \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country
#' names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param message logical. Whether or not to notify you which of sheets are
#' being cleaned and organised.
#'
#' @details Using the \code{sheets} argument you can select which variables to
#' download from their individual workbook seets in the original data set.
#' These include:
#' \itemize{
#' \item{\code{WB other agreed}: }{Number of World Bank projects agreed, other than technical or adjustment.}
#' \item{\code{WB technical agreed}: }{Number of World Bank technical projects agreed.}
#' \item{\code{WB adjustment agreed}: }{Number of World Bank adjustment projects agreed.}
#' \item{\code{WB environment agreed}: }{Number of World Bank environmental projects agreed.}
#' \item{\code{WB adjustment 5}: }{Number of World Bank adjustment projects in effect for at least 5 months in a particular year.}
#' \item{\code{IMF SBA}: }{IMF Standby Arrangement agreed, dummy.}
#' \item{\code{IMF EFF}: }{IMF Extended Fund Facility Arrangement agreed, dummy.}
#' \item{\code{IMF SAF}: }{IMF Structural Adjustment Facility Arrangement agreed, dummy.}
#' \item{\code{IMF PRGF}: }{IMF Poverty Reduction and Growth Facility Arrangement agreed, dummy.}
#' \item{\code{IMF SBA 5}: }{IMF Standby Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF EFF 5}: }{IMF Extended Fund Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF SAF 5}: }{IMF Structural Adjustment Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF PRGF 5}: }{IMF Poverty Reduction and Growth Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' }
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download 'WB other agreed', 'WB environment agreed'
#' # These are the default sheets to gather
#' WBPrograms <- IMF_WBGet()
#' }
#'
#' @source Data website: \url{http://www.uni-heidelberg.de/fakultaeten/wiso/awi/professuren/intwipol/datasets_en.html}.
#'
#' When using the IMF data, please cite:
#'
#' Dreher, Axel, 2006, IMF and Economic Growth: The Effects of Programs, Loans,
#' and Compliance with Conditionality, World Development 34, 5: 769-788.
#'
#' When using the World Bank data, please cite:
#'
#' Boockmann, Bernhard and Axel Dreher, 2003, The Contribution of the IMF and
#' the World Bank to Economic Freedom, European Journal of Political Economy
#' 19, 3: 633-649.
#'
#' @importFrom DataCombine VarDrop
#' @importFrom reshape2 melt
#' @export
IMF_WBGet <- function(url = 'http://axel-dreher.de/Dreher%20IMF%20and%20WB.xls',
sheets = c('WB other agreed', 'WB environment agreed'),
OutCountryID = 'iso2c', message = TRUE,
standardCountryName = TRUE){
# Download full Dreher IMF program data set
tmpfile <- tempfile(fileext = paste0(".", tools::file_ext(url)))
download.file(url, tmpfile, mode = "wb")
# Select sheet
WBNames <- readxl::excel_sheets(tmpfile)
# Error if desired sheet is not in the data set.
TestExist <- sheets %in% WBNames
if (!all(TestExist)){
stop("Sheets(s) not found in the data set.")
}
FullDF <- data.frame()
for (i in sheets){
VarName <- gsub(' ', '.', i)
if (isTRUE(message)){
message(paste0('Cleaning: ', VarName, '.\n'))
}
if (i != 'WB environment agreed'){
# Extract sheet
OneSheet <- readxl::read_excel(tmpfile, i)
names(OneSheet) <- gsub(" ", ".", names(OneSheet))
OneSheet <- melt(OneSheet, id.vars = c('Country.Code', 'Country.Name'))
# Clean
OneSheet$variable <- gsub('X', '', as.character(OneSheet$variable))
OneSheet$variable <- as.numeric(OneSheet$variable)
if (class(OneSheet$value) == 'character'){
OneSheet$value <- gsub('\\.', '', OneSheet$value)
OneSheet$value <- as.numeric(OneSheet$value)
}
OneSheet <- CountryID(data = OneSheet, OutCountryID = OutCountryID,
countryVar = 'Country.Name', duplicates = 'none',
standardCountryName = standardCountryName)
OneSheet <- VarDrop(OneSheet, 'Country.Code')
names(OneSheet) <- c(OutCountryID, 'country', 'year', VarName)
}
else if (i == 'WB environment agreed'){ # originally in country-year format
# Extract sheet
OneSheet <- readxl::read_excel(tmpfile, i)
OneSheet <- as.data.frame(OneSheet)
OneSheet <- CountryID(data = OneSheet, OutCountryID = OutCountryID,
countryVar = 'country', duplicates = 'none',
standardCountryName = standardCountryName)
OneSheet <- VarDrop(OneSheet, 'code')
names(OneSheet) <- c(OutCountryID, 'country', 'year', VarName)
}
# Merge data frames together
if (ncol(FullDF) == 0){
FullDF <- OneSheet
}
else if (ncol(FullDF) != 0) {
FullDF <- merge(FullDF, OneSheet, by = c('country', 'year'))
if (paste0(OutCountryID, '.x') %in% names(FullDF)){
FullDF <- FullDF[, !(names(FullDF) %in% paste0(OutCountryID, '.y'))]
FullDF <- MoveFront(FullDF, paste0(OutCountryID, '.x'))
FinalNames <- names(FullDF)
FinalNames <- FinalNames[-1]
names(FullDF) <- c(OutCountryID, FinalNames)
}
}
}
# Final Clean
FullDF <- FullDF[order(FullDF[, OutCountryID], FullDF[, 'year']), ]
return(FullDF)
}
#' Downloads the Democracy and Dictatorship data set
#'
#' @description
#' Downloads the Democracy and Dictatorship data set. It keeps specified
#' variables and creates a standard country ID variable that can be used for
#' merging the data with other data sets.
#' See the codebook at the authors' website
#' \url{https://sites.google.com/site/joseantoniocheibub/datasets/dd?authuser=0}
#'
#'
#' @param url character string. The URL for the Democracy and Dictatorship data
#' set you would like to download. Note: it must be for the Stata version of
#' the file.
#' @param vars character vector containing the variables to keep. If
#' \code{vars = NULL} then the entire data set is returned. Note that the
#' \code{country} and \code{year} variables are always returned.
#' @param OutCountryID character string. The type of country ID you would like
#' to include in the output file along with the country name. See
#' \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country
#' names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param na.rm logical. Drop observations where \code{OutCountryID} is
#' \code{NA}.
#' @param duplicates character specifying how to handle duplicated country-year
#' observations. Can be set to \code{none} to do nothing, \code{message} to
#' simply report duplicates, \code{drop} to report and drop duplicates, and
#' \code{return} to return a data frame with only duplicated observations
#' (see also \code{fromLast}).
#' @param fromLast logical indicating if duplication should be considered from
#' the reverse side. Only relevant if \code{duplicates = 'drop'} or
#' \code{duplicates = 'out'}.
#'
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download full data set
#' DDData <- DDGet()
#' }
#'
#' @seealso \code{\link{countrycode}}, \code{\link{CountryID}}
#'
#' @importFrom rio import
#'
#' @export
DDGet <- function(url = 'http://uofi.box.com/shared/static/bba3968d7c3397c024ec.dta',
vars = NULL, OutCountryID = 'iso2c',
standardCountryName = TRUE,
na.rm = TRUE, duplicates = 'message', fromLast = FALSE){
# Download underlying Polity IV data
DDData <- import(url)
# Clean up
DDData$order <- NULL
names(DDData)[names(DDData) == "ctryname"] <- "country"
# Ensure that vars are in the data frame
if (!is.null(vars)){
if (!all(vars %in% names(DDData))){
stop('Specified variables not found in data.')
}
Vars <- c('country', 'year', vars)
DDData <- DDData[, Vars]
}
# Include new country ID variable and standardise country names
DDData <- CountryID(data = DDData, OutCountryID = OutCountryID,
timeVar = 'year', duplicates = duplicates,
standardCountryName = standardCountryName,
fromLast = fromLast)
# Drop NAs for OutCountryID
if (isTRUE(na.rm) & duplicates != "return") {
DDData <- DropNA.psData(data = DDData,
timeVar='year',
OutCountryID=OutCountryID)
}
return(DDData)
}