-
Notifications
You must be signed in to change notification settings - Fork 10
/
Getters.R
361 lines (329 loc) · 17 KB
/
Getters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#' Downloads Polity IV
#'
#' Downloads the Polity IV data set. It keeps specified variables and creates a standard country ID variable that can be used for merging the data with other data sets.
#'
#' @param url character string. The URL for the Polity IV data set you would like to download. Note: it must be for the SPSS version of the file.
#' @param vars character vector containing the variables to keep. If \code{vars = NULL} then the entire data set is returned. Note that the \code{country} and \code{year} variables are always returned.
#' @param OutCountryID character string. The type of country ID you would like to include in the output file along with the country name. See \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param na.rm logical. Drop observations where \code{OutCountryID} is \code{NA}.
#' @param duplicates character specifying how to handle duplicated country-year observations. Can be set to \code{none} to do nothing, \code{message} to simply report duplicates, \code{drop} to report and drop duplicates, and \code{return} to return a data frame with only duplicated observations (see also \code{fromLast}).
#' @param fromLast logical indicating if duplication should be considered from the reverse side. Only relevant if \code{duplicates = 'drop'} or \code{duplicates = 'out'}.
#'
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download full data set
#' PolityData <- PolityGet()
#'
#' # Create data frame with only the main Polity democracy variable (polity2)
#' Polity2Data <- PolityGet(vars = 'polity2',
#' OutCountryID = 'imf')
#' }
#'
#' @seealso \code{\link{countrycode}}, \code{\link{CountryID}}, \code{\link{WinsetCreator}}
#'
#' @importFrom foreign read.spss
#'
#' @export
PolityGet <- function(url = 'http://www.systemicpeace.org/inscr/p4v2012.sav',
vars = NULL, OutCountryID = 'iso2c', standardCountryName = TRUE,
na.rm = TRUE, duplicates = 'message', fromLast = FALSE){
# Download underlying Polity IV data
tmpfile <- tempfile()
download.file(url, tmpfile)
PolityData <- read.spss(tmpfile, to.data.frame = TRUE)
unlink(tmpfile)
# Ensure that vars are in the data frame
# Clean up
# Ensure that vars are in the data frame
if (!is.null(vars)){
if (!all(vars %in% names(PolityData))){
stop('Specified variables not found in data.')
}
Vars <- c('country', 'year', vars)
PolityData <- PolityData[, Vars]
}
# Include new country ID variable and standardise country names
PolityData <- CountryID(data = PolityData, OutCountryID = OutCountryID,
timeVar = 'year', duplicates = duplicates,
standardCountryName = standardCountryName, fromLast = fromLast)
# Drop NAs for OutCountryID
if (isTRUE(na.rm)){
PolityData <- DropNA.psData(data = PolityData,
Var = OutCountryID)
}
return(PolityData)
}
#' Downloads the Database of Political Institutions (DPI)
#'
#' Downloads the Database of Political Institutions (DPI) data set. It keeps specified variables and creates a standard country ID variable that can be used for merging the data with other data sets.
#'
#' @param url character string. The URL for the Polity IV data set you would like to download. Note: the link must be to a Stata formated file.
#' @param vars character vector containing the variables to keep. If \code{vars = NULL} then the entire data set is returned. Note that \code{country} and \code{year} variables are always returned.
#' @param OutCountryID character string. The type of country ID you would like to include in the output file along with the country name. See \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country names variable based on \code{country.name} from \code{\link{countrycode}}.
#' @param na.rm logical. Drop observations where \code{OutCountryID} is \code{NA}.
#' @param duplicates character specifying how to handle duplicated country-year observations. Can be set to \code{none} to do nothing, \code{message} to simply report duplicates, \code{drop} to report and drop duplicates, and \code{return} to return a data frame with only duplicated observations (see also \code{fromLast}).
#' @param fromLast logical indicating if duplication should be considered from the reverse side. Only relevant if \code{duplicates = 'drop'} or \code{duplicates = 'out'}.
#'
#' @details Note: a bit.ly URL is used to shorten the Stata formatted data set's URL due to CRAN requirements.
#'
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download full data set
#' DpiData <- DpiGet()
#'
#' # Create data frame with only the military variable
#' DpiSub <- DpiGet(vars = 'military',
#' OutCountryID = 'imf')
#' }
#'
#' @seealso \code{\link{countrycode}}, \code{\link{CountryID}}, \code{\link{WinsetCreator}}
#'
#' @importFrom foreign read.dta
#'
#' @export
DpiGet <- function(url = 'http://bit.ly/1jZ3nmM', vars = NULL, OutCountryID = 'iso2c',
standardCountryName = TRUE, na.rm = TRUE,
duplicates = 'message', fromLast = FALSE){
# Download underlying Dpi IV data
tmpfile <- tempfile()
download.file(url, tmpfile)
DpiData <- read.dta(tmpfile)
unlink(tmpfile)
# Clean up
if (!is.null(vars)){
if (!all(vars %in% names(DpiData))){
stop('Specified variables not found in data.')
}
Vars <- c('countryname', 'year', vars)
DpiData <- DpiData[, Vars]
}
# Include new country ID variable
DpiData <- CountryID(data = DpiData,
OutCountryID = OutCountryID, countryVar = 'countryname',
timeVar = 'year', duplicates = duplicates,
standardCountryName = standardCountryName,fromLast = fromLast)
# Drop NAs for OutCountryID
if (isTRUE(na.rm)){
DpiData <- DropNA.psData(data = DpiData,
Var = OutCountryID)
}
return(DpiData)
}
#' Download and combine Reinhart and Rogoff's (2010) crisis dummy variables into one data frame
#'
#' @param urls URLs for each Excel file in the Reinhart and Rogoff data set. See \url{http://www.carmenreinhart.com/data/browse-by-topic/topics/7/}.
#' @param OutCountryID character string. The type of country ID you would like to include in the output file along with the country name. See \code{\link{countrycode}} for available options.
#' @param message logical. Whether or not to notify you which of sheets are being cleaned and organised.
#' @param standardCountryName logical. Whether or not to standardise the country names variable based on \code{country.name} from \code{\link{countrycode}}.
#'
#' @return Returns a data frame with the following columns:
#' \itemize{
#' \item{\code{iso2c}: }{The ISO two letter country code identifying the country. This can be changed to another country ID system using \code{OutCountryID}}
#' \item{\code{country}: }{Country names.}
#' \item{\code{year}: }{The year.}
#' \item{\code{RR_Independence}: }{Year of independence.}
#' \item{\code{RR_CurrencyCrisis}: }{Currency crisis.}
#' \item{\code{RR_InflationCrisis}: }{Inflation crisis.}
#' \item{\code{RR_StockMarketCrash}: }{Stock market crash.}
#' \item{\code{RR_SovDebtCrisisDom}: }{Domestic sovereign debt crisis.}
#' \item{\code{RR_SovDebtCrisisExt}: }{External sovereign debt crisis.}
#' \item{\code{RR_BankingCrisis}: }{Banking crisis.}
#' \item{\code{RR_YearlyCrisisTally}: }{Total number of crises per year.}
#' }
#'
#' @examples
#' \dontrun{
#' # RRDummies <- RRCrisisGest()
#' }
#'
#' @source
#' Reinhart, Camen M. and Kenneth S. Rogoff, ''From Financial Crash to Debt Crisis,'' NBER Working Paper 15795, March 2010. Forthcoming in American Economic Review.
#'
#' @importFrom xlsx loadWorkbook
#' @importFrom xlsx getSheets
#' @importFrom xlsx read.xlsx
#' @importFrom DataCombine MoveFront
#' @importFrom DataCombine DropNA
#' @export
RRCrisisGet <- function(urls = c(
'http://www.carmenreinhart.com/user_uploads/data/22_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/35_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/23_data.xls',
'http://www.carmenreinhart.com/user_uploads/data/25_data.xls'),
OutCountryID = 'iso2c', message = TRUE,
standardCountryName = TRUE){
OutData <- data.frame()
for (i in urls){
tmpfile <- tempfile()
download.file(i, tmpfile)
WB <- getSheets(loadWorkbook(tmpfile))
# Load workbook and find relevant sheet names
WBNames <- names(WB)
Droppers <- c('Contents', 'CrisisDefinition', 'CrisisDefinitions', 'Sheet1', 'Sheet3')
WBNames <- WBNames[!is.element(WBNames, Droppers)]
for (u in WBNames){
Temp <- read.xlsx(tmpfile, u)
# Keep only the year and crisis indicators
Temp <- Temp[13:nrow(Temp), c(1:9)]
# Extract the country name
if (u == 'UK'){
Temp$country <- 'United Kingdom'
}
else if (u == 'US'){
Temp$country <- 'United States'
}
else if (u != 'US' & u != 'UK'){
TempNames <- names(Temp[1])
CountryName <- gsub('([A-z]+).*', '\\1', TempNames)
Temp$country <- CountryName
}
Temp <- MoveFront(Temp, 'country')
# Rename variables
names(Temp) <- c('country', 'year', 'RR_Independence', 'RR_CurrencyCrisis', 'RR_InflationCrisis',
'RR_StockMarketCrash', 'RR_SovDebtCrisisDom', 'RR_SovDebtCrisisExt',
'RR_BankingCrisis', 'RR_YearlyCrisisTally')
if (isTRUE(message)){
message(paste0('Cleaning up Excel sheet for ', u, '.\n'))
}
Temp <- DropNA(Temp, c('year', 'RR_BankingCrisis'), message = FALSE)
OutData <- rbind(OutData, Temp)
}
}
# Clean up country names and add ID
OutData$country[OutData$country == 'New'] <- 'New Zealand'
OutData$country[OutData$country == 'South'] <- 'South Africa'
OutData$country[OutData$country == 'Sri'] <- 'Sri Lanka'
OutData$country[OutData$country == 'Central'] <- 'Central African Republic'
OutData$country[OutData$country == 'Costa'] <- 'Costa Rica'
OutData$country[OutData$country == 'Cote'] <- 'Cote dIvoire'
OutData$country[OutData$country == 'Costa'] <-
OutData$country[OutData$country == 'Dominican'] <- 'Domincan Republic'
OutData$country[OutData$country == 'El'] <- 'El Salvador'
OutData <- CountryID(data = OutData, OutCountryID = OutCountryID,
timeVar = 'year', standardCountryName = standardCountryName)
# Clean year
OutData$year <- as.character(OutData$year)
OutData$year <- as.numeric(OutData$year)
OutData <- OutData[order(OutData[, OutCountryID], OutData[, 'year']), ]
return(OutData)
}
#' Downloads Dreher's data set of IMF programs and World Bank projects (1970-2011)
#'
#' Downloads Dreher's data set of IMF programs and World Bank projects (1970-2011). It keeps specified variables and creates a standard country ID variable that can be used for merging the data with other data sets.
#'
#' @param url character string. The URL for the Dreher data set you would like to download. Note: it must be for the xlx version of the file. Currently only the 1970-2011 version is supported.
#' @param sheets character vector of the Excel sheets (variables) that you would like to return. See Details for more information.
#' @param OutCountryID character string. The type of country ID you would like to include in the output file along with the country name. See \code{\link{countrycode}} for available options.
#' @param standardCountryName logical. Whether or not to standardise the country names variable based on \code{country.name} from \code{\link{countrycode}}.#' @param message logical. Whether or not to notify you which of sheets are being cleaned and organised.
#'
#' @details Using the \code{sheets} argument you can select which variables to download from their individual workbook seets in the original data set. These include:
#' \itemize{
#' \item{\code{WB other agreed}: }{Number of World Bank projects agreed, other than technical or adjustment.}
#' \item{\code{WB technical agreed}: }{Number of World Bank technical projects agreed.}
#' \item{\code{WB adjustment agreed}: }{Number of World Bank adjustment projects agreed.}
#' \item{\code{WB environment agreed}: }{Number of World Bank environmental projects agreed.}
#' \item{\code{WB adjustment 5}: }{Number of World Bank adjustment projects in effect for at least 5 months in a particular year.}
#' \item{\code{IMF SBA}: }{IMF Standby Arrangement agreed, dummy.}
#' \item{\code{IMF EFF}: }{IMF Extended Fund Facility Arrangement agreed, dummy.}
#' \item{\code{IMF SAF}: }{IMF Structural Adjustment Facility Arrangement agreed, dummy.}
#' \item{\code{IMF PRGF}: }{IMF Poverty Reduction and Growth Facility Arrangement agreed, dummy.}
#' \item{\code{IMF SBA 5}: }{IMF Standby Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF EFF 5}: }{IMF Extended Fund Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF SAF 5}: }{IMF Structural Adjustment Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' \item{\code{IMF PRGF 5}: }{IMF Poverty Reduction and Growth Facility Arrangement in effect for at least 5 months in a particular year, dummy.}
#' }
#' @return a data frame
#'
#' @examples
#' \dontrun{
#' # Download 'WB other agreed', 'WB environment agreed'
#' # These are the default sheets to gather
#' WBPrograms <- IMF_WBGet()
#' }
#'
#' @source Data website: \url{http://www.uni-heidelberg.de/fakultaeten/wiso/awi/professuren/intwipol/datasets_en.html}.
#'
#' When using the IMF data, please cite:
#'
#' Dreher, Axel, 2006, IMF and Economic Growth: The Effects of Programs, Loans, and Compliance with Conditionality, World Development 34, 5: 769-788.
#'
#' When using the World Bank data, please cite:
#'
#' Boockmann, Bernhard and Axel Dreher, 2003, The Contribution of the IMF and the World Bank to Economic Freedom, European Journal of Political Economy 19, 3: 633-649.
#'
#' @importFrom xlsx loadWorkbook
#' @importFrom xlsx getSheets
#' @importFrom xlsx read.xlsx
#' @importFrom DataCombine VarDrop
#' @importFrom reshape2 melt
#' @export
IMF_WBGet <- function(url = 'http://axel-dreher.de/Dreher%20IMF%20and%20WB.xls',
sheets = c('WB other agreed', 'WB environment agreed'), OutCountryID = 'iso2c', message = TRUE, standardCountryName = TRUE){
# Download full Dreher IMF program data set
tmpfile <- tempfile()
download.file(url, tmpfile)
# Select sheet
WB <- getSheets(loadWorkbook(tmpfile))
WBNames <- names(WB)
# Error if desired sheet is not in the data set.
TestExist <- sheets %in% WBNames
if (!all(TestExist)){
stop("Sheets(s) not found in the data set.")
}
FullDF <- data.frame()
for (i in sheets){
VarName <- gsub(' ', '.', i)
if (isTRUE(message)){
message(paste0('Cleaning: ', VarName, '.\n'))
}
if (i != 'WB environment agreed'){
# Extract sheet
OneSheet <- read.xlsx(tmpfile, i)
OneSheet <- melt(OneSheet, id.vars = c('Country.Code', 'Country.Name'))
# Clean
OneSheet$variable <- gsub('X', '', as.character(OneSheet$variable))
OneSheet$variable <- as.numeric(OneSheet$variable)
if (class(OneSheet$value) == 'character'){
OneSheet$value <- gsub('\\.', '', OneSheet$value)
OneSheet$value <- as.numeric(OneSheet$value)
}
OneSheet <- CountryID(data = OneSheet, OutCountryID = OutCountryID,
countryVar = 'Country.Name', duplicates = 'none',
standardCountryName = standardCountryName)
OneSheet <- VarDrop(OneSheet, 'Country.Code')
names(OneSheet) <- c(OutCountryID, 'country', 'year', VarName)
}
else if (i == 'WB environment agreed'){ # originally in country-year format
# Extract sheet
OneSheet <- read.xlsx(tmpfile, i)
OneSheet <- CountryID(data = OneSheet, OutCountryID = OutCountryID,
countryVar = 'country', duplicates = 'none',
standardCountryName = standardCountryName)
OneSheet <- VarDrop(OneSheet, 'code')
names(OneSheet) <- c(OutCountryID, 'country', 'year', VarName)
}
# Merge data frames together
if (ncol(FullDF) == 0){
FullDF <- OneSheet
}
else if (ncol(FullDF) != 0) {
FullDF <- merge(FullDF, OneSheet, by = c('country', 'year'))
if (paste0(OutCountryID, '.x') %in% names(FullDF)){
FullDF <- FullDF[, !(names(FullDF) %in% paste0(OutCountryID, '.y'))]
FullDF <- MoveFront(FullDF, paste0(OutCountryID, '.x'))
FinalNames <- names(FullDF)
FinalNames <- FinalNames[-1]
names(FullDF) <- c(OutCountryID, FinalNames)
}
}
}
# Final Clean
FullDF <- FullDF[order(FullDF[, OutCountryID], FullDF[, 'year']), ]
return(FullDF)
}