-
Notifications
You must be signed in to change notification settings - Fork 3
/
loadNZEAYearlyEmbeddedGenData.R
87 lines (80 loc) · 3.91 KB
/
loadNZEAYearlyEmbeddedGenData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#' Load the pre-downloaded yearly NZ EA embedded generation files in to a data.table
#'
#' \code{loadNZEAYearlyEmbeddedGenData} returns a dt with a proper rDateTimeNZT added & set to NZ tzone. The
#' embedded generation data from XX does not have a fuel type so we cannot calculate carbon intensity. We also
#' assume that I means inflow (to the grid - so we add this to the grid generation) and that X means outflow (to the LV network) which
#' we don't really care about.
#'
#' Presumably the sum of X is the sum of consumption across the POCs which happen to have embedded generation. Maybe.
#'
#' @param path the folder to look in for the data
#' @param fromYear the year to start from (needs to be in the data file names - you did name them sensibly, yes?)
#' @import lubridate
#' @import data.table
#' @author Ben Anderson, \email{b.anderson@@soton.ac.uk}
#' @export
#' @family data
#'
loadNZEAYearlyEmbeddedGenData <- function(path, fromYear, update){
# update = dummy used to force re-load
# lists files within a folder (path) & loads fromYear
# path <- localParams$nzGridDataLoc
# fromYear <- 2015
filesToDateDT <- data.table::as.data.table(list.files(path, ".csv.gz")) # get list of files already downloaded & converted to long form
filesToDateDT[, file := V1]
filesToDateDT[, c("year", "name") := data.table::tstrsplit(file, split = "_")]
filesToDateDT[, year := as.numeric(year)]
filesToDateDT[, fullPath := paste0(path, file)]
filesToGet <- filesToDateDT[year >= fromYear, # to reduce files loaded
fullPath]
message("Loading files >= ", fromYear)
l <- lapply(filesToGet, data.table::fread) # very fast data loading :-)
dt <- data.table::rbindlist(l, fill = TRUE) # rbind them
# > fix dates ----
dt[, rDateTime := lubridate::as_datetime(rDateTime)] # comes in unformatted in fread()
dt[, rDateTimeNZT := lubridate::force_tz(rDateTime,
tzone = "Pacific/Auckland")] # to be sure to be sure
# check
#h <- head(gridGenDT[, .(DATETIME, year, rDateTimeUTC, GENERATION, CARBON_INTENSITY)])
#h
# drop any days with incomplete data (this seems to happen on the last day in the data which is cut at 17:00 instead of 23:59. Oh yes. Data doesn't stop flowing at home time chaps)
dt[, obsDate := lubridate::as_date(rDateTimeNZT)]
dt[, hms := hms::as_hms(rDateTimeNZT)]
t <- dt[, .(nHalfHours = uniqueN(hms)), keyby = .(obsDate)]
setkey(t, obsDate)
setkey(dt, obsDate)
uniqueN(dt$obsDate)
#nrow(t)
ok <- t[nHalfHours == 48]
nrow(ok)
dt <- dt[ok] # drops the dates with less than 48 observations
#uniqueN(dt$obsDate)
# embedded gen does not give us a fuel type, just a Point of Connection (https://www.emi.ea.govt.nz/Search?q=POC)
# and flow: X outflow, I inflow (we assume)
# so we can drop the X as we only care about energy flowing in (for now)
dt[, kWh_n := as.numeric(kWh)]
# and there's a lot more of it
dt[, .(mean = mean(kWh_n), sum = sum(kWh_n), nObs = .N), keyby = .(Flow_Direction)]
dt <- dt[Flow_Direction == "I"]
# we have no idea what Loss_Code means
dt[, .(mean = mean(kWh_n), sum = sum(kWh_n), nObs = .N), keyby = .(Loss_Code)]
dtw <- dcast(dt[!is.na(kWh_n)], # remove NA now so sum works
rDateTimeNZT ~ ., # each row is a unique dateTime
value.var = "kWh_n", # what to sum
fun.aggregate = sum)
dtw[, kWh := .]
dtw[, GENERATION_MWh := kWh/1000]
dtw[, GWh := GENERATION_MWh/1000]
dtw[, GENERATION_MW := (GENERATION_MWh * 2)] # convert to MW to match UK data
dtw[, GW := GENERATION_MW/1000]
dtw[, hms := hms::as_hms(rDateTimeNZT)]
dtw[, year := lubridate::year(rDateTimeNZT)]
dtw <- setPeakPeriod(dtw, dateTime = "rDateTimeNZT")
dtw$`.` <- NULL
# Carbon intensity
# We have no info on fuel type
dtw[, totalC02e_g := NA]
dtw[, totalC02e_kg := NA]
dtw[, totalC02e_T := NA]
return(dtw) # large, possibly very large depending on fromYear
}