knitr::opts_chunk$set(echo = TRUE)
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.3 v fma 2.4
## v forecast 8.14 v expsmooth 2.3
## Warning: package 'fma' was built under R version 4.0.5
## Warning: package 'expsmooth' was built under R version 4.0.5
##
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.0.5
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.0.5
library(data.table)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:fma':
##
## ozone
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.6 v purrr 0.3.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::arrange() masks plyr::arrange()
## x lubridate::as.difftime() masks base::as.difftime()
## x dplyr::between() masks data.table::between()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x lubridate::date() masks base::date()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x lubridate::hour() masks data.table::hour()
## x dplyr::id() masks plyr::id()
## x lubridate::intersect() masks base::intersect()
## x lubridate::isoweek() masks data.table::isoweek()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x lubridate::mday() masks data.table::mday()
## x lubridate::minute() masks data.table::minute()
## x lubridate::month() masks data.table::month()
## x dplyr::mutate() masks plyr::mutate()
## x lubridate::quarter() masks data.table::quarter()
## x dplyr::rename() masks plyr::rename()
## x lubridate::second() masks data.table::second()
## x lubridate::setdiff() masks base::setdiff()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
## x purrr::transpose() masks data.table::transpose()
## x lubridate::union() masks base::union()
## x lubridate::wday() masks data.table::wday()
## x lubridate::week() masks data.table::week()
## x lubridate::yday() masks data.table::yday()
## x lubridate::year() masks data.table::year()
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(grid)
library(seasonal)
## Warning: package 'seasonal' was built under R version 4.0.5
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:tibble':
##
## view
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.0.5
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
## The following objects are masked from 'package:data.table':
##
## dcast, melt
#Dengue Data
d.all <- fread("dengue_labels_train.csv")
d.all.sj <- d.all[city=="sj"]
d.all.iq <- d.all[city=="iq"]
#Additional Variables
d.vars <- fread("dengue_features_train.csv")
d.vars.sj <- d.vars[city=="sj"]
d.vars.iq <- d.vars[city=="iq"]
#Merge
d.sj <- merge(d.all.sj, d.vars.sj, by.x = c("year","weekofyear","city"),
by.y = c("year","weekofyear","city"), all.x = TRUE, all.y = TRUE)
d.iq <- merge(d.all.iq, d.vars.iq, by.x = c("year","weekofyear","city"),
by.y = c("year","weekofyear","city"), all.x = TRUE, all.y = TRUE)
#SJ
cor.sj <- data.frame(cor(d.sj[,-c("city","week_start_date")], use = "complete.obs"))
cor.sj.sub <- subset(cor.sj, select = "total_cases")
cor.sj.sub
## total_cases
## year -0.207912900
## weekofyear 0.289133825
## total_cases 1.000000000
## ndvi_ne 0.031533393
## ndvi_nw 0.083304916
## ndvi_se -0.004387998
## ndvi_sw -0.002335392
## precipitation_amt_mm 0.101033458
## reanalysis_air_temp_k 0.238730865
## reanalysis_avg_temp_k 0.231786509
## reanalysis_dew_point_temp_k 0.271975113
## reanalysis_max_air_temp_k 0.256103688
## reanalysis_min_air_temp_k 0.243080906
## reanalysis_precip_amt_kg_per_m2 0.154953190
## reanalysis_relative_humidity_percent 0.197329663
## reanalysis_sat_precip_amt_mm 0.101033458
## reanalysis_specific_humidity_g_per_kg 0.280365672
## reanalysis_tdtr_k -0.049046214
## station_avg_temp_c 0.224524237
## station_diur_temp_rng_c 0.010370823
## station_max_temp_c 0.170781332
## station_min_temp_c 0.215872005
## station_precip_mm 0.069941830
cor.sj.sub.sig <- subset(cor.sj.sub, abs(total_cases) >= .15)
cor.sj.sub.sig
## total_cases
## year -0.2079129
## weekofyear 0.2891338
## total_cases 1.0000000
## reanalysis_air_temp_k 0.2387309
## reanalysis_avg_temp_k 0.2317865
## reanalysis_dew_point_temp_k 0.2719751
## reanalysis_max_air_temp_k 0.2561037
## reanalysis_min_air_temp_k 0.2430809
## reanalysis_precip_amt_kg_per_m2 0.1549532
## reanalysis_relative_humidity_percent 0.1973297
## reanalysis_specific_humidity_g_per_kg 0.2803657
## station_avg_temp_c 0.2245242
## station_max_temp_c 0.1707813
## station_min_temp_c 0.2158720
#IQ
cor.iq <- data.frame(cor(d.iq[,-c("city","week_start_date")], use = "complete.obs"))
cor.iq.sub <- subset(cor.iq, select = "total_cases")
cor.iq.sub
## total_cases
## year 0.218685930
## weekofyear -0.017409241
## total_cases 1.000000000
## ndvi_ne -0.015188790
## ndvi_nw -0.056017695
## ndvi_se -0.063522481
## ndvi_sw 0.001880791
## precipitation_amt_mm 0.104963004
## reanalysis_air_temp_k 0.104898299
## reanalysis_avg_temp_k 0.083218569
## reanalysis_dew_point_temp_k 0.250422175
## reanalysis_max_air_temp_k -0.051879135
## reanalysis_min_air_temp_k 0.206488508
## reanalysis_precip_amt_kg_per_m2 0.084730868
## reanalysis_relative_humidity_percent 0.139303890
## reanalysis_sat_precip_amt_mm 0.104963004
## reanalysis_specific_humidity_g_per_kg 0.257292843
## reanalysis_tdtr_k -0.127453682
## station_avg_temp_c 0.112726442
## station_diur_temp_rng_c -0.060938281
## station_max_temp_c 0.069411063
## station_min_temp_c 0.230102160
## station_precip_mm 0.069049546
cor.iq.sub.sig <- subset(cor.iq.sub, abs(total_cases) >= .15)
cor.iq.sub.sig
## total_cases
## year 0.2186859
## total_cases 1.0000000
## reanalysis_dew_point_temp_k 0.2504222
## reanalysis_min_air_temp_k 0.2064885
## reanalysis_specific_humidity_g_per_kg 0.2572928
## station_min_temp_c 0.2301022
#SJ
xreg.sj <- cbind(d.sj[,"year"],d.sj[,"weekofyear"],d.sj[,"reanalysis_air_temp_k"],d.sj[,"reanalysis_avg_temp_k"],
d.sj[,"reanalysis_dew_point_temp_k"],d.sj[,"reanalysis_max_air_temp_k"],d.sj[,"reanalysis_min_air_temp_k"],
d.sj[,"reanalysis_precip_amt_kg_per_m2"],d.sj[,"reanalysis_relative_humidity_percent"],
d.sj[,"reanalysis_specific_humidity_g_per_kg"],d.sj[,"station_avg_temp_c"],d.sj[,"station_max_temp_c"],
d.sj[,"station_min_temp_c"])
xreg.sj.train <- xreg.sj[1:749]
xreg.sj.test <- xreg.sj[750:936]
#IQ
xreg.iq <- cbind(d.iq[,"year"],d.iq[,"reanalysis_dew_point_temp_k"],d.iq[,"reanalysis_min_air_temp_k"],
d.iq[,"reanalysis_specific_humidity_g_per_kg"],d.iq[,"station_min_temp_c"])
xreg.iq.train <- xreg.iq[1:416]
xreg.iq.test <- xreg.iq[417:520]
#SJ Time series
d.sj.ts <- ts(d.sj$total_cases, start = c(1990, 18), frequency = 52)
#IQ Time Series
d.iq.ts <- ts(d.iq$total_cases, start = c(2000, 26), frequency = 52)
#SJ
melted.cor.sj <- melt(cor(d.sj[,-c("city","week_start_date")], use = "complete.obs"))
ggplot(data = melted.cor.sj, aes(x=Var1, y=Var2, fill=value)) + geom_tile()
#IQ
melted.cor.iq <- melt(cor(d.iq[,-c("city","week_start_date")], use = "complete.obs"))
ggplot(data = melted.cor.iq, aes(x=Var1, y=Var2, fill=value)) + geom_tile()
#Plot Time Series
plot(d.sj.ts, main = "Figure 1: SJ Dengue Cases by Week 1990-2008",
ylab = "Daily Cases")
plot(d.iq.ts, main = "Figure 2: IQ Dengue Cases by Week 2000-2010",
ylab = "Daily Cases")
#SJ
decomp.add.sj <- decompose(d.sj.ts, type = "additive", filter = NULL)
autoplot(decomp.add.sj) + ggtitle("Additive Decomp. for SJ")
decomp.mult.sj <- decompose(d.sj.ts, type = "multiplicative", filter = NULL)
autoplot(decomp.mult.sj) + ggtitle("Multip. Decomp. for SJ")
#IQ
decomp.add.iq <- decompose(d.iq.ts, type = "additive", filter = NULL)
autoplot(decomp.add.iq) + ggtitle("Additive Decomp. for IQ")
decomp.mult.iq <- decompose(d.iq.ts, type = "multiplicative", filter = NULL)
autoplot(decomp.mult.iq) + ggtitle("Multip. Decomp. for IQ")
#SJ Data (80:20 split)
d.sj.end <- length(d.sj.ts)
d.sj.train.cutoff <- round(.8*d.sj.end, 0)
d.sj.train <- d.sj.ts[1:d.sj.train.cutoff]
d.sj.test <- d.sj.ts[(d.sj.train.cutoff+1):d.sj.end]
d.sj.train.ts <- ts(d.sj.train, start = c(1990, 18), frequency = 52)
d.sj.test.ts <- ts(d.sj.test, start = c(2004, 39), frequency = 52)
#IQ Data (80:20 split)
d.iq.end <- length(d.iq.ts)
d.iq.train.cutoff <- round(.8*d.iq.end, 0)
d.iq.train <- d.iq.ts[1:d.iq.train.cutoff]
d.iq.test <- d.iq.ts[(d.iq.train.cutoff+1):d.iq.end]
d.iq.train.ts <- ts(d.iq.train, start = c(2000, 26), frequency = 52)
d.iq.test.ts <- ts(d.iq.test, start = c(2008, 27), frequency = 52)
#SJ Model
naive.sj <- naive(d.sj.train.ts,h=187)
autoplot(naive.sj) + autolayer(d.sj.test.ts)
acc.naive.sj <- accuracy(naive.sj$mean,d.sj.test.ts[1:187])
acc.naive.sj
## ME RMSE MAE MPE MAPE
## Test set 11.74866 33.38056 17.86631 -Inf Inf
#IQ Model
naive.iq <- naive(d.iq.train.ts,h=104)
autoplot(naive.iq) + autolayer(d.iq.test.ts)
acc.naive.iq <- accuracy(naive.iq$mean,d.iq.test.ts[1:104])
acc.naive.iq
## ME RMSE MAE MPE MAPE
## Test set 8.519231 14.24713 8.673077 -Inf Inf
#SJ Model
snaive.sj <- snaive(d.sj.train.ts,h=187)
autoplot(snaive.sj) + autolayer(d.sj.test.ts)
acc.snaive.sj <- accuracy(snaive.sj$mean,d.sj.test.ts[1:187])
acc.snaive.sj
## ME RMSE MAE MPE MAPE
## Test set 7.550802 31.57336 17.5615 -Inf Inf
#IQ Model
snaive.iq <- snaive(d.iq.train.ts,h=104)
autoplot(snaive.iq) + autolayer(d.iq.test.ts)
acc.snaive.iq <- accuracy(snaive.iq$mean,d.iq.test.ts[1:104])
acc.snaive.iq
## ME RMSE MAE MPE MAPE
## Test set -1.288462 14.74658 9.288462 -Inf Inf
#SJ Model
stlf.ets.sj <-stlf(d.sj.train.ts,method="ets",h=187)
autoplot(stlf.ets.sj) + autolayer(d.sj.test.ts)
acc.stlf.ets.sj <- accuracy(stlf.ets.sj$mean,d.sj.test.ts[1:187])
acc.stlf.ets.sj
## ME RMSE MAE MPE MAPE
## Test set 17.46665 32.18493 20.76644 Inf Inf
#IQ Model
stlf.ets.iq <-stlf(d.iq.train.ts,method="ets",h=104)
autoplot(stlf.ets.iq) + autolayer(d.iq.test.ts)
acc.stlf.ets.iq <- accuracy(stlf.ets.iq$mean,d.iq.test.ts[1:104])
acc.stlf.ets.iq
## ME RMSE MAE MPE MAPE
## Test set 3.215535 11.72406 6.852651 -Inf Inf
#SJ Model
stlf.naive.sj <-stlf(d.sj.train.ts,method="naive",h=187)
autoplot(stlf.naive.sj) + autolayer(d.sj.test.ts)
acc.stlf.naive.sj <- accuracy(stlf.naive.sj$mean,d.sj.test.ts[1:187])
acc.stlf.naive.sj
## ME RMSE MAE MPE MAPE
## Test set 17.46305 32.18298 20.76423 Inf Inf
#IQ Model
stlf.naive.iq <-stlf(d.iq.train.ts,method="naive",h=104)
autoplot(stlf.naive.iq) + autolayer(d.iq.test.ts)
acc.stlf.naive.iq <- accuracy(stlf.naive.iq$mean,d.iq.test.ts[1:104])
acc.stlf.naive.iq
## ME RMSE MAE MPE MAPE
## Test set 2.481035 11.54424 6.756134 -Inf Inf
#SJ Model
stlf.arima.sj <-stlf(d.sj.train.ts,method="arima",h=187)
autoplot(stlf.arima.sj) + autolayer(d.sj.test.ts)
acc.stlf.arima.sj <- accuracy(stlf.arima.sj$mean,d.sj.test.ts[1:187])
acc.stlf.arima.sj
## ME RMSE MAE MPE MAPE
## Test set 17.44394 32.17409 20.7551 Inf Inf
#IQ Model
stlf.arima.iq <-stlf(d.iq.train.ts,method="arima",h=104)
autoplot(stlf.arima.iq) + autolayer(d.iq.test.ts)
acc.stlf.arima.iq <- accuracy(stlf.arima.iq$mean,d.iq.test.ts[1:104])
acc.stlf.arima.iq
## ME RMSE MAE MPE MAPE
## Test set 3.468611 11.79906 6.917627 -Inf Inf
#SJ Model
stlf.arima.xreg.sj <-stlf(d.sj.train.ts,method="arima",h=187,xreg=as.matrix(xreg.sj.train),newxreg=as.matrix(xreg.sj.test))
## Warning in forecast.forecast_ARIMA(fit, h = h, level = level, xreg = newxreg):
## Upper prediction intervals are not finite.
autoplot(stlf.arima.xreg.sj) + autolayer(d.sj.test.ts)
acc.stlf.arima.xreg.sj <- accuracy(stlf.arima.xreg.sj$mean,d.sj.test.ts[1:187])
acc.stlf.arima.xreg.sj
## ME RMSE MAE MPE MAPE
## Test set 11.68336 28.03475 16.79356 -Inf Inf
#IQ Model
stlf.arima.xreg.iq <-stlf(d.iq.train.ts,method="arima",h=104,xreg=as.matrix(xreg.iq.train),newxreg=as.matrix(xreg.iq.test))
## Warning in forecast.forecast_ARIMA(fit, h = h, level = level, xreg = newxreg):
## Upper prediction intervals are not finite.
autoplot(stlf.arima.xreg.iq) + autolayer(d.iq.test.ts)
acc.stlf.arima.xreg.iq <- accuracy(stlf.arima.xreg.iq$mean,d.iq.test.ts[1:104])
acc.stlf.arima.xreg.iq
## ME RMSE MAE MPE MAPE
## Test set -1.836337 11.92539 8.18647 -Inf Inf
#SJ Model
tbats.sj <- tbats(d.sj.train.ts)
for.tbats.sj <- forecast(tbats.sj,187)
autoplot(for.tbats.sj) + autolayer(d.sj.test.ts)
acc.tbats.sj <- accuracy(for.tbats.sj,d.sj.test.ts[1:187])
acc.tbats.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.5644022 15.74908 8.752217 -Inf Inf 0.9975101 0.006240324
## Test set 2.5404329 27.33667 19.668676 -Inf Inf 2.2416837 NA
#IQ Model
tbats.iq <- tbats(d.iq.train.ts)
for.tbats.iq <- forecast(tbats.iq,104)
autoplot(for.tbats.iq) + autolayer(d.iq.test.ts)
acc.tbats.iq <- accuracy(for.tbats.iq,d.iq.test.ts[1:104])
acc.tbats.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.002721555 7.225633 3.799959 NaN Inf 0.9692581 0.006681934
## Test set 8.838031070 14.440930 8.946828 -Inf Inf 2.2820735 NA
#SJ Model
aarima.sj <- auto.arima(d.sj.train.ts, xreg = as.matrix(xreg.sj.train))
for.aarima.sj <- forecast(aarima.sj,187, xreg = as.matrix(xreg.sj.test))
## Warning in forecast.forecast_ARIMA(aarima.sj, 187, xreg =
## as.matrix(xreg.sj.test)): Upper prediction intervals are not finite.
autoplot(for.aarima.sj) + autolayer(d.sj.test.ts)
acc.aarima.sj <- accuracy(for.aarima.sj,d.sj.test.ts[1:187])
acc.aarima.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.06477151 15.61662 8.747112 -Inf Inf 0.9969282 0.07948093
## Test set 11.43920732 30.50516 17.205513 -Inf Inf 1.9609514 NA
#IQ Model
aarima.iq <- auto.arima(d.iq.train.ts, xreg = as.matrix(xreg.iq.train))
for.aarima.iq <- forecast(aarima.iq,104, xreg = as.matrix(xreg.iq.test))
## Warning in forecast.forecast_ARIMA(aarima.iq, 104, xreg =
## as.matrix(xreg.iq.test)): Upper prediction intervals are not finite.
autoplot(for.aarima.iq) + autolayer(d.iq.test.ts)
acc.aarima.iq <- accuracy(for.aarima.iq,d.iq.test.ts[1:104])
acc.aarima.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02127012 5.900787 3.334367 NaN Inf 0.8504992 0.04751688
## Test set -1.12419094 11.871375 8.418910 -Inf Inf 2.1474172 NA
#SJ Model
#Random Seed
set.seed(1240932)
#Deal with NAs in Xreg Test
xreg.sj.test.na <- data.table(xreg.sj.test)
xreg.sj.test.na <- na.locf(xreg.sj.test.na)
#Model
nnar.sj <- nnetar(d.sj.train.ts, xreg = as.matrix(xreg.sj.train))
## Warning in nnetar(d.sj.train.ts, xreg = as.matrix(xreg.sj.train)): Missing
## values in xreg, omitting rows
for.nnar.sj <- forecast(nnar.sj,187, xreg = as.matrix(xreg.sj.test.na))
autoplot(for.nnar.sj) + autolayer(d.sj.test.ts)
acc.nnar.sj <- accuracy(for.nnar.sj,d.sj.test.ts[1:187])
acc.nnar.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.01297781 4.081275 2.90118 -Inf Inf 0.3306541 -0.07797094
## Test set 1.15344162 24.882368 15.01318 -Inf Inf 1.7110857 NA
#IQ Model
#Random Seed
set.seed(1234216)
#Deal with NAs in Xreg Test
xreg.iq.test.na <- data.table(xreg.iq.test)
xreg.iq.test.na <- na.locf(xreg.iq.test.na)
#Model
nnar.iq <- nnetar(d.iq.train.ts, xreg = as.matrix(xreg.iq.train))
## Warning in nnetar(d.iq.train.ts, xreg = as.matrix(xreg.iq.train)): Missing
## values in xreg, omitting rows
for.nnar.iq <- forecast(nnar.iq,104, xreg = as.matrix(xreg.iq.test.na))
autoplot(for.nnar.iq) + autolayer(d.iq.test.ts)
acc.nnar.iq <- accuracy(for.nnar.iq,d.iq.test.ts[1:104])
acc.nnar.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.007956413 3.774465 2.484315 -Inf Inf 0.6336759 -0.02258968
## Test set 3.750152741 11.884336 6.759713 -Inf Inf 1.7242046 NA
#Dengue Data
d.sub <- fread("submission_format.csv")
d.sub.sj <- d.sub[city=="sj"]
d.sub.iq <- d.sub[city=="iq"]
#Additional Variables
d.sub.feat <- fread("dengue_features_test.csv")
d.sub.feat.sj <- d.sub.feat[city=="sj"]
d.sub.feat.iq <- d.sub.feat[city=="iq"]
#Merge
d.msub.sj <- merge(d.sub.sj, d.sub.feat.sj, by.x = c("year","weekofyear","city"),
by.y = c("year","weekofyear","city"), all.x = TRUE, all.y = TRUE)
d.msub.iq <- merge(d.sub.iq, d.sub.feat.iq, by.x = c("year","weekofyear","city"),
by.y = c("year","weekofyear","city"), all.x = TRUE, all.y = TRUE)
#SJ
sub.xreg.sj <- cbind(d.msub.sj[,"year"],d.msub.sj[,"weekofyear"],d.msub.sj[,"reanalysis_air_temp_k"],d.msub.sj[,"reanalysis_avg_temp_k"],
d.msub.sj[,"reanalysis_dew_point_temp_k"],d.msub.sj[,"reanalysis_max_air_temp_k"],
d.msub.sj[,"reanalysis_min_air_temp_k"],d.msub.sj[,"reanalysis_precip_amt_kg_per_m2"],
d.msub.sj[,"reanalysis_relative_humidity_percent"],d.msub.sj[,"reanalysis_specific_humidity_g_per_kg"],
d.msub.sj[,"station_avg_temp_c"],d.msub.sj[,"station_max_temp_c"],d.msub.sj[,"station_min_temp_c"])
sub.xreg.sj.test <- sub.xreg.sj
#IQ
sub.xreg.iq <- cbind(d.msub.iq[,"year"],d.msub.iq[,"reanalysis_dew_point_temp_k"],d.msub.iq[,"reanalysis_min_air_temp_k"],
d.msub.iq[,"reanalysis_specific_humidity_g_per_kg"],d.msub.iq[,"station_min_temp_c"])
sub.xreg.iq.test <- sub.xreg.iq
ft.stlf.arima.xreg.sj <-stlf(d.sj.ts,method="arima",h=260,xreg=as.matrix(xreg.sj),newxreg=as.matrix(sub.xreg.sj))
## Warning in forecast.forecast_ARIMA(fit, h = h, level = level, xreg = newxreg):
## Upper prediction intervals are not finite.
#Set negative forecasts to 0, carry forward on NAs
num.stax.sj <- as.numeric(ft.stlf.arima.xreg.sj$mean)
num.stax.sj <- na.locf(num.stax.sj)
num.stax.sj[num.stax.sj<0] = 0
ts.num.stax.sj <- ts(num.stax.sj, start = c(2008, 18), frequency = 52)
#Plot and Train Accuracy
autoplot(ft.stlf.arima.xreg.sj) + autolayer(ts.num.stax.sj)
acc.ft.stlf.arima.xreg.sj <- accuracy(stlf.arima.xreg.sj)
acc.ft.stlf.arima.xreg.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02384946 14.45873 8.507962 NaN Inf 0.2139962 0.006687136
#SJ Model
#Random Seed
set.seed(1240932)
#Deal with NAs in Xreg Test
sub.xreg.sj.test.na <- data.table(sub.xreg.sj.test)
sub.xreg.sj.test.na <- na.locf(sub.xreg.sj.test.na)
#Model
sub.nnar.sj <- nnetar(d.sj.ts, xreg = as.matrix(xreg.sj))
## Warning in nnetar(d.sj.ts, xreg = as.matrix(xreg.sj)): Missing values in xreg,
## omitting rows
for.sub.nnar.sj <- forecast(sub.nnar.sj,260, xreg = as.matrix(sub.xreg.sj.test.na))
#Set negative forecasts to 0, carry forward on NAs
num.nnar.sj <- as.numeric(for.sub.nnar.sj$mean)
num.nnar.sj <- na.locf(num.nnar.sj)
num.nnar.sj[num.nnar.sj<0] = 0
ts.num.nnar.sj <- ts(num.nnar.sj, start = c(2008, 18), frequency = 52)
#Plot and Train Accuracy
autoplot(for.sub.nnar.sj) + autolayer(ts.num.nnar.sj)
acc.sub.nnar.sj <- accuracy(for.sub.nnar.sj)
acc.sub.nnar.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.002474817 4.916843 3.44649 -Inf Inf 0.09452106 -0.08119953
sub.tbats.sj <- tbats(d.sj.ts)
for.sub.tbats.sj <- forecast(sub.tbats.sj,260)
#Set negative forecasts to 0, carry forward on NAs
num.tbats.sj <- as.numeric(for.sub.tbats.sj$mean)
num.tbats.sj <- na.locf(num.tbats.sj)
num.tbats.sj[num.tbats.sj<0] = 0
ts.num.tbats.sj <- ts(num.tbats.sj, start = c(2008, 18), frequency = 52)
#Plot and Train Accuracy
autoplot(for.sub.tbats.sj) + autolayer(ts.num.tbats.sj)
acc.sub.tbats.sj <- accuracy(for.sub.tbats.sj)
acc.sub.tbats.sj
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0114407 15.00084 8.312624 -Inf Inf 0.2279763 -0.001365316
sub.stlf.naive.iq <-stlf(d.iq.ts,method="naive",h=156)
#Set negative forecasts to 0, carry forward on NAs
num.stnrw.iq <- as.numeric(sub.stlf.naive.iq$mean)
num.stnrw.iq <- na.locf(num.stnrw.iq)
num.stnrw.iq[num.stnrw.iq<0] = 0
ts.num.stnrw.iq <- ts(num.stnrw.iq, start = c(2010, 26), frequency = 52)
#Plot and Train Accuracy
autoplot(sub.stlf.naive.iq) + autolayer(ts.num.stnrw.iq)
acc.sub.stlf.naive.iq <- accuracy(sub.stlf.naive.iq)
acc.sub.stlf.naive.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.008697743 6.952827 4.180876 NaN Inf 0.4438861 -0.2026068
set.seed(4632905)
#Deal with NAs
sub.xreg.iq.test.na <- data.table(sub.xreg.iq.test)
sub.xreg.iq.test.na <- na.locf(sub.xreg.iq.test.na)
#Model
sub.nnar.iq <- nnetar(d.iq.ts, xreg = as.matrix(xreg.iq))
## Warning in nnetar(d.iq.ts, xreg = as.matrix(xreg.iq)): Missing values in xreg,
## omitting rows
for.sub.nnar.iq <- forecast(sub.nnar.iq,156, xreg = as.matrix(sub.xreg.iq.test.na))
#Set negative forecasts to 0, carry forward on NAs
num.nnar.iq <- as.numeric(for.sub.nnar.iq$mean)
num.nnar.iq <- na.locf(num.nnar.iq)
num.nnar.iq[num.nnar.iq<0] = 0
ts.num.nnar.iq <- ts(num.nnar.iq, start = c(2010, 26), frequency = 52)
#Plot and Train Accuracy
autoplot(for.sub.nnar.iq) + autolayer(ts.num.nnar.iq)
acc.sub.nnar.iq <- accuracy(for.sub.nnar.iq)
acc.sub.nnar.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.009030504 3.333286 2.368186 -Inf Inf 0.2514317 0.0002581162
sub.stlf.ets.iq <-stlf(d.iq.ts,method="ets",h=156)
#Set negative forecasts to 0, carry forward on NAs
num.ets.iq <- as.numeric(sub.stlf.ets.iq$mean)
num.ets.iq <- na.locf(num.ets.iq)
num.ets.iq[num.ets.iq<0] = 0
ts.num.ets.iq <- ts(num.ets.iq, start = c(2010, 26), frequency = 52)
#Plot and Train Accuracy
autoplot(sub.stlf.ets.iq) + autolayer(ts.num.ets.iq)
acc.sub.stlf.ets.iq <- accuracy(sub.stlf.ets.iq)
acc.sub.stlf.ets.iq
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.01465928 6.681039 3.888844 NaN Inf 0.4128809 0.062921
d.sub1 <- d.sub
output1.ts <- append(ts.num.stax.sj, ts.num.stnrw.iq)
d.sub1$total_cases <- round(as.numeric(output1.ts), 0)
write.csv(d.sub1,"submission1.csv", row.names = FALSE)
MAE on unseen test set of 30.4663.
d.sub2 <- d.sub
output2.ts <- append(ts.num.nnar.sj, ts.num.nnar.iq)
d.sub2$total_cases <- round(as.numeric(output2.ts), 0)
write.csv(d.sub2,"submission2.csv", row.names = FALSE)
MAE on unseen test set of 29.4904.
d.sub3 <- d.sub
output3.ts <- append(ts.num.tbats.sj, ts.num.ets.iq)
d.sub3$total_cases <- round(as.numeric(output3.ts), 0)
write.csv(d.sub3,"submission3.csv", row.names = FALSE)
MAE on unseen test set of 25.8654,
d.sub4 <- d.sub
output4.ts <- append(ts.num.tbats.sj, ts.num.stnrw.iq)
d.sub4$total_cases <- round(as.numeric(output4.ts), 0)
write.csv(d.sub4,"submission4.csv", row.names = FALSE)
MAE on unseen test set of 25.7909.