Setup
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(psych)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ResourceSelection)
## ResourceSelection 0.3-5 2019-07-22
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(nnet)
Data Manipulation
#add total cases to larger data set
dengue_features_train$total_cases = dengue_labels_train$total_cases
dengue_features_train$week_start_date = as.Date(dengue_features_train$week_start_date)
dengue_features_train$month = month(dengue_features_train$week_start_date)
#create separate data files for San Juan and Iquitos
sj_data = filter(dengue_features_train, dengue_features_train$city == 'sj')
iq_data = filter(dengue_features_train, dengue_features_train$city == 'iq')
Check for missing data
missmap(sj_data)
missmap(iq_data)
Create time series
#create time series
sj_ts = ts(sj_data$total_cases, frequency = 52, start = c(1990,5))
iq_ts = ts(iq_data$total_cases, frequency = 52, start = c(2000,7))
sj_train = head(sj_ts, n=(length(sj_ts)-260))
sj_test = tail(sj_ts, n=260)
iq_train = head(iq_ts, n=(length(iq_ts)-156))
iq_test = tail(iq_ts, n = 156)
EDA
describe(sj_data[,-c(1:4)])
## vars n mean sd median trimmed
## ndvi_ne 1 745 0.06 0.11 0.06 0.06
## ndvi_nw 2 887 0.07 0.09 0.07 0.07
## ndvi_se 3 917 0.18 0.06 0.18 0.18
## ndvi_sw 4 917 0.17 0.06 0.17 0.17
## precipitation_amt_mm 5 927 35.47 44.61 20.80 27.52
## reanalysis_air_temp_k 6 930 299.16 1.24 299.25 299.20
## reanalysis_avg_temp_k 7 930 299.28 1.22 299.38 299.31
## reanalysis_dew_point_temp_k 8 930 295.11 1.57 295.46 295.23
## reanalysis_max_air_temp_k 9 930 301.40 1.26 301.50 301.42
## reanalysis_min_air_temp_k 10 930 297.30 1.29 297.50 297.38
## reanalysis_precip_amt_kg_per_m2 11 930 30.47 35.63 21.30 24.29
## reanalysis_relative_humidity_percent 12 930 78.57 3.39 78.67 78.62
## reanalysis_sat_precip_amt_mm 13 927 35.47 44.61 20.80 27.52
## reanalysis_specific_humidity_g_per_kg 14 930 16.55 1.56 16.85 16.64
## reanalysis_tdtr_k 15 930 2.52 0.50 2.46 2.48
## station_avg_temp_c 16 930 27.01 1.42 27.23 27.06
## station_diur_temp_rng_c 17 930 6.76 0.84 6.76 6.75
## station_max_temp_c 18 930 31.61 1.72 31.70 31.69
## station_min_temp_c 19 930 22.60 1.51 22.80 22.67
## station_precip_mm 20 930 26.79 29.33 17.75 21.66
## total_cases 21 936 34.18 51.38 19.00 23.84
## month 22 936 6.42 3.45 6.50 6.42
## mad min max range skew kurtosis
## ndvi_ne 0.08 -0.41 0.49 0.90 -0.02 2.01
## ndvi_nw 0.07 -0.46 0.44 0.89 -0.09 2.01
## ndvi_se 0.05 -0.02 0.39 0.41 0.21 0.42
## ndvi_sw 0.05 -0.06 0.38 0.44 0.15 0.64
## precipitation_amt_mm 30.84 0.00 390.60 390.60 2.61 11.69
## reanalysis_air_temp_k 1.46 295.94 302.20 6.26 -0.22 -0.88
## reanalysis_avg_temp_k 1.42 296.11 302.16 6.05 -0.22 -0.82
## reanalysis_dew_point_temp_k 1.71 289.64 297.80 8.15 -0.62 -0.40
## reanalysis_max_air_temp_k 1.48 297.80 304.30 6.50 -0.16 -0.79
## reanalysis_min_air_temp_k 1.48 292.60 299.90 7.30 -0.54 -0.31
## reanalysis_precip_amt_kg_per_m2 18.31 0.00 570.50 570.50 5.55 62.25
## reanalysis_relative_humidity_percent 3.49 66.74 87.58 20.84 -0.19 -0.09
## reanalysis_sat_precip_amt_mm 30.84 0.00 390.60 390.60 2.61 11.69
## reanalysis_specific_humidity_g_per_kg 1.79 11.72 19.44 7.72 -0.47 -0.74
## reanalysis_tdtr_k 0.49 1.36 4.43 3.07 0.68 0.36
## station_avg_temp_c 1.67 22.84 30.07 7.23 -0.31 -0.90
## station_diur_temp_rng_c 0.80 4.53 9.91 5.39 0.10 0.46
## station_max_temp_c 1.63 26.70 35.60 8.90 -0.44 -0.53
## station_min_temp_c 1.63 17.80 25.60 7.80 -0.39 -0.48
## station_precip_mm 19.05 0.00 305.90 305.90 2.62 12.68
## total_cases 17.79 0.00 461.00 461.00 4.46 25.17
## month 3.71 1.00 12.00 11.00 0.00 -1.21
## se
## ndvi_ne 0.00
## ndvi_nw 0.00
## ndvi_se 0.00
## ndvi_sw 0.00
## precipitation_amt_mm 1.47
## reanalysis_air_temp_k 0.04
## reanalysis_avg_temp_k 0.04
## reanalysis_dew_point_temp_k 0.05
## reanalysis_max_air_temp_k 0.04
## reanalysis_min_air_temp_k 0.04
## reanalysis_precip_amt_kg_per_m2 1.17
## reanalysis_relative_humidity_percent 0.11
## reanalysis_sat_precip_amt_mm 1.47
## reanalysis_specific_humidity_g_per_kg 0.05
## reanalysis_tdtr_k 0.02
## station_avg_temp_c 0.05
## station_diur_temp_rng_c 0.03
## station_max_temp_c 0.06
## station_min_temp_c 0.05
## station_precip_mm 0.96
## total_cases 1.68
## month 0.11
describe(iq_data[,-c(1:4)])
## vars n mean sd median trimmed
## ndvi_ne 1 517 0.26 0.08 0.26 0.26
## ndvi_nw 2 517 0.24 0.08 0.23 0.24
## ndvi_se 3 517 0.25 0.08 0.25 0.25
## ndvi_sw 4 517 0.27 0.09 0.26 0.26
## precipitation_amt_mm 5 516 64.25 35.22 60.47 62.24
## reanalysis_air_temp_k 6 516 297.87 1.17 297.82 297.86
## reanalysis_avg_temp_k 7 516 299.13 1.33 299.12 299.15
## reanalysis_dew_point_temp_k 8 516 295.49 1.42 295.85 295.63
## reanalysis_max_air_temp_k 9 516 307.08 2.38 307.05 307.03
## reanalysis_min_air_temp_k 10 516 292.87 1.66 293.05 293.03
## reanalysis_precip_amt_kg_per_m2 11 516 57.61 50.29 46.44 49.40
## reanalysis_relative_humidity_percent 12 516 88.64 7.58 90.92 89.58
## reanalysis_sat_precip_amt_mm 13 516 64.25 35.22 60.47 62.24
## reanalysis_specific_humidity_g_per_kg 14 516 17.10 1.45 17.43 17.20
## reanalysis_tdtr_k 15 516 9.21 2.45 8.96 9.12
## station_avg_temp_c 16 483 27.53 0.92 27.60 27.57
## station_diur_temp_rng_c 17 483 10.57 1.54 10.62 10.58
## station_max_temp_c 18 506 34.00 1.33 34.00 33.97
## station_min_temp_c 19 512 21.20 1.26 21.30 21.31
## station_precip_mm 20 504 62.47 63.25 45.30 52.21
## total_cases 21 520 7.57 10.77 5.00 5.38
## month 22 520 6.42 3.45 6.50 6.42
## mad min max range skew kurtosis
## ndvi_ne 0.09 0.06 0.51 0.45 0.22 -0.34
## ndvi_nw 0.08 0.04 0.45 0.42 0.23 -0.50
## ndvi_se 0.08 0.03 0.54 0.51 0.27 0.01
## ndvi_sw 0.09 0.06 0.55 0.48 0.27 -0.19
## precipitation_amt_mm 33.94 0.00 210.83 210.83 0.60 0.40
## reanalysis_air_temp_k 1.13 294.64 301.64 7.00 0.11 -0.04
## reanalysis_avg_temp_k 1.41 294.89 302.93 8.04 -0.11 -0.21
## reanalysis_dew_point_temp_k 1.28 290.09 298.45 8.36 -0.88 0.58
## reanalysis_max_air_temp_k 2.67 300.00 314.00 14.00 0.16 -0.49
## reanalysis_min_air_temp_k 1.70 286.90 296.00 9.10 -0.94 0.95
## reanalysis_precip_amt_kg_per_m2 34.29 0.00 362.03 362.03 1.99 5.58
## reanalysis_relative_humidity_percent 6.32 57.79 98.61 40.82 -1.10 0.78
## reanalysis_sat_precip_amt_mm 33.94 0.00 210.83 210.83 0.60 0.40
## reanalysis_specific_humidity_g_per_kg 1.39 12.11 20.46 8.35 -0.66 0.07
## reanalysis_tdtr_k 2.70 3.71 16.03 12.31 0.29 -0.65
## station_avg_temp_c 0.82 21.40 30.80 9.40 -0.93 4.49
## station_diur_temp_rng_c 1.59 5.20 15.80 10.60 -0.07 0.08
## station_max_temp_c 1.19 30.10 42.20 12.10 0.61 2.54
## station_min_temp_c 1.04 14.70 24.20 9.50 -1.14 2.48
## station_precip_mm 47.74 0.00 543.30 543.30 2.20 8.40
## total_cases 5.93 0.00 116.00 116.00 3.97 26.35
## month 3.71 1.00 12.00 11.00 0.00 -1.21
## se
## ndvi_ne 0.00
## ndvi_nw 0.00
## ndvi_se 0.00
## ndvi_sw 0.00
## precipitation_amt_mm 1.55
## reanalysis_air_temp_k 0.05
## reanalysis_avg_temp_k 0.06
## reanalysis_dew_point_temp_k 0.06
## reanalysis_max_air_temp_k 0.10
## reanalysis_min_air_temp_k 0.07
## reanalysis_precip_amt_kg_per_m2 2.21
## reanalysis_relative_humidity_percent 0.33
## reanalysis_sat_precip_amt_mm 1.55
## reanalysis_specific_humidity_g_per_kg 0.06
## reanalysis_tdtr_k 0.11
## station_avg_temp_c 0.04
## station_diur_temp_rng_c 0.07
## station_max_temp_c 0.06
## station_min_temp_c 0.06
## station_precip_mm 2.82
## total_cases 0.47
## month 0.15
autoplot(sj_ts, main = "San Juan Dengue Fever Cases", ylab = "Number of Cases")
autoplot(iq_ts, main = "Iquitos Dengue Fever Cases", ylab = "Number of Cases")
SJ Histograms
hist(sj_data$ndvi_ne)
hist(sj_data$ndvi_nw)
hist(sj_data$ndvi_se)
hist(sj_data$ndvi_sw)
hist(sj_data$precipitation_amt_mm)
hist(sj_data$reanalysis_air_temp_k)
hist(sj_data$reanalysis_avg_temp_k)
IQ Histograms
EDA2
kdelist = sj_data[,c(5:8,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)
kdelist1 = iq_data[,c(5:8,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)
EDA3
kdelist = sj_data[,c(10:14,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)
kdelist1 = iq_data[,c(10:14,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)
kdelist = sj_data[,c(15:19,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)
kdelist1 = iq_data[,c(15:19,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)
kdelist = sj_data[,c(9,20:25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)
kdelist1 = iq_data[,c(9,20:25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)
EDA Plots
plot(sj_data$weekofyear, sj_data$total_cases, main = "San Juan Cases by Week of Year")
plot(iq_data$weekofyear, iq_data$total_cases, main = "Iquitos Cases by Week of Year")
boxplot(sj_data$total_cases~sj_data$month, main = "San Juan Cases by Month", notch = TRUE, ylab = 'Total Cases', xlab = "Month")
boxplot(iq_data$total_cases~iq_data$month, main = "Iquitos Cases by Month", notch = TRUE, ylab = 'Total Cases', xlab = "Month")
## Warning in bxp(list(stats = structure(c(0, 4, 9, 16, 29, 0, 4, 11, 20, 39, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
Decomposition
decompIQ = stl(iq_train, s.window = 'periodic')
decompSJ = stl(sj_train, s.window = 'periodic' )
autoplot(decompIQ, main = "Iquitos")
autoplot(decompSJ, main = "San Juan")
Arima
logsj = log(sj_train)
logsj[!is.finite(logsj)] = NA
logsj[is.na(logsj)] = mean(logsj, na.rm = TRUE)
bcsj = BoxCox(sj_train, lambda = 'auto')
arimaSJ = auto.arima(sj_train, xreg = head(sj_data$month, n=nrow(sj_data) - length(sj_test)), seasonal = TRUE)
checkresiduals(arimaSJ)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 98.213, df = 100, p-value = 0.5318
##
## Model df: 4. Total lags used: 104
arimaSJfc = forecast(arimaSJ, h = length(sj_test), xreg = tail(sj_data$month,n= length(sj_test)))
autoplot(arimaSJfc)
accuracy (arimaSJfc, sj_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.324969e-04 14.3828 8.760602 NaN Inf 0.2035225 -0.0006753483
## Test set 1.923278e+01 33.9337 19.723702 -Inf Inf 0.4582126 0.9324568739
## Theil's U
## Training set NA
## Test set 0
logiq = log(iq_train)
logiq[!is.finite(logiq)] = NA
logiq[is.na(logiq)] = mean(logiq, na.rm = TRUE)
bciq = BoxCox(iq_train, lambda = 'auto')
arimaIQ = auto.arima(iq_train, xreg = head(iq_data$month,n=nrow(iq_data) - length(iq_test)), seasonal = TRUE)
checkresiduals(arimaIQ)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(0,1,2) errors
## Q* = 61.771, df = 70, p-value = 0.7479
##
## Model df: 3. Total lags used: 73
arimaIQfc = forecast(arimaIQ, h = length(iq_test), xreg = tail(iq_data$month,n= length(iq_test)))
autoplot(arimaIQfc)
accuracy (arimaIQfc, iq_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0314652 7.066612 3.673178 NaN Inf 0.3951833 -0.004366313
## Test set 5.9907750 13.488959 7.935829 -Inf Inf 0.8537857 0.811617022
## Theil's U
## Training set NA
## Test set 0
Neural Net
nnSJ = nnetar(sj_train, lambda='auto')
nnSJfc = forecast(nnSJ,h=length(sj_test))
autoplot(nnSJfc)
#checkresiduals(nnSJfc)
nnIQ = nnetar(iq_train, lambda='auto')
nnIQfc = forecast(nnIQ,h=length(iq_test))
autoplot(nnIQfc)
#checkresiduals(nnIQfc)
Arima w fourier
testAccuracy = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")
for (i in 1:8) {
arimaSJ1 = auto.arima(sj_train, xreg = fourier(sj_train, K = i), seasonal = TRUE)
#checkresiduals(arimaSJ1)
arimaSJfc1 = forecast(arimaSJ1, h = length(sj_test), xreg = fourier(sj_test, K = i))
#accuracy(arimaSJfc1, sj_test)
testacc = data.frame(accuracy(arimaSJfc1,sj_test))
testacc = testacc[-1,]
testAccuracy = rbind(testAccuracy, testacc)
}
#autoplot(arimaSJfc1)
testAccuracy1 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy1)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")
for (i in 1:8) {
arimaIQ1 = auto.arima(iq_train, xreg = fourier(iq_train, K = i), seasonal = TRUE)
#checkresiduals(arimaIQ1)
arimaIQfc1 = forecast(arimaIQ1, h = length(iq_test), xreg = fourier(iq_test, K = i))
#accuracy (arimaIQfc, iq_test)
testacc = data.frame(accuracy(arimaIQfc1,iq_test))
testacc = testacc[-1,]
testAccuracy1 = rbind(testAccuracy1, testacc)
}
#autoplot(arimaIQfc)
More neural nets
nnSJ1 = nnetar(logsj, lambda='auto', repeats = 25, size = 1, P=1)
nnSJfc1 = forecast(nnSJ1,h=length(sj_test))
autoplot(nnSJfc1)
accuracy(nnSJfc1, sj_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.02185322 0.4147659 0.3123685 -Inf Inf 0.2938665 0.002272656
## Test set 20.71571969 34.2771591 20.7689498 -Inf Inf 19.5387781 0.930118285
## Theil's U
## Training set NA
## Test set 0
nnIQ1 = nnetar(logiq, lambda='auto', repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1,h=length(iq_test))
autoplot(nnIQfc1)
accuracy(nnIQfc1,iq_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1390472 0.6487207 0.5088262 -Inf Inf 0.5032304 -0.03421292
## Test set 8.8314234 14.7834503 9.0167418 -Inf Inf 8.9175805 0.80939346
## Theil's U
## Training set NA
## Test set 0
Neural nets with Fourier
testAccuracy2 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy2)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")
for (j in 1:8) {
nnSJ1 = nnetar(sj_train, lambda = 'auto', xreg = fourier(sj_train, K = j), repeats = 25, size = 1, P=1)
#checkresiduals(arimaSJ1)
nnSJfc1 = forecast(nnSJ1, h = length(sj_test), xreg = fourier(sj_test, K = j))
#accuracy(arimaSJfc1, sj_test)
testacc = data.frame(accuracy(nnSJfc1,sj_test))
testacc = testacc[-1,]
testAccuracy2 = rbind(testAccuracy2, testacc)
}
#autoplot(arimaSJfc1)
testAccuracy3 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy3)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")
for (i in 1:8) {
nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = i), repeats = 25, size = 1, P=1)
#checkresiduals(arimaIQ1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = i))
#accuracy (arimaIQfc, iq_test)
testacc = data.frame(accuracy(nnIQfc1,iq_test))
testacc = testacc[-1,]
testAccuracy3 = rbind(testAccuracy3, testacc)
}
nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = 1), repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))
autoplot(nnIQfc1)
accuracy(nnIQfc1,iq_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 2.298459 8.908259 3.863563 -Inf Inf 0.4156660 0.4217168
## Test set 5.129688 12.354544 7.373392 -Inf Inf 0.7932752 0.7902820
## Theil's U
## Training set NA
## Test set 0
Accuracy
accuracy(arimaSJfc, sj_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.324969e-04 14.3828 8.760602 NaN Inf 0.2035225 -0.0006753483
## Test set 1.923278e+01 33.9337 19.723702 -Inf Inf 0.4582126 0.9324568739
## Theil's U
## Training set NA
## Test set 0
accuracy(arimaIQfc, iq_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.0314652 7.066612 3.673178 NaN Inf 0.3951833 -0.004366313
## Test set 5.9907750 13.488959 7.935829 -Inf Inf 0.8537857 0.811617022
## Theil's U
## Training set NA
## Test set 0
accuracy(nnSJfc, sj_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -4.638609e+111 1.158724e+113 4.638609e+111 -Inf Inf 1.077622e+110
## Test set 2.254231e+01 3.541659e+01 2.254231e+01 100 100 5.236932e-01
## ACF1 Theil's U
## Training set -0.001607709 NA
## Test set 0.930111980 0
accuracy(nnIQfc, iq_test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.420918 7.387154 3.401925 -Inf Inf 0.3660002 0.2625136
## Test set 5.151148 12.189364 7.473082 -Inf Inf 0.8040006 0.7818240
## Theil's U
## Training set NA
## Test set 0
Average of existing forecasts
#San Juan ARIMA
arimaSJ1 = auto.arima(sj_train, xreg = fourier(sj_train, K = 2), seasonal = TRUE)
arimaSJfc1 = forecast(arimaSJ1, h = length(sj_test), xreg = fourier(sj_test, K = 2))
#San Juan Neural Net
nnSJ1 = nnetar(logsj, lambda='auto', repeats = 25, size = 1, P=1)
nnSJfc1 = forecast(nnSJ1,h=length(sj_test))
#Combine Forecasts
combSJ = (nnSJfc1[["mean"]] + arimaSJfc1[["mean"]]) / 2
combSJts = ts(combSJ, frequency = 52, start =c(2003,05))
autoplot(sj_train) + autolayer(combSJts)
accuracy(combSJ,sj_test)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 6.732091 25.00837 13.23342 -Inf Inf 0.9140913 0
#Iquiots ARIMA
arimaIQ1 = auto.arima(iq_train, xreg = fourier(iq_train, K = 1), seasonal = TRUE)
arimaIQfc1 = forecast(arimaIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))
#Iquitos Neural Net
nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = 1), repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))
#Combine Forecasts
combIQ = (nnIQfc1[["mean"]] + arimaIQfc1[["mean"]]) / 2
combIQts = ts(combIQ, frequency = 52, start =c(2007,07))
autoplot(iq_train) + autolayer(combIQts)
accuracy(combIQ,iq_test)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 4.119392 11.73255 6.808306 -Inf Inf 0.7810788 0
Create forecasts for whole data set
x = filter(submission_format, city == 'sj')
y = filter(submission_format, city == 'iq')
xTS = ts(x$total_cases, start = c(2008,4), frequency = 52)
yTS = ts(y$total_cases, start = c(2010,06), frequency = 52)
logsj1 = log(sj_ts)
logsj1[!is.finite(logsj1)] = NA
logsj1[is.na(logsj1)] = mean(logsj1, na.rm = TRUE)
#San Juan ARIMA
arimaSJ2 = auto.arima(sj_ts, xreg = fourier(sj_ts, K = 2), seasonal = TRUE)
arimaSJfc2 = forecast(arimaSJ2, h = length(xTS), xreg = fourier(xTS, K = 2))
#San Juan Neural Net
nnSJ2 = nnetar(logsj1, lambda='auto', repeats = 25, size = 1, P=1)
nnSJfc2 = forecast(nnSJ2,h=length(xTS))
#Combine Forecasts
combSJ2 = (nnSJfc2[["mean"]] + arimaSJfc2[["mean"]]) / 2
combSJts2 = ts(combSJ2, frequency = 52, start =c(2008,04))
autoplot(sj_ts, main = "Final San Juan Forecasts", ylab = "Total Cases") + autolayer(combSJts2)
#Iquitos ARIMA
arimaIQ2 = auto.arima(iq_ts, xreg = fourier(iq_ts, K = 1), seasonal = TRUE)
arimaIQfc2 = forecast(arimaIQ2, h = length(yTS), xreg = fourier(yTS, K = 1))
#Iquitos Neural Net
nnIQ2 = nnetar(iq_ts, lambda = 'auto', xreg = fourier(iq_ts, K = 1), repeats = 25, size = 1, P=1)
nnIQfc2 = forecast(nnIQ2, h = length(yTS), xreg = fourier(yTS, K = 1))
#Combine Forecasts
combIQ2 = (nnIQfc2[["mean"]] + arimaIQfc2[["mean"]]) / 2
combIQts2 = ts(combIQ2, frequency = 52, start =c(2010,06))
autoplot(iq_ts, main = "Final Iquitos Forecasts", ylab = "Total Cases") + autolayer(combIQts2)
Add forecasts to submission file
sjframe = data.frame(as.matrix(combSJ2))
iqframe = data.frame(as.matrix(combIQ2))
colnames(iqframe) = 'as.matrix.combSJ2.'
submitlist = data.frame(matrix(ncol=1,nrow=0))
submitlist = rbind(submitlist, sjframe)
submitlist = rbind(submitlist, iqframe)
submission_format$total_cases = as.integer(submitlist$as.matrix.combSJ2.)
write.csv(submission_format,"~/Desktop/DengueSubmission.csv", row.names = FALSE)