Final

Setup

library(Amelia)

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(psych)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ResourceSelection)

## ResourceSelection 0.3-5   2019-07-22

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(nnet)

Data Manipulation

#add total cases to larger data set
dengue_features_train$total_cases = dengue_labels_train$total_cases
dengue_features_train$week_start_date = as.Date(dengue_features_train$week_start_date)
dengue_features_train$month = month(dengue_features_train$week_start_date)

#create separate data files for San Juan and Iquitos
sj_data = filter(dengue_features_train, dengue_features_train$city == 'sj')
iq_data = filter(dengue_features_train, dengue_features_train$city == 'iq')

Check for missing data

missmap(sj_data)

missmap(iq_data)

Create time series

#create time series
sj_ts = ts(sj_data$total_cases, frequency = 52, start = c(1990,5))
iq_ts = ts(iq_data$total_cases, frequency = 52, start = c(2000,7))

sj_train = head(sj_ts, n=(length(sj_ts)-260))
sj_test = tail(sj_ts, n=260)
iq_train = head(iq_ts, n=(length(iq_ts)-156))
iq_test = tail(iq_ts, n = 156)

EDA

describe(sj_data[,-c(1:4)])

##                                       vars   n   mean    sd median trimmed
## ndvi_ne                                  1 745   0.06  0.11   0.06    0.06
## ndvi_nw                                  2 887   0.07  0.09   0.07    0.07
## ndvi_se                                  3 917   0.18  0.06   0.18    0.18
## ndvi_sw                                  4 917   0.17  0.06   0.17    0.17
## precipitation_amt_mm                     5 927  35.47 44.61  20.80   27.52
## reanalysis_air_temp_k                    6 930 299.16  1.24 299.25  299.20
## reanalysis_avg_temp_k                    7 930 299.28  1.22 299.38  299.31
## reanalysis_dew_point_temp_k              8 930 295.11  1.57 295.46  295.23
## reanalysis_max_air_temp_k                9 930 301.40  1.26 301.50  301.42
## reanalysis_min_air_temp_k               10 930 297.30  1.29 297.50  297.38
## reanalysis_precip_amt_kg_per_m2         11 930  30.47 35.63  21.30   24.29
## reanalysis_relative_humidity_percent    12 930  78.57  3.39  78.67   78.62
## reanalysis_sat_precip_amt_mm            13 927  35.47 44.61  20.80   27.52
## reanalysis_specific_humidity_g_per_kg   14 930  16.55  1.56  16.85   16.64
## reanalysis_tdtr_k                       15 930   2.52  0.50   2.46    2.48
## station_avg_temp_c                      16 930  27.01  1.42  27.23   27.06
## station_diur_temp_rng_c                 17 930   6.76  0.84   6.76    6.75
## station_max_temp_c                      18 930  31.61  1.72  31.70   31.69
## station_min_temp_c                      19 930  22.60  1.51  22.80   22.67
## station_precip_mm                       20 930  26.79 29.33  17.75   21.66
## total_cases                             21 936  34.18 51.38  19.00   23.84
## month                                   22 936   6.42  3.45   6.50    6.42
##                                         mad    min    max  range  skew kurtosis
## ndvi_ne                                0.08  -0.41   0.49   0.90 -0.02     2.01
## ndvi_nw                                0.07  -0.46   0.44   0.89 -0.09     2.01
## ndvi_se                                0.05  -0.02   0.39   0.41  0.21     0.42
## ndvi_sw                                0.05  -0.06   0.38   0.44  0.15     0.64
## precipitation_amt_mm                  30.84   0.00 390.60 390.60  2.61    11.69
## reanalysis_air_temp_k                  1.46 295.94 302.20   6.26 -0.22    -0.88
## reanalysis_avg_temp_k                  1.42 296.11 302.16   6.05 -0.22    -0.82
## reanalysis_dew_point_temp_k            1.71 289.64 297.80   8.15 -0.62    -0.40
## reanalysis_max_air_temp_k              1.48 297.80 304.30   6.50 -0.16    -0.79
## reanalysis_min_air_temp_k              1.48 292.60 299.90   7.30 -0.54    -0.31
## reanalysis_precip_amt_kg_per_m2       18.31   0.00 570.50 570.50  5.55    62.25
## reanalysis_relative_humidity_percent   3.49  66.74  87.58  20.84 -0.19    -0.09
## reanalysis_sat_precip_amt_mm          30.84   0.00 390.60 390.60  2.61    11.69
## reanalysis_specific_humidity_g_per_kg  1.79  11.72  19.44   7.72 -0.47    -0.74
## reanalysis_tdtr_k                      0.49   1.36   4.43   3.07  0.68     0.36
## station_avg_temp_c                     1.67  22.84  30.07   7.23 -0.31    -0.90
## station_diur_temp_rng_c                0.80   4.53   9.91   5.39  0.10     0.46
## station_max_temp_c                     1.63  26.70  35.60   8.90 -0.44    -0.53
## station_min_temp_c                     1.63  17.80  25.60   7.80 -0.39    -0.48
## station_precip_mm                     19.05   0.00 305.90 305.90  2.62    12.68
## total_cases                           17.79   0.00 461.00 461.00  4.46    25.17
## month                                  3.71   1.00  12.00  11.00  0.00    -1.21
##                                         se
## ndvi_ne                               0.00
## ndvi_nw                               0.00
## ndvi_se                               0.00
## ndvi_sw                               0.00
## precipitation_amt_mm                  1.47
## reanalysis_air_temp_k                 0.04
## reanalysis_avg_temp_k                 0.04
## reanalysis_dew_point_temp_k           0.05
## reanalysis_max_air_temp_k             0.04
## reanalysis_min_air_temp_k             0.04
## reanalysis_precip_amt_kg_per_m2       1.17
## reanalysis_relative_humidity_percent  0.11
## reanalysis_sat_precip_amt_mm          1.47
## reanalysis_specific_humidity_g_per_kg 0.05
## reanalysis_tdtr_k                     0.02
## station_avg_temp_c                    0.05
## station_diur_temp_rng_c               0.03
## station_max_temp_c                    0.06
## station_min_temp_c                    0.05
## station_precip_mm                     0.96
## total_cases                           1.68
## month                                 0.11

describe(iq_data[,-c(1:4)])

##                                       vars   n   mean    sd median trimmed
## ndvi_ne                                  1 517   0.26  0.08   0.26    0.26
## ndvi_nw                                  2 517   0.24  0.08   0.23    0.24
## ndvi_se                                  3 517   0.25  0.08   0.25    0.25
## ndvi_sw                                  4 517   0.27  0.09   0.26    0.26
## precipitation_amt_mm                     5 516  64.25 35.22  60.47   62.24
## reanalysis_air_temp_k                    6 516 297.87  1.17 297.82  297.86
## reanalysis_avg_temp_k                    7 516 299.13  1.33 299.12  299.15
## reanalysis_dew_point_temp_k              8 516 295.49  1.42 295.85  295.63
## reanalysis_max_air_temp_k                9 516 307.08  2.38 307.05  307.03
## reanalysis_min_air_temp_k               10 516 292.87  1.66 293.05  293.03
## reanalysis_precip_amt_kg_per_m2         11 516  57.61 50.29  46.44   49.40
## reanalysis_relative_humidity_percent    12 516  88.64  7.58  90.92   89.58
## reanalysis_sat_precip_amt_mm            13 516  64.25 35.22  60.47   62.24
## reanalysis_specific_humidity_g_per_kg   14 516  17.10  1.45  17.43   17.20
## reanalysis_tdtr_k                       15 516   9.21  2.45   8.96    9.12
## station_avg_temp_c                      16 483  27.53  0.92  27.60   27.57
## station_diur_temp_rng_c                 17 483  10.57  1.54  10.62   10.58
## station_max_temp_c                      18 506  34.00  1.33  34.00   33.97
## station_min_temp_c                      19 512  21.20  1.26  21.30   21.31
## station_precip_mm                       20 504  62.47 63.25  45.30   52.21
## total_cases                             21 520   7.57 10.77   5.00    5.38
## month                                   22 520   6.42  3.45   6.50    6.42
##                                         mad    min    max  range  skew kurtosis
## ndvi_ne                                0.09   0.06   0.51   0.45  0.22    -0.34
## ndvi_nw                                0.08   0.04   0.45   0.42  0.23    -0.50
## ndvi_se                                0.08   0.03   0.54   0.51  0.27     0.01
## ndvi_sw                                0.09   0.06   0.55   0.48  0.27    -0.19
## precipitation_amt_mm                  33.94   0.00 210.83 210.83  0.60     0.40
## reanalysis_air_temp_k                  1.13 294.64 301.64   7.00  0.11    -0.04
## reanalysis_avg_temp_k                  1.41 294.89 302.93   8.04 -0.11    -0.21
## reanalysis_dew_point_temp_k            1.28 290.09 298.45   8.36 -0.88     0.58
## reanalysis_max_air_temp_k              2.67 300.00 314.00  14.00  0.16    -0.49
## reanalysis_min_air_temp_k              1.70 286.90 296.00   9.10 -0.94     0.95
## reanalysis_precip_amt_kg_per_m2       34.29   0.00 362.03 362.03  1.99     5.58
## reanalysis_relative_humidity_percent   6.32  57.79  98.61  40.82 -1.10     0.78
## reanalysis_sat_precip_amt_mm          33.94   0.00 210.83 210.83  0.60     0.40
## reanalysis_specific_humidity_g_per_kg  1.39  12.11  20.46   8.35 -0.66     0.07
## reanalysis_tdtr_k                      2.70   3.71  16.03  12.31  0.29    -0.65
## station_avg_temp_c                     0.82  21.40  30.80   9.40 -0.93     4.49
## station_diur_temp_rng_c                1.59   5.20  15.80  10.60 -0.07     0.08
## station_max_temp_c                     1.19  30.10  42.20  12.10  0.61     2.54
## station_min_temp_c                     1.04  14.70  24.20   9.50 -1.14     2.48
## station_precip_mm                     47.74   0.00 543.30 543.30  2.20     8.40
## total_cases                            5.93   0.00 116.00 116.00  3.97    26.35
## month                                  3.71   1.00  12.00  11.00  0.00    -1.21
##                                         se
## ndvi_ne                               0.00
## ndvi_nw                               0.00
## ndvi_se                               0.00
## ndvi_sw                               0.00
## precipitation_amt_mm                  1.55
## reanalysis_air_temp_k                 0.05
## reanalysis_avg_temp_k                 0.06
## reanalysis_dew_point_temp_k           0.06
## reanalysis_max_air_temp_k             0.10
## reanalysis_min_air_temp_k             0.07
## reanalysis_precip_amt_kg_per_m2       2.21
## reanalysis_relative_humidity_percent  0.33
## reanalysis_sat_precip_amt_mm          1.55
## reanalysis_specific_humidity_g_per_kg 0.06
## reanalysis_tdtr_k                     0.11
## station_avg_temp_c                    0.04
## station_diur_temp_rng_c               0.07
## station_max_temp_c                    0.06
## station_min_temp_c                    0.06
## station_precip_mm                     2.82
## total_cases                           0.47
## month                                 0.15

autoplot(sj_ts, main = "San Juan Dengue Fever Cases", ylab = "Number of Cases")

autoplot(iq_ts, main = "Iquitos Dengue Fever Cases", ylab = "Number of Cases")

SJ Histograms

hist(sj_data$ndvi_ne)

hist(sj_data$ndvi_nw)

hist(sj_data$ndvi_se)

hist(sj_data$ndvi_sw)

hist(sj_data$precipitation_amt_mm)

hist(sj_data$reanalysis_air_temp_k)

hist(sj_data$reanalysis_avg_temp_k)

IQ Histograms

EDA2

kdelist = sj_data[,c(5:8,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)

kdelist1 = iq_data[,c(5:8,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)

EDA3

kdelist = sj_data[,c(10:14,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)

kdelist1 = iq_data[,c(10:14,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)

kdelist = sj_data[,c(15:19,25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)

kdelist1 = iq_data[,c(15:19,25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)

kdelist = sj_data[,c(9,20:25)]
kdelist = na.omit(kdelist)
kdepairs(kdelist)

kdelist1 = iq_data[,c(9,20:25)]
kdelist1 = na.omit(kdelist1)
kdepairs(kdelist1)

EDA Plots

plot(sj_data$weekofyear, sj_data$total_cases, main = "San Juan Cases by Week of Year")

plot(iq_data$weekofyear, iq_data$total_cases, main = "Iquitos Cases by Week of Year")

boxplot(sj_data$total_cases~sj_data$month, main = "San Juan Cases by Month", notch = TRUE, ylab = 'Total Cases', xlab = "Month")

boxplot(iq_data$total_cases~iq_data$month, main = "Iquitos Cases by Month", notch = TRUE, ylab = 'Total Cases', xlab = "Month")

## Warning in bxp(list(stats = structure(c(0, 4, 9, 16, 29, 0, 4, 11, 20, 39, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

Decomposition

decompIQ = stl(iq_train, s.window = 'periodic')
decompSJ = stl(sj_train, s.window = 'periodic' )
autoplot(decompIQ, main = "Iquitos")

autoplot(decompSJ, main = "San Juan")

Arima

logsj = log(sj_train)
logsj[!is.finite(logsj)] = NA
logsj[is.na(logsj)] = mean(logsj, na.rm = TRUE)

bcsj = BoxCox(sj_train, lambda = 'auto')

arimaSJ = auto.arima(sj_train, xreg = head(sj_data$month, n=nrow(sj_data) - length(sj_test)), seasonal = TRUE)
checkresiduals(arimaSJ)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,1,1) errors
## Q* = 98.213, df = 100, p-value = 0.5318
## 
## Model df: 4.   Total lags used: 104

arimaSJfc = forecast(arimaSJ, h = length(sj_test), xreg = tail(sj_data$month,n= length(sj_test)))
autoplot(arimaSJfc)

accuracy (arimaSJfc, sj_test)

##                        ME    RMSE       MAE  MPE MAPE      MASE          ACF1
## Training set 4.324969e-04 14.3828  8.760602  NaN  Inf 0.2035225 -0.0006753483
## Test set     1.923278e+01 33.9337 19.723702 -Inf  Inf 0.4582126  0.9324568739
##              Theil's U
## Training set        NA
## Test set             0

logiq = log(iq_train)
logiq[!is.finite(logiq)] = NA
logiq[is.na(logiq)] = mean(logiq, na.rm = TRUE)

bciq = BoxCox(iq_train, lambda = 'auto')


arimaIQ = auto.arima(iq_train, xreg = head(iq_data$month,n=nrow(iq_data) - length(iq_test)), seasonal = TRUE)
checkresiduals(arimaIQ)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,1,2) errors
## Q* = 61.771, df = 70, p-value = 0.7479
## 
## Model df: 3.   Total lags used: 73

arimaIQfc = forecast(arimaIQ, h = length(iq_test), xreg = tail(iq_data$month,n= length(iq_test)))
autoplot(arimaIQfc)

accuracy (arimaIQfc, iq_test)

##                     ME      RMSE      MAE  MPE MAPE      MASE         ACF1
## Training set 0.0314652  7.066612 3.673178  NaN  Inf 0.3951833 -0.004366313
## Test set     5.9907750 13.488959 7.935829 -Inf  Inf 0.8537857  0.811617022
##              Theil's U
## Training set        NA
## Test set             0

Neural Net

nnSJ = nnetar(sj_train, lambda='auto')
nnSJfc = forecast(nnSJ,h=length(sj_test))
autoplot(nnSJfc)

#checkresiduals(nnSJfc)

nnIQ = nnetar(iq_train, lambda='auto')
nnIQfc = forecast(nnIQ,h=length(iq_test))
autoplot(nnIQfc)

#checkresiduals(nnIQfc)

Arima w fourier

testAccuracy = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")

for (i in 1:8) {  
arimaSJ1 = auto.arima(sj_train, xreg = fourier(sj_train, K = i), seasonal = TRUE)
#checkresiduals(arimaSJ1)
arimaSJfc1 = forecast(arimaSJ1, h = length(sj_test), xreg = fourier(sj_test, K = i))
#accuracy(arimaSJfc1, sj_test)

testacc = data.frame(accuracy(arimaSJfc1,sj_test))
testacc = testacc[-1,]
testAccuracy = rbind(testAccuracy, testacc)
}
#autoplot(arimaSJfc1)

testAccuracy1 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy1)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")

for (i in 1:8) {  

arimaIQ1 = auto.arima(iq_train, xreg = fourier(iq_train, K = i), seasonal = TRUE)
#checkresiduals(arimaIQ1)
arimaIQfc1 = forecast(arimaIQ1, h = length(iq_test), xreg = fourier(iq_test, K = i))
#accuracy (arimaIQfc, iq_test)

testacc = data.frame(accuracy(arimaIQfc1,iq_test))
testacc = testacc[-1,]
testAccuracy1 = rbind(testAccuracy1, testacc)

}
#autoplot(arimaIQfc)

More neural nets

  nnSJ1 = nnetar(logsj, lambda='auto', repeats = 25, size = 1, P=1)
  nnSJfc1 = forecast(nnSJ1,h=length(sj_test))
  autoplot(nnSJfc1)

accuracy(nnSJfc1, sj_test)

##                       ME       RMSE        MAE  MPE MAPE       MASE        ACF1
## Training set  0.02185322  0.4147659  0.3123685 -Inf  Inf  0.2938665 0.002272656
## Test set     20.71571969 34.2771591 20.7689498 -Inf  Inf 19.5387781 0.930118285
##              Theil's U
## Training set        NA
## Test set             0

nnIQ1 = nnetar(logiq, lambda='auto', repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1,h=length(iq_test))
autoplot(nnIQfc1)

accuracy(nnIQfc1,iq_test)

##                     ME       RMSE       MAE  MPE MAPE      MASE        ACF1
## Training set 0.1390472  0.6487207 0.5088262 -Inf  Inf 0.5032304 -0.03421292
## Test set     8.8314234 14.7834503 9.0167418 -Inf  Inf 8.9175805  0.80939346
##              Theil's U
## Training set        NA
## Test set             0

Neural nets with Fourier

testAccuracy2 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy2)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")

for (j in 1:8) {  
nnSJ1 = nnetar(sj_train, lambda = 'auto', xreg = fourier(sj_train, K = j), repeats = 25, size = 1, P=1)
#checkresiduals(arimaSJ1)
nnSJfc1 = forecast(nnSJ1, h = length(sj_test), xreg = fourier(sj_test, K = j))
#accuracy(arimaSJfc1, sj_test)

testacc = data.frame(accuracy(nnSJfc1,sj_test))
testacc = testacc[-1,]
testAccuracy2 = rbind(testAccuracy2, testacc)
}
#autoplot(arimaSJfc1)

testAccuracy3 = data.frame(matrix(ncol = 8, nrow =0))
colnames(testAccuracy3)=c("ME","RMSE", "MAE", "MPE", "MAPE", "MASE", "ACF1", "Theil's U")

for (i in 1:8) {  

nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = i), repeats = 25, size = 1, P=1)
#checkresiduals(arimaIQ1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = i))
#accuracy (arimaIQfc, iq_test)

testacc = data.frame(accuracy(nnIQfc1,iq_test))
testacc = testacc[-1,]
testAccuracy3 = rbind(testAccuracy3, testacc)

}

nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = 1), repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))
autoplot(nnIQfc1)

accuracy(nnIQfc1,iq_test)

##                    ME      RMSE      MAE  MPE MAPE      MASE      ACF1
## Training set 2.298459  8.908259 3.863563 -Inf  Inf 0.4156660 0.4217168
## Test set     5.129688 12.354544 7.373392 -Inf  Inf 0.7932752 0.7902820
##              Theil's U
## Training set        NA
## Test set             0

Accuracy

accuracy(arimaSJfc, sj_test)

##                        ME    RMSE       MAE  MPE MAPE      MASE          ACF1
## Training set 4.324969e-04 14.3828  8.760602  NaN  Inf 0.2035225 -0.0006753483
## Test set     1.923278e+01 33.9337 19.723702 -Inf  Inf 0.4582126  0.9324568739
##              Theil's U
## Training set        NA
## Test set             0

accuracy(arimaIQfc, iq_test)

##                     ME      RMSE      MAE  MPE MAPE      MASE         ACF1
## Training set 0.0314652  7.066612 3.673178  NaN  Inf 0.3951833 -0.004366313
## Test set     5.9907750 13.488959 7.935829 -Inf  Inf 0.8537857  0.811617022
##              Theil's U
## Training set        NA
## Test set             0

accuracy(nnSJfc, sj_test)

##                          ME          RMSE           MAE  MPE MAPE          MASE
## Training set -4.638609e+111 1.158724e+113 4.638609e+111 -Inf  Inf 1.077622e+110
## Test set       2.254231e+01  3.541659e+01  2.254231e+01  100  100  5.236932e-01
##                      ACF1 Theil's U
## Training set -0.001607709        NA
## Test set      0.930111980         0

accuracy(nnIQfc, iq_test)

##                    ME      RMSE      MAE  MPE MAPE      MASE      ACF1
## Training set 1.420918  7.387154 3.401925 -Inf  Inf 0.3660002 0.2625136
## Test set     5.151148 12.189364 7.473082 -Inf  Inf 0.8040006 0.7818240
##              Theil's U
## Training set        NA
## Test set             0

Average of existing forecasts

#San Juan ARIMA
arimaSJ1 = auto.arima(sj_train, xreg = fourier(sj_train, K = 2), seasonal = TRUE)
arimaSJfc1 = forecast(arimaSJ1, h = length(sj_test), xreg = fourier(sj_test, K = 2))

#San Juan Neural Net
nnSJ1 = nnetar(logsj, lambda='auto', repeats = 25, size = 1, P=1)
nnSJfc1 = forecast(nnSJ1,h=length(sj_test))
  
#Combine Forecasts
combSJ = (nnSJfc1[["mean"]] + arimaSJfc1[["mean"]]) / 2

combSJts = ts(combSJ, frequency = 52, start =c(2003,05))
autoplot(sj_train) + autolayer(combSJts)

accuracy(combSJ,sj_test)

##                ME     RMSE      MAE  MPE MAPE      ACF1 Theil's U
## Test set 6.732091 25.00837 13.23342 -Inf  Inf 0.9140913         0

#Iquiots ARIMA
arimaIQ1 = auto.arima(iq_train, xreg = fourier(iq_train, K = 1), seasonal = TRUE)
arimaIQfc1 = forecast(arimaIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))

#Iquitos Neural Net
nnIQ1 = nnetar(iq_train, lambda = 'auto', xreg = fourier(iq_train, K = 1), repeats = 25, size = 1, P=1)
nnIQfc1 = forecast(nnIQ1, h = length(iq_test), xreg = fourier(iq_test, K = 1))

#Combine Forecasts
combIQ = (nnIQfc1[["mean"]] + arimaIQfc1[["mean"]]) / 2

combIQts = ts(combIQ, frequency = 52, start =c(2007,07))
autoplot(iq_train) + autolayer(combIQts)

accuracy(combIQ,iq_test)

##                ME     RMSE      MAE  MPE MAPE      ACF1 Theil's U
## Test set 4.119392 11.73255 6.808306 -Inf  Inf 0.7810788         0

Create forecasts for whole data set

x = filter(submission_format, city == 'sj')
y = filter(submission_format, city == 'iq')

xTS = ts(x$total_cases, start = c(2008,4), frequency = 52)
yTS = ts(y$total_cases, start = c(2010,06), frequency = 52)

logsj1 = log(sj_ts)
logsj1[!is.finite(logsj1)] = NA
logsj1[is.na(logsj1)] = mean(logsj1, na.rm = TRUE)

#San Juan ARIMA
arimaSJ2 = auto.arima(sj_ts, xreg = fourier(sj_ts, K = 2), seasonal = TRUE)
arimaSJfc2 = forecast(arimaSJ2, h = length(xTS), xreg = fourier(xTS, K = 2))

#San Juan Neural Net
nnSJ2 = nnetar(logsj1, lambda='auto', repeats = 25, size = 1, P=1)
nnSJfc2 = forecast(nnSJ2,h=length(xTS))
  
#Combine Forecasts
combSJ2 = (nnSJfc2[["mean"]] + arimaSJfc2[["mean"]]) / 2

combSJts2 = ts(combSJ2, frequency = 52, start =c(2008,04))
autoplot(sj_ts, main = "Final San Juan Forecasts", ylab = "Total Cases") + autolayer(combSJts2)

#Iquitos ARIMA
arimaIQ2 = auto.arima(iq_ts, xreg = fourier(iq_ts, K = 1), seasonal = TRUE)
arimaIQfc2 = forecast(arimaIQ2, h = length(yTS), xreg = fourier(yTS, K = 1))

#Iquitos Neural Net
nnIQ2 = nnetar(iq_ts, lambda = 'auto', xreg = fourier(iq_ts, K = 1), repeats = 25, size = 1, P=1)
nnIQfc2 = forecast(nnIQ2, h = length(yTS), xreg = fourier(yTS, K = 1))

#Combine Forecasts
combIQ2 = (nnIQfc2[["mean"]] + arimaIQfc2[["mean"]]) / 2

combIQts2 = ts(combIQ2, frequency = 52, start =c(2010,06))
autoplot(iq_ts, main = "Final Iquitos Forecasts", ylab = "Total Cases") + autolayer(combIQts2)

Add forecasts to submission file

sjframe = data.frame(as.matrix(combSJ2))
iqframe = data.frame(as.matrix(combIQ2))
colnames(iqframe) = 'as.matrix.combSJ2.'

submitlist = data.frame(matrix(ncol=1,nrow=0))

submitlist = rbind(submitlist, sjframe)
submitlist = rbind(submitlist, iqframe)

submission_format$total_cases = as.integer(submitlist$as.matrix.combSJ2.)

write.csv(submission_format,"~/Desktop/DengueSubmission.csv", row.names = FALSE)

Final

Justin Lynch

7/23/2020