Load Data

setwd("~/Documents/MSAE/Predictive Analytics")
dengue_test <- read.csv("dengue_features_test.csv")
dengue_train_prelim <- read.csv("dengue_features_train.csv")
labels_train<- read.csv("dengue_labels_train.csv")

Data Manipulation

# merge training sets
dengue_train <- merge(dengue_train_prelim, labels_train, by = c("city","year","weekofyear"))

# set up date variable
dengue_train$week_start_date <- as.Date(dengue_train$week_start_date)
dengue_test$week_start_date <- as.Date(dengue_test$week_start_date)


# split datasets by city
sj_train <- subset(dengue_train, dengue_train$city == "sj")
iq_train <- subset(dengue_train, dengue_train$city == "iq")

sj_test_real <- subset(dengue_test, dengue_test$city == "sj")
iq_test_real <- subset(dengue_test, dengue_test$city == "iq")


# Creating Tsibble for training

sj_train  <- sj_train  %>% 
  mutate(Week = yearweek(week_start_date)) %>% 
  as_tsibble(index = Week)

iq_train  <- iq_train  %>% 
  mutate(Week = yearweek(week_start_date)) %>% 
  as_tsibble(index = Week)

##=== Hold out data to test model 

sj_training <- sj_train  %>% 
  filter(year(Week) < 2005 )

sj_testing <- sj_train  %>% 
  filter(year(Week) > 2004 )

iq_training <- iq_train  %>% 
  filter(year(Week) < 2009 )

iq_testing <- iq_train  %>% 
  filter(year(Week) > 2008 )


# Creating Tsibble for real test data

sj_test_real <- sj_test_real  %>% 
  mutate(Week = yearweek(week_start_date)) %>% 
  tsibble(index = Week)

iq_test_real <- iq_test_real  %>% 
  mutate(Week = yearweek(week_start_date)) %>% 
  tsibble(index = Week)

# check for missing rows
sj_training <- fill_gaps(sj_training, .full = TRUE)
iq_training <- fill_gaps(iq_training, .full = TRUE)

sj_testing <- fill_gaps(sj_testing, .full = TRUE)
iq_testing <- fill_gaps(iq_testing, .full = TRUE)

sj_train <- fill_gaps(sj_train, .full = TRUE)
sj_test_real <- fill_gaps(sj_test_real, .full = TRUE)
iq_train <- fill_gaps(iq_train, .full = TRUE)
iq_test_real <- fill_gaps(iq_test_real, .full = TRUE)

# checking for missing data
summary(is.na(sj_training))
##     city            year         weekofyear      week_start_date
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:764       FALSE:764       FALSE:764       FALSE:764      
##  TRUE :2         TRUE :2         TRUE :2         TRUE :2        
##   ndvi_ne         ndvi_nw         ndvi_se         ndvi_sw       
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:605       FALSE:720       FALSE:745       FALSE:745      
##  TRUE :161       TRUE :46        TRUE :21        TRUE :21       
##  precipitation_amt_mm reanalysis_air_temp_k reanalysis_avg_temp_k
##  Mode :logical        Mode :logical         Mode :logical        
##  FALSE:755            FALSE:758             FALSE:758            
##  TRUE :11             TRUE :8               TRUE :8              
##  reanalysis_dew_point_temp_k reanalysis_max_air_temp_k
##  Mode :logical               Mode :logical            
##  FALSE:758                   FALSE:758                
##  TRUE :8                     TRUE :8                  
##  reanalysis_min_air_temp_k reanalysis_precip_amt_kg_per_m2
##  Mode :logical             Mode :logical                  
##  FALSE:758                 FALSE:758                      
##  TRUE :8                   TRUE :8                        
##  reanalysis_relative_humidity_percent reanalysis_sat_precip_amt_mm
##  Mode :logical                        Mode :logical               
##  FALSE:758                            FALSE:755                   
##  TRUE :8                              TRUE :11                    
##  reanalysis_specific_humidity_g_per_kg reanalysis_tdtr_k station_avg_temp_c
##  Mode :logical                         Mode :logical     Mode :logical     
##  FALSE:758                             FALSE:758         FALSE:758         
##  TRUE :8                               TRUE :8           TRUE :8           
##  station_diur_temp_rng_c station_max_temp_c station_min_temp_c
##  Mode :logical           Mode :logical      Mode :logical     
##  FALSE:758               FALSE:758          FALSE:758         
##  TRUE :8                 TRUE :8            TRUE :8           
##  station_precip_mm total_cases        Week        
##  Mode :logical     Mode :logical   Mode :logical  
##  FALSE:758         FALSE:764       FALSE:766      
##  TRUE :8           TRUE :2
sj_training <- fill(sj_training, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')


iq_training <- fill(iq_training, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')


sj_testing <- fill(sj_testing, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')


iq_testing  <- fill(iq_testing, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')


sj_train <- fill(sj_train, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')

iq_train <- fill(iq_train, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm", "total_cases"), .direction = 'down')

sj_test_real <- fill(sj_test_real, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm"), .direction = 'down')

iq_test_real <- fill(iq_test_real, c( "ndvi_ne","ndvi_nw","ndvi_se", "ndvi_sw","precipitation_amt_mm","reanalysis_air_temp_k" , "reanalysis_avg_temp_k", "reanalysis_dew_point_temp_k","reanalysis_max_air_temp_k","reanalysis_min_air_temp_k", "reanalysis_precip_amt_kg_per_m2","reanalysis_relative_humidity_percent", "reanalysis_sat_precip_amt_mm", "reanalysis_specific_humidity_g_per_kg", "reanalysis_tdtr_k" , "station_avg_temp_c", "station_diur_temp_rng_c", "station_max_temp_c", "station_min_temp_c", "station_precip_mm"), .direction = 'down')

Time-Series Plot

sj_train  %>% autoplot(total_cases) + 
  labs(title = "") +
  xlab("Week") +
  ylab("Total Cases") 

iq_train  %>% autoplot(total_cases) + 
  labs(title = "") +
  xlab("Week") +
  ylab("Total Cases") 

Seasonal Plot

sj_training  %>% gg_season(total_cases) + 
  labs(title = "Seasonality of Dengue Cases") +
  xlab("Weeks") +
  ylab("Total Cases") 

iq_training  %>% gg_season(total_cases) + 
  labs(title = "Seasonality of Dengue Cases") +
  xlab("Weeks") +
  ylab("Total Cases") 

Analysis of exogenous variables

Correlation matrix shows variables with highest correlation to total cases.

In San Juan: air temp, avg. temp, dew point temp, max air temp, min air temp, humidity g per kg, station avg. temp, station min and max temp.

In Iquitos: dew point temp, min air temp, precipitation amount, relative humidity, humidity g per kg, tdtr, station avg. temp, and station min temp.

sj_train_temp <- sj_train[,-c(1:4)]
sj_train_temp <- sj_train_temp[,-c(22)]
cor(sj_train_temp, use = 'complete.obs', method = 'pearson')
##                                            ndvi_ne      ndvi_nw      ndvi_se
## ndvi_ne                                1.000000000  0.614733087  0.207746336
## ndvi_nw                                0.614733087  1.000000000  0.188984102
## ndvi_se                                0.207746336  0.188984102  1.000000000
## ndvi_sw                                0.156891734  0.219223044  0.797579571
## precipitation_amt_mm                  -0.058262915 -0.040672314 -0.107653149
## reanalysis_air_temp_k                 -0.081611625 -0.077227611 -0.012332081
## reanalysis_avg_temp_k                 -0.080190467 -0.075942802 -0.009134148
## reanalysis_dew_point_temp_k           -0.053284201 -0.027818733 -0.060874637
## reanalysis_max_air_temp_k             -0.055840228 -0.043865010 -0.004200218
## reanalysis_min_air_temp_k             -0.090594139 -0.075627486 -0.045776043
## reanalysis_precip_amt_kg_per_m2       -0.001720521  0.004655376 -0.128896748
## reanalysis_relative_humidity_percent   0.022914359  0.072377586 -0.113298128
## reanalysis_sat_precip_amt_mm          -0.058262915 -0.040672314 -0.107653149
## reanalysis_specific_humidity_g_per_kg -0.050158798 -0.022235334 -0.056001467
## reanalysis_tdtr_k                     -0.016472227 -0.047476579  0.046012169
## station_avg_temp_c                     0.057168024  0.088998612 -0.059144749
## station_diur_temp_rng_c                0.187282196  0.184570751  0.009584318
## station_max_temp_c                     0.104107142  0.137634662 -0.069850980
## station_min_temp_c                     0.003189960  0.018169747 -0.070307017
## station_precip_mm                     -0.084108050 -0.083805979 -0.139890146
## total_cases                            0.004841108  0.059528350 -0.118511522
##                                            ndvi_sw precipitation_amt_mm
## ndvi_ne                                0.156891734          -0.05826291
## ndvi_nw                                0.219223044          -0.04067231
## ndvi_se                                0.797579571          -0.10765315
## ndvi_sw                                1.000000000          -0.10913105
## precipitation_amt_mm                  -0.109131046           1.00000000
## reanalysis_air_temp_k                 -0.035554194           0.23675596
## reanalysis_avg_temp_k                 -0.028252243           0.22541864
## reanalysis_dew_point_temp_k           -0.077835018           0.40537943
## reanalysis_max_air_temp_k             -0.005414366           0.26031506
## reanalysis_min_air_temp_k             -0.064719969           0.24792723
## reanalysis_precip_amt_kg_per_m2       -0.120714621           0.50837162
## reanalysis_relative_humidity_percent  -0.109262240           0.50258955
## reanalysis_sat_precip_amt_mm          -0.109131046           1.00000000
## reanalysis_specific_humidity_g_per_kg -0.070543805           0.41275425
## reanalysis_tdtr_k                      0.060828098          -0.08923607
## station_avg_temp_c                    -0.028237835           0.20046115
## station_diur_temp_rng_c                0.077080185          -0.15569937
## station_max_temp_c                    -0.001627374           0.19496493
## station_min_temp_c                    -0.064866699           0.22907984
## station_precip_mm                     -0.174310630           0.56418258
## total_cases                            0.042338172           0.05770377
##                                       reanalysis_air_temp_k
## ndvi_ne                                         -0.08161162
## ndvi_nw                                         -0.07722761
## ndvi_se                                         -0.01233208
## ndvi_sw                                         -0.03555419
## precipitation_amt_mm                             0.23675596
## reanalysis_air_temp_k                            1.00000000
## reanalysis_avg_temp_k                            0.99749502
## reanalysis_dew_point_temp_k                      0.90357683
## reanalysis_max_air_temp_k                        0.93519189
## reanalysis_min_air_temp_k                        0.94230555
## reanalysis_precip_amt_kg_per_m2                  0.07999179
## reanalysis_relative_humidity_percent             0.29938367
## reanalysis_sat_precip_amt_mm                     0.23675596
## reanalysis_specific_humidity_g_per_kg            0.90503239
## reanalysis_tdtr_k                                0.17905989
## station_avg_temp_c                               0.88026673
## station_diur_temp_rng_c                          0.04319466
## station_max_temp_c                               0.69879140
## station_min_temp_c                               0.83269721
## station_precip_mm                                0.11347483
## total_cases                                      0.17963308
##                                       reanalysis_avg_temp_k
## ndvi_ne                                        -0.080190467
## ndvi_nw                                        -0.075942802
## ndvi_se                                        -0.009134148
## ndvi_sw                                        -0.028252243
## precipitation_amt_mm                            0.225418643
## reanalysis_air_temp_k                           0.997495018
## reanalysis_avg_temp_k                           1.000000000
## reanalysis_dew_point_temp_k                     0.895503839
## reanalysis_max_air_temp_k                       0.938964463
## reanalysis_min_air_temp_k                       0.939255018
## reanalysis_precip_amt_kg_per_m2                 0.062175668
## reanalysis_relative_humidity_percent            0.285620609
## reanalysis_sat_precip_amt_mm                    0.225418643
## reanalysis_specific_humidity_g_per_kg           0.896480340
## reanalysis_tdtr_k                               0.202110951
## station_avg_temp_c                              0.878486073
## station_diur_temp_rng_c                         0.057777082
## station_max_temp_c                              0.704158435
## station_min_temp_c                              0.827026029
## station_precip_mm                               0.097566477
## total_cases                                     0.172814139
##                                       reanalysis_dew_point_temp_k
## ndvi_ne                                               -0.05328420
## ndvi_nw                                               -0.02781873
## ndvi_se                                               -0.06087464
## ndvi_sw                                               -0.07783502
## precipitation_amt_mm                                   0.40537943
## reanalysis_air_temp_k                                  0.90357683
## reanalysis_avg_temp_k                                  0.89550384
## reanalysis_dew_point_temp_k                            1.00000000
## reanalysis_max_air_temp_k                              0.84792804
## reanalysis_min_air_temp_k                              0.89872363
## reanalysis_precip_amt_kg_per_m2                        0.32779046
## reanalysis_relative_humidity_percent                   0.67906262
## reanalysis_sat_precip_amt_mm                           0.40537943
## reanalysis_specific_humidity_g_per_kg                  0.99852785
## reanalysis_tdtr_k                                     -0.03027745
## station_avg_temp_c                                     0.86835771
## station_diur_temp_rng_c                               -0.05271514
## station_max_temp_c                                     0.69039975
## station_min_temp_c                                     0.85004413
## station_precip_mm                                      0.28444602
## total_cases                                            0.20150743
##                                       reanalysis_max_air_temp_k
## ndvi_ne                                            -0.055840228
## ndvi_nw                                            -0.043865010
## ndvi_se                                            -0.004200218
## ndvi_sw                                            -0.005414366
## precipitation_amt_mm                                0.260315063
## reanalysis_air_temp_k                               0.935191890
## reanalysis_avg_temp_k                               0.938964463
## reanalysis_dew_point_temp_k                         0.847928042
## reanalysis_max_air_temp_k                           1.000000000
## reanalysis_min_air_temp_k                           0.828629948
## reanalysis_precip_amt_kg_per_m2                     0.091321852
## reanalysis_relative_humidity_percent                0.289471605
## reanalysis_sat_precip_amt_mm                        0.260315063
## reanalysis_specific_humidity_g_per_kg               0.853896029
## reanalysis_tdtr_k                                   0.353873203
## station_avg_temp_c                                  0.852684422
## station_diur_temp_rng_c                             0.118112005
## station_max_temp_c                                  0.762146719
## station_min_temp_c                                  0.770896000
## station_precip_mm                                   0.104035604
## total_cases                                         0.193177101
##                                       reanalysis_min_air_temp_k
## ndvi_ne                                             -0.09059414
## ndvi_nw                                             -0.07562749
## ndvi_se                                             -0.04577604
## ndvi_sw                                             -0.06471997
## precipitation_amt_mm                                 0.24792723
## reanalysis_air_temp_k                                0.94230555
## reanalysis_avg_temp_k                                0.93925502
## reanalysis_dew_point_temp_k                          0.89872363
## reanalysis_max_air_temp_k                            0.82862995
## reanalysis_min_air_temp_k                            1.00000000
## reanalysis_precip_amt_kg_per_m2                      0.13196312
## reanalysis_relative_humidity_percent                 0.38537661
## reanalysis_sat_precip_amt_mm                         0.24792723
## reanalysis_specific_humidity_g_per_kg                0.89603888
## reanalysis_tdtr_k                                   -0.04818969
## station_avg_temp_c                                   0.84071530
## station_diur_temp_rng_c                             -0.02039504
## station_max_temp_c                                   0.62727871
## station_min_temp_c                                   0.82933875
## station_precip_mm                                    0.15033344
## total_cases                                          0.18562283
##                                       reanalysis_precip_amt_kg_per_m2
## ndvi_ne                                                  -0.001720521
## ndvi_nw                                                   0.004655376
## ndvi_se                                                  -0.128896748
## ndvi_sw                                                  -0.120714621
## precipitation_amt_mm                                      0.508371624
## reanalysis_air_temp_k                                     0.079991788
## reanalysis_avg_temp_k                                     0.062175668
## reanalysis_dew_point_temp_k                               0.327790464
## reanalysis_max_air_temp_k                                 0.091321852
## reanalysis_min_air_temp_k                                 0.131963120
## reanalysis_precip_amt_kg_per_m2                           1.000000000
## reanalysis_relative_humidity_percent                      0.601792833
## reanalysis_sat_precip_amt_mm                              0.508371624
## reanalysis_specific_humidity_g_per_kg                     0.333814940
## reanalysis_tdtr_k                                        -0.306114553
## station_avg_temp_c                                        0.134903828
## station_diur_temp_rng_c                                  -0.251414130
## station_max_temp_c                                        0.080636869
## station_min_temp_c                                        0.198762812
## station_precip_mm                                         0.478204811
## total_cases                                               0.106601325
##                                       reanalysis_relative_humidity_percent
## ndvi_ne                                                         0.02291436
## ndvi_nw                                                         0.07237759
## ndvi_se                                                        -0.11329813
## ndvi_sw                                                        -0.10926224
## precipitation_amt_mm                                            0.50258955
## reanalysis_air_temp_k                                           0.29938367
## reanalysis_avg_temp_k                                           0.28562061
## reanalysis_dew_point_temp_k                                     0.67906262
## reanalysis_max_air_temp_k                                       0.28947160
## reanalysis_min_air_temp_k                                       0.38537661
## reanalysis_precip_amt_kg_per_m2                                 0.60179283
## reanalysis_relative_humidity_percent                            1.00000000
## reanalysis_sat_precip_amt_mm                                    0.50258955
## reanalysis_specific_humidity_g_per_kg                           0.67405600
## reanalysis_tdtr_k                                              -0.36964793
## station_avg_temp_c                                              0.42747479
## station_diur_temp_rng_c                                        -0.19165550
## station_max_temp_c                                              0.34305193
## station_min_temp_c                                              0.46690705
## station_precip_mm                                               0.44349269
## total_cases                                                     0.14286734
##                                       reanalysis_sat_precip_amt_mm
## ndvi_ne                                                -0.05826291
## ndvi_nw                                                -0.04067231
## ndvi_se                                                -0.10765315
## ndvi_sw                                                -0.10913105
## precipitation_amt_mm                                    1.00000000
## reanalysis_air_temp_k                                   0.23675596
## reanalysis_avg_temp_k                                   0.22541864
## reanalysis_dew_point_temp_k                             0.40537943
## reanalysis_max_air_temp_k                               0.26031506
## reanalysis_min_air_temp_k                               0.24792723
## reanalysis_precip_amt_kg_per_m2                         0.50837162
## reanalysis_relative_humidity_percent                    0.50258955
## reanalysis_sat_precip_amt_mm                            1.00000000
## reanalysis_specific_humidity_g_per_kg                   0.41275425
## reanalysis_tdtr_k                                      -0.08923607
## station_avg_temp_c                                      0.20046115
## station_diur_temp_rng_c                                -0.15569937
## station_max_temp_c                                      0.19496493
## station_min_temp_c                                      0.22907984
## station_precip_mm                                       0.56418258
## total_cases                                             0.05770377
##                                       reanalysis_specific_humidity_g_per_kg
## ndvi_ne                                                         -0.05015880
## ndvi_nw                                                         -0.02223533
## ndvi_se                                                         -0.05600147
## ndvi_sw                                                         -0.07054381
## precipitation_amt_mm                                             0.41275425
## reanalysis_air_temp_k                                            0.90503239
## reanalysis_avg_temp_k                                            0.89648034
## reanalysis_dew_point_temp_k                                      0.99852785
## reanalysis_max_air_temp_k                                        0.85389603
## reanalysis_min_air_temp_k                                        0.89603888
## reanalysis_precip_amt_kg_per_m2                                  0.33381494
## reanalysis_relative_humidity_percent                             0.67405600
## reanalysis_sat_precip_amt_mm                                     0.41275425
## reanalysis_specific_humidity_g_per_kg                            1.00000000
## reanalysis_tdtr_k                                               -0.02300644
## station_avg_temp_c                                               0.86963945
## station_diur_temp_rng_c                                         -0.05550745
## station_max_temp_c                                               0.69184011
## station_min_temp_c                                               0.84921777
## station_precip_mm                                                0.28753710
## total_cases                                                      0.20578910
##                                       reanalysis_tdtr_k station_avg_temp_c
## ndvi_ne                                     -0.01647223         0.05716802
## ndvi_nw                                     -0.04747658         0.08899861
## ndvi_se                                      0.04601217        -0.05914475
## ndvi_sw                                      0.06082810        -0.02823783
## precipitation_amt_mm                        -0.08923607         0.20046115
## reanalysis_air_temp_k                        0.17905989         0.88026673
## reanalysis_avg_temp_k                        0.20211095         0.87848607
## reanalysis_dew_point_temp_k                 -0.03027745         0.86835771
## reanalysis_max_air_temp_k                    0.35387320         0.85268442
## reanalysis_min_air_temp_k                   -0.04818969         0.84071530
## reanalysis_precip_amt_kg_per_m2             -0.30611455         0.13490383
## reanalysis_relative_humidity_percent        -0.36964793         0.42747479
## reanalysis_sat_precip_amt_mm                -0.08923607         0.20046115
## reanalysis_specific_humidity_g_per_kg       -0.02300644         0.86963945
## reanalysis_tdtr_k                            1.00000000         0.14197006
## station_avg_temp_c                           0.14197006         1.00000000
## station_diur_temp_rng_c                      0.37548665         0.18810969
## station_max_temp_c                           0.28600118         0.86555956
## station_min_temp_c                           0.01105830         0.89820615
## station_precip_mm                           -0.20862538         0.02949012
## total_cases                                 -0.06621245         0.19475547
##                                       station_diur_temp_rng_c
## ndvi_ne                                           0.187282196
## ndvi_nw                                           0.184570751
## ndvi_se                                           0.009584318
## ndvi_sw                                           0.077080185
## precipitation_amt_mm                             -0.155699369
## reanalysis_air_temp_k                             0.043194664
## reanalysis_avg_temp_k                             0.057777082
## reanalysis_dew_point_temp_k                      -0.052715142
## reanalysis_max_air_temp_k                         0.118112005
## reanalysis_min_air_temp_k                        -0.020395038
## reanalysis_precip_amt_kg_per_m2                  -0.251414130
## reanalysis_relative_humidity_percent             -0.191655500
## reanalysis_sat_precip_amt_mm                     -0.155699369
## reanalysis_specific_humidity_g_per_kg            -0.055507453
## reanalysis_tdtr_k                                 0.375486650
## station_avg_temp_c                                0.188109687
## station_diur_temp_rng_c                           1.000000000
## station_max_temp_c                                0.476740193
## station_min_temp_c                               -0.120433752
## station_precip_mm                                -0.267575182
## total_cases                                       0.035780208
##                                       station_max_temp_c station_min_temp_c
## ndvi_ne                                      0.104107142         0.00318996
## ndvi_nw                                      0.137634662         0.01816975
## ndvi_se                                     -0.069850980        -0.07030702
## ndvi_sw                                     -0.001627374        -0.06486670
## precipitation_amt_mm                         0.194964930         0.22907984
## reanalysis_air_temp_k                        0.698791403         0.83269721
## reanalysis_avg_temp_k                        0.704158435         0.82702603
## reanalysis_dew_point_temp_k                  0.690399752         0.85004413
## reanalysis_max_air_temp_k                    0.762146719         0.77089600
## reanalysis_min_air_temp_k                    0.627278708         0.82933875
## reanalysis_precip_amt_kg_per_m2              0.080636869         0.19876281
## reanalysis_relative_humidity_percent         0.343051933         0.46690705
## reanalysis_sat_precip_amt_mm                 0.194964930         0.22907984
## reanalysis_specific_humidity_g_per_kg        0.691840112         0.84921777
## reanalysis_tdtr_k                            0.286001180         0.01105830
## station_avg_temp_c                           0.865559564         0.89820615
## station_diur_temp_rng_c                      0.476740193        -0.12043375
## station_max_temp_c                           1.000000000         0.67409377
## station_min_temp_c                           0.674093773         1.00000000
## station_precip_mm                            0.004192619         0.08585059
## total_cases                                  0.188226224         0.17456647
##                                       station_precip_mm  total_cases
## ndvi_ne                                    -0.084108050  0.004841108
## ndvi_nw                                    -0.083805979  0.059528350
## ndvi_se                                    -0.139890146 -0.118511522
## ndvi_sw                                    -0.174310630  0.042338172
## precipitation_amt_mm                        0.564182583  0.057703769
## reanalysis_air_temp_k                       0.113474827  0.179633079
## reanalysis_avg_temp_k                       0.097566477  0.172814139
## reanalysis_dew_point_temp_k                 0.284446020  0.201507432
## reanalysis_max_air_temp_k                   0.104035604  0.193177101
## reanalysis_min_air_temp_k                   0.150333441  0.185622835
## reanalysis_precip_amt_kg_per_m2             0.478204811  0.106601325
## reanalysis_relative_humidity_percent        0.443492686  0.142867341
## reanalysis_sat_precip_amt_mm                0.564182583  0.057703769
## reanalysis_specific_humidity_g_per_kg       0.287537099  0.205789101
## reanalysis_tdtr_k                          -0.208625382 -0.066212448
## station_avg_temp_c                          0.029490118  0.194755471
## station_diur_temp_rng_c                    -0.267575182  0.035780208
## station_max_temp_c                          0.004192619  0.188226224
## station_min_temp_c                          0.085850588  0.174566473
## station_precip_mm                           1.000000000  0.050114370
## total_cases                                 0.050114370  1.000000000
iq_train_temp <- iq_train[,-c(1:4)]
iq_train_temp <- iq_train_temp[,-c(22)]
cor(iq_train_temp, use = 'complete.obs', method = 'pearson')
##                                            ndvi_ne      ndvi_nw      ndvi_se
## ndvi_ne                                1.000000000  0.764284686  0.769971474
## ndvi_nw                                0.764284686  1.000000000  0.645191376
## ndvi_se                                0.769971474  0.645191376  1.000000000
## ndvi_sw                                0.842399415  0.763961833  0.715004002
## precipitation_amt_mm                  -0.006564765 -0.051242385 -0.032827248
## reanalysis_air_temp_k                  0.152998394  0.147632012  0.192944878
## reanalysis_avg_temp_k                  0.168619268  0.164371843  0.204818339
## reanalysis_dew_point_temp_k           -0.030232191 -0.026705301 -0.056242792
## reanalysis_max_air_temp_k              0.214523087  0.199230502  0.256331377
## reanalysis_min_air_temp_k             -0.003602505  0.003433947 -0.020463601
## reanalysis_precip_amt_kg_per_m2       -0.082748870 -0.074914283 -0.120260729
## reanalysis_relative_humidity_percent  -0.132577393 -0.123318758 -0.181230432
## reanalysis_sat_precip_amt_mm          -0.006564765 -0.051242385 -0.032827248
## reanalysis_specific_humidity_g_per_kg -0.029247368 -0.023175540 -0.054177577
## reanalysis_tdtr_k                      0.167696217  0.161936216  0.216002177
## station_avg_temp_c                     0.122185301  0.123689682  0.130065159
## station_diur_temp_rng_c                0.144287920  0.189865361  0.169897325
## station_max_temp_c                     0.140639793  0.147322203  0.154462057
## station_min_temp_c                    -0.005059250 -0.088344589 -0.045160572
## station_precip_mm                      0.009540696 -0.014912915  0.009424642
## total_cases                            0.018770102 -0.009629011 -0.042713578
##                                            ndvi_sw precipitation_amt_mm
## ndvi_ne                                0.842399415         -0.006564765
## ndvi_nw                                0.763961833         -0.051242385
## ndvi_se                                0.715004002         -0.032827248
## ndvi_sw                                1.000000000         -0.014720695
## precipitation_amt_mm                  -0.014720695          1.000000000
## reanalysis_air_temp_k                  0.162715744         -0.054891852
## reanalysis_avg_temp_k                  0.175894434         -0.060474398
## reanalysis_dew_point_temp_k           -0.030902248          0.479241088
## reanalysis_max_air_temp_k              0.226536913         -0.233402766
## reanalysis_min_air_temp_k              0.002212419          0.323656210
## reanalysis_precip_amt_kg_per_m2       -0.063275096          0.340000777
## reanalysis_relative_humidity_percent  -0.138125664          0.438361652
## reanalysis_sat_precip_amt_mm          -0.014720695          1.000000000
## reanalysis_specific_humidity_g_per_kg -0.027033097          0.475832289
## reanalysis_tdtr_k                      0.170077854         -0.382548005
## station_avg_temp_c                     0.122684649          0.128046164
## station_diur_temp_rng_c                0.172528922         -0.168069921
## station_max_temp_c                     0.170190171         -0.006014397
## station_min_temp_c                    -0.051051077          0.314432363
## station_precip_mm                     -0.006801561          0.365122243
## total_cases                            0.029586470          0.089677318
##                                       reanalysis_air_temp_k
## ndvi_ne                                          0.15299839
## ndvi_nw                                          0.14763201
## ndvi_se                                          0.19294488
## ndvi_sw                                          0.16271574
## precipitation_amt_mm                            -0.05489185
## reanalysis_air_temp_k                            1.00000000
## reanalysis_avg_temp_k                            0.97367824
## reanalysis_dew_point_temp_k                      0.13503944
## reanalysis_max_air_temp_k                        0.75373008
## reanalysis_min_air_temp_k                        0.41095593
## reanalysis_precip_amt_kg_per_m2                 -0.09129756
## reanalysis_relative_humidity_percent            -0.55521583
## reanalysis_sat_precip_amt_mm                    -0.05489185
## reanalysis_specific_humidity_g_per_kg            0.16344479
## reanalysis_tdtr_k                                0.55657225
## station_avg_temp_c                               0.59228408
## station_diur_temp_rng_c                          0.50707719
## station_max_temp_c                               0.64827595
## station_min_temp_c                               0.23701077
## station_precip_mm                               -0.13929213
## total_cases                                      0.09342510
##                                       reanalysis_avg_temp_k
## ndvi_ne                                           0.1686193
## ndvi_nw                                           0.1643718
## ndvi_se                                           0.2048183
## ndvi_sw                                           0.1758944
## precipitation_amt_mm                             -0.0604744
## reanalysis_air_temp_k                             0.9736782
## reanalysis_avg_temp_k                             1.0000000
## reanalysis_dew_point_temp_k                       0.1261382
## reanalysis_max_air_temp_k                         0.7856406
## reanalysis_min_air_temp_k                         0.3945337
## reanalysis_precip_amt_kg_per_m2                  -0.1138834
## reanalysis_relative_humidity_percent             -0.5476834
## reanalysis_sat_precip_amt_mm                     -0.0604744
## reanalysis_specific_humidity_g_per_kg             0.1517424
## reanalysis_tdtr_k                                 0.6058434
## station_avg_temp_c                                0.5592344
## station_diur_temp_rng_c                           0.5056625
## station_max_temp_c                                0.6233760
## station_min_temp_c                                0.2067904
## station_precip_mm                                -0.1429488
## total_cases                                       0.0768732
##                                       reanalysis_dew_point_temp_k
## ndvi_ne                                               -0.03023219
## ndvi_nw                                               -0.02670530
## ndvi_se                                               -0.05624279
## ndvi_sw                                               -0.03090225
## precipitation_amt_mm                                   0.47924109
## reanalysis_air_temp_k                                  0.13503944
## reanalysis_avg_temp_k                                  0.12613820
## reanalysis_dew_point_temp_k                            1.00000000
## reanalysis_max_air_temp_k                             -0.26228023
## reanalysis_min_air_temp_k                              0.74163608
## reanalysis_precip_amt_kg_per_m2                        0.57026646
## reanalysis_relative_humidity_percent                   0.74667392
## reanalysis_sat_precip_amt_mm                           0.47924109
## reanalysis_specific_humidity_g_per_kg                  0.99765996
## reanalysis_tdtr_k                                     -0.60984391
## station_avg_temp_c                                     0.33203064
## station_diur_temp_rng_c                               -0.23321107
## station_max_temp_c                                     0.08785170
## station_min_temp_c                                     0.61573890
## station_precip_mm                                      0.18704879
## total_cases                                            0.22955976
##                                       reanalysis_max_air_temp_k
## ndvi_ne                                              0.21452309
## ndvi_nw                                              0.19923050
## ndvi_se                                              0.25633138
## ndvi_sw                                              0.22653691
## precipitation_amt_mm                                -0.23340277
## reanalysis_air_temp_k                                0.75373008
## reanalysis_avg_temp_k                                0.78564065
## reanalysis_dew_point_temp_k                         -0.26228023
## reanalysis_max_air_temp_k                            1.00000000
## reanalysis_min_air_temp_k                           -0.04893587
## reanalysis_precip_amt_kg_per_m2                     -0.26165155
## reanalysis_relative_humidity_percent                -0.72851550
## reanalysis_sat_precip_amt_mm                        -0.23340277
## reanalysis_specific_humidity_g_per_kg               -0.24375607
## reanalysis_tdtr_k                                    0.80235494
## station_avg_temp_c                                   0.35986272
## station_diur_temp_rng_c                              0.58035607
## station_max_temp_c                                   0.58674341
## station_min_temp_c                                  -0.10013765
## station_precip_mm                                   -0.20090504
## total_cases                                         -0.05510552
##                                       reanalysis_min_air_temp_k
## ndvi_ne                                            -0.003602505
## ndvi_nw                                             0.003433947
## ndvi_se                                            -0.020463601
## ndvi_sw                                             0.002212419
## precipitation_amt_mm                                0.323656210
## reanalysis_air_temp_k                               0.410955927
## reanalysis_avg_temp_k                               0.394533732
## reanalysis_dew_point_temp_k                         0.741636078
## reanalysis_max_air_temp_k                          -0.048935868
## reanalysis_min_air_temp_k                           1.000000000
## reanalysis_precip_amt_kg_per_m2                     0.395602367
## reanalysis_relative_humidity_percent                0.353640861
## reanalysis_sat_precip_amt_mm                        0.323656210
## reanalysis_specific_humidity_g_per_kg               0.747820178
## reanalysis_tdtr_k                                  -0.401231711
## station_avg_temp_c                                  0.415171201
## station_diur_temp_rng_c                            -0.034893208
## station_max_temp_c                                  0.222844715
## station_min_temp_c                                  0.592851513
## station_precip_mm                                   0.091641558
## total_cases                                         0.207925812
##                                       reanalysis_precip_amt_kg_per_m2
## ndvi_ne                                                   -0.08274887
## ndvi_nw                                                   -0.07491428
## ndvi_se                                                   -0.12026073
## ndvi_sw                                                   -0.06327510
## precipitation_amt_mm                                       0.34000078
## reanalysis_air_temp_k                                     -0.09129756
## reanalysis_avg_temp_k                                     -0.11388335
## reanalysis_dew_point_temp_k                                0.57026646
## reanalysis_max_air_temp_k                                 -0.26165155
## reanalysis_min_air_temp_k                                  0.39560237
## reanalysis_precip_amt_kg_per_m2                            1.00000000
## reanalysis_relative_humidity_percent                       0.55048182
## reanalysis_sat_precip_amt_mm                               0.34000078
## reanalysis_specific_humidity_g_per_kg                      0.57667251
## reanalysis_tdtr_k                                         -0.53997521
## station_avg_temp_c                                         0.05753559
## station_diur_temp_rng_c                                   -0.20038884
## station_max_temp_c                                        -0.05092926
## station_min_temp_c                                         0.25490402
## station_precip_mm                                          0.15676558
## total_cases                                                0.10134611
##                                       reanalysis_relative_humidity_percent
## ndvi_ne                                                         -0.1325774
## ndvi_nw                                                         -0.1233188
## ndvi_se                                                         -0.1812304
## ndvi_sw                                                         -0.1381257
## precipitation_amt_mm                                             0.4383617
## reanalysis_air_temp_k                                           -0.5552158
## reanalysis_avg_temp_k                                           -0.5476834
## reanalysis_dew_point_temp_k                                      0.7466739
## reanalysis_max_air_temp_k                                       -0.7285155
## reanalysis_min_air_temp_k                                        0.3536409
## reanalysis_precip_amt_kg_per_m2                                  0.5504818
## reanalysis_relative_humidity_percent                             1.0000000
## reanalysis_sat_precip_amt_mm                                     0.4383617
## reanalysis_specific_humidity_g_per_kg                            0.7269488
## reanalysis_tdtr_k                                               -0.8936120
## station_avg_temp_c                                              -0.1161053
## station_diur_temp_rng_c                                         -0.5350515
## station_max_temp_c                                              -0.3586808
## station_min_temp_c                                               0.3595888
## station_precip_mm                                                0.2517485
## total_cases                                                      0.1306398
##                                       reanalysis_sat_precip_amt_mm
## ndvi_ne                                               -0.006564765
## ndvi_nw                                               -0.051242385
## ndvi_se                                               -0.032827248
## ndvi_sw                                               -0.014720695
## precipitation_amt_mm                                   1.000000000
## reanalysis_air_temp_k                                 -0.054891852
## reanalysis_avg_temp_k                                 -0.060474398
## reanalysis_dew_point_temp_k                            0.479241088
## reanalysis_max_air_temp_k                             -0.233402766
## reanalysis_min_air_temp_k                              0.323656210
## reanalysis_precip_amt_kg_per_m2                        0.340000777
## reanalysis_relative_humidity_percent                   0.438361652
## reanalysis_sat_precip_amt_mm                           1.000000000
## reanalysis_specific_humidity_g_per_kg                  0.475832289
## reanalysis_tdtr_k                                     -0.382548005
## station_avg_temp_c                                     0.128046164
## station_diur_temp_rng_c                               -0.168069921
## station_max_temp_c                                    -0.006014397
## station_min_temp_c                                     0.314432363
## station_precip_mm                                      0.365122243
## total_cases                                            0.089677318
##                                       reanalysis_specific_humidity_g_per_kg
## ndvi_ne                                                         -0.02924737
## ndvi_nw                                                         -0.02317554
## ndvi_se                                                         -0.05417758
## ndvi_sw                                                         -0.02703310
## precipitation_amt_mm                                             0.47583229
## reanalysis_air_temp_k                                            0.16344479
## reanalysis_avg_temp_k                                            0.15174240
## reanalysis_dew_point_temp_k                                      0.99765996
## reanalysis_max_air_temp_k                                       -0.24375607
## reanalysis_min_air_temp_k                                        0.74782018
## reanalysis_precip_amt_kg_per_m2                                  0.57667251
## reanalysis_relative_humidity_percent                             0.72694877
## reanalysis_sat_precip_amt_mm                                     0.47583229
## reanalysis_specific_humidity_g_per_kg                            1.00000000
## reanalysis_tdtr_k                                               -0.59591436
## station_avg_temp_c                                               0.34919584
## station_diur_temp_rng_c                                         -0.21837004
## station_max_temp_c                                               0.10585692
## station_min_temp_c                                               0.61426470
## station_precip_mm                                                0.17865267
## total_cases                                                      0.23552871
##                                       reanalysis_tdtr_k station_avg_temp_c
## ndvi_ne                                       0.1676962         0.12218530
## ndvi_nw                                       0.1619362         0.12368968
## ndvi_se                                       0.2160022         0.13006516
## ndvi_sw                                       0.1700779         0.12268465
## precipitation_amt_mm                         -0.3825480         0.12804616
## reanalysis_air_temp_k                         0.5565723         0.59228408
## reanalysis_avg_temp_k                         0.6058434         0.55923439
## reanalysis_dew_point_temp_k                  -0.6098439         0.33203064
## reanalysis_max_air_temp_k                     0.8023549         0.35986272
## reanalysis_min_air_temp_k                    -0.4012317         0.41517120
## reanalysis_precip_amt_kg_per_m2              -0.5399752         0.05753559
## reanalysis_relative_humidity_percent         -0.8936120        -0.11610532
## reanalysis_sat_precip_amt_mm                 -0.3825480         0.12804616
## reanalysis_specific_humidity_g_per_kg        -0.5959144         0.34919584
## reanalysis_tdtr_k                             1.0000000         0.14091164
## station_avg_temp_c                            0.1409116         1.00000000
## station_diur_temp_rng_c                       0.5416893         0.50763529
## station_max_temp_c                            0.3714817         0.64752956
## station_min_temp_c                           -0.3461632         0.45682712
## station_precip_mm                            -0.2530236        -0.05547461
## total_cases                                  -0.1308094         0.11199096
##                                       station_diur_temp_rng_c
## ndvi_ne                                            0.14428792
## ndvi_nw                                            0.18986536
## ndvi_se                                            0.16989733
## ndvi_sw                                            0.17252892
## precipitation_amt_mm                              -0.16806992
## reanalysis_air_temp_k                              0.50707719
## reanalysis_avg_temp_k                              0.50566252
## reanalysis_dew_point_temp_k                       -0.23321107
## reanalysis_max_air_temp_k                          0.58035607
## reanalysis_min_air_temp_k                         -0.03489321
## reanalysis_precip_amt_kg_per_m2                   -0.20038884
## reanalysis_relative_humidity_percent              -0.53505152
## reanalysis_sat_precip_amt_mm                      -0.16806992
## reanalysis_specific_humidity_g_per_kg             -0.21837004
## reanalysis_tdtr_k                                  0.54168933
## station_avg_temp_c                                 0.50763529
## station_diur_temp_rng_c                            1.00000000
## station_max_temp_c                                 0.67997286
## station_min_temp_c                                -0.23279830
## station_precip_mm                                 -0.24523095
## total_cases                                       -0.02148258
##                                       station_max_temp_c station_min_temp_c
## ndvi_ne                                      0.140639793        -0.00505925
## ndvi_nw                                      0.147322203        -0.08834459
## ndvi_se                                      0.154462057        -0.04516057
## ndvi_sw                                      0.170190171        -0.05105108
## precipitation_amt_mm                        -0.006014397         0.31443236
## reanalysis_air_temp_k                        0.648275947         0.23701077
## reanalysis_avg_temp_k                        0.623375996         0.20679041
## reanalysis_dew_point_temp_k                  0.087851701         0.61573890
## reanalysis_max_air_temp_k                    0.586743407        -0.10013765
## reanalysis_min_air_temp_k                    0.222844715         0.59285151
## reanalysis_precip_amt_kg_per_m2             -0.050929262         0.25490402
## reanalysis_relative_humidity_percent        -0.358680761         0.35958876
## reanalysis_sat_precip_amt_mm                -0.006014397         0.31443236
## reanalysis_specific_humidity_g_per_kg        0.105856919         0.61426470
## reanalysis_tdtr_k                            0.371481658        -0.34616323
## station_avg_temp_c                           0.647529555         0.45682712
## station_diur_temp_rng_c                      0.679972861        -0.23279830
## station_max_temp_c                           1.000000000         0.12331098
## station_min_temp_c                           0.123310980         1.00000000
## station_precip_mm                           -0.137748491         0.18213787
## total_cases                                  0.079671188         0.20013000
##                                       station_precip_mm  total_cases
## ndvi_ne                                     0.009540696  0.018770102
## ndvi_nw                                    -0.014912915 -0.009629011
## ndvi_se                                     0.009424642 -0.042713578
## ndvi_sw                                    -0.006801561  0.029586470
## precipitation_amt_mm                        0.365122243  0.089677318
## reanalysis_air_temp_k                      -0.139292133  0.093425101
## reanalysis_avg_temp_k                      -0.142948788  0.076873199
## reanalysis_dew_point_temp_k                 0.187048793  0.229559761
## reanalysis_max_air_temp_k                  -0.200905036 -0.055105522
## reanalysis_min_air_temp_k                   0.091641558  0.207925812
## reanalysis_precip_amt_kg_per_m2             0.156765575  0.101346114
## reanalysis_relative_humidity_percent        0.251748494  0.130639813
## reanalysis_sat_precip_amt_mm                0.365122243  0.089677318
## reanalysis_specific_humidity_g_per_kg       0.178652668  0.235528708
## reanalysis_tdtr_k                          -0.253023609 -0.130809395
## station_avg_temp_c                         -0.055474607  0.111990959
## station_diur_temp_rng_c                    -0.245230946 -0.021482577
## station_max_temp_c                         -0.137748491  0.079671188
## station_min_temp_c                          0.182137874  0.200129997
## station_precip_mm                           1.000000000  0.047431867
## total_cases                                 0.047431867  1.000000000

Models

ARIMA

ARIMA with external regressors - San Juan

m1_sj <- sj_training %>% 
  model(ARIMA(total_cases ~ reanalysis_air_temp_k + reanalysis_avg_temp_k + reanalysis_dew_point_temp_k + reanalysis_max_air_temp_k + reanalysis_min_air_temp_k + reanalysis_specific_humidity_g_per_kg  + station_avg_temp_c + station_min_temp_c + station_max_temp_c + fourier(K=10)))

ARIMA_fcst <- m1_sj %>% forecast(sj_testing)

ARIMA_fcst %>% autoplot(sj_testing) + 
  labs(title = "ARIMA model") +
  xlab("Week") +
  ylab("Total Cases") 

m1_sj_accuracy <- accuracy(ARIMA_fcst, sj_testing)


# simple ARIMA model for comparison
m2_sj <- sj_training %>%  model(ARIMA(total_cases~ fourier(K=10)))
fcst_arimabasic <- m2_sj %>% forecast(sj_testing)
m2_sj_accuracy <- accuracy(fcst_arimabasic , sj_testing)

ARIMA with external regressors - Iquitos

m1_iq <- iq_training %>% 
  model(ARIMA(total_cases ~ reanalysis_air_temp_k + reanalysis_avg_temp_k + reanalysis_dew_point_temp_k + reanalysis_max_air_temp_k + reanalysis_min_air_temp_k + reanalysis_specific_humidity_g_per_kg  + station_avg_temp_c + station_min_temp_c + station_max_temp_c + fourier(K=10)))

ARIMA_fcst <- m1_iq %>% forecast(iq_testing)


ARIMA_fcst %>% autoplot(iq_testing) + 
  labs(title = "ARIMA model") +
  xlab("Week") +
  ylab("Total Cases") 
## Warning: Removed 1 row containing missing values (`()`).

m1_iq_accuracy <- accuracy(ARIMA_fcst, iq_testing)



m2_iq <- iq_training %>%  model(ARIMA(total_cases~ fourier(K=10)))
fcst_arimabasic <- m2_iq %>% forecast(iq_testing)
m2_iq_accuracy <- accuracy(fcst_arimabasic , iq_testing)

Neural Net Model - San Juan

m3_sj <- sj_training %>% model(NNETAR(total_cases ~ reanalysis_dew_point_temp_k + reanalysis_min_air_temp_k + reanalysis_max_air_temp_k + reanalysis_specific_humidity_g_per_kg))

neuralnet_fcst <- m3_sj %>% forecast(sj_testing, times=10, scale = TRUE)

neuralnet_fcst %>% autoplot(sj_testing) + 
  labs(title = "Neural Net model - San Juan") +
  xlab("Month") +
  ylab("Total Cases") 

m3_sj_accuracy <- accuracy(neuralnet_fcst, sj_testing)

m4_sj <- sj_training %>% model(NNETAR(total_cases))

neuralnet_fcstbasic <- m4_sj %>% forecast(sj_testing, times=10, scale = TRUE)
m4_sj_accuracy <- accuracy(neuralnet_fcstbasic, sj_testing)

Neural Net Model - Iquitos

# m3_iq <- iq_training %>% model(NNETAR(total_cases ~ reanalysis_dew_point_temp_k + reanalysis_min_air_temp_k + reanalysis_max_air_temp_k + reanalysis_specific_humidity_g_per_kg))
# 
# neuralnet_fcst <- m3_iq %>% forecast(iq_testing, times=10, scale = TRUE)
# 
# 
# neuralnet_fcst %>% autoplot(iq_testing) +
#   labs(title = "Neural Net model - Iquitos") +
#   xlab("Month") +
#   ylab("Total Cases")
# 
# m3_iq_accuracy <- accuracy(neuralnet_fcst, iq_testing)
# 
# m4_iq <- iq_training %>% model(NNETAR(total_cases))
# 
# neuralnet_fcstbasic <- m4_iq %>% forecast(iq_testing, times=10, scale = TRUE)
# m4_iq_accuracy <- accuracy(neuralnet_fcstbasic, iq_testing)

NAIVE model - San Juan

m5_sj <- sj_training %>% 
  model(NAIVE(total_cases))


NAIVE_fcst <- m5_sj %>% forecast(sj_testing)

 NAIVE_fcst %>% autoplot(sj_testing) + 
  labs(title = "Naive Model- San Juan") +
  xlab("Month") +
  ylab("Total Cases") 

m5_sj_accuracy <- accuracy(NAIVE_fcst, sj_testing)


m5_sj %>% gg_tsresiduals(lag_max = 12) + labs(title = "Naive Model- San Juan")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).

NAIVE model - Iquitos

m5_iq <- iq_training %>% 
  model(NAIVE(total_cases))


NAIVE_fcst <- m5_iq %>% forecast(iq_testing)

NAIVE_fcst %>% autoplot(iq_testing) + 
  labs(title = "Naive Model - Iquitos") +
  xlab("Month") +
  ylab("Total Cases") 

m5_iq_accuracy <- accuracy(NAIVE_fcst, iq_testing)


m5_iq %>% gg_tsresiduals(lag_max = 12) + labs(title = "Naive Model - Iquitos")
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).

# Accuracy Metrics

Accuracy Metrics for San Juan Models

sj_accuracy_metrics = rbind(m1_sj_accuracy, m2_sj_accuracy, m3_sj_accuracy, m4_sj_accuracy,  m5_sj_accuracy)

sj_accuracy_metrics %>% arrange(MAE)
## # A tibble: 5 × 10
##   .model                  .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ARIMA(total_cases ~ r… Test   -7.78  25.9  18.9  -Inf   Inf   NaN   NaN 0.891
## 2 "NAIVE(total_cases)"    Test   15.6   35.9  19.3  -Inf   Inf   NaN   NaN 0.934
## 3 "ARIMA(total_cases ~ f… Test   16.7   30.0  21.3   Inf   Inf   NaN   NaN 0.892
## 4 "NNETAR(total_cases ~ … Test  -26.2   48.8  34.9  -Inf   Inf   NaN   NaN 0.943
## 5 "NNETAR(total_cases)"   Test  -27.5   51.0  44.1  -Inf   Inf   NaN   NaN 0.949

Accuracy Metrics for Iquitos Models

iq_accuracy_metrics = rbind(m1_iq_accuracy, m2_iq_accuracy, #m3_iq_accuracy, m4_iq_accuracy,
                            m5_iq_accuracy)
iq_accuracy_metrics %>% arrange(MAE)
## # A tibble: 3 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ARIMA(total_cases ~ fo… Test   2.26  6.98  5.09  -Inf   Inf   NaN   NaN 0.777
## 2 "NAIVE(total_cases)"     Test   7.01  9.93  7.01   100   100   NaN   NaN 0.808
## 3 "ARIMA(total_cases ~ re… Test   5.70  9.15  7.18   NaN   Inf   NaN   NaN 0.756

Training ARIMA Model on full training set for submission

ARIMA_sj_model_final <- sj_train %>% 
  model(ARIMA(total_cases ~ reanalysis_air_temp_k + reanalysis_avg_temp_k + reanalysis_dew_point_temp_k + reanalysis_max_air_temp_k + reanalysis_min_air_temp_k + reanalysis_specific_humidity_g_per_kg  + station_avg_temp_c + station_min_temp_c + station_max_temp_c + fourier(K=10)))


ARIMA_iq_model_final <- iq_train %>% 
  model(ARIMA(total_cases ~ reanalysis_air_temp_k + reanalysis_avg_temp_k + reanalysis_dew_point_temp_k + reanalysis_max_air_temp_k + reanalysis_min_air_temp_k + reanalysis_specific_humidity_g_per_kg  + station_avg_temp_c + station_min_temp_c + station_max_temp_c + fourier(K=10)))

Building Predictions on real test data

ARIMA_sj_fcst <- ARIMA_sj_model_final %>% forecast(sj_test_real)
ARIMA_iq_fcst <- ARIMA_iq_model_final %>% forecast(iq_test_real)

sj_cases <- ARIMA_sj_fcst %>% subset(, c("city","year", "weekofyear", ".mean"))
iq_cases <- ARIMA_iq_fcst %>% subset(, c("city","year", "weekofyear", ".mean"))

submission <- rbind(sj_cases, iq_cases)
submission <- rename(submission, total_cases = .mean)
submission$total_cases <- round(submission$total_cases, 0)

submission <- na.omit(submission)

write.csv(submission, file = 'DengAIsubmission1.csv', row.names = FALSE)