1. Load Packages

2. Import Data

rm(list=ls())
df_train <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/dengue_features_train.csv")
df_test <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/dengue_features_test.csv")
df_train_results <- read.csv("C:/Users/schic/OneDrive/Documents/Predictive Analytics and Forecasting/dengue_labels_train.csv")

Training set contains weekly data for two cities, sj and iq, from 1990 to 2010 (sj = San Juan, iq = Iquitos)

San Juan years span from 1990 to 2008 and Iquitos from 2000 to 2010

Testing set contains data for the same cities SJ from 2008-2013 and Iq from 2010-2013

The rest of the data is ecological data to predict dengue fever

3. Data Exploration/Manipulation

#row bind the target variable to testing set
df_train <- merge(df_train, df_train_results, by = c("city", "year", "weekofyear"))
missmap(df_train)

missmap(df_test)

Based on the analysis of missing observations, we can see that the training set is missing 2% of the data and the testing set is missing 1% of the data. It seems as though one of the rows is missing most data in both training and testing, so I will remove this row from the both data frames. I will proceed to analyze the specific variables with NAs in the next code chunk.

print(colSums(is.na(df_test)))
##                                  city                                  year 
##                                     0                                     0 
##                            weekofyear                       week_start_date 
##                                     0                                     0 
##                               ndvi_ne                               ndvi_nw 
##                                    43                                    11 
##                               ndvi_se                               ndvi_sw 
##                                     1                                     1 
##                  precipitation_amt_mm                 reanalysis_air_temp_k 
##                                     2                                     2 
##                 reanalysis_avg_temp_k           reanalysis_dew_point_temp_k 
##                                     2                                     2 
##             reanalysis_max_air_temp_k             reanalysis_min_air_temp_k 
##                                     2                                     2 
##       reanalysis_precip_amt_kg_per_m2  reanalysis_relative_humidity_percent 
##                                     2                                     2 
##          reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg 
##                                     2                                     2 
##                     reanalysis_tdtr_k                    station_avg_temp_c 
##                                     2                                    12 
##               station_diur_temp_rng_c                    station_max_temp_c 
##                                    12                                     3 
##                    station_min_temp_c                     station_precip_mm 
##                                     9                                     5
missing_per_row2 <- rowSums(is.na(df_test))

print(df_test[missing_per_row2 >= 12, ])
##    city year weekofyear week_start_date ndvi_ne  ndvi_nw   ndvi_se ndvi_sw
## 36   sj 2009          1      2009-01-01  0.5004 0.161425 0.1607857 0.18865
## 88   sj 2010         53      2010-01-01      NA       NA        NA      NA
##    precipitation_amt_mm reanalysis_air_temp_k reanalysis_avg_temp_k
## 36                   NA                    NA                    NA
## 88                   NA                    NA                    NA
##    reanalysis_dew_point_temp_k reanalysis_max_air_temp_k
## 36                          NA                        NA
## 88                          NA                        NA
##    reanalysis_min_air_temp_k reanalysis_precip_amt_kg_per_m2
## 36                        NA                              NA
## 88                        NA                              NA
##    reanalysis_relative_humidity_percent reanalysis_sat_precip_amt_mm
## 36                                   NA                           NA
## 88                                   NA                           NA
##    reanalysis_specific_humidity_g_per_kg reanalysis_tdtr_k station_avg_temp_c
## 36                                    NA                NA                 NA
## 88                                    NA                NA                 NA
##    station_diur_temp_rng_c station_max_temp_c station_min_temp_c
## 36                      NA                 NA                 NA
## 88                      NA                 NA                 NA
##    station_precip_mm
## 36                NA
## 88                NA
print(colSums(is.na(df_train)))
##                                  city                                  year 
##                                     0                                     0 
##                            weekofyear                       week_start_date 
##                                     0                                     0 
##                               ndvi_ne                               ndvi_nw 
##                                   194                                    52 
##                               ndvi_se                               ndvi_sw 
##                                    22                                    22 
##                  precipitation_amt_mm                 reanalysis_air_temp_k 
##                                    13                                    10 
##                 reanalysis_avg_temp_k           reanalysis_dew_point_temp_k 
##                                    10                                    10 
##             reanalysis_max_air_temp_k             reanalysis_min_air_temp_k 
##                                    10                                    10 
##       reanalysis_precip_amt_kg_per_m2  reanalysis_relative_humidity_percent 
##                                    10                                    10 
##          reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg 
##                                    13                                    10 
##                     reanalysis_tdtr_k                    station_avg_temp_c 
##                                    10                                    43 
##               station_diur_temp_rng_c                    station_max_temp_c 
##                                    43                                    20 
##                    station_min_temp_c                     station_precip_mm 
##                                    14                                    22 
##                           total_cases 
##                                     0
missing_per_row <- rowSums(is.na(df_train))

print(missing_per_row)
##    [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##   [25]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  3  0  0  0  0  0  2  0  0  0
##   [49]  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##   [73]  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  2  0  0  0  0  0  0  0  0
##   [97]  0  0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  2  0  0  0
##  [121]  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [145]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [169]  0  0  0  0  0  0  0  0  0  0  0  0  2  0 16  0  0  0  0  0  0  0  0  0
##  [193]  0  0  0  0  2  0  0  0  0  0  0  0  0  0  2  2  2  3  2  0  0  0  0  0
##  [217]  0  0  0  0  0  0  0  0  0  0  0  4  0  0  0  0  0  0  0  0  0  0  0  0
##  [241]  0  0  0  0  0  0  2  0  2  0  0  0  0  0  0  0  0  0  0  0  2  0  0  3
##  [265]  4  0  1  0  0  0  0  0  0  2  0  0  0  0  0  0  0 20  0  0  0  0  2  0
##  [289]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  0  0  0
##  [313]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  4  3  0  0  0
##  [337]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0
##  [361]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [385]  0  0  0  0  0  0  1  0  0  2  0  0  0  0  0  0  0  0  0  0  0  0  3  3
##  [409]  3  3  3  3  0  0  0  0  0  0  0  0  0  0  0  2  2  0  0  0  0  0  0  0
##  [433]  0  0  0  0  0  0  0  0  0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [457]  0  1  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0
##  [481]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0
##  [505]  0  0  0  0  0  0  0  0  0  0  0 20  0  0  0  0  0  0  0  0  0  1  0  0
##  [529]  0  1  0  0  0  1  1  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0
##  [553]  0  0  0  0  0  2  0  0  0  0  0  1  0  0  1  1  0  0  0  0  0  0  1  1
##  [577]  1  1  0  2  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [601]  0  1  0  0  0  1  0 16  1  0  0  2  0  0  0  0  0  0  0  0  0  1  0  0
##  [625]  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0
##  [649]  0  0  0  1  0  1  0  0  3  3  2  0  0  0  0  0  1  0  0  1  0  0  0  0
##  [673]  0  0  1  1  0  0  0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  0  0  1
##  [697]  0  1  0  0  0  0  2  0  0  0 20  0  0  1  0  0  0  0  0  0  0  0  2  0
##  [721]  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  1  0  0  0  0  0  2  4  4
##  [745]  0  4  4  4  4  4  4  4  4  4  4  0  4  4  4  0  0  1  1  0  0  0  0  0
##  [769]  1  1  0  0  1  0  1  0  0  1  0  0  0  0  0  1  0  0  1  0  0  0  0  0
##  [793]  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  1  0  1  0  0  1  0  0  0
##  [817]  1  0  0  1  0  0  0  1  0  0  0  1  0  0  0  0  0  0  1  0  0  0  0  0
##  [841]  1  0  1  0  0  0  1  0  0  0  0  1  0  0  0  0  1  1  0  0  1  1  0  0
##  [865]  0  1  0  0  2  1  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##  [889]  0  0  1  0  0  0  1  0  0  0  0  0  0  0  0  0  1  1  1  0  1  0  0  0
##  [913]  1  1  0  0  0  0  1 16  0  1  0  1  0  0  0  0  0  1  0  1  0  0  0  0
##  [937]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1  1  0  0  1  0
##  [961]  2  0  0  0  0  0  0  0  0  1  0  1  0  0  0  0  1  0  0  0  0  1  0  1
##  [985]  1  0  0  1  1  1  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0  1
## [1009]  1  0  0  0  0  0  1  1  0  0 20  1  0  0  0  0  0  0  0  1  0  0  0  0
## [1033]  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  1
## [1057]  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  0  1  0  0  0  0  0  1
## [1081]  0  1  0  1  1  0  0  0  0  0  0  1  0  0  0  0  0  1  0  0  0  0  0  2
## [1105]  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0
## [1129]  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  1  1  1  0  0  1  0  0  0
## [1153]  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  1  0  0  4  0  0  0  0
## [1177]  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  1  0  0  0  0  1
## [1201]  0  0  0  0  0  0  0  1  2  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0
## [1225]  0  0  0  0  1  0  0 16  0  0  2  0  1  0  0  1  1  2  0  0  0  0  0  0
## [1249]  0  0  1  0  0  1  0  0  0  0  0  0  0  0  1  1  0  0  0  1  0  0  0  0
## [1273]  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  1  0  0  0  1  0  0  0  1
## [1297]  0  0  0  0  1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0
## [1321]  0  0  0  0  1  1  0  0  0  0 20  0  1  0  0  0  0  0  1  1  0  0  0  0
## [1345]  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0
## [1369]  0  2  0  0  0  1  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [1393]  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  1  0  0  0  0  0  1  0  0
## [1417]  1  1  0  0  0  0  0  0  1  0  0  0  2  0  1  1  0  1  0  0  0  0  1  0
## [1441]  0  0  0  0  0  0  1  0  1  1  0  1  0  0  0  1
print(missing_per_row[(length(missing_per_row)-455):length(missing_per_row)])
##   [1]  0  0  0  0  0  0  0  1  1  0  0  0  0  0  1  1  0  0 20  1  0  0  0  0  0
##  [26]  0  0  1  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
##  [51]  0  1  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  1  0  1  0
##  [76]  0  0  0  0  1  0  1  0  1  1  0  0  0  0  0  0  1  0  0  0  0  0  1  0  0
## [101]  0  0  0  2  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0  1  0
## [126]  0  0  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  1  1  1  0  0  1  0
## [151]  0  0  0  0  0  0  0  0  1  0  0  0  0  1  0  0  0  0  1  0  0  4  0  0  0
## [176]  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  1  0  1  0  0  0  0  1
## [201]  0  0  0  0  0  0  0  1  2  0  0  0  0  1  0  0  0  0  1  0  0  0  0  0  0
## [226]  0  0  0  1  0  0 16  0  0  2  0  1  0  0  1  1  2  0  0  0  0  0  0  0  0
## [251]  1  0  0  1  0  0  0  0  0  0  0  0  1  1  0  0  0  1  0  0  0  0  0  0  0
## [276]  0  0  0  0  1  0  0  0  0  1  0  0  1  0  0  0  1  0  0  0  1  0  0  0  0
## [301]  1  0  0  0  0  0  0  0  1  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  1
## [326]  1  0  0  0  0 20  0  1  0  0  0  0  0  1  1  0  0  0  0  0  1  0  0  1  0
## [351]  0  0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  2  0  0  0  1  0
## [376]  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  0
## [401]  0  1  0  0  0  0  0  1  0  0  0  0  0  1  0  0  1  1  0  0  0  0  0  0  1
## [426]  0  0  0  2  0  1  1  0  1  0  0  0  0  1  0  0  0  0  0  0  0  1  0  1  1
## [451]  0  1  0  0  0  1
print(df_train[missing_per_row >= 16, ])
##      city year weekofyear week_start_date    ndvi_ne    ndvi_nw   ndvi_se
## 183    iq 2004          1      2004-01-01  0.2025714  0.1989667 0.2445571
## 282    iq 2005         53      2005-01-01         NA         NA        NA
## 443    iq 2009          1      2009-01-01  0.1512857  0.1616857 0.1532714
## 516    iq 2010         53      2010-01-01         NA         NA        NA
## 608    sj 1992          1      1992-01-01  0.1251000  0.1578667 0.1852833
## 707    sj 1993         53      1993-01-01         NA         NA        NA
## 920    sj 1998          1      1998-01-01  0.0455000  0.0478000 0.1239857
## 1019   sj 1999         53      1999-01-01         NA         NA        NA
## 1232   sj 2004          1      2004-01-01 -0.2683000 -0.1237000 0.2237571
## 1331   sj 2005         53      2005-01-01         NA         NA        NA
##         ndvi_sw precipitation_amt_mm reanalysis_air_temp_k
## 183  0.18795710                   NA                    NA
## 282          NA                   NA                    NA
## 443  0.11944290                   NA                    NA
## 516          NA                   NA                    NA
## 608  0.18136670                   NA                    NA
## 707          NA                   NA                    NA
## 920  0.08344286                   NA                    NA
## 1019         NA                   NA                    NA
## 1232 0.17654290                   NA                    NA
## 1331         NA                   NA                    NA
##      reanalysis_avg_temp_k reanalysis_dew_point_temp_k
## 183                     NA                          NA
## 282                     NA                          NA
## 443                     NA                          NA
## 516                     NA                          NA
## 608                     NA                          NA
## 707                     NA                          NA
## 920                     NA                          NA
## 1019                    NA                          NA
## 1232                    NA                          NA
## 1331                    NA                          NA
##      reanalysis_max_air_temp_k reanalysis_min_air_temp_k
## 183                         NA                        NA
## 282                         NA                        NA
## 443                         NA                        NA
## 516                         NA                        NA
## 608                         NA                        NA
## 707                         NA                        NA
## 920                         NA                        NA
## 1019                        NA                        NA
## 1232                        NA                        NA
## 1331                        NA                        NA
##      reanalysis_precip_amt_kg_per_m2 reanalysis_relative_humidity_percent
## 183                               NA                                   NA
## 282                               NA                                   NA
## 443                               NA                                   NA
## 516                               NA                                   NA
## 608                               NA                                   NA
## 707                               NA                                   NA
## 920                               NA                                   NA
## 1019                              NA                                   NA
## 1232                              NA                                   NA
## 1331                              NA                                   NA
##      reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg
## 183                            NA                                    NA
## 282                            NA                                    NA
## 443                            NA                                    NA
## 516                            NA                                    NA
## 608                            NA                                    NA
## 707                            NA                                    NA
## 920                            NA                                    NA
## 1019                           NA                                    NA
## 1232                           NA                                    NA
## 1331                           NA                                    NA
##      reanalysis_tdtr_k station_avg_temp_c station_diur_temp_rng_c
## 183                 NA                 NA                      NA
## 282                 NA                 NA                      NA
## 443                 NA                 NA                      NA
## 516                 NA                 NA                      NA
## 608                 NA                 NA                      NA
## 707                 NA                 NA                      NA
## 920                 NA                 NA                      NA
## 1019                NA                 NA                      NA
## 1232                NA                 NA                      NA
## 1331                NA                 NA                      NA
##      station_max_temp_c station_min_temp_c station_precip_mm total_cases
## 183                  NA                 NA                NA           4
## 282                  NA                 NA                NA           9
## 443                  NA                 NA                NA           1
## 516                  NA                 NA                NA           0
## 608                  NA                 NA                NA          81
## 707                  NA                 NA                NA          30
## 920                  NA                 NA                NA          64
## 1019                 NA                 NA                NA          59
## 1232                 NA                 NA                NA          15
## 1331                 NA                 NA                NA          10

Based on the code above, we can see that the training set is missing

194 observations of ‘ndvi_ne’,

52 obs of ‘nvdi_nw’, 22 obs of ‘nvdi_se’ and ‘ndvi_sw’,

13 obs of ‘precipitation_amt_mm’,

10 obs of ‘reanalysis_air_temp_k’ and ‘reanalysis_avg_temp_k’ and ‘reanalysis_max_air_temp_k’ and ‘reanalysis_min_air_temp_k’ and ‘reanalysis_precip_amt_kg_per_m2’ and ‘’reanalysis_specific_humidity_g_per_kg’ and ‘reanalysis_tdtr_k’,

13 obs of reanalysis_sat_precip_amt_mm,

43 obs of ‘station_diur_temp_rng_c’,

20 obs of ‘station_max_temp_c’,

14 obs of ‘station_min_temp_c’,

and 22 obs of ‘station_precip_mm’.

Based on these observations I will impute the missing observations of most variables with median, and I will remove the rows containing mostly NA values

Based on this code we know that the rows 88,140, 400, 452, 712, 764, 1119, 1171, 1379, and 1431 are missing the majority of their observations for the variables, so I will drop these rows.

#removing selected rows
rows_to_drop <- c(88,140,400,452,712,764,1119,1171,1379,1431)
df_train <- df_train[-rows_to_drop,]
#creating date vairable for weekly data observations
df_train$week_start_date <- ymd(df_train$week_start_date)
#imputing missing values with median values
df_train_imputed <- df_train


#only want to impute numeric columns
numeric_columns_to_impute = c(5:24)

df_train_imputed[, 5:24] <- na.aggregate(df_train_imputed[, 5:24], FUN=median)

#create date variable for monthly obsrvations
df_train_imputed$week_start_date <- as.Date(df_train$week_start_date)

df_train_imputed$year_month <- format(df_train_imputed$week_start_date, "%Y-%m")

df_train_imputed$year_month_day <- paste(df_train_imputed$year_month, "01", sep = "-")

# Convert to Date format
df_train_imputed$year_month_day <- as.Date(df_train_imputed$year_month_day)
missmap(df_train_imputed)

print(colSums(is.na(df_train_imputed)))
##                                  city                                  year 
##                                     0                                     0 
##                            weekofyear                       week_start_date 
##                                     0                                     0 
##                               ndvi_ne                               ndvi_nw 
##                                     0                                     0 
##                               ndvi_se                               ndvi_sw 
##                                     0                                     0 
##                  precipitation_amt_mm                 reanalysis_air_temp_k 
##                                     0                                     0 
##                 reanalysis_avg_temp_k           reanalysis_dew_point_temp_k 
##                                     0                                     0 
##             reanalysis_max_air_temp_k             reanalysis_min_air_temp_k 
##                                     0                                     0 
##       reanalysis_precip_amt_kg_per_m2  reanalysis_relative_humidity_percent 
##                                     0                                     0 
##          reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg 
##                                     0                                     0 
##                     reanalysis_tdtr_k                    station_avg_temp_c 
##                                     0                                     0 
##               station_diur_temp_rng_c                    station_max_temp_c 
##                                     0                                     0 
##                    station_min_temp_c                     station_precip_mm 
##                                     0                                     0 
##                           total_cases                            year_month 
##                                     0                                     0 
##                        year_month_day 
##                                     0

Now that I have addressed the missing observations, I will create time series objects, a hierarchical time series object, and visualize the historical data

a. Create Time Series Objects

#drop unneeded columns
columns_to_drop <- c(2,3,26,27)
new_df <- df_train_imputed[,-columns_to_drop]
full_tsibble <- tsibble(new_df, key = city, index = week_start_date)
full_tsibble <- full_tsibble %>%
  mutate(week_start_date = floor_date(week_start_date, unit = "week"))
#establishing hierarchichal structure
full_hts <- full_tsibble %>%
  aggregate_key(city, Sum_Cases = sum(total_cases))
full_hts
## # A tsibble: 2,491 x 3 [7D]
## # Key:       city [3]
##    week_start_date city         Sum_Cases
##    <date>          <chr*>           <int>
##  1 1990-04-29      <aggregated>         4
##  2 1990-05-06      <aggregated>         5
##  3 1990-05-13      <aggregated>         4
##  4 1990-05-20      <aggregated>         3
##  5 1990-05-27      <aggregated>         6
##  6 1990-06-03      <aggregated>         2
##  7 1990-06-10      <aggregated>         4
##  8 1990-06-17      <aggregated>         5
##  9 1990-06-24      <aggregated>        10
## 10 1990-07-01      <aggregated>         6
## # ℹ 2,481 more rows
# Fill gaps in the time series data
full_hts_filled <- full_hts %>%
  fill_gaps(Sum_Cases = 0)

full_hts_filled
## # A tsibble: 2,513 x 3 [7D]
## # Key:       city [3]
##    week_start_date city   Sum_Cases
##    <date>          <chr*>     <dbl>
##  1 2000-06-25      iq             0
##  2 2000-07-02      iq             0
##  3 2000-07-09      iq             0
##  4 2000-07-16      iq             0
##  5 2000-07-23      iq             0
##  6 2000-07-30      iq             0
##  7 2000-08-06      iq             0
##  8 2000-08-13      iq             0
##  9 2000-08-20      iq             0
## 10 2000-08-27      iq             0
## # ℹ 2,503 more rows

b. Historical Data Visualizations

full_hts_filled |>
  autoplot(Sum_Cases) +
  labs(y = "Cases of Dengue",
       title = "Number of Cases of Dengue Fever in each City (aggregated by city)") +
  facet_wrap(vars(city), scales = "free_y", ncol = 6) +
  theme(legend.position = "none")

I have now created a tsibble for the training set that is sequenced with weekly data for robust short-term predictions (good for 5 weeks of predictions per city).

c. Correlation Matrix

corr_data <- new_df |> 
  filter(city == 'sj') |> 
  select(-c(city,week_start_date)) |>
  mutate(across(where(is.character), as.numeric))

# Visualize correlation
correlation_matrix <- corrplot::corrplot(cor(corr_data), method = 'square', type = "lower")

correlation_matrix
## $corr
##                                            ndvi_ne     ndvi_nw      ndvi_se
## ndvi_ne                                1.000000000  0.59161753  0.218431722
## ndvi_nw                                0.591617528  1.00000000  0.191586923
## ndvi_se                                0.218431722  0.19158692  1.000000000
## ndvi_sw                                0.165680178  0.21116445  0.823663260
## precipitation_amt_mm                  -0.051755502 -0.03165610 -0.118169771
## reanalysis_air_temp_k                 -0.074230506 -0.07167253 -0.016321673
## reanalysis_avg_temp_k                 -0.073766834 -0.07099561 -0.013442511
## reanalysis_dew_point_temp_k           -0.047719848 -0.02453295 -0.062879579
## reanalysis_max_air_temp_k             -0.053488197 -0.04383825 -0.007832885
## reanalysis_min_air_temp_k             -0.089804371 -0.07031282 -0.047762255
## reanalysis_precip_amt_kg_per_m2        0.005164418  0.01289867 -0.128462027
## reanalysis_relative_humidity_percent   0.024789043  0.07027854 -0.111818332
## reanalysis_sat_precip_amt_mm          -0.051755502 -0.03165610 -0.118169771
## reanalysis_specific_humidity_g_per_kg -0.044123263 -0.01950563 -0.058654493
## reanalysis_tdtr_k                     -0.019130717 -0.05427261  0.028611907
## station_avg_temp_c                     0.037032627  0.08688059 -0.056020384
## station_diur_temp_rng_c                0.118277032  0.17814988  0.017219622
## station_max_temp_c                     0.067090419  0.13442621 -0.062418040
## station_min_temp_c                     0.002632542  0.02002469 -0.067181337
## station_precip_mm                     -0.061728736 -0.06717668 -0.139280607
## total_cases                            0.063138488  0.09506206  0.026414203
##                                           ndvi_sw precipitation_amt_mm
## ndvi_ne                                0.16568018          -0.05175550
## ndvi_nw                                0.21116445          -0.03165610
## ndvi_se                                0.82366326          -0.11816977
## ndvi_sw                                1.00000000          -0.11757245
## precipitation_amt_mm                  -0.11757245           1.00000000
## reanalysis_air_temp_k                 -0.04297988           0.23274305
## reanalysis_avg_temp_k                 -0.03591891           0.22147215
## reanalysis_dew_point_temp_k           -0.08731194           0.40154892
## reanalysis_max_air_temp_k             -0.01529890           0.25494172
## reanalysis_min_air_temp_k             -0.07196856           0.24465529
## reanalysis_precip_amt_kg_per_m2       -0.12277851           0.50911835
## reanalysis_relative_humidity_percent  -0.11792226           0.50003342
## reanalysis_sat_precip_amt_mm          -0.11757245           1.00000000
## reanalysis_specific_humidity_g_per_kg -0.08049071           0.40870299
## reanalysis_tdtr_k                      0.04960614          -0.09539445
## station_avg_temp_c                    -0.04072092           0.19445476
## station_diur_temp_rng_c                0.06687045          -0.15819741
## station_max_temp_c                    -0.01698596           0.19231915
## station_min_temp_c                    -0.07081906           0.22251161
## station_precip_mm                     -0.17125985           0.56731810
## total_cases                            0.03340995           0.06061048
##                                       reanalysis_air_temp_k
## ndvi_ne                                         -0.07423051
## ndvi_nw                                         -0.07167253
## ndvi_se                                         -0.01632167
## ndvi_sw                                         -0.04297988
## precipitation_amt_mm                             0.23274305
## reanalysis_air_temp_k                            1.00000000
## reanalysis_avg_temp_k                            0.99691437
## reanalysis_dew_point_temp_k                      0.90180265
## reanalysis_max_air_temp_k                        0.93091675
## reanalysis_min_air_temp_k                        0.94212762
## reanalysis_precip_amt_kg_per_m2                  0.07954136
## reanalysis_relative_humidity_percent             0.29741192
## reanalysis_sat_precip_amt_mm                     0.23274305
## reanalysis_specific_humidity_g_per_kg            0.90337916
## reanalysis_tdtr_k                                0.17402323
## station_avg_temp_c                               0.88080251
## station_diur_temp_rng_c                          0.03955553
## station_max_temp_c                               0.69735080
## station_min_temp_c                               0.83350666
## station_precip_mm                                0.11637226
## total_cases                                      0.18389160
##                                       reanalysis_avg_temp_k
## ndvi_ne                                         -0.07376683
## ndvi_nw                                         -0.07099561
## ndvi_se                                         -0.01344251
## ndvi_sw                                         -0.03591891
## precipitation_amt_mm                             0.22147215
## reanalysis_air_temp_k                            0.99691437
## reanalysis_avg_temp_k                            1.00000000
## reanalysis_dew_point_temp_k                      0.89488569
## reanalysis_max_air_temp_k                        0.93742814
## reanalysis_min_air_temp_k                        0.93701864
## reanalysis_precip_amt_kg_per_m2                  0.06151069
## reanalysis_relative_humidity_percent             0.28459533
## reanalysis_sat_precip_amt_mm                     0.22147215
## reanalysis_specific_humidity_g_per_kg            0.89600300
## reanalysis_tdtr_k                                0.19920148
## station_avg_temp_c                               0.88011545
## station_diur_temp_rng_c                          0.05580107
## station_max_temp_c                               0.70484213
## station_min_temp_c                               0.82735814
## station_precip_mm                                0.10057642
## total_cases                                      0.17785781
##                                       reanalysis_dew_point_temp_k
## ndvi_ne                                               -0.04771985
## ndvi_nw                                               -0.02453295
## ndvi_se                                               -0.06287958
## ndvi_sw                                               -0.08731194
## precipitation_amt_mm                                   0.40154892
## reanalysis_air_temp_k                                  0.90180265
## reanalysis_avg_temp_k                                  0.89488569
## reanalysis_dew_point_temp_k                            1.00000000
## reanalysis_max_air_temp_k                              0.84677079
## reanalysis_min_air_temp_k                              0.89444130
## reanalysis_precip_amt_kg_per_m2                        0.32841934
## reanalysis_relative_humidity_percent                   0.67926782
## reanalysis_sat_precip_amt_mm                           0.40154892
## reanalysis_specific_humidity_g_per_kg                  0.99853159
## reanalysis_tdtr_k                                     -0.03445904
## station_avg_temp_c                                     0.86875303
## station_diur_temp_rng_c                               -0.05447346
## station_max_temp_c                                     0.69095822
## station_min_temp_c                                     0.84865475
## station_precip_mm                                      0.28903154
## total_cases                                            0.20593757
##                                       reanalysis_max_air_temp_k
## ndvi_ne                                            -0.053488197
## ndvi_nw                                            -0.043838245
## ndvi_se                                            -0.007832885
## ndvi_sw                                            -0.015298903
## precipitation_amt_mm                                0.254941716
## reanalysis_air_temp_k                               0.930916751
## reanalysis_avg_temp_k                               0.937428144
## reanalysis_dew_point_temp_k                         0.846770789
## reanalysis_max_air_temp_k                           1.000000000
## reanalysis_min_air_temp_k                           0.821129474
## reanalysis_precip_amt_kg_per_m2                     0.090558979
## reanalysis_relative_humidity_percent                0.288814657
## reanalysis_sat_precip_amt_mm                        0.254941716
## reanalysis_specific_humidity_g_per_kg               0.852748941
## reanalysis_tdtr_k                                   0.352567711
## station_avg_temp_c                                  0.852270844
## station_diur_temp_rng_c                             0.116724207
## station_max_temp_c                                  0.762332975
## station_min_temp_c                                  0.767984439
## station_precip_mm                                   0.107114105
## total_cases                                         0.196406787
##                                       reanalysis_min_air_temp_k
## ndvi_ne                                             -0.08980437
## ndvi_nw                                             -0.07031282
## ndvi_se                                             -0.04776226
## ndvi_sw                                             -0.07196856
## precipitation_amt_mm                                 0.24465529
## reanalysis_air_temp_k                                0.94212762
## reanalysis_avg_temp_k                                0.93701864
## reanalysis_dew_point_temp_k                          0.89444130
## reanalysis_max_air_temp_k                            0.82112947
## reanalysis_min_air_temp_k                            1.00000000
## reanalysis_precip_amt_kg_per_m2                      0.13168131
## reanalysis_relative_humidity_percent                 0.38084364
## reanalysis_sat_precip_amt_mm                         0.24465529
## reanalysis_specific_humidity_g_per_kg                0.89191250
## reanalysis_tdtr_k                                   -0.05468321
## station_avg_temp_c                                   0.83871260
## station_diur_temp_rng_c                             -0.02471044
## station_max_temp_c                                   0.62367502
## station_min_temp_c                                   0.82846509
## station_precip_mm                                    0.15490568
## total_cases                                          0.18974580
##                                       reanalysis_precip_amt_kg_per_m2
## ndvi_ne                                                   0.005164418
## ndvi_nw                                                   0.012898675
## ndvi_se                                                  -0.128462027
## ndvi_sw                                                  -0.122778508
## precipitation_amt_mm                                      0.509118352
## reanalysis_air_temp_k                                     0.079541356
## reanalysis_avg_temp_k                                     0.061510691
## reanalysis_dew_point_temp_k                               0.328419341
## reanalysis_max_air_temp_k                                 0.090558979
## reanalysis_min_air_temp_k                                 0.131681311
## reanalysis_precip_amt_kg_per_m2                           1.000000000
## reanalysis_relative_humidity_percent                      0.604166310
## reanalysis_sat_precip_amt_mm                              0.509118352
## reanalysis_specific_humidity_g_per_kg                     0.334423723
## reanalysis_tdtr_k                                        -0.307134813
## station_avg_temp_c                                        0.133908011
## station_diur_temp_rng_c                                  -0.249982789
## station_max_temp_c                                        0.080654363
## station_min_temp_c                                        0.196450472
## station_precip_mm                                         0.478951364
## total_cases                                               0.108055077
##                                       reanalysis_relative_humidity_percent
## ndvi_ne                                                         0.02478904
## ndvi_nw                                                         0.07027854
## ndvi_se                                                        -0.11181833
## ndvi_sw                                                        -0.11792226
## precipitation_amt_mm                                            0.50003342
## reanalysis_air_temp_k                                           0.29741192
## reanalysis_avg_temp_k                                           0.28459533
## reanalysis_dew_point_temp_k                                     0.67926782
## reanalysis_max_air_temp_k                                       0.28881466
## reanalysis_min_air_temp_k                                       0.38084364
## reanalysis_precip_amt_kg_per_m2                                 0.60416631
## reanalysis_relative_humidity_percent                            1.00000000
## reanalysis_sat_precip_amt_mm                                    0.50003342
## reanalysis_specific_humidity_g_per_kg                           0.67403129
## reanalysis_tdtr_k                                              -0.37446737
## station_avg_temp_c                                              0.42504559
## station_diur_temp_rng_c                                        -0.19330629
## station_max_temp_c                                              0.34198090
## station_min_temp_c                                              0.46348816
## station_precip_mm                                               0.44903058
## total_cases                                                     0.14427084
##                                       reanalysis_sat_precip_amt_mm
## ndvi_ne                                                -0.05175550
## ndvi_nw                                                -0.03165610
## ndvi_se                                                -0.11816977
## ndvi_sw                                                -0.11757245
## precipitation_amt_mm                                    1.00000000
## reanalysis_air_temp_k                                   0.23274305
## reanalysis_avg_temp_k                                   0.22147215
## reanalysis_dew_point_temp_k                             0.40154892
## reanalysis_max_air_temp_k                               0.25494172
## reanalysis_min_air_temp_k                               0.24465529
## reanalysis_precip_amt_kg_per_m2                         0.50911835
## reanalysis_relative_humidity_percent                    0.50003342
## reanalysis_sat_precip_amt_mm                            1.00000000
## reanalysis_specific_humidity_g_per_kg                   0.40870299
## reanalysis_tdtr_k                                      -0.09539445
## station_avg_temp_c                                      0.19445476
## station_diur_temp_rng_c                                -0.15819741
## station_max_temp_c                                      0.19231915
## station_min_temp_c                                      0.22251161
## station_precip_mm                                       0.56731810
## total_cases                                             0.06061048
##                                       reanalysis_specific_humidity_g_per_kg
## ndvi_ne                                                         -0.04412326
## ndvi_nw                                                         -0.01950563
## ndvi_se                                                         -0.05865449
## ndvi_sw                                                         -0.08049071
## precipitation_amt_mm                                             0.40870299
## reanalysis_air_temp_k                                            0.90337916
## reanalysis_avg_temp_k                                            0.89600300
## reanalysis_dew_point_temp_k                                      0.99853159
## reanalysis_max_air_temp_k                                        0.85274894
## reanalysis_min_air_temp_k                                        0.89191250
## reanalysis_precip_amt_kg_per_m2                                  0.33442372
## reanalysis_relative_humidity_percent                             0.67403129
## reanalysis_sat_precip_amt_mm                                     0.40870299
## reanalysis_specific_humidity_g_per_kg                            1.00000000
## reanalysis_tdtr_k                                               -0.02743759
## station_avg_temp_c                                               0.86976974
## station_diur_temp_rng_c                                         -0.05757736
## station_max_temp_c                                               0.69201583
## station_min_temp_c                                               0.84774102
## station_precip_mm                                                0.29227375
## total_cases                                                      0.21002550
##                                       reanalysis_tdtr_k station_avg_temp_c
## ndvi_ne                                    -0.019130717         0.03703263
## ndvi_nw                                    -0.054272607         0.08688059
## ndvi_se                                     0.028611907        -0.05602038
## ndvi_sw                                     0.049606141        -0.04072092
## precipitation_amt_mm                       -0.095394450         0.19445476
## reanalysis_air_temp_k                       0.174023234         0.88080251
## reanalysis_avg_temp_k                       0.199201481         0.88011545
## reanalysis_dew_point_temp_k                -0.034459037         0.86875303
## reanalysis_max_air_temp_k                   0.352567711         0.85227084
## reanalysis_min_air_temp_k                  -0.054683207         0.83871260
## reanalysis_precip_amt_kg_per_m2            -0.307134813         0.13390801
## reanalysis_relative_humidity_percent       -0.374467373         0.42504559
## reanalysis_sat_precip_amt_mm               -0.095394450         0.19445476
## reanalysis_specific_humidity_g_per_kg      -0.027437590         0.86976974
## reanalysis_tdtr_k                           1.000000000         0.13755714
## station_avg_temp_c                          0.137557135         1.00000000
## station_diur_temp_rng_c                     0.371511809         0.18490247
## station_max_temp_c                          0.280621187         0.86479315
## station_min_temp_c                          0.007762314         0.89780787
## station_precip_mm                          -0.206101924         0.03358466
## total_cases                                -0.066769634         0.19747743
##                                       station_diur_temp_rng_c
## ndvi_ne                                            0.11827703
## ndvi_nw                                            0.17814988
## ndvi_se                                            0.01721962
## ndvi_sw                                            0.06687045
## precipitation_amt_mm                              -0.15819741
## reanalysis_air_temp_k                              0.03955553
## reanalysis_avg_temp_k                              0.05580107
## reanalysis_dew_point_temp_k                       -0.05447346
## reanalysis_max_air_temp_k                          0.11672421
## reanalysis_min_air_temp_k                         -0.02471044
## reanalysis_precip_amt_kg_per_m2                   -0.24998279
## reanalysis_relative_humidity_percent              -0.19330629
## reanalysis_sat_precip_amt_mm                      -0.15819741
## reanalysis_specific_humidity_g_per_kg             -0.05757736
## reanalysis_tdtr_k                                  0.37151181
## station_avg_temp_c                                 0.18490247
## station_diur_temp_rng_c                            1.00000000
## station_max_temp_c                                 0.47400018
## station_min_temp_c                                -0.12329033
## station_precip_mm                                 -0.26478423
## total_cases                                        0.03360986
##                                       station_max_temp_c station_min_temp_c
## ndvi_ne                                      0.067090419        0.002632542
## ndvi_nw                                      0.134426207        0.020024690
## ndvi_se                                     -0.062418040       -0.067181337
## ndvi_sw                                     -0.016985960       -0.070819059
## precipitation_amt_mm                         0.192319152        0.222511607
## reanalysis_air_temp_k                        0.697350801        0.833506664
## reanalysis_avg_temp_k                        0.704842133        0.827358137
## reanalysis_dew_point_temp_k                  0.690958217        0.848654748
## reanalysis_max_air_temp_k                    0.762332975        0.767984439
## reanalysis_min_air_temp_k                    0.623675017        0.828465090
## reanalysis_precip_amt_kg_per_m2              0.080654363        0.196450472
## reanalysis_relative_humidity_percent         0.341980900        0.463488162
## reanalysis_sat_precip_amt_mm                 0.192319152        0.222511607
## reanalysis_specific_humidity_g_per_kg        0.692015827        0.847741022
## reanalysis_tdtr_k                            0.280621187        0.007762314
## station_avg_temp_c                           0.864793154        0.897807872
## station_diur_temp_rng_c                      0.474000181       -0.123290335
## station_max_temp_c                           1.000000000        0.672630792
## station_min_temp_c                           0.672630792        1.000000000
## station_precip_mm                            0.008407339        0.087868654
## total_cases                                  0.189673857        0.178650008
##                                       station_precip_mm total_cases
## ndvi_ne                                    -0.061728736  0.06313849
## ndvi_nw                                    -0.067176683  0.09506206
## ndvi_se                                    -0.139280607  0.02641420
## ndvi_sw                                    -0.171259849  0.03340995
## precipitation_amt_mm                        0.567318096  0.06061048
## reanalysis_air_temp_k                       0.116372260  0.18389160
## reanalysis_avg_temp_k                       0.100576419  0.17785781
## reanalysis_dew_point_temp_k                 0.289031541  0.20593757
## reanalysis_max_air_temp_k                   0.107114105  0.19640679
## reanalysis_min_air_temp_k                   0.154905675  0.18974580
## reanalysis_precip_amt_kg_per_m2             0.478951364  0.10805508
## reanalysis_relative_humidity_percent        0.449030575  0.14427084
## reanalysis_sat_precip_amt_mm                0.567318096  0.06061048
## reanalysis_specific_humidity_g_per_kg       0.292273751  0.21002550
## reanalysis_tdtr_k                          -0.206101924 -0.06676963
## station_avg_temp_c                          0.033584661  0.19747743
## station_diur_temp_rng_c                    -0.264784228  0.03360986
## station_max_temp_c                          0.008407339  0.18967386
## station_min_temp_c                          0.087868654  0.17865001
## station_precip_mm                           1.000000000  0.05135153
## total_cases                                 0.051351528  1.00000000
## 
## $corrPos
##                                     xName                                 yName
## 1                                 ndvi_ne                               ndvi_ne
## 2                                 ndvi_ne                               ndvi_nw
## 3                                 ndvi_ne                               ndvi_se
## 4                                 ndvi_ne                               ndvi_sw
## 5                                 ndvi_ne                  precipitation_amt_mm
## 6                                 ndvi_ne                 reanalysis_air_temp_k
## 7                                 ndvi_ne                 reanalysis_avg_temp_k
## 8                                 ndvi_ne           reanalysis_dew_point_temp_k
## 9                                 ndvi_ne             reanalysis_max_air_temp_k
## 10                                ndvi_ne             reanalysis_min_air_temp_k
## 11                                ndvi_ne       reanalysis_precip_amt_kg_per_m2
## 12                                ndvi_ne  reanalysis_relative_humidity_percent
## 13                                ndvi_ne          reanalysis_sat_precip_amt_mm
## 14                                ndvi_ne reanalysis_specific_humidity_g_per_kg
## 15                                ndvi_ne                     reanalysis_tdtr_k
## 16                                ndvi_ne                    station_avg_temp_c
## 17                                ndvi_ne               station_diur_temp_rng_c
## 18                                ndvi_ne                    station_max_temp_c
## 19                                ndvi_ne                    station_min_temp_c
## 20                                ndvi_ne                     station_precip_mm
## 21                                ndvi_ne                           total_cases
## 22                                ndvi_nw                               ndvi_nw
## 23                                ndvi_nw                               ndvi_se
## 24                                ndvi_nw                               ndvi_sw
## 25                                ndvi_nw                  precipitation_amt_mm
## 26                                ndvi_nw                 reanalysis_air_temp_k
## 27                                ndvi_nw                 reanalysis_avg_temp_k
## 28                                ndvi_nw           reanalysis_dew_point_temp_k
## 29                                ndvi_nw             reanalysis_max_air_temp_k
## 30                                ndvi_nw             reanalysis_min_air_temp_k
## 31                                ndvi_nw       reanalysis_precip_amt_kg_per_m2
## 32                                ndvi_nw  reanalysis_relative_humidity_percent
## 33                                ndvi_nw          reanalysis_sat_precip_amt_mm
## 34                                ndvi_nw reanalysis_specific_humidity_g_per_kg
## 35                                ndvi_nw                     reanalysis_tdtr_k
## 36                                ndvi_nw                    station_avg_temp_c
## 37                                ndvi_nw               station_diur_temp_rng_c
## 38                                ndvi_nw                    station_max_temp_c
## 39                                ndvi_nw                    station_min_temp_c
## 40                                ndvi_nw                     station_precip_mm
## 41                                ndvi_nw                           total_cases
## 42                                ndvi_se                               ndvi_se
## 43                                ndvi_se                               ndvi_sw
## 44                                ndvi_se                  precipitation_amt_mm
## 45                                ndvi_se                 reanalysis_air_temp_k
## 46                                ndvi_se                 reanalysis_avg_temp_k
## 47                                ndvi_se           reanalysis_dew_point_temp_k
## 48                                ndvi_se             reanalysis_max_air_temp_k
## 49                                ndvi_se             reanalysis_min_air_temp_k
## 50                                ndvi_se       reanalysis_precip_amt_kg_per_m2
## 51                                ndvi_se  reanalysis_relative_humidity_percent
## 52                                ndvi_se          reanalysis_sat_precip_amt_mm
## 53                                ndvi_se reanalysis_specific_humidity_g_per_kg
## 54                                ndvi_se                     reanalysis_tdtr_k
## 55                                ndvi_se                    station_avg_temp_c
## 56                                ndvi_se               station_diur_temp_rng_c
## 57                                ndvi_se                    station_max_temp_c
## 58                                ndvi_se                    station_min_temp_c
## 59                                ndvi_se                     station_precip_mm
## 60                                ndvi_se                           total_cases
## 61                                ndvi_sw                               ndvi_sw
## 62                                ndvi_sw                  precipitation_amt_mm
## 63                                ndvi_sw                 reanalysis_air_temp_k
## 64                                ndvi_sw                 reanalysis_avg_temp_k
## 65                                ndvi_sw           reanalysis_dew_point_temp_k
## 66                                ndvi_sw             reanalysis_max_air_temp_k
## 67                                ndvi_sw             reanalysis_min_air_temp_k
## 68                                ndvi_sw       reanalysis_precip_amt_kg_per_m2
## 69                                ndvi_sw  reanalysis_relative_humidity_percent
## 70                                ndvi_sw          reanalysis_sat_precip_amt_mm
## 71                                ndvi_sw reanalysis_specific_humidity_g_per_kg
## 72                                ndvi_sw                     reanalysis_tdtr_k
## 73                                ndvi_sw                    station_avg_temp_c
## 74                                ndvi_sw               station_diur_temp_rng_c
## 75                                ndvi_sw                    station_max_temp_c
## 76                                ndvi_sw                    station_min_temp_c
## 77                                ndvi_sw                     station_precip_mm
## 78                                ndvi_sw                           total_cases
## 79                   precipitation_amt_mm                  precipitation_amt_mm
## 80                   precipitation_amt_mm                 reanalysis_air_temp_k
## 81                   precipitation_amt_mm                 reanalysis_avg_temp_k
## 82                   precipitation_amt_mm           reanalysis_dew_point_temp_k
## 83                   precipitation_amt_mm             reanalysis_max_air_temp_k
## 84                   precipitation_amt_mm             reanalysis_min_air_temp_k
## 85                   precipitation_amt_mm       reanalysis_precip_amt_kg_per_m2
## 86                   precipitation_amt_mm  reanalysis_relative_humidity_percent
## 87                   precipitation_amt_mm          reanalysis_sat_precip_amt_mm
## 88                   precipitation_amt_mm reanalysis_specific_humidity_g_per_kg
## 89                   precipitation_amt_mm                     reanalysis_tdtr_k
## 90                   precipitation_amt_mm                    station_avg_temp_c
## 91                   precipitation_amt_mm               station_diur_temp_rng_c
## 92                   precipitation_amt_mm                    station_max_temp_c
## 93                   precipitation_amt_mm                    station_min_temp_c
## 94                   precipitation_amt_mm                     station_precip_mm
## 95                   precipitation_amt_mm                           total_cases
## 96                  reanalysis_air_temp_k                 reanalysis_air_temp_k
## 97                  reanalysis_air_temp_k                 reanalysis_avg_temp_k
## 98                  reanalysis_air_temp_k           reanalysis_dew_point_temp_k
## 99                  reanalysis_air_temp_k             reanalysis_max_air_temp_k
## 100                 reanalysis_air_temp_k             reanalysis_min_air_temp_k
## 101                 reanalysis_air_temp_k       reanalysis_precip_amt_kg_per_m2
## 102                 reanalysis_air_temp_k  reanalysis_relative_humidity_percent
## 103                 reanalysis_air_temp_k          reanalysis_sat_precip_amt_mm
## 104                 reanalysis_air_temp_k reanalysis_specific_humidity_g_per_kg
## 105                 reanalysis_air_temp_k                     reanalysis_tdtr_k
## 106                 reanalysis_air_temp_k                    station_avg_temp_c
## 107                 reanalysis_air_temp_k               station_diur_temp_rng_c
## 108                 reanalysis_air_temp_k                    station_max_temp_c
## 109                 reanalysis_air_temp_k                    station_min_temp_c
## 110                 reanalysis_air_temp_k                     station_precip_mm
## 111                 reanalysis_air_temp_k                           total_cases
## 112                 reanalysis_avg_temp_k                 reanalysis_avg_temp_k
## 113                 reanalysis_avg_temp_k           reanalysis_dew_point_temp_k
## 114                 reanalysis_avg_temp_k             reanalysis_max_air_temp_k
## 115                 reanalysis_avg_temp_k             reanalysis_min_air_temp_k
## 116                 reanalysis_avg_temp_k       reanalysis_precip_amt_kg_per_m2
## 117                 reanalysis_avg_temp_k  reanalysis_relative_humidity_percent
## 118                 reanalysis_avg_temp_k          reanalysis_sat_precip_amt_mm
## 119                 reanalysis_avg_temp_k reanalysis_specific_humidity_g_per_kg
## 120                 reanalysis_avg_temp_k                     reanalysis_tdtr_k
## 121                 reanalysis_avg_temp_k                    station_avg_temp_c
## 122                 reanalysis_avg_temp_k               station_diur_temp_rng_c
## 123                 reanalysis_avg_temp_k                    station_max_temp_c
## 124                 reanalysis_avg_temp_k                    station_min_temp_c
## 125                 reanalysis_avg_temp_k                     station_precip_mm
## 126                 reanalysis_avg_temp_k                           total_cases
## 127           reanalysis_dew_point_temp_k           reanalysis_dew_point_temp_k
## 128           reanalysis_dew_point_temp_k             reanalysis_max_air_temp_k
## 129           reanalysis_dew_point_temp_k             reanalysis_min_air_temp_k
## 130           reanalysis_dew_point_temp_k       reanalysis_precip_amt_kg_per_m2
## 131           reanalysis_dew_point_temp_k  reanalysis_relative_humidity_percent
## 132           reanalysis_dew_point_temp_k          reanalysis_sat_precip_amt_mm
## 133           reanalysis_dew_point_temp_k reanalysis_specific_humidity_g_per_kg
## 134           reanalysis_dew_point_temp_k                     reanalysis_tdtr_k
## 135           reanalysis_dew_point_temp_k                    station_avg_temp_c
## 136           reanalysis_dew_point_temp_k               station_diur_temp_rng_c
## 137           reanalysis_dew_point_temp_k                    station_max_temp_c
## 138           reanalysis_dew_point_temp_k                    station_min_temp_c
## 139           reanalysis_dew_point_temp_k                     station_precip_mm
## 140           reanalysis_dew_point_temp_k                           total_cases
## 141             reanalysis_max_air_temp_k             reanalysis_max_air_temp_k
## 142             reanalysis_max_air_temp_k             reanalysis_min_air_temp_k
## 143             reanalysis_max_air_temp_k       reanalysis_precip_amt_kg_per_m2
## 144             reanalysis_max_air_temp_k  reanalysis_relative_humidity_percent
## 145             reanalysis_max_air_temp_k          reanalysis_sat_precip_amt_mm
## 146             reanalysis_max_air_temp_k reanalysis_specific_humidity_g_per_kg
## 147             reanalysis_max_air_temp_k                     reanalysis_tdtr_k
## 148             reanalysis_max_air_temp_k                    station_avg_temp_c
## 149             reanalysis_max_air_temp_k               station_diur_temp_rng_c
## 150             reanalysis_max_air_temp_k                    station_max_temp_c
## 151             reanalysis_max_air_temp_k                    station_min_temp_c
## 152             reanalysis_max_air_temp_k                     station_precip_mm
## 153             reanalysis_max_air_temp_k                           total_cases
## 154             reanalysis_min_air_temp_k             reanalysis_min_air_temp_k
## 155             reanalysis_min_air_temp_k       reanalysis_precip_amt_kg_per_m2
## 156             reanalysis_min_air_temp_k  reanalysis_relative_humidity_percent
## 157             reanalysis_min_air_temp_k          reanalysis_sat_precip_amt_mm
## 158             reanalysis_min_air_temp_k reanalysis_specific_humidity_g_per_kg
## 159             reanalysis_min_air_temp_k                     reanalysis_tdtr_k
## 160             reanalysis_min_air_temp_k                    station_avg_temp_c
## 161             reanalysis_min_air_temp_k               station_diur_temp_rng_c
## 162             reanalysis_min_air_temp_k                    station_max_temp_c
## 163             reanalysis_min_air_temp_k                    station_min_temp_c
## 164             reanalysis_min_air_temp_k                     station_precip_mm
## 165             reanalysis_min_air_temp_k                           total_cases
## 166       reanalysis_precip_amt_kg_per_m2       reanalysis_precip_amt_kg_per_m2
## 167       reanalysis_precip_amt_kg_per_m2  reanalysis_relative_humidity_percent
## 168       reanalysis_precip_amt_kg_per_m2          reanalysis_sat_precip_amt_mm
## 169       reanalysis_precip_amt_kg_per_m2 reanalysis_specific_humidity_g_per_kg
## 170       reanalysis_precip_amt_kg_per_m2                     reanalysis_tdtr_k
## 171       reanalysis_precip_amt_kg_per_m2                    station_avg_temp_c
## 172       reanalysis_precip_amt_kg_per_m2               station_diur_temp_rng_c
## 173       reanalysis_precip_amt_kg_per_m2                    station_max_temp_c
## 174       reanalysis_precip_amt_kg_per_m2                    station_min_temp_c
## 175       reanalysis_precip_amt_kg_per_m2                     station_precip_mm
## 176       reanalysis_precip_amt_kg_per_m2                           total_cases
## 177  reanalysis_relative_humidity_percent  reanalysis_relative_humidity_percent
## 178  reanalysis_relative_humidity_percent          reanalysis_sat_precip_amt_mm
## 179  reanalysis_relative_humidity_percent reanalysis_specific_humidity_g_per_kg
## 180  reanalysis_relative_humidity_percent                     reanalysis_tdtr_k
## 181  reanalysis_relative_humidity_percent                    station_avg_temp_c
## 182  reanalysis_relative_humidity_percent               station_diur_temp_rng_c
## 183  reanalysis_relative_humidity_percent                    station_max_temp_c
## 184  reanalysis_relative_humidity_percent                    station_min_temp_c
## 185  reanalysis_relative_humidity_percent                     station_precip_mm
## 186  reanalysis_relative_humidity_percent                           total_cases
## 187          reanalysis_sat_precip_amt_mm          reanalysis_sat_precip_amt_mm
## 188          reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg
## 189          reanalysis_sat_precip_amt_mm                     reanalysis_tdtr_k
## 190          reanalysis_sat_precip_amt_mm                    station_avg_temp_c
## 191          reanalysis_sat_precip_amt_mm               station_diur_temp_rng_c
## 192          reanalysis_sat_precip_amt_mm                    station_max_temp_c
## 193          reanalysis_sat_precip_amt_mm                    station_min_temp_c
## 194          reanalysis_sat_precip_amt_mm                     station_precip_mm
## 195          reanalysis_sat_precip_amt_mm                           total_cases
## 196 reanalysis_specific_humidity_g_per_kg reanalysis_specific_humidity_g_per_kg
## 197 reanalysis_specific_humidity_g_per_kg                     reanalysis_tdtr_k
## 198 reanalysis_specific_humidity_g_per_kg                    station_avg_temp_c
## 199 reanalysis_specific_humidity_g_per_kg               station_diur_temp_rng_c
## 200 reanalysis_specific_humidity_g_per_kg                    station_max_temp_c
## 201 reanalysis_specific_humidity_g_per_kg                    station_min_temp_c
## 202 reanalysis_specific_humidity_g_per_kg                     station_precip_mm
## 203 reanalysis_specific_humidity_g_per_kg                           total_cases
## 204                     reanalysis_tdtr_k                     reanalysis_tdtr_k
## 205                     reanalysis_tdtr_k                    station_avg_temp_c
## 206                     reanalysis_tdtr_k               station_diur_temp_rng_c
## 207                     reanalysis_tdtr_k                    station_max_temp_c
## 208                     reanalysis_tdtr_k                    station_min_temp_c
## 209                     reanalysis_tdtr_k                     station_precip_mm
## 210                     reanalysis_tdtr_k                           total_cases
## 211                    station_avg_temp_c                    station_avg_temp_c
## 212                    station_avg_temp_c               station_diur_temp_rng_c
## 213                    station_avg_temp_c                    station_max_temp_c
## 214                    station_avg_temp_c                    station_min_temp_c
## 215                    station_avg_temp_c                     station_precip_mm
## 216                    station_avg_temp_c                           total_cases
## 217               station_diur_temp_rng_c               station_diur_temp_rng_c
## 218               station_diur_temp_rng_c                    station_max_temp_c
## 219               station_diur_temp_rng_c                    station_min_temp_c
## 220               station_diur_temp_rng_c                     station_precip_mm
## 221               station_diur_temp_rng_c                           total_cases
## 222                    station_max_temp_c                    station_max_temp_c
## 223                    station_max_temp_c                    station_min_temp_c
## 224                    station_max_temp_c                     station_precip_mm
## 225                    station_max_temp_c                           total_cases
## 226                    station_min_temp_c                    station_min_temp_c
## 227                    station_min_temp_c                     station_precip_mm
## 228                    station_min_temp_c                           total_cases
## 229                     station_precip_mm                     station_precip_mm
## 230                     station_precip_mm                           total_cases
## 231                           total_cases                           total_cases
##      x  y         corr
## 1    1 21  1.000000000
## 2    1 20  0.591617528
## 3    1 19  0.218431722
## 4    1 18  0.165680178
## 5    1 17 -0.051755502
## 6    1 16 -0.074230506
## 7    1 15 -0.073766834
## 8    1 14 -0.047719848
## 9    1 13 -0.053488197
## 10   1 12 -0.089804371
## 11   1 11  0.005164418
## 12   1 10  0.024789043
## 13   1  9 -0.051755502
## 14   1  8 -0.044123263
## 15   1  7 -0.019130717
## 16   1  6  0.037032627
## 17   1  5  0.118277032
## 18   1  4  0.067090419
## 19   1  3  0.002632542
## 20   1  2 -0.061728736
## 21   1  1  0.063138488
## 22   2 20  1.000000000
## 23   2 19  0.191586923
## 24   2 18  0.211164451
## 25   2 17 -0.031656102
## 26   2 16 -0.071672525
## 27   2 15 -0.070995613
## 28   2 14 -0.024532951
## 29   2 13 -0.043838245
## 30   2 12 -0.070312817
## 31   2 11  0.012898675
## 32   2 10  0.070278544
## 33   2  9 -0.031656102
## 34   2  8 -0.019505633
## 35   2  7 -0.054272607
## 36   2  6  0.086880589
## 37   2  5  0.178149879
## 38   2  4  0.134426207
## 39   2  3  0.020024690
## 40   2  2 -0.067176683
## 41   2  1  0.095062058
## 42   3 19  1.000000000
## 43   3 18  0.823663260
## 44   3 17 -0.118169771
## 45   3 16 -0.016321673
## 46   3 15 -0.013442511
## 47   3 14 -0.062879579
## 48   3 13 -0.007832885
## 49   3 12 -0.047762255
## 50   3 11 -0.128462027
## 51   3 10 -0.111818332
## 52   3  9 -0.118169771
## 53   3  8 -0.058654493
## 54   3  7  0.028611907
## 55   3  6 -0.056020384
## 56   3  5  0.017219622
## 57   3  4 -0.062418040
## 58   3  3 -0.067181337
## 59   3  2 -0.139280607
## 60   3  1  0.026414203
## 61   4 18  1.000000000
## 62   4 17 -0.117572449
## 63   4 16 -0.042979876
## 64   4 15 -0.035918913
## 65   4 14 -0.087311942
## 66   4 13 -0.015298903
## 67   4 12 -0.071968563
## 68   4 11 -0.122778508
## 69   4 10 -0.117922258
## 70   4  9 -0.117572449
## 71   4  8 -0.080490715
## 72   4  7  0.049606141
## 73   4  6 -0.040720923
## 74   4  5  0.066870448
## 75   4  4 -0.016985960
## 76   4  3 -0.070819059
## 77   4  2 -0.171259849
## 78   4  1  0.033409948
## 79   5 17  1.000000000
## 80   5 16  0.232743047
## 81   5 15  0.221472152
## 82   5 14  0.401548921
## 83   5 13  0.254941716
## 84   5 12  0.244655293
## 85   5 11  0.509118352
## 86   5 10  0.500033420
## 87   5  9  1.000000000
## 88   5  8  0.408702991
## 89   5  7 -0.095394450
## 90   5  6  0.194454755
## 91   5  5 -0.158197413
## 92   5  4  0.192319152
## 93   5  3  0.222511607
## 94   5  2  0.567318096
## 95   5  1  0.060610483
## 96   6 16  1.000000000
## 97   6 15  0.996914374
## 98   6 14  0.901802647
## 99   6 13  0.930916751
## 100  6 12  0.942127617
## 101  6 11  0.079541356
## 102  6 10  0.297411920
## 103  6  9  0.232743047
## 104  6  8  0.903379156
## 105  6  7  0.174023234
## 106  6  6  0.880802506
## 107  6  5  0.039555526
## 108  6  4  0.697350801
## 109  6  3  0.833506664
## 110  6  2  0.116372260
## 111  6  1  0.183891599
## 112  7 15  1.000000000
## 113  7 14  0.894885689
## 114  7 13  0.937428144
## 115  7 12  0.937018637
## 116  7 11  0.061510691
## 117  7 10  0.284595332
## 118  7  9  0.221472152
## 119  7  8  0.896002997
## 120  7  7  0.199201481
## 121  7  6  0.880115454
## 122  7  5  0.055801071
## 123  7  4  0.704842133
## 124  7  3  0.827358137
## 125  7  2  0.100576419
## 126  7  1  0.177857811
## 127  8 14  1.000000000
## 128  8 13  0.846770789
## 129  8 12  0.894441297
## 130  8 11  0.328419341
## 131  8 10  0.679267821
## 132  8  9  0.401548921
## 133  8  8  0.998531589
## 134  8  7 -0.034459037
## 135  8  6  0.868753028
## 136  8  5 -0.054473461
## 137  8  4  0.690958217
## 138  8  3  0.848654748
## 139  8  2  0.289031541
## 140  8  1  0.205937566
## 141  9 13  1.000000000
## 142  9 12  0.821129474
## 143  9 11  0.090558979
## 144  9 10  0.288814657
## 145  9  9  0.254941716
## 146  9  8  0.852748941
## 147  9  7  0.352567711
## 148  9  6  0.852270844
## 149  9  5  0.116724207
## 150  9  4  0.762332975
## 151  9  3  0.767984439
## 152  9  2  0.107114105
## 153  9  1  0.196406787
## 154 10 12  1.000000000
## 155 10 11  0.131681311
## 156 10 10  0.380843635
## 157 10  9  0.244655293
## 158 10  8  0.891912496
## 159 10  7 -0.054683207
## 160 10  6  0.838712596
## 161 10  5 -0.024710438
## 162 10  4  0.623675017
## 163 10  3  0.828465090
## 164 10  2  0.154905675
## 165 10  1  0.189745796
## 166 11 11  1.000000000
## 167 11 10  0.604166310
## 168 11  9  0.509118352
## 169 11  8  0.334423723
## 170 11  7 -0.307134813
## 171 11  6  0.133908011
## 172 11  5 -0.249982789
## 173 11  4  0.080654363
## 174 11  3  0.196450472
## 175 11  2  0.478951364
## 176 11  1  0.108055077
## 177 12 10  1.000000000
## 178 12  9  0.500033420
## 179 12  8  0.674031295
## 180 12  7 -0.374467373
## 181 12  6  0.425045589
## 182 12  5 -0.193306294
## 183 12  4  0.341980900
## 184 12  3  0.463488162
## 185 12  2  0.449030575
## 186 12  1  0.144270836
## 187 13  9  1.000000000
## 188 13  8  0.408702991
## 189 13  7 -0.095394450
## 190 13  6  0.194454755
## 191 13  5 -0.158197413
## 192 13  4  0.192319152
## 193 13  3  0.222511607
## 194 13  2  0.567318096
## 195 13  1  0.060610483
## 196 14  8  1.000000000
## 197 14  7 -0.027437590
## 198 14  6  0.869769737
## 199 14  5 -0.057577356
## 200 14  4  0.692015827
## 201 14  3  0.847741022
## 202 14  2  0.292273751
## 203 14  1  0.210025502
## 204 15  7  1.000000000
## 205 15  6  0.137557135
## 206 15  5  0.371511809
## 207 15  4  0.280621187
## 208 15  3  0.007762314
## 209 15  2 -0.206101924
## 210 15  1 -0.066769634
## 211 16  6  1.000000000
## 212 16  5  0.184902469
## 213 16  4  0.864793154
## 214 16  3  0.897807872
## 215 16  2  0.033584661
## 216 16  1  0.197477429
## 217 17  5  1.000000000
## 218 17  4  0.474000181
## 219 17  3 -0.123290335
## 220 17  2 -0.264784228
## 221 17  1  0.033609856
## 222 18  4  1.000000000
## 223 18  3  0.672630792
## 224 18  2  0.008407339
## 225 18  1  0.189673857
## 226 19  3  1.000000000
## 227 19  2  0.087868654
## 228 19  1  0.178650008
## 229 20  2  1.000000000
## 230 20  1  0.051351528
## 231 21  1  1.000000000
## 
## $arg
## $arg$type
## [1] "lower"
correlation_with_target <- cor(corr_data$total_cases, corr_data[, -which(names(corr_data) == "total_cases")])

# Print the correlation matrix
print(correlation_with_target)
##         ndvi_ne    ndvi_nw   ndvi_se    ndvi_sw precipitation_amt_mm
## [1,] 0.06313849 0.09506206 0.0264142 0.03340995           0.06061048
##      reanalysis_air_temp_k reanalysis_avg_temp_k reanalysis_dew_point_temp_k
## [1,]             0.1838916             0.1778578                   0.2059376
##      reanalysis_max_air_temp_k reanalysis_min_air_temp_k
## [1,]                 0.1964068                 0.1897458
##      reanalysis_precip_amt_kg_per_m2 reanalysis_relative_humidity_percent
## [1,]                       0.1080551                            0.1442708
##      reanalysis_sat_precip_amt_mm reanalysis_specific_humidity_g_per_kg
## [1,]                   0.06061048                             0.2100255
##      reanalysis_tdtr_k station_avg_temp_c station_diur_temp_rng_c
## [1,]       -0.06676963          0.1974774              0.03360986
##      station_max_temp_c station_min_temp_c station_precip_mm
## [1,]          0.1896739            0.17865        0.05135153

In analyzing the correlation of variables on cases of dengue, we can see that the variables with the most correlation to our target variable are ‘reanalysis_air_temp_k’, ‘reanalysis_avg_temp_k’, ‘reanalysis_specific_humidity_g_per_kg’, ‘station_max_temp_c’, ‘station_min_temp_c’, ‘station_avg_temp_c’, ‘reanalysis_max_air_temp_k’, ‘reanalysis_min_air_temp_k’, and ‘reanalysis_dew_point_temp_k’. These correlations range from 0.1778578 to 0.2100255 so the values are very small. Therefore, I will not create a dynamic regression model for forecasting, but I will create an auto ARIMA model instead.

correlation_vars <- new_df %>%
  select('reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_specific_humidity_g_per_kg', 'station_max_temp_c', 'station_min_temp_c', 'station_avg_temp_c', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_dew_point_temp_k', 'total_cases'  )
corrplot::corrplot(cor(correlation_vars), method = 'square', type = "lower")

print(cor(correlation_vars))
##                                       reanalysis_air_temp_k
## reanalysis_air_temp_k                          1.0000000000
## reanalysis_avg_temp_k                          0.9018099276
## reanalysis_specific_humidity_g_per_kg          0.5117003442
## station_max_temp_c                             0.2278891136
## station_min_temp_c                             0.7203850726
## station_avg_temp_c                             0.6085510926
## reanalysis_max_air_temp_k                     -0.0007812154
## reanalysis_min_air_temp_k                      0.7362364409
## reanalysis_dew_point_temp_k                    0.5328709872
## total_cases                                    0.2657648415
##                                       reanalysis_avg_temp_k
## reanalysis_air_temp_k                             0.9018099
## reanalysis_avg_temp_k                             1.0000000
## reanalysis_specific_humidity_g_per_kg             0.6140171
## station_max_temp_c                                0.5113370
## station_min_temp_c                                0.5796410
## station_avg_temp_c                                0.7406893
## reanalysis_max_air_temp_k                         0.3981483
## reanalysis_min_air_temp_k                         0.4362652
## reanalysis_dew_point_temp_k                       0.6155884
## total_cases                                       0.1530568
##                                       reanalysis_specific_humidity_g_per_kg
## reanalysis_air_temp_k                                             0.5117003
## reanalysis_avg_temp_k                                             0.6140171
## reanalysis_specific_humidity_g_per_kg                             1.0000000
## station_max_temp_c                                                0.5167122
## station_min_temp_c                                                0.6227538
## station_avg_temp_c                                                0.7357880
## reanalysis_max_air_temp_k                                         0.2968031
## reanalysis_min_air_temp_k                                         0.3223277
## reanalysis_dew_point_temp_k                                       0.9970426
## total_cases                                                       0.1330857
##                                       station_max_temp_c station_min_temp_c
## reanalysis_air_temp_k                         0.22788911          0.7203851
## reanalysis_avg_temp_k                         0.51133700          0.5796410
## reanalysis_specific_humidity_g_per_kg         0.51671217          0.6227538
## station_max_temp_c                            1.00000000          0.1388052
## station_min_temp_c                            0.13880523          1.0000000
## station_avg_temp_c                            0.76245283          0.6169036
## reanalysis_max_air_temp_k                     0.76115460         -0.1961241
## reanalysis_min_air_temp_k                    -0.26901950          0.7216910
## reanalysis_dew_point_temp_k                   0.48976623          0.6512199
## total_cases                                  -0.03896821          0.2684015
##                                       station_avg_temp_c
## reanalysis_air_temp_k                          0.6085511
## reanalysis_avg_temp_k                          0.7406893
## reanalysis_specific_humidity_g_per_kg          0.7357880
## station_max_temp_c                             0.7624528
## station_min_temp_c                             0.6169036
## station_avg_temp_c                             1.0000000
## reanalysis_max_air_temp_k                      0.4635907
## reanalysis_min_air_temp_k                      0.2067870
## reanalysis_dew_point_temp_k                    0.7308444
## total_cases                                    0.1158974
##                                       reanalysis_max_air_temp_k
## reanalysis_air_temp_k                             -0.0007812154
## reanalysis_avg_temp_k                              0.3981483203
## reanalysis_specific_humidity_g_per_kg              0.2968031382
## station_max_temp_c                                 0.7611545955
## station_min_temp_c                                -0.1961241358
## station_avg_temp_c                                 0.4635907055
## reanalysis_max_air_temp_k                          1.0000000000
## reanalysis_min_air_temp_k                         -0.6014890313
## reanalysis_dew_point_temp_k                        0.2537611734
## total_cases                                       -0.1902989455
##                                       reanalysis_min_air_temp_k
## reanalysis_air_temp_k                                 0.7362364
## reanalysis_avg_temp_k                                 0.4362652
## reanalysis_specific_humidity_g_per_kg                 0.3223277
## station_max_temp_c                                   -0.2690195
## station_min_temp_c                                    0.7216910
## station_avg_temp_c                                    0.2067870
## reanalysis_max_air_temp_k                            -0.6014890
## reanalysis_min_air_temp_k                             1.0000000
## reanalysis_dew_point_temp_k                           0.3654194
## total_cases                                           0.3254744
##                                       reanalysis_dew_point_temp_k total_cases
## reanalysis_air_temp_k                                   0.5328710  0.26576484
## reanalysis_avg_temp_k                                   0.6155884  0.15305682
## reanalysis_specific_humidity_g_per_kg                   0.9970426  0.13308565
## station_max_temp_c                                      0.4897662 -0.03896821
## station_min_temp_c                                      0.6512199  0.26840155
## station_avg_temp_c                                      0.7308444  0.11589737
## reanalysis_max_air_temp_k                               0.2537612 -0.19029895
## reanalysis_min_air_temp_k                               0.3654194  0.32547441
## reanalysis_dew_point_temp_k                             1.0000000  0.14576761
## total_cases                                             0.1457676  1.00000000

4. Modeling

a. HTS (w/ ETS modeling)

# Create the HTS model and reconcile
HTS_Model <- full_hts_filled |>
  model(ets = ETS(Sum_Cases)) |>
  reconcile(bu = bottom_up(ets), td = top_down(ets))
forecast_hts <- forecast::forecast(HTS_Model, h = 5)
print(forecast_hts)
## # A fable: 45 x 5 [7D]
## # Key:     city, .model [9]
##    city   .model week_start_date   Sum_Cases .mean
##    <chr*> <chr>  <date>               <dist> <dbl>
##  1 iq     ets    2010-06-27       N(3.2, 55)  3.21
##  2 iq     ets    2010-07-04       N(3.2, 79)  3.21
##  3 iq     ets    2010-07-11      N(3.2, 102)  3.21
##  4 iq     ets    2010-07-18      N(3.2, 125)  3.21
##  5 iq     ets    2010-07-25      N(3.2, 148)  3.21
##  6 iq     bu     2010-06-27       N(3.2, 55)  3.21
##  7 iq     bu     2010-07-04       N(3.2, 79)  3.21
##  8 iq     bu     2010-07-11      N(3.2, 102)  3.21
##  9 iq     bu     2010-07-18      N(3.2, 125)  3.21
## 10 iq     bu     2010-07-25      N(3.2, 148)  3.21
## # ℹ 35 more rows
forecast_hts |>
  autoplot(full_hts_filled) +
  labs(y = "Cases") +
  facet_wrap(vars(city), scales = "free_y")

b. ARIMA Models

Auto ARIMAs

ARIMA_Model <- full_hts_filled |>
  #filter(is_aggregated(city)) |>
  model(stepwise = ARIMA(Sum_Cases),
        search = ARIMA(Sum_Cases, stepwise=FALSE))
ARIMA_Model |>
  forecast(h=5) |>
  filter(.model=='search') |>
  autoplot(full_hts_filled)

forecasted_values_search <- ARIMA_Model |>
  forecast(h = 5) |>
  filter(.model == 'search')

# Print the predictions
print(forecasted_values_search)
## # A fable: 15 x 5 [7D]
## # Key:     city, .model [3]
##    city         .model week_start_date    Sum_Cases .mean
##    <chr*>       <chr>  <date>                <dist> <dbl>
##  1 iq           search 2010-06-27        N(3.9, 52)  3.92
##  2 iq           search 2010-07-04        N(3.2, 77)  3.17
##  3 iq           search 2010-07-11        N(4.3, 88)  4.33
##  4 iq           search 2010-07-18       N(4.6, 101)  4.55
##  5 iq           search 2010-07-25       N(4.4, 116)  4.44
##  6 sj           search 2008-04-27       N(6.1, 240)  6.12
##  7 sj           search 2008-05-04       N(6.5, 436)  6.54
##  8 sj           search 2008-05-11         N(6, 725)  5.98
##  9 sj           search 2008-05-18        N(5, 1007)  5.00
## 10 sj           search 2008-05-25      N(3.9, 1307)  3.94
## 11 <aggregated> search 2010-06-27       N(3.5, 244)  3.52
## 12 <aggregated> search 2010-07-04       N(3.6, 447)  3.60
## 13 <aggregated> search 2010-07-11       N(3.7, 711)  3.68
## 14 <aggregated> search 2010-07-18       N(3.7, 982)  3.70
## 15 <aggregated> search 2010-07-25      N(3.7, 1290)  3.74
ARIMA_Model |>
  forecast(h=5) |>
  filter(.model=='stepwise') |>
  autoplot(full_hts_filled)

forecasted_values_stepwise <- ARIMA_Model |>
  forecast(h = 5) |>
  filter(.model == 'stepwise')

# Print the predictions
print(forecasted_values_stepwise)
## # A fable: 15 x 5 [7D]
## # Key:     city, .model [3]
##    city         .model   week_start_date    Sum_Cases .mean
##    <chr*>       <chr>    <date>                <dist> <dbl>
##  1 iq           stepwise 2010-06-27        N(3.9, 53)  3.93
##  2 iq           stepwise 2010-07-04        N(3.6, 82)  3.64
##  3 iq           stepwise 2010-07-11        N(3.6, 95)  3.64
##  4 iq           stepwise 2010-07-18       N(3.6, 109)  3.64
##  5 iq           stepwise 2010-07-25       N(3.6, 122)  3.64
##  6 sj           stepwise 2008-04-27       N(6.1, 240)  6.12
##  7 sj           stepwise 2008-05-04       N(6.5, 436)  6.54
##  8 sj           stepwise 2008-05-11         N(6, 725)  5.98
##  9 sj           stepwise 2008-05-18        N(5, 1007)  5.00
## 10 sj           stepwise 2008-05-25      N(3.9, 1307)  3.94
## 11 <aggregated> stepwise 2010-06-27       N(4.2, 246)  4.19
## 12 <aggregated> stepwise 2010-07-04         N(4, 463)  4.01
## 13 <aggregated> stepwise 2010-07-11       N(4.2, 707)  4.18
## 14 <aggregated> stepwise 2010-07-18         N(4, 925)  4.02
## 15 <aggregated> stepwise 2010-07-25      N(4.2, 1168)  4.17

c. Neural Networks

#fitting the model


#Neural_Net_Model <- full_hts_filled |>
  #model(NNETAR(sqrt(Sum_Cases)))
#forecasting from model

#forecasted_values_NNET <- Neural_Net_Model |>
#  forecast(h = 5)
#Model and Forecasts were Stored 
#saveRDS(Neural_Net_Model, "Neural_Net_Model.rds")
#saveRDS(forecasted_values_NNET, "forecasted_values_NNET.rds")
forecasted_values_NNET <- readRDS("forecasted_values_NNET.rds")
Neural_Net_Model <- readRDS("Neural_Net_Model.rds")
forecasted_values_NNET |>
  autoplot(full_hts_filled) +
  labs( x = "Time", y = "# of Cases", title = "# of Cases of Dengue Fever (Neural Networks Forecast)")

forecasted_values_NNET
## # A fable: 15 x 5 [7D]
## # Key:     city, .model [3]
##    city         .model                  week_start_date    Sum_Cases .mean
##    <chr*>       <chr>                   <date>                <dist> <dbl>
##  1 iq           NNETAR(sqrt(Sum_Cases)) 2010-06-27      sample[5000]  3.16
##  2 iq           NNETAR(sqrt(Sum_Cases)) 2010-07-04      sample[5000]  3.73
##  3 iq           NNETAR(sqrt(Sum_Cases)) 2010-07-11      sample[5000]  4.23
##  4 iq           NNETAR(sqrt(Sum_Cases)) 2010-07-18      sample[5000]  4.49
##  5 iq           NNETAR(sqrt(Sum_Cases)) 2010-07-25      sample[5000]  4.88
##  6 sj           NNETAR(sqrt(Sum_Cases)) 2008-04-27      sample[5000]  5.23
##  7 sj           NNETAR(sqrt(Sum_Cases)) 2008-05-04      sample[5000]  5.44
##  8 sj           NNETAR(sqrt(Sum_Cases)) 2008-05-11      sample[5000]  6.53
##  9 sj           NNETAR(sqrt(Sum_Cases)) 2008-05-18      sample[5000]  7.90
## 10 sj           NNETAR(sqrt(Sum_Cases)) 2008-05-25      sample[5000] 10.0 
## 11 <aggregated> NNETAR(sqrt(Sum_Cases)) 2010-06-27      sample[5000]  3.82
## 12 <aggregated> NNETAR(sqrt(Sum_Cases)) 2010-07-04      sample[5000]  4.18
## 13 <aggregated> NNETAR(sqrt(Sum_Cases)) 2010-07-11      sample[5000]  6.40
## 14 <aggregated> NNETAR(sqrt(Sum_Cases)) 2010-07-18      sample[5000]  6.61
## 15 <aggregated> NNETAR(sqrt(Sum_Cases)) 2010-07-25      sample[5000]  8.02

5. Comparisons

#accuracy measures for NNETAR Model
accuracy_measures1 <- accuracy(Neural_Net_Model)

print(accuracy_measures1)
## # A tibble: 3 × 11
##   city         .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr*>       <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 iq           NNETAR(sq… Trai… 0.833  6.34  3.44  -Inf   Inf 0.867 0.822 0.0942
## 2 sj           NNETAR(sq… Trai… 0.943 10.5   6.59  -Inf   Inf 0.771 0.660 0.0290
## 3 <aggregated> NNETAR(sq… Trai… 1.00  10.9   7.02  -Inf   Inf 0.797 0.687 0.0603
accuracy_measures2 <- accuracy(ARIMA_Model)

accuracy_measures2
## # A tibble: 6 × 11
##   city        .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr*>      <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 iq          stepw… Trai…  1.38e-2  7.25  3.91   NaN   Inf 0.985 0.940 -4.84e-3
## 2 iq          search Trai…  2.32e-2  7.17  3.87   NaN   Inf 0.976 0.929  2.04e-2
## 3 sj          stepw… Trai… -1.99e-4 15.4   8.59  -Inf   Inf 1.01  0.967 -5.12e-3
## 4 sj          search Trai… -1.99e-4 15.4   8.59  -Inf   Inf 1.01  0.967 -5.12e-3
## 5 <aggregate… stepw… Trai…  1.01e-4 15.7   8.92   NaN   Inf 1.01  0.984 -2.07e-2
## 6 <aggregate… search Trai… -1.73e-4 15.6   8.89   NaN   Inf 1.01  0.977 -6.69e-4
#aggregated bottom up

print(HTS_Model[[3]][[3]]$fit)
## $par
## # A tibble: 5 × 2
##   term  estimate
##   <chr>    <dbl>
## 1 alpha   0.809 
## 2 beta    0.129 
## 3 phi     0.800 
## 4 l       2.10  
## 5 b      -0.0572
## 
## $est
## # A tsibble: 1,052 x 4 [7D]
##    week_start_date Sum_Cases .fitted .resid
##    <date>              <dbl>   <dbl>  <dbl>
##  1 1990-04-29              4    2.05   1.95
##  2 1990-05-06              5    3.79   1.21
##  3 1990-05-13              4    5.03  -1.03
##  4 1990-05-20              3    4.29  -1.29
##  5 1990-05-27              6    3.19   2.81
##  6 1990-06-03              2    5.71  -3.71
##  7 1990-06-10              4    2.52   1.48
##  8 1990-06-17              5    3.72   1.28
##  9 1990-06-24             10    4.89   5.11
## 10 1990-07-01              6    9.66  -3.66
## # ℹ 1,042 more rows
## 
## $fit
## # A tibble: 1 × 8
##   sigma2 log_lik    AIC   AICc    BIC   MSE  AMSE   MAE
##    <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1   249.  -6560. 13131. 13131. 13161.  248.  475.  8.75
## 
## $states
## # A tsibble: 1,053 x 3 [7D]
##    week_start_date     l        b
##    <date>          <dbl>    <dbl>
##  1 1990-04-22       2.10 -0.0572 
##  2 1990-04-29       3.63  0.206  
##  3 1990-05-06       4.77  0.320  
##  4 1990-05-13       4.20  0.124  
##  5 1990-05-20       3.25 -0.0679 
##  6 1990-05-27       5.46  0.308  
##  7 1990-06-03       2.71 -0.232  
##  8 1990-06-10       3.72  0.00475
##  9 1990-06-17       4.76  0.169  
## 10 1990-06-24       9.03  0.794  
## # ℹ 1,043 more rows
## 
## $spec
## # A tibble: 1 × 5
##   errortype trendtype seasontype damped period
##   <chr>     <chr>     <chr>      <lgl>   <dbl>
## 1 A         A         N          TRUE        1
## 
## attr(,"class")
## [1] "ETS"
#aggregated top down

print(HTS_Model[[4]][[3]]$fit)
## $par
## # A tibble: 5 × 2
##   term  estimate
##   <chr>    <dbl>
## 1 alpha   0.809 
## 2 beta    0.129 
## 3 phi     0.800 
## 4 l       2.10  
## 5 b      -0.0572
## 
## $est
## # A tsibble: 1,052 x 4 [7D]
##    week_start_date Sum_Cases .fitted .resid
##    <date>              <dbl>   <dbl>  <dbl>
##  1 1990-04-29              4    2.05   1.95
##  2 1990-05-06              5    3.79   1.21
##  3 1990-05-13              4    5.03  -1.03
##  4 1990-05-20              3    4.29  -1.29
##  5 1990-05-27              6    3.19   2.81
##  6 1990-06-03              2    5.71  -3.71
##  7 1990-06-10              4    2.52   1.48
##  8 1990-06-17              5    3.72   1.28
##  9 1990-06-24             10    4.89   5.11
## 10 1990-07-01              6    9.66  -3.66
## # ℹ 1,042 more rows
## 
## $fit
## # A tibble: 1 × 8
##   sigma2 log_lik    AIC   AICc    BIC   MSE  AMSE   MAE
##    <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1   249.  -6560. 13131. 13131. 13161.  248.  475.  8.75
## 
## $states
## # A tsibble: 1,053 x 3 [7D]
##    week_start_date     l        b
##    <date>          <dbl>    <dbl>
##  1 1990-04-22       2.10 -0.0572 
##  2 1990-04-29       3.63  0.206  
##  3 1990-05-06       4.77  0.320  
##  4 1990-05-13       4.20  0.124  
##  5 1990-05-20       3.25 -0.0679 
##  6 1990-05-27       5.46  0.308  
##  7 1990-06-03       2.71 -0.232  
##  8 1990-06-10       3.72  0.00475
##  9 1990-06-17       4.76  0.169  
## 10 1990-06-24       9.03  0.794  
## # ℹ 1,043 more rows
## 
## $spec
## # A tibble: 1 × 5
##   errortype trendtype seasontype damped period
##   <chr>     <chr>     <chr>      <lgl>   <dbl>
## 1 A         A         N          TRUE        1
## 
## attr(,"class")
## [1] "ETS"

Based on the observations of goodness of fit for the three models above, it is apparent that the best performer out of the three models is the Neural Network Model.