Preliminaries

library(tidyverse)
library(sp)
library(sf)
library(spdep)
library(spatstat)
library(readxl)
library(lubridate)
library(spatialRF)
library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)
QC <- sf::st_read("C:/Users/Trish/Documents/Thesis 2024 Data/QC Barangays/QC Barangays.shp")
## Reading layer `QC Barangays' from data source 
##   `C:\Users\Trish\Documents\Thesis 2024 Data\QC Barangays\QC Barangays.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 142 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 283480.1 ymin: 1613822 xmax: 299258 ymax: 1634574
## Projected CRS: WGS 84 / UTM zone 51N
mapview::mapview(QC)
Dengue <- data.frame()
for (i in as.character(2017:2019)){
    dengue_temp <- read_xlsx("C:/Users/Trish/Documents/Thesis 2024 Data/DENGUE CASE/[RAW DATA] Dengue_Case_2017-2022.xlsx", sheet = i)|>   
        group_by(Barangay, "Year" = i, "Month" = month(DateOfEntry, abbr = T, label = T)) |> 
        summarize(DengueCases = n())
    Dengue <- rbind(Dengue, dengue_temp)
    
}
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
# note: Payatas = Payatas A + Payatas B
Payatas <- filter(Dengue, Barangay %in% c("PAYATAS A", "PAYATAS B")) |> 
        group_by(Year, Month) |> 
        summarize(DengueCases = sum(DengueCases))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Payatas$Barangay = "PAYATAS"

Dengue <- rbind(Dengue, Payatas) |> 
    filter(!(Barangay %in% c("PAYATAS A", "PAYATAS B")))|> arrange(Barangay)
Temperature <- data.frame()
for (year in 2017:2020){
    temp_file <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/TEMPERATURE/Temperature_PerBrgyPerMonth_",
                         year,".csv") |> 
        read.csv() |> 
        mutate(Month = lubridate::month(Month, 
                                        label = T,
                                        abbr = T),
               Year = year,
               Barangay = toupper(ADM4_EN),
               Temperature = Mean_LST)

    Temperature <- rbind(Temperature, 
                         temp_file[,c("Barangay",
                                      "Temperature",
                                      "Month", "Year")])
   
}
Rainfall <- data.frame()
for (i in 2017:2020){
    rain_file <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/RAINFALL/Rainfall_PerBrgyPerMonth_", i, ".csv")
    rain_file0 <- read.csv(rain_file)[,c("ADM4_PCODE", "ADM4_EN","month", "year","mean")]
    colnames(rain_file0) <- c("ADM4_PCODE", "Barangay", "Month", "Year", "Rainfall")
    rain_file0$Barangay <- toupper(rain_file0$Barangay )
    rain_file0$Month    <- month(rain_file0$Month, label = T)
    rain_file0$Code     <- str_sub(rain_file0$ADM4_PCODE,-3)
    Rainfall <- rbind(Rainfall,rain_file0)
}
NDVI <- data.frame()
for (year in 2017:2020){
    ndvi_files <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/NDVI/MONTHLY [MODIS]/",year,
                         "/NDVI_", lubridate::month(1:12, label = T),
                         year,".csv")
    for (i in 1:12){
        temp <- read.csv(ndvi_files[i])[,c("ADM4_PCODE", "ADM4_EN", "mean")]
        temp$ADM4_EN <- toupper(temp$ADM4_EN)
        temp$Month <- lubridate::month(i, label = T)
        temp$Year  <- year
        colnames(temp) <- c("ADM4_PCODE","Barangay", "Vegetation", "Month", "Year")
        NDVI <- rbind(NDVI, temp)
    }
}
QC$Code <- str_sub(QC$adm4_psgc,-3)
QCx <- select(QC, -Barangay) |> 
    merge(Rainfall, by = "Code") |> 
    merge(Temperature, by = c("Barangay",  "Year", "Month")) |> 
    merge(select(NDVI, -Barangay), by = c("ADM4_PCODE",  "Year", "Month")) |> 
    # note that there are some months with no dengue cases
    # so we keep complete rows in the previous table 
    merge(Dengue, by = c("Barangay", "Year", "Month"), all.x = T) |> 
    # the next line fills the NA to 0
    mutate(Dengue = ifelse(is.na(DengueCases), 0, DengueCases)) |> 
    # selecting few columns
    select(District, Barangay, Year, Month, 
           Dengue,
           Rainfall, Temperature, Vegetation, 
           geometry)
# dropping geometry for faster loading later
View(QCx)
QC0 <- st_drop_geometry(QCx)|> 
    group_by(Barangay) |> 
    # adding monthly lagged values for rainfall
    summarize(District,
              Barangay,
              Year,
              Month,
              Rainfall,
              lag.Rain = lag(Rainfall),
              Temperature,
              lag.Temp = lag(Temperature),
              Vegetation,
              Dengue)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
QCx$lag.Temp <- QC0$lag.Temp
QCx$lag.Rain  <- QC0$lag.Rain

Exploratory Plots and Maps

# for later. Average of values per barangay per month.
QC_brgy_month <- QCx |> group_by(Barangay, Month) |> 
    summarize(Dengue = mean(Dengue, na.rm = T),
              Rainfall = mean(Rainfall, na.rm = T),
              Temperature = mean(Temperature, na.rm = T),
              Vegetation = mean(Vegetation, na.rm = T))
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
QC_month <- QC0 |> group_by(Date = make_date(year = Year, month = Month)) |> 
    summarize(Dengue = sum(Dengue, na.rm = T),
              Rain   = mean(Rainfall, na.rm = T)) |>
    group_by(Month = month(Date, label = T, abbr = T)) |>
    summarise(Dengue = mean(Dengue),
              Rain = mean(Rain)) |>
    pivot_longer(cols = c(Dengue, Rain))

Dengue Cases

Map of Average Monthly Dengue Cases per Barangay

library(ggplot2)
library(viridis)
## Loading required package: viridisLite
ggplot(QC_brgy_month) +
  geom_sf(aes(fill = Dengue, geometry = geometry), color = "black") +
  facet_wrap(~Month) +
  scale_fill_viridis_c(option = "B",
                       direction = -1,
                       breaks = seq(0, 200, by = 50), # Adjust the breaks as needed
                       limits = c(0, 220)) + # Adjust the limits based on your data range
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_blank()) +
  ggtitle("Average of Monthly Dengue Cases Per Barangay (2017-2019)")

  ggsave("Average of Monthly Dengue Cases Per Barangay (2017-2019).png", plot = last_plot(), width = 8, height = 8)

Line Plot of Monthly Total Dengue Cases per District

QC0 |> group_by(District, Date = make_date(Year, Month)) |> 
    summarize(Dengue = sum(Dengue, na.rm = T)) |> 
ggplot(aes(x = Date, y = Dengue, color = District))+
    geom_line() + theme_bw() +
    scale_x_date(breaks = "1 year",
                 minor_breaks = "3 months",
                 date_labels = "%Y %b")+
    theme(axis.text.x=element_text(angle=0, hjust=0.5),
          plot.title = element_text(hjust = 0.5)) +
    ggtitle("Monthly Total Dengue Cases Per District")
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.

    ggsave("Monthly Total Dengue Cases Per District.png", plot = last_plot(), width = 11, height = 6)
# Assuming you have your actual data loaded into QCx with columns: Barangay, Year, Month, Dengue, geometry
# Prepare the data frame
QC_brgy_month <- QCx %>%
  filter(Year %in% c(2017, 2018, 2019, 2020)) %>%
  group_by(Barangay, Year, Month) %>%
  summarize(
    Dengue = sum(Dengue, na.rm = TRUE),
    geometry = first(geometry)  # Keep the geometry column for spatial data
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
# Create the plot for multiple years
years <- c(2017, 2018, 2019, 2020)  # List of years to plot

# Define the limits and breaks for the color scale
fill_limits <- c(0, 220)  # Adjust this based on your data range
fill_breaks <- seq(0, 200, by = 50)  # Adjust this based on your data range and desired intervals

for (year_to_plot in years) {
  p <- ggplot(QC_brgy_month %>% filter(Year == year_to_plot)) +
    geom_sf(aes(fill = Dengue, geometry = geometry), color = "black") +
    facet_wrap(~Month) +
    scale_fill_viridis_c(option = "B", direction = -1, limits = fill_limits, breaks = fill_breaks) +
    theme_bw() +
    theme(
      plot.title = element_text(hjust = 0.5),
      plot.subtitle = element_text(hjust = 0.5),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.text.y = element_blank()
    ) +
    ggtitle(paste("Dengue Cases Per Barangay in", year_to_plot))
  
  print(p)

  ggsave(paste("Dengue_Cases_", year_to_plot, " - for presentation", ".png", sep = ""), plot = p, device = "png", width = 8, height = 8)
}

Rainfall

Maps

QC_brgy_month <- QCx %>%
  group_by(Barangay, Month) %>%
  summarize(
    Dengue = mean(Dengue, na.rm = TRUE),
    Rainfall = mean(Rainfall, na.rm = TRUE),
    Temperature = mean(Temperature, na.rm = TRUE),
    Vegetation = mean(Vegetation, na.rm = TRUE)
  )
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
library(ggplot2)

ggplot(QC_brgy_month) +
  geom_sf(aes(fill = Rainfall, geometry = geometry), color = "black") +
  facet_wrap(~Month) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.y = element_blank()
  ) +
  ggtitle("Average of Monthly Total Rainfall Per Barangay") +
  scale_fill_gradientn(name = "Rainfall (mm)", # Set the legend name
                       colours = c("bisque2", "olivedrab", "deepskyblue"), # Set colors
                       values = scales::rescale(c(0, 265, 530)), # Adjust values as needed
                       breaks = seq(0, 520, by = 130), # Adjust breaks as needed
                       limits = c(0, 550)) # Set the range of values for the color scale

  ggsave("Average of Monthly Total Rainfall Per Barangay - for presentation.png", plot = last_plot(), width = 8, height = 8)

Whole QC

Histogram of Dengue Cases and Rainfall throughout the Months

QC0 |> pivot_longer(cols = c(Rainfall, Dengue)) |> 
    ggplot()+
        geom_boxplot(data = pivot_longer(QC0, cols = c(Rainfall, Dengue)) ,
                     aes(x = Month, y = value, fill = name))

Line Chart Sum Dengue and Mean Rainfall 2017-2020

QC0 |> group_by(Year, Month) |> 
    summarize(Date = make_date(year = Year, month = Month),
                Dengue = sum(Dengue, na.rm = T),
              Rain   = mean(Rainfall, na.rm = T)) |> 
    pivot_longer(cols = c(Dengue, Rain), names_to = "Variable") |> 
    
    ggplot(aes(x = Date,
               y = value,
               color = Variable))+
    geom_line() + theme_bw() +
    scale_x_date(breaks = "1 year",
                 date_labels = "%Y",
                 date_minor_breaks = "3 months")+
    

    scale_color_manual(values = c("Dengue" = "red", "Rain" = "blue"),
                       labels = c("Total Dengue Cases in Quezon City",
                                 "Average Rainfall Amount in Quezon City")) +
    labs(color = NULL, x = NULL) +

    ggtitle("Monthly Dengue Cases and Average Rainfall Amount in Quezon City", 
            subtitle = "2017 to 2020")+
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          legend.position = "bottom")
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.

    ggsave("Monthly Dengue Cases and Average Rainfall Amount in Quezon City.png", plot = last_plot(), width = 11, height = 6)
library(ggplot2)

ggplot(QC_month, aes(x = Month, y = value, color = name, group = name)) +
  geom_line() + 
  geom_point() + 
  theme_bw() +
  scale_y_continuous(limits = c(0, 2500)) +  # Set y-axis limits here
  scale_color_manual(
    values = c("Dengue" = "red", "Rain" = "blue"),
    labels = c("Average of Monthly Total Dengue Cases", "Average of Monthly Rainfall Amount")
  ) +
  labs(color = NULL, x = NULL) +
  ggtitle(
    "Average of Monthly Dengue Cases and Rainfall Amount in Quezon City", 
    subtitle = "2017 to 2020"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "bottom"
  )

    ggsave("Average of Monthly Dengue Cases and Rainfall Amount in Quezon City.png", plot = last_plot(), width = 11, height = 6)

Sum Dengue and Mean Rainfall 2017-2019

library(dplyr)
library(ggplot2)
library(lubridate)

QC0 |> 
  filter(Year >= 2017 & Year <= 2019) |>  # Filter for the years 2017 to 2019
  group_by(Year, Month) |> 
  summarize(Date = make_date(year = Year, month = Month),
            Dengue = sum(Dengue, na.rm = TRUE),
            Rain = mean(Rainfall, na.rm = TRUE)) |> 
  pivot_longer(cols = c(Dengue, Rain), names_to = "Variable", values_to = "value") |> 
    
  ggplot(aes(x = Date, y = value, color = Variable)) +
  geom_line() + 
  theme_bw() +
  scale_x_date(
    date_labels = "%Y",
    date_minor_breaks = "2 month"  # Specify the minor breaks for every month
  ) +
  scale_color_manual(
    values = c("Dengue" = "red", "Rain" = "blue"),
    labels = c("Total Dengue Cases in Quezon City", "Average Rainfall Amount in Quezon City")
  ) +
  labs(color = NULL, x = NULL) +
  ggtitle(
    "Monthly Dengue Cases and Average Rainfall Amount in Quezon City", 
    subtitle = "2017 to 2019"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "bottom"
  )
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.

  ggsave("Average of Monthly Dengue Cases and Rainfall Amount in Quezon City (2017-2019).png", plot = last_plot(), width = 11, height = 6)

per District

library(dplyr)
library(ggplot2)
library(lubridate)


# Data preprocessing and visualization
QC0 |> 
  group_by(District, Date = make_date(year = Year, month = Month)) |> 
  summarize(Dengue = sum(Dengue, na.rm = TRUE),
            Rain = mean(Rainfall, na.rm = TRUE)) |> 
  pivot_longer(cols = c(Dengue, Rain), names_to = "Variable") |> 
  ggplot(aes(x = Date, y = value, color = Variable)) +
  geom_line() + 
  theme_bw() +
  facet_wrap(~District) +
  scale_x_date(
    breaks = "1 year",
    date_labels = "%Y",
    date_minor_breaks = "3 months"
  ) +
  scale_color_manual(
    values = c("Dengue" = "black", "Rain" = "blue"),
    labels = c("Total Dengue Cases in District", "Average Rainfall Amount in District")
  ) +
  labs(color = NULL, x = NULL) +
  ggtitle(
    "Monthly Dengue Cases and Rainfall Amount Per District", 
    subtitle = "2017 to 2020"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "bottom"
  )
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.

    ##ggsave("Total Dengue Cases and Average Rainfall Amount Per District in Quezon City.png", plot = last_plot(), width = 11, height = 6)

Pearson Correlation Per District

# Create a data frame to store the correlations
correlation_table <- QC0 %>%
  group_by(District) %>%
  summarize(Correlation = cor(Dengue, Rainfall, use = "complete.obs"))

print(correlation_table)
## # A tibble: 6 × 2
##   District   Correlation
##   <chr>            <dbl>
## 1 DISTRICT 1       0.137
## 2 DISTRICT 2       0.347
## 3 DISTRICT 3       0.144
## 4 DISTRICT 4       0.173
## 5 DISTRICT 5       0.319
## 6 DISTRICT 6       0.235

Linear Relationship

QC0 |> ggplot(aes(x = Rainfall, y = Dengue))+
    geom_point(aes(color = District))+
    theme_bw()+
    geom_smooth(method = "lm", se = FALSE) +
    ggtitle("Scatter Plot of Rainfall vs Dengue Cases") +
    theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'

    ggsave("Scatter Plot of Rainfall vs Dengue Cases.png", plot = last_plot(), width = 11, height = 6)
## `geom_smooth()` using formula = 'y ~ x'

With 1 month lag of rainfall

QC0 |> ggplot(aes(x = lag.Rain, y = Dengue)) +
    geom_point(aes(color = District)) +
    theme_bw() +
    geom_smooth(method = "lm") +
    ggtitle("Scatter Plot of Rainfall (with lag) vs Dengue Cases") +
    theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 142 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 142 rows containing missing values or values outside the scale range
## (`geom_point()`).

    ggsave("Scatter Plot of Rainfall (with lag) vs Dengue Cases.png", plot = last_plot(), width = 11, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 142 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 142 rows containing missing values or values outside the scale range
## (`geom_point()`).

Correlation between Dengue Cases and Rainfall and Lagged Rainfall using Pearson’s correlation r

# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Rainfall)

# Calculate Pearson's correlation coefficient between Dengue and lagged Rainfall, removing NA values
# Assuming you have a lagged Rainfall variable named 'lag.Rain'
correlation2 <- cor(QC0$Dengue[!is.na(QC0$lag.Rain)], QC0$lag.Rain[!is.na(QC0$lag.Rain)])

# Create a data frame to store the correlations
correlation_table <- data.frame(
  Variables = c("Dengue vs. Rainfall", "Dengue vs. Lagged Rainfall"),
  Correlation = c(correlation1, correlation2)
)

# Print the table
print(correlation_table)
##                    Variables Correlation
## 1        Dengue vs. Rainfall   0.1809789
## 2 Dengue vs. Lagged Rainfall   0.2380496

Rainfall and Lagged Rainfall Correlation Coefficient and Confidence Interval

# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Rainfall, alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)

# Print the correlation coefficient
cat("Correlation coefficient (r) between Dengue and Rainfall:", correlation_result$estimate, "\n")
## Correlation coefficient (r) between Dengue and Rainfall: 0.1809789
# Print the confidence interval
cat("95% Confidence Interval for Dengue and Rainfall:", correlation_result$conf.int, "\n")
## 95% Confidence Interval for Dengue and Rainfall: 0.1579165 0.203844
# Calculate Pearson's correlation coefficient and its confidence interval for Dengue and Lagged Rainfall
correlation_result_lagged <- cor.test(QC0$Dengue[!is.na(QC0$lag.Rain)], QC0$lag.Rain[!is.na(QC0$lag.Rain)], alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)

# Print the correlation coefficient for Dengue and Lagged Rainfall
cat("Correlation coefficient (r) between Dengue and Lagged Rainfall:", correlation_result_lagged$estimate, "\n")
## Correlation coefficient (r) between Dengue and Lagged Rainfall: 0.2380496
# Print the confidence interval for Dengue and Lagged Rainfall
cat("95% Confidence Interval for Dengue and Lagged Rainfall:", correlation_result_lagged$conf.int, "\n")
## 95% Confidence Interval for Dengue and Lagged Rainfall: 0.215287 0.2605537

Temperature

ggplot(QC_brgy_month) +
    geom_sf(aes(fill = Temperature, geometry = geometry), color = "black") +
    facet_wrap(~Month) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.y = element_blank()) +
    ggtitle("Average of Monthly Temperature Per Barangay") +
    scale_fill_gradientn(colors = c("yellow", "orange", "red", "darkred"), name = "Temperature")

    ggsave("Average of Monthly Temperature Per Barangay.png", plot = last_plot(), width = 8, height = 8)

Per district time series plot

library(dplyr)
library(tidyr)
library(ggplot2)

QC0 |> 
  group_by(District, Date = make_date(year = Year, month = Month)) |> 
  summarize(Dengue = sum(Dengue, na.rm = TRUE),
            Temp = mean(Temperature, na.rm = TRUE)) |> 
  pivot_longer(cols = c(Dengue, Temp), names_to = "Variable", values_to = "Value") |> 
  ggplot() + 
  geom_line(data = . %>% filter(Variable == "Dengue"), 
            aes(x = Date, y = Value, color = Variable)) + 
  geom_line(data = . %>% filter(Variable == "Temp"), 
            aes(x = Date, y = Value * 15, color = Variable)) +  # Scale the temperature values here
  scale_y_continuous(
    name = "Total Dengue cases in District",
    sec.axis = sec_axis(~ . / 20, name = "Average Temperature in District (°C)")  # Adjusted transformation for the axis
  ) +
  scale_x_date(
    breaks = "1 year",
    date_labels = "%Y",
    date_minor_breaks = "3 months"
  ) +
  scale_color_manual(
    values = c("Dengue" = "black", "Temp" = "red"),
    labels = c("Total Dengue Cases in District", "Average Temperature in District (°C)")
  ) +
  labs(color = NULL, x = NULL) +
  facet_wrap(~District) +
  ggtitle("Monthly Dengue Cases and Temperature Per District", subtitle = "2017 to 2020") +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "bottom"
  )
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.

    ggsave("Monthly Dengue Cases and Temperature Per District - Option 2.png", plot = last_plot(), width = 11, height = 6)

Correlation Between Dengue Cases and Temperature

# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Temperature)

# Create a data frame to store the correlations
correlation_table <- data.frame(
  Variables = c("Dengue vs. Temperature"),
  Correlation = c(correlation1)
)

# Print the table
print(correlation_table)
##                Variables Correlation
## 1 Dengue vs. Temperature -0.06797611

Temperature Correlation Coefficient and Confidence Interval

# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Temperature)

# Print the correlation coefficient
cat("Correlation coefficient (r):", correlation_result$estimate, "\n")
## Correlation coefficient (r): -0.06797611
# Print the confidence interval
cat("95% Confidence Interval:", correlation_result$conf.int, "\n")
## 95% Confidence Interval: -0.09156924 -0.0443067

Vegetation

ggplot(QC_brgy_month) +
    geom_sf(aes(fill = Vegetation, geometry = geometry), color = "black") +
    facet_wrap(~Month) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.y = element_blank())+
    ggtitle("Average of Monthly Vegetation Index Per Barangay")+
    scale_fill_viridis_c(option = "D")

    ggsave("Average of Monthly Vegetation Index Per Barangay.png", plot = last_plot(), width = 8, height = 8)
QC0 |> 
    group_by(Date = make_date(year = Year, month = Month)) |> 
    summarize(Dengue = sum(Dengue, na.rm = T),
              Vegetation  = mean(Vegetation, na.rm = T)) |> 
    pivot_longer(cols = c(Dengue, Vegetation), 
                 names_to = "Variable") |> 
    
    ggplot(aes(x = Date,
               y = value,
               group = Variable,
               color = Variable))+
    geom_line() + theme_bw() +
    facet_wrap(.~Variable, nrow = 2, scale = "free_y") +
    
    scale_x_date(breaks = "1 year",
                 date_labels = "%Y",
                 date_minor_breaks = "3 months")+
    
    

    scale_color_manual(values = c("Dengue" = "black", "Vegetation" = "green"),
                       labels = c("Total Dengue cases in QC",
                                 "Average Vegetation in QC")) +
    labs(color = NULL, x = NULL) +

    ggtitle("Monthly Dengue Cases and Vegetation in Quezon City", 
            subtitle = "2017 to 2020")+
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          legend.position = "bottom")

    ggsave("Monthly Dengue Cases and Vegatation in Quezon City.png", plot = last_plot(), width = 11, height = 6)

Correlation Between Dengue Cases and NDVI

# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Vegetation)

# Create a data frame to store the correlations
correlation_table <- data.frame(
  Variables = c("Dengue vs. Vegetation"),
  Correlation = c(correlation1)
)

# Print the table
print(correlation_table)
##               Variables Correlation
## 1 Dengue vs. Vegetation   0.1960672

Correlation Between Dengue and NDVI Per District

library(dplyr)

# Calculate Pearson's correlation coefficient between Dengue and NDVI per district
correlation_per_district <- QC0 %>%
  group_by(District) %>%
  summarize(correlation = cor(Dengue, Vegetation, use = "complete.obs"))

# Print the correlation table
print(correlation_per_district)
## # A tibble: 6 × 2
##   District   correlation
##   <chr>            <dbl>
## 1 DISTRICT 1      0.102 
## 2 DISTRICT 2      0.162 
## 3 DISTRICT 3      0.0856
## 4 DISTRICT 4     -0.0608
## 5 DISTRICT 5      0.0627
## 6 DISTRICT 6      0.0946

Vegetation Correlation Coefficient and Confidence Interval

# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Vegetation)

# Print the correlation coefficient
cat("Correlation coefficient (r):", correlation_result$estimate, "\n")
## Correlation coefficient (r): 0.1960672
# Print the confidence interval
cat("95% Confidence Interval:", correlation_result$conf.int, "\n")
## 95% Confidence Interval: 0.1731322 0.2187897

Statistical Models

We first split our dataset to train and test sets.

train <- filter(QCx, Year %in% c(2017,2018))
test  <- filter(QCx, Year == 2019)

Model 1: Poisson Loglinear Model

\[ log(dengue) = \beta_0 +\beta_1rain + \beta_2temp + \beta_3vegetation \]

mod1 <- glm(Dengue ~ Rainfall + Temperature+ + Vegetation, family = "poisson", 
            data = train)
summary(mod1)
## 
## Call:
## glm(formula = Dengue ~ Rainfall + Temperature + +Vegetation, 
##     family = "poisson", data = train)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.492e-01  4.604e-02  -5.412 6.22e-08 ***
## Rainfall     1.779e-03  3.993e-05  44.553  < 2e-16 ***
## Temperature  1.674e-03  8.904e-04   1.880   0.0601 .  
## Vegetation   3.282e+00  5.184e-02  63.302  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 35820  on 3407  degrees of freedom
## Residual deviance: 29388  on 3404  degrees of freedom
## AIC: 36426
## 
## Number of Fisher Scoring iterations: 6
plot(mod1)

test$fit1 <-  predict(mod1, test, type = "response")
hist(test$fit1)

ggplot(test) +
  geom_sf(aes(fill = fit1, geometry = geometry)) +
  facet_wrap(~Month) +
  scale_fill_viridis_c(option = "B",
                       direction = -1,
                       limits = c(0, 34), # Adjust the limits based on your data range
                       breaks = seq(0, 32, by = 8)) + # Adjust the breaks as needed
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_blank()) +
  ggtitle("Predicted Dengue Cases Per Barangay in 2019",
          subtitle = "Model 1: Poisson Loglinear Model")

ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 1) - for presentation.png", plot = last_plot(), width = 8, height = 8)
ggplot(test, aes(y = Dengue, x = fit1))+
    geom_point() +
    geom_abline(slope = 1)+
    xlim(0,100) + ylim(0,100)+theme_bw()+
    xlab("Predicted number of dengue cases")+
    ylab("Actual number of dengue cases")+
    ggtitle("Actual vs Predicted Plot",
            subtitle = "Model 1: Poisson Loglinear Model")+
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("Actual vs Predicted Plot (Model 1).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
Metrics::rmse(na.omit(test)$Dengue, na.omit(test)$fit1)
## [1] 11.34747

Model 2: Poisson Regression, with monthly rainfall lag in the model

mod2 <-    glm(Dengue ~
            Rainfall + 
            lag.Rain + 
            Temperature +
            Vegetation, 
        family = "poisson",
        data = train)
summary(mod2)
## 
## Call:
## glm(formula = Dengue ~ Rainfall + lag.Rain + Temperature + Vegetation, 
##     family = "poisson", data = train)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -7.960e-01  5.104e-02 -15.595  < 2e-16 ***
## Rainfall     4.155e-04  5.779e-05   7.191 6.44e-13 ***
## lag.Rain     2.197e-03  5.455e-05  40.279  < 2e-16 ***
## Temperature  9.074e-03  9.621e-04   9.432  < 2e-16 ***
## Vegetation   3.303e+00  5.201e-02  63.503  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 34836  on 3265  degrees of freedom
## Residual deviance: 26861  on 3261  degrees of freedom
##   (142 observations deleted due to missingness)
## AIC: 33604
## 
## Number of Fisher Scoring iterations: 6
plot(mod2)

test$fit2 <-  predict(mod2, test, type = "response")
hist(test$fit2)

library(ggplot2)
# Map
ggplot(test)+
        geom_sf(aes(fill = fit2, geometry = geometry))+
    facet_wrap(~Month) +
    scale_fill_viridis_c(option = "B",
                        direction = -1,
                        limits = c(0, 34),
                        breaks = seq(0, 32, by = 8)) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.y = element_blank())+
    ggtitle("Predicted Dengue Cases Per Barangay in 2019", 
            subtitle = "Model 2: Poisson Regression with Monthly Rainfall Lag")

ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 2) - for presentation.png", plot = last_plot(), width = 8, height = 8)
ggplot(test, aes(y = Dengue, x = fit2))+
    geom_point() +
    geom_abline(slope = 1)+
    xlim(0,100) + ylim(0,100)+theme_bw()+
    xlab("Predicted number of dengue cases")+
    ylab("Actual number of dengue cases") +
    ggtitle("Actual vs Predicted Plot",
            subtitle = "Model 2: Poisson Regresson Model with Monthly Rainfall Lag")+
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("Actual vs Predicted Plot (Model 2).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
Metrics::rmse(na.omit(test)$Dengue, na.omit(test)$fit2)
## [1] 11.12746

RMSE of this model is lower than RMSE of model 1.

Model 3: Spatial Autoregressive Model (SAR)

Before performing the regression, some preliminary tests are considered first.

Setting Neighbors using Queen Contiguity

centroid <- st_centroid(QC)
## Warning: st_centroid assumes attributes are constant over geometries
# queen contiguity
nb <- poly2nb(QC, queen = T)

plot(QC$geometry, 
     main = "Neighbor Assignment Using Queen Contiguity")
plot(nb, centroid$geometry, col='red', lwd=1, add = T)

Moran’s I for Spatial Correlation

avg_QC <- train |> group_by(Barangay, Month) |> 
    summarize(Dengue = mean(Dengue), ## change to mean or sum to check
              Temperature = mean(Temperature, na.rm = T),
              Vegetation = mean(Vegetation, na.rm = T),
              Rainfall = mean(Rainfall),
              lag.Rain = mean(lag.Rain, na.rm = T)) ## check difference if may lag temp
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
p <- ggplot(avg_QC) +
  geom_sf(aes(fill = Dengue, geometry = geometry)) +
  facet_wrap(~Month) +
  scale_fill_viridis_c(option = "B",
                       direction = -1,
                       limits = c(0, 220),
                        breaks = seq(0, 200, by = 50)) +
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.y = element_blank()
  ) +
  labs(
    title = "Average of Monthly Dengue Cases Per Barangay", ## change title
    subtitle = "Training Data: 2017-2018"
  )

print(p)

ggsave("Average of Monthly Dengue Cases Per Barangay (Training) - for presentation.png", plot = p, width = 8, height = 8) ## change title
moran_tests <- data.frame(month = month(1:12, label = T), 
                          statistic = rep(NA, 12),
                          p.value = rep(NA,12))

for (i in 1:12){
    m <- month(i, label = T)
    # W
    nb <- poly2nb(filter(avg_QC, Month == m), queen = T)
    lw <- nb2listw(nb, style = "U", zero.policy=TRUE)
    # WX
    lag.deng <- filter(avg_QC, Month == m)$Dengue %>%
        lag.listw(lw,.,zero.policy=TRUE)
    
    m_test <- moran.test(lag.deng, lw, na.action = na.pass)
    moran_tests[i,"statistic"] <- m_test$estimate[1]
    moran_tests[i,"p.value"]   <- m_test$p.value
}
moran_tests
##    month statistic      p.value
## 1    Jan 0.7067399 1.484274e-50
## 2    Feb 0.7476249 5.082005e-57
## 3    Mar 0.7069445 9.582257e-51
## 4    Apr 0.7551903 3.210292e-58
## 5    May 0.7225299 2.137761e-54
## 6    Jun 0.7176300 4.489498e-53
## 7    Jul 0.7931579 7.559314e-63
## 8    Aug 0.7412206 4.739576e-57
## 9    Sep 0.7321940 3.993048e-54
## 10   Oct 0.6612202 5.943298e-45
## 11   Nov 0.6143546 7.458644e-40
## 12   Dec 0.6232205 3.441475e-41

Estimation of Models

SLMs <- data.frame()
test$fit3 <- NA
test$rho <- NA
for (i in 1:12){
    m <- month(i, label = T)
    temporary <- filter(avg_QC, Month == m) |> mutate(Dengue = round(Dengue,0))
    temporary$rho <- temporary$Dengue %>%
        lag.listw(lw,.,zero.policy=TRUE)

    spat_model <- glm(Dengue ~ Rainfall + lag.Rain+ 
                            Temperature + 
                            Vegetation + rho , 
                        data = temporary, 
                        family = "poisson" )

    
    #prediction
    nb <- poly2nb(test[test$Month==m,], queen = T)
    lw <- nb2listw(nb, style = "U", zero.policy=TRUE)
    
    test[test$Month==m,]$rho <- test[test$Month==m,]$Dengue %>%
        lag.listw(lw,.,zero.policy=TRUE)
    
    test[test$Month==m,]$fit3  <- predict(spat_model, 
                                          newdata = test[test$Month==m,], 
                                          type = "response")
    
    # combining estimates
    t <- summary(spat_model)$coefficients[,c("Estimate","Pr(>|z|)")]
    u <- data.frame(Month = m, coefficient = rownames(t), t )
    
    colnames(u) <- c("Month", "Coefficient", "Estimate", "p.value")
    
    row.names(u) <- NULL
    
    SLMs <- rbind(SLMs, u)
    
}
SLMs %>%
    pivot_longer(cols = c("Estimate", "p.value")) %>%
    mutate(value = round(value,3)) %>%
    pivot_wider(values_from = value, names_from = Month)
## # A tibble: 12 × 14
##    Coefficient name        Jan    Feb    Mar    Apr    May     Jun    Jul    Aug
##    <chr>       <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
##  1 (Intercept) Estimate -3.56  -2.45  -4.38  -3.94  -2.47  -14.8   -5.87  -6.40 
##  2 (Intercept) p.value   0      0.003  0      0      0.306   0      0.013  0    
##  3 Rainfall    Estimate -0.034  0.091  0.264 -0.018 -0.012   0.009  0.029  0.004
##  4 Rainfall    p.value   0.036  0.004  0      0.837  0.546   0.249  0      0.092
##  5 lag.Rain    Estimate -0.004 -0.024 -0.138  0.096  0.044   0.055 -0.015  0.008
##  6 lag.Rain    p.value   0.274  0.117  0.001  0.573  0.023   0      0.015  0.021
##  7 Temperature Estimate  0.086  0.067  0.079  0.063  0.046   0.098 -0.008  0.039
##  8 Temperature p.value   0      0      0      0.001  0.025   0      0.249  0    
##  9 Vegetation  Estimate  7.78  -0.472 -6.89  -1.09  -0.491  -0.235 -0.685  0.721
## 10 Vegetation  p.value   0      0.788  0      0.098  0.365   0.79   0.086  0.003
## 11 rho         Estimate 18.9   16.1   21.3   32.8   26.5    18.8    8.07   5.56 
## 12 rho         p.value   0      0      0      0      0       0      0      0    
## # ℹ 4 more variables: Sep <dbl>, Oct <dbl>, Nov <dbl>, Dec <dbl>

Model Evaluation

Map of prediction

ggplot(test)+
        geom_sf(aes(fill = fit3, geometry = geometry))+
    facet_wrap(~Month) +
    scale_fill_viridis_c(option = "B",
                        direction = -1,
                        limits = c(0, 430),
                        breaks = seq(0, 400, by = 100)) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.text.y = element_blank())+
    ggtitle("Predicted Dengue Cases Per Barangay in 2019",
            subtitle = "Model 3: Spatial Autoregressive Model")

ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 3) - 430 400 100.png", plot = last_plot(), width = 8, height = 8)

Prediction vs actual

ggplot(test, aes(y = Dengue, x = fit3))+
    geom_point()+
    geom_abline(slope = 1)+
    xlim(0,100) + ylim(0,100)+theme_bw()+
    xlab("Predicted number of dengue cases")+
    ylab("Actual number of dengue cases")+
    ggtitle("Actual vs Predicted Plot",
            subtitle = "Model 3: Spatial Autoregressive Model")+
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("Actual vs Predicted Plot (Model 3).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

RMSE

Metrics::rmse(test$Dengue, test$fit3)
## [1] 22.84478
# Export test dataframe to CSV without modifying rows and columns
#write.csv(test, file = "test_data.csv", row.names = FALSE) # fit1, fit2, fit3, rho results from test dataframe

#write.csv(SLMs, file = "SLMs_data.csv", row.names = FALSE) # SLMs estimates dataframe

Exposure Model

Adding estimates to the test data

# Remove the row with "(Intercept)" from SLMs dataframe
SLMs <- SLMs[SLMs$Coefficient != "(Intercept)", ]

# Loop through each month in SLMs dataframe
for (i in unique(SLMs$Month)) {
  month_data <- filter(SLMs, Month == i)
  
  # Loop through each coefficient in the month_data
  for (j in 1:nrow(month_data)) {
    coefficient <- month_data[j, "Coefficient"]
    estimate <- month_data[j, "Estimate"]
    
    # Create a new column name for the coefficient estimate
    new_column_name <- paste(coefficient, "estimate", sep = "_")
    
    # Match the month in the test dataframe and add the coefficient estimate
    test[test$Month == i, new_column_name] <- as.numeric(estimate)
  }
}

# Now test dataframe should have new columns for each coefficient estimate

Calculating exposure indices

# Calculate the Exposure Index per row in the test dataframe
test$Exposure_Index <- with(test, 
  Rainfall * Rainfall_estimate + 
  `lag.Rain` * `lag.Rain_estimate` + 
  Temperature * Temperature_estimate + 
  Vegetation * Vegetation_estimate + 
  rho * rho_estimate
)

# Print the first 10 rows of the test dataframe with the new Exposure_Index
print(head(test, 10))
## Simple feature collection with 10 features and 20 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 286990 ymin: 1621630 xmax: 287570.3 ymax: 1621984
## Projected CRS: WGS 84 / UTM zone 51N
##      District Barangay Year Month Dengue   Rainfall Temperature Vegetation
## 1  DISTRICT 1   ALICIA 2019   Jan      6  28.483838    43.64173    0.34745
## 2  DISTRICT 1   ALICIA 2019   Feb      0   5.538522    43.77079    0.30080
## 3  DISTRICT 1   ALICIA 2019   Mar      1   7.756636    52.06994    0.32640
## 4  DISTRICT 1   ALICIA 2019   Apr      1  12.265913    49.89770    0.29550
## 5  DISTRICT 1   ALICIA 2019   May      0 198.195090    47.72545    0.06920
## 6  DISTRICT 1   ALICIA 2019   Jun      1 227.645734    44.71861    0.25065
## 7  DISTRICT 1   ALICIA 2019   Jul      2 378.972106    41.71176    0.11395
## 8  DISTRICT 1   ALICIA 2019   Aug      1 521.374819    37.83839    0.17940
## 9  DISTRICT 1   ALICIA 2019   Sep      9 357.980239    46.80949    0.28320
## 10 DISTRICT 1   ALICIA 2019   Oct      5  97.082126    55.78059    0.45800
##                          geometry lag.Temp   lag.Rain     fit1      fit2
## 1  POLYGON ((287225.7 1621983,... 37.90846 126.422498 2.758916 2.8216933
## 2  POLYGON ((287225.7 1621983,... 43.64173  28.483838 2.273079 1.9342021
## 3  POLYGON ((287225.7 1621983,... 43.77079   5.538522 2.516804 2.1599189
## 4  POLYGON ((287225.7 1621983,... 52.06994   7.756636 2.284094 1.9252207
## 5  POLYGON ((287225.7 1621983,... 49.89770  12.265913 1.507536 0.9753155
## 6  POLYGON ((287225.7 1621983,... 47.72545 198.195090 2.867143 2.6322143
## 7  POLYGON ((287225.7 1621983,... 44.71861 227.645734 2.384300 1.8526119
## 8  POLYGON ((287225.7 1621983,... 41.71176 378.972106 3.783218 3.2847818
## 9  POLYGON ((287225.7 1621983,... 37.83839 521.374819 4.037112 6.4144884
## 10 POLYGON ((287225.7 1621983,... 46.80949 357.980239 4.572273 7.7673414
##          fit3         rho Rainfall_estimate lag.Rain_estimate
## 1   6.9723992 0.022670025      -0.033565443      -0.003550731
## 2   1.4282020 0.012594458       0.091316601      -0.024035765
## 3   0.4616287 0.022670025       0.263615815      -0.138143330
## 4   0.7651975 0.010075567      -0.017851155       0.095662066
## 5   0.1319221 0.001259446      -0.011521284       0.043745477
## 6  12.6808215 0.006297229       0.008709635       0.055062791
## 7   4.2775306 0.013853904       0.028925229      -0.014618580
## 8   2.2194615 0.027707809       0.004419174       0.008250221
## 9  13.9867926 0.073047859      -0.005370727       0.013173704
## 10  9.0243087 0.042821159       0.022196362      -0.019547281
##    Temperature_estimate Vegetation_estimate rho_estimate Exposure_Index
## 1           0.086473491           7.7763658    18.898711      5.4992205
## 2           0.066829650          -0.4717562    16.092275      2.8070840
## 3           0.078642570          -6.8917769    21.275014      3.6074050
## 4           0.062880472          -1.0896474    32.770193      3.6688333
## 5           0.045907648          -0.4913028    26.506915      0.4434655
## 6           0.097720395          -0.2348960    18.833071     17.3255256
## 7          -0.008337363          -0.6851428     8.067844      7.3199305
## 8           0.039122540           0.7208237     5.561326      7.1943918
## 9           0.044016593           0.5879307     7.166312      7.6962035
## 10          0.067658989           2.7319804     7.639324      0.5097600

Normalizing exposure indices

# Normalize the Exposure Index values to range from 0 to 1
min_exposure_index <- min(test$Exposure_Index, na.rm = TRUE)
max_exposure_index <- max(test$Exposure_Index, na.rm = TRUE)
test$Exposure_Index_Normalized <- (test$Exposure_Index - min_exposure_index) / 
                                  (max_exposure_index - min_exposure_index)

# Print the first 10 rows of the test dataframe with the new Exposure_Index_Normalized
print(head(test, 10))
## Simple feature collection with 10 features and 21 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 286990 ymin: 1621630 xmax: 287570.3 ymax: 1621984
## Projected CRS: WGS 84 / UTM zone 51N
##      District Barangay Year Month Dengue   Rainfall Temperature Vegetation
## 1  DISTRICT 1   ALICIA 2019   Jan      6  28.483838    43.64173    0.34745
## 2  DISTRICT 1   ALICIA 2019   Feb      0   5.538522    43.77079    0.30080
## 3  DISTRICT 1   ALICIA 2019   Mar      1   7.756636    52.06994    0.32640
## 4  DISTRICT 1   ALICIA 2019   Apr      1  12.265913    49.89770    0.29550
## 5  DISTRICT 1   ALICIA 2019   May      0 198.195090    47.72545    0.06920
## 6  DISTRICT 1   ALICIA 2019   Jun      1 227.645734    44.71861    0.25065
## 7  DISTRICT 1   ALICIA 2019   Jul      2 378.972106    41.71176    0.11395
## 8  DISTRICT 1   ALICIA 2019   Aug      1 521.374819    37.83839    0.17940
## 9  DISTRICT 1   ALICIA 2019   Sep      9 357.980239    46.80949    0.28320
## 10 DISTRICT 1   ALICIA 2019   Oct      5  97.082126    55.78059    0.45800
##                          geometry lag.Temp   lag.Rain     fit1      fit2
## 1  POLYGON ((287225.7 1621983,... 37.90846 126.422498 2.758916 2.8216933
## 2  POLYGON ((287225.7 1621983,... 43.64173  28.483838 2.273079 1.9342021
## 3  POLYGON ((287225.7 1621983,... 43.77079   5.538522 2.516804 2.1599189
## 4  POLYGON ((287225.7 1621983,... 52.06994   7.756636 2.284094 1.9252207
## 5  POLYGON ((287225.7 1621983,... 49.89770  12.265913 1.507536 0.9753155
## 6  POLYGON ((287225.7 1621983,... 47.72545 198.195090 2.867143 2.6322143
## 7  POLYGON ((287225.7 1621983,... 44.71861 227.645734 2.384300 1.8526119
## 8  POLYGON ((287225.7 1621983,... 41.71176 378.972106 3.783218 3.2847818
## 9  POLYGON ((287225.7 1621983,... 37.83839 521.374819 4.037112 6.4144884
## 10 POLYGON ((287225.7 1621983,... 46.80949 357.980239 4.572273 7.7673414
##          fit3         rho Rainfall_estimate lag.Rain_estimate
## 1   6.9723992 0.022670025      -0.033565443      -0.003550731
## 2   1.4282020 0.012594458       0.091316601      -0.024035765
## 3   0.4616287 0.022670025       0.263615815      -0.138143330
## 4   0.7651975 0.010075567      -0.017851155       0.095662066
## 5   0.1319221 0.001259446      -0.011521284       0.043745477
## 6  12.6808215 0.006297229       0.008709635       0.055062791
## 7   4.2775306 0.013853904       0.028925229      -0.014618580
## 8   2.2194615 0.027707809       0.004419174       0.008250221
## 9  13.9867926 0.073047859      -0.005370727       0.013173704
## 10  9.0243087 0.042821159       0.022196362      -0.019547281
##    Temperature_estimate Vegetation_estimate rho_estimate Exposure_Index
## 1           0.086473491           7.7763658    18.898711      5.4992205
## 2           0.066829650          -0.4717562    16.092275      2.8070840
## 3           0.078642570          -6.8917769    21.275014      3.6074050
## 4           0.062880472          -1.0896474    32.770193      3.6688333
## 5           0.045907648          -0.4913028    26.506915      0.4434655
## 6           0.097720395          -0.2348960    18.833071     17.3255256
## 7          -0.008337363          -0.6851428     8.067844      7.3199305
## 8           0.039122540           0.7208237     5.561326      7.1943918
## 9           0.044016593           0.5879307     7.166312      7.6962035
## 10          0.067658989           2.7319804     7.639324      0.5097600
##    Exposure_Index_Normalized
## 1                 0.32233960
## 2                 0.20124389
## 3                 0.23724335
## 4                 0.24000647
## 5                 0.09492534
## 6                 0.85430179
## 7                 0.40423745
## 8                 0.39859056
## 9                 0.42116268
## 10                0.09790735
# Define the breaks for 5 classes with the specified ranges
breaks <- c(0, 0.4, 0.6, 0.8, 0.9, 1)

# Convert normalized exposure index values into discrete intervals
test$Normalized_Exposure_Index_Group <- cut(test$Exposure_Index_Normalized, 
                                            breaks = breaks, 
                                            labels = FALSE, 
                                            include.lowest = TRUE)

# Define the colors for each class
class_colors <- c("#FFD700", "#FFA500", "#FF6347", "#CD5C5C", "#8B0000")

# Define the labels for each class
labels <- c("[0, 0.4)", "[0.4, 0.6)", "[0.6, 0.8)", "[0.8, 0.9)", "[0.9, 1]")

# Plot with the new grouping
ggplot(data = test) +
  geom_sf(aes(fill = factor(Normalized_Exposure_Index_Group), geometry = geometry), color = "black") +
  facet_wrap(~Month) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_blank()) +
  ggtitle("Dengue Exposure Index") +
  scale_fill_manual(values = class_colors, 
                    name = "Index",
                    labels = labels)

ggsave("Dengue Exposure Index (Option 2).png", plot = last_plot(), width = 8, height = 8)