library(tidyverse)
library(sp)
library(sf)
library(spdep)
library(spatstat)
library(readxl)
library(lubridate)
library(spatialRF)
library(dplyr)
library(ggplot2)
library(stringr)
library(tidyr)
QC <- sf::st_read("C:/Users/Trish/Documents/Thesis 2024 Data/QC Barangays/QC Barangays.shp")
## Reading layer `QC Barangays' from data source
## `C:\Users\Trish\Documents\Thesis 2024 Data\QC Barangays\QC Barangays.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 142 features and 3 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 283480.1 ymin: 1613822 xmax: 299258 ymax: 1634574
## Projected CRS: WGS 84 / UTM zone 51N
mapview::mapview(QC)
Dengue <- data.frame()
for (i in as.character(2017:2019)){
dengue_temp <- read_xlsx("C:/Users/Trish/Documents/Thesis 2024 Data/DENGUE CASE/[RAW DATA] Dengue_Case_2017-2022.xlsx", sheet = i)|>
group_by(Barangay, "Year" = i, "Month" = month(DateOfEntry, abbr = T, label = T)) |>
summarize(DengueCases = n())
Dengue <- rbind(Dengue, dengue_temp)
}
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
# note: Payatas = Payatas A + Payatas B
Payatas <- filter(Dengue, Barangay %in% c("PAYATAS A", "PAYATAS B")) |>
group_by(Year, Month) |>
summarize(DengueCases = sum(DengueCases))
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
Payatas$Barangay = "PAYATAS"
Dengue <- rbind(Dengue, Payatas) |>
filter(!(Barangay %in% c("PAYATAS A", "PAYATAS B")))|> arrange(Barangay)
Temperature <- data.frame()
for (year in 2017:2020){
temp_file <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/TEMPERATURE/Temperature_PerBrgyPerMonth_",
year,".csv") |>
read.csv() |>
mutate(Month = lubridate::month(Month,
label = T,
abbr = T),
Year = year,
Barangay = toupper(ADM4_EN),
Temperature = Mean_LST)
Temperature <- rbind(Temperature,
temp_file[,c("Barangay",
"Temperature",
"Month", "Year")])
}
Rainfall <- data.frame()
for (i in 2017:2020){
rain_file <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/RAINFALL/Rainfall_PerBrgyPerMonth_", i, ".csv")
rain_file0 <- read.csv(rain_file)[,c("ADM4_PCODE", "ADM4_EN","month", "year","mean")]
colnames(rain_file0) <- c("ADM4_PCODE", "Barangay", "Month", "Year", "Rainfall")
rain_file0$Barangay <- toupper(rain_file0$Barangay )
rain_file0$Month <- month(rain_file0$Month, label = T)
rain_file0$Code <- str_sub(rain_file0$ADM4_PCODE,-3)
Rainfall <- rbind(Rainfall,rain_file0)
}
NDVI <- data.frame()
for (year in 2017:2020){
ndvi_files <- paste0("C:/Users/Trish/Documents/Thesis 2024 Data/NDVI/MONTHLY [MODIS]/",year,
"/NDVI_", lubridate::month(1:12, label = T),
year,".csv")
for (i in 1:12){
temp <- read.csv(ndvi_files[i])[,c("ADM4_PCODE", "ADM4_EN", "mean")]
temp$ADM4_EN <- toupper(temp$ADM4_EN)
temp$Month <- lubridate::month(i, label = T)
temp$Year <- year
colnames(temp) <- c("ADM4_PCODE","Barangay", "Vegetation", "Month", "Year")
NDVI <- rbind(NDVI, temp)
}
}
QC$Code <- str_sub(QC$adm4_psgc,-3)
QCx <- select(QC, -Barangay) |>
merge(Rainfall, by = "Code") |>
merge(Temperature, by = c("Barangay", "Year", "Month")) |>
merge(select(NDVI, -Barangay), by = c("ADM4_PCODE", "Year", "Month")) |>
# note that there are some months with no dengue cases
# so we keep complete rows in the previous table
merge(Dengue, by = c("Barangay", "Year", "Month"), all.x = T) |>
# the next line fills the NA to 0
mutate(Dengue = ifelse(is.na(DengueCases), 0, DengueCases)) |>
# selecting few columns
select(District, Barangay, Year, Month,
Dengue,
Rainfall, Temperature, Vegetation,
geometry)
# dropping geometry for faster loading later
View(QCx)
QC0 <- st_drop_geometry(QCx)|>
group_by(Barangay) |>
# adding monthly lagged values for rainfall
summarize(District,
Barangay,
Year,
Month,
Rainfall,
lag.Rain = lag(Rainfall),
Temperature,
lag.Temp = lag(Temperature),
Vegetation,
Dengue)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
QCx$lag.Temp <- QC0$lag.Temp
QCx$lag.Rain <- QC0$lag.Rain
# for later. Average of values per barangay per month.
QC_brgy_month <- QCx |> group_by(Barangay, Month) |>
summarize(Dengue = mean(Dengue, na.rm = T),
Rainfall = mean(Rainfall, na.rm = T),
Temperature = mean(Temperature, na.rm = T),
Vegetation = mean(Vegetation, na.rm = T))
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
QC_month <- QC0 |> group_by(Date = make_date(year = Year, month = Month)) |>
summarize(Dengue = sum(Dengue, na.rm = T),
Rain = mean(Rainfall, na.rm = T)) |>
group_by(Month = month(Date, label = T, abbr = T)) |>
summarise(Dengue = mean(Dengue),
Rain = mean(Rain)) |>
pivot_longer(cols = c(Dengue, Rain))
Map of Average Monthly Dengue Cases per Barangay
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
ggplot(QC_brgy_month) +
geom_sf(aes(fill = Dengue, geometry = geometry), color = "black") +
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B",
direction = -1,
breaks = seq(0, 200, by = 50), # Adjust the breaks as needed
limits = c(0, 220)) + # Adjust the limits based on your data range
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()) +
ggtitle("Average of Monthly Dengue Cases Per Barangay (2017-2019)")
ggsave("Average of Monthly Dengue Cases Per Barangay (2017-2019).png", plot = last_plot(), width = 8, height = 8)
Line Plot of Monthly Total Dengue Cases per District
QC0 |> group_by(District, Date = make_date(Year, Month)) |>
summarize(Dengue = sum(Dengue, na.rm = T)) |>
ggplot(aes(x = Date, y = Dengue, color = District))+
geom_line() + theme_bw() +
scale_x_date(breaks = "1 year",
minor_breaks = "3 months",
date_labels = "%Y %b")+
theme(axis.text.x=element_text(angle=0, hjust=0.5),
plot.title = element_text(hjust = 0.5)) +
ggtitle("Monthly Total Dengue Cases Per District")
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.
ggsave("Monthly Total Dengue Cases Per District.png", plot = last_plot(), width = 11, height = 6)
# Assuming you have your actual data loaded into QCx with columns: Barangay, Year, Month, Dengue, geometry
# Prepare the data frame
QC_brgy_month <- QCx %>%
filter(Year %in% c(2017, 2018, 2019, 2020)) %>%
group_by(Barangay, Year, Month) %>%
summarize(
Dengue = sum(Dengue, na.rm = TRUE),
geometry = first(geometry) # Keep the geometry column for spatial data
) %>%
ungroup()
## `summarise()` has grouped output by 'Barangay', 'Year'. You can override using
## the `.groups` argument.
# Create the plot for multiple years
years <- c(2017, 2018, 2019, 2020) # List of years to plot
# Define the limits and breaks for the color scale
fill_limits <- c(0, 220) # Adjust this based on your data range
fill_breaks <- seq(0, 200, by = 50) # Adjust this based on your data range and desired intervals
for (year_to_plot in years) {
p <- ggplot(QC_brgy_month %>% filter(Year == year_to_plot)) +
geom_sf(aes(fill = Dengue, geometry = geometry), color = "black") +
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B", direction = -1, limits = fill_limits, breaks = fill_breaks) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()
) +
ggtitle(paste("Dengue Cases Per Barangay in", year_to_plot))
print(p)
ggsave(paste("Dengue_Cases_", year_to_plot, " - for presentation", ".png", sep = ""), plot = p, device = "png", width = 8, height = 8)
}
QC_brgy_month <- QCx %>%
group_by(Barangay, Month) %>%
summarize(
Dengue = mean(Dengue, na.rm = TRUE),
Rainfall = mean(Rainfall, na.rm = TRUE),
Temperature = mean(Temperature, na.rm = TRUE),
Vegetation = mean(Vegetation, na.rm = TRUE)
)
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
library(ggplot2)
ggplot(QC_brgy_month) +
geom_sf(aes(fill = Rainfall, geometry = geometry), color = "black") +
facet_wrap(~Month) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()
) +
ggtitle("Average of Monthly Total Rainfall Per Barangay") +
scale_fill_gradientn(name = "Rainfall (mm)", # Set the legend name
colours = c("bisque2", "olivedrab", "deepskyblue"), # Set colors
values = scales::rescale(c(0, 265, 530)), # Adjust values as needed
breaks = seq(0, 520, by = 130), # Adjust breaks as needed
limits = c(0, 550)) # Set the range of values for the color scale
ggsave("Average of Monthly Total Rainfall Per Barangay - for presentation.png", plot = last_plot(), width = 8, height = 8)
Histogram of Dengue Cases and Rainfall throughout the Months
QC0 |> pivot_longer(cols = c(Rainfall, Dengue)) |>
ggplot()+
geom_boxplot(data = pivot_longer(QC0, cols = c(Rainfall, Dengue)) ,
aes(x = Month, y = value, fill = name))
Line Chart Sum Dengue and Mean Rainfall 2017-2020
QC0 |> group_by(Year, Month) |>
summarize(Date = make_date(year = Year, month = Month),
Dengue = sum(Dengue, na.rm = T),
Rain = mean(Rainfall, na.rm = T)) |>
pivot_longer(cols = c(Dengue, Rain), names_to = "Variable") |>
ggplot(aes(x = Date,
y = value,
color = Variable))+
geom_line() + theme_bw() +
scale_x_date(breaks = "1 year",
date_labels = "%Y",
date_minor_breaks = "3 months")+
scale_color_manual(values = c("Dengue" = "red", "Rain" = "blue"),
labels = c("Total Dengue Cases in Quezon City",
"Average Rainfall Amount in Quezon City")) +
labs(color = NULL, x = NULL) +
ggtitle("Monthly Dengue Cases and Average Rainfall Amount in Quezon City",
subtitle = "2017 to 2020")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom")
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.
ggsave("Monthly Dengue Cases and Average Rainfall Amount in Quezon City.png", plot = last_plot(), width = 11, height = 6)
library(ggplot2)
ggplot(QC_month, aes(x = Month, y = value, color = name, group = name)) +
geom_line() +
geom_point() +
theme_bw() +
scale_y_continuous(limits = c(0, 2500)) + # Set y-axis limits here
scale_color_manual(
values = c("Dengue" = "red", "Rain" = "blue"),
labels = c("Average of Monthly Total Dengue Cases", "Average of Monthly Rainfall Amount")
) +
labs(color = NULL, x = NULL) +
ggtitle(
"Average of Monthly Dengue Cases and Rainfall Amount in Quezon City",
subtitle = "2017 to 2020"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom"
)
ggsave("Average of Monthly Dengue Cases and Rainfall Amount in Quezon City.png", plot = last_plot(), width = 11, height = 6)
Sum Dengue and Mean Rainfall 2017-2019
library(dplyr)
library(ggplot2)
library(lubridate)
QC0 |>
filter(Year >= 2017 & Year <= 2019) |> # Filter for the years 2017 to 2019
group_by(Year, Month) |>
summarize(Date = make_date(year = Year, month = Month),
Dengue = sum(Dengue, na.rm = TRUE),
Rain = mean(Rainfall, na.rm = TRUE)) |>
pivot_longer(cols = c(Dengue, Rain), names_to = "Variable", values_to = "value") |>
ggplot(aes(x = Date, y = value, color = Variable)) +
geom_line() +
theme_bw() +
scale_x_date(
date_labels = "%Y",
date_minor_breaks = "2 month" # Specify the minor breaks for every month
) +
scale_color_manual(
values = c("Dengue" = "red", "Rain" = "blue"),
labels = c("Total Dengue Cases in Quezon City", "Average Rainfall Amount in Quezon City")
) +
labs(color = NULL, x = NULL) +
ggtitle(
"Monthly Dengue Cases and Average Rainfall Amount in Quezon City",
subtitle = "2017 to 2019"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom"
)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'Year', 'Month'. You can override using the
## `.groups` argument.
ggsave("Average of Monthly Dengue Cases and Rainfall Amount in Quezon City (2017-2019).png", plot = last_plot(), width = 11, height = 6)
library(dplyr)
library(ggplot2)
library(lubridate)
# Data preprocessing and visualization
QC0 |>
group_by(District, Date = make_date(year = Year, month = Month)) |>
summarize(Dengue = sum(Dengue, na.rm = TRUE),
Rain = mean(Rainfall, na.rm = TRUE)) |>
pivot_longer(cols = c(Dengue, Rain), names_to = "Variable") |>
ggplot(aes(x = Date, y = value, color = Variable)) +
geom_line() +
theme_bw() +
facet_wrap(~District) +
scale_x_date(
breaks = "1 year",
date_labels = "%Y",
date_minor_breaks = "3 months"
) +
scale_color_manual(
values = c("Dengue" = "black", "Rain" = "blue"),
labels = c("Total Dengue Cases in District", "Average Rainfall Amount in District")
) +
labs(color = NULL, x = NULL) +
ggtitle(
"Monthly Dengue Cases and Rainfall Amount Per District",
subtitle = "2017 to 2020"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom"
)
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.
##ggsave("Total Dengue Cases and Average Rainfall Amount Per District in Quezon City.png", plot = last_plot(), width = 11, height = 6)
Pearson Correlation Per District
# Create a data frame to store the correlations
correlation_table <- QC0 %>%
group_by(District) %>%
summarize(Correlation = cor(Dengue, Rainfall, use = "complete.obs"))
print(correlation_table)
## # A tibble: 6 × 2
## District Correlation
## <chr> <dbl>
## 1 DISTRICT 1 0.137
## 2 DISTRICT 2 0.347
## 3 DISTRICT 3 0.144
## 4 DISTRICT 4 0.173
## 5 DISTRICT 5 0.319
## 6 DISTRICT 6 0.235
QC0 |> ggplot(aes(x = Rainfall, y = Dengue))+
geom_point(aes(color = District))+
theme_bw()+
geom_smooth(method = "lm", se = FALSE) +
ggtitle("Scatter Plot of Rainfall vs Dengue Cases") +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Scatter Plot of Rainfall vs Dengue Cases.png", plot = last_plot(), width = 11, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
With 1 month lag of rainfall
QC0 |> ggplot(aes(x = lag.Rain, y = Dengue)) +
geom_point(aes(color = District)) +
theme_bw() +
geom_smooth(method = "lm") +
ggtitle("Scatter Plot of Rainfall (with lag) vs Dengue Cases") +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 142 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 142 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("Scatter Plot of Rainfall (with lag) vs Dengue Cases.png", plot = last_plot(), width = 11, height = 6)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 142 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 142 rows containing missing values or values outside the scale range
## (`geom_point()`).
Correlation between Dengue Cases and Rainfall and Lagged Rainfall using Pearson’s correlation r
# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Rainfall)
# Calculate Pearson's correlation coefficient between Dengue and lagged Rainfall, removing NA values
# Assuming you have a lagged Rainfall variable named 'lag.Rain'
correlation2 <- cor(QC0$Dengue[!is.na(QC0$lag.Rain)], QC0$lag.Rain[!is.na(QC0$lag.Rain)])
# Create a data frame to store the correlations
correlation_table <- data.frame(
Variables = c("Dengue vs. Rainfall", "Dengue vs. Lagged Rainfall"),
Correlation = c(correlation1, correlation2)
)
# Print the table
print(correlation_table)
## Variables Correlation
## 1 Dengue vs. Rainfall 0.1809789
## 2 Dengue vs. Lagged Rainfall 0.2380496
Rainfall and Lagged Rainfall Correlation Coefficient and Confidence Interval
# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Rainfall, alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)
# Print the correlation coefficient
cat("Correlation coefficient (r) between Dengue and Rainfall:", correlation_result$estimate, "\n")
## Correlation coefficient (r) between Dengue and Rainfall: 0.1809789
# Print the confidence interval
cat("95% Confidence Interval for Dengue and Rainfall:", correlation_result$conf.int, "\n")
## 95% Confidence Interval for Dengue and Rainfall: 0.1579165 0.203844
# Calculate Pearson's correlation coefficient and its confidence interval for Dengue and Lagged Rainfall
correlation_result_lagged <- cor.test(QC0$Dengue[!is.na(QC0$lag.Rain)], QC0$lag.Rain[!is.na(QC0$lag.Rain)], alternative = "two.sided", method = "pearson", exact = NULL, conf.level = 0.95)
# Print the correlation coefficient for Dengue and Lagged Rainfall
cat("Correlation coefficient (r) between Dengue and Lagged Rainfall:", correlation_result_lagged$estimate, "\n")
## Correlation coefficient (r) between Dengue and Lagged Rainfall: 0.2380496
# Print the confidence interval for Dengue and Lagged Rainfall
cat("95% Confidence Interval for Dengue and Lagged Rainfall:", correlation_result_lagged$conf.int, "\n")
## 95% Confidence Interval for Dengue and Lagged Rainfall: 0.215287 0.2605537
ggplot(QC_brgy_month) +
geom_sf(aes(fill = Temperature, geometry = geometry), color = "black") +
facet_wrap(~Month) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()) +
ggtitle("Average of Monthly Temperature Per Barangay") +
scale_fill_gradientn(colors = c("yellow", "orange", "red", "darkred"), name = "Temperature")
ggsave("Average of Monthly Temperature Per Barangay.png", plot = last_plot(), width = 8, height = 8)
Per district time series plot
library(dplyr)
library(tidyr)
library(ggplot2)
QC0 |>
group_by(District, Date = make_date(year = Year, month = Month)) |>
summarize(Dengue = sum(Dengue, na.rm = TRUE),
Temp = mean(Temperature, na.rm = TRUE)) |>
pivot_longer(cols = c(Dengue, Temp), names_to = "Variable", values_to = "Value") |>
ggplot() +
geom_line(data = . %>% filter(Variable == "Dengue"),
aes(x = Date, y = Value, color = Variable)) +
geom_line(data = . %>% filter(Variable == "Temp"),
aes(x = Date, y = Value * 15, color = Variable)) + # Scale the temperature values here
scale_y_continuous(
name = "Total Dengue cases in District",
sec.axis = sec_axis(~ . / 20, name = "Average Temperature in District (°C)") # Adjusted transformation for the axis
) +
scale_x_date(
breaks = "1 year",
date_labels = "%Y",
date_minor_breaks = "3 months"
) +
scale_color_manual(
values = c("Dengue" = "black", "Temp" = "red"),
labels = c("Total Dengue Cases in District", "Average Temperature in District (°C)")
) +
labs(color = NULL, x = NULL) +
facet_wrap(~District) +
ggtitle("Monthly Dengue Cases and Temperature Per District", subtitle = "2017 to 2020") +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom"
)
## `summarise()` has grouped output by 'District'. You can override using the
## `.groups` argument.
ggsave("Monthly Dengue Cases and Temperature Per District - Option 2.png", plot = last_plot(), width = 11, height = 6)
Correlation Between Dengue Cases and Temperature
# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Temperature)
# Create a data frame to store the correlations
correlation_table <- data.frame(
Variables = c("Dengue vs. Temperature"),
Correlation = c(correlation1)
)
# Print the table
print(correlation_table)
## Variables Correlation
## 1 Dengue vs. Temperature -0.06797611
Temperature Correlation Coefficient and Confidence Interval
# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Temperature)
# Print the correlation coefficient
cat("Correlation coefficient (r):", correlation_result$estimate, "\n")
## Correlation coefficient (r): -0.06797611
# Print the confidence interval
cat("95% Confidence Interval:", correlation_result$conf.int, "\n")
## 95% Confidence Interval: -0.09156924 -0.0443067
ggplot(QC_brgy_month) +
geom_sf(aes(fill = Vegetation, geometry = geometry), color = "black") +
facet_wrap(~Month) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank())+
ggtitle("Average of Monthly Vegetation Index Per Barangay")+
scale_fill_viridis_c(option = "D")
ggsave("Average of Monthly Vegetation Index Per Barangay.png", plot = last_plot(), width = 8, height = 8)
QC0 |>
group_by(Date = make_date(year = Year, month = Month)) |>
summarize(Dengue = sum(Dengue, na.rm = T),
Vegetation = mean(Vegetation, na.rm = T)) |>
pivot_longer(cols = c(Dengue, Vegetation),
names_to = "Variable") |>
ggplot(aes(x = Date,
y = value,
group = Variable,
color = Variable))+
geom_line() + theme_bw() +
facet_wrap(.~Variable, nrow = 2, scale = "free_y") +
scale_x_date(breaks = "1 year",
date_labels = "%Y",
date_minor_breaks = "3 months")+
scale_color_manual(values = c("Dengue" = "black", "Vegetation" = "green"),
labels = c("Total Dengue cases in QC",
"Average Vegetation in QC")) +
labs(color = NULL, x = NULL) +
ggtitle("Monthly Dengue Cases and Vegetation in Quezon City",
subtitle = "2017 to 2020")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "bottom")
ggsave("Monthly Dengue Cases and Vegatation in Quezon City.png", plot = last_plot(), width = 11, height = 6)
Correlation Between Dengue Cases and NDVI
# Calculate Pearson's correlation coefficient between Dengue and Rainfall
correlation1 <- cor(QC0$Dengue, QC0$Vegetation)
# Create a data frame to store the correlations
correlation_table <- data.frame(
Variables = c("Dengue vs. Vegetation"),
Correlation = c(correlation1)
)
# Print the table
print(correlation_table)
## Variables Correlation
## 1 Dengue vs. Vegetation 0.1960672
Correlation Between Dengue and NDVI Per District
library(dplyr)
# Calculate Pearson's correlation coefficient between Dengue and NDVI per district
correlation_per_district <- QC0 %>%
group_by(District) %>%
summarize(correlation = cor(Dengue, Vegetation, use = "complete.obs"))
# Print the correlation table
print(correlation_per_district)
## # A tibble: 6 × 2
## District correlation
## <chr> <dbl>
## 1 DISTRICT 1 0.102
## 2 DISTRICT 2 0.162
## 3 DISTRICT 3 0.0856
## 4 DISTRICT 4 -0.0608
## 5 DISTRICT 5 0.0627
## 6 DISTRICT 6 0.0946
Vegetation Correlation Coefficient and Confidence Interval
# Calculate Pearson's correlation coefficient and its confidence interval
correlation_result <- cor.test(QC0$Dengue, QC0$Vegetation)
# Print the correlation coefficient
cat("Correlation coefficient (r):", correlation_result$estimate, "\n")
## Correlation coefficient (r): 0.1960672
# Print the confidence interval
cat("95% Confidence Interval:", correlation_result$conf.int, "\n")
## 95% Confidence Interval: 0.1731322 0.2187897
We first split our dataset to train and test sets.
train <- filter(QCx, Year %in% c(2017,2018))
test <- filter(QCx, Year == 2019)
\[ log(dengue) = \beta_0 +\beta_1rain + \beta_2temp + \beta_3vegetation \]
mod1 <- glm(Dengue ~ Rainfall + Temperature+ + Vegetation, family = "poisson",
data = train)
summary(mod1)
##
## Call:
## glm(formula = Dengue ~ Rainfall + Temperature + +Vegetation,
## family = "poisson", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.492e-01 4.604e-02 -5.412 6.22e-08 ***
## Rainfall 1.779e-03 3.993e-05 44.553 < 2e-16 ***
## Temperature 1.674e-03 8.904e-04 1.880 0.0601 .
## Vegetation 3.282e+00 5.184e-02 63.302 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 35820 on 3407 degrees of freedom
## Residual deviance: 29388 on 3404 degrees of freedom
## AIC: 36426
##
## Number of Fisher Scoring iterations: 6
plot(mod1)
test$fit1 <- predict(mod1, test, type = "response")
hist(test$fit1)
ggplot(test) +
geom_sf(aes(fill = fit1, geometry = geometry)) +
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B",
direction = -1,
limits = c(0, 34), # Adjust the limits based on your data range
breaks = seq(0, 32, by = 8)) + # Adjust the breaks as needed
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()) +
ggtitle("Predicted Dengue Cases Per Barangay in 2019",
subtitle = "Model 1: Poisson Loglinear Model")
ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 1) - for presentation.png", plot = last_plot(), width = 8, height = 8)
ggplot(test, aes(y = Dengue, x = fit1))+
geom_point() +
geom_abline(slope = 1)+
xlim(0,100) + ylim(0,100)+theme_bw()+
xlab("Predicted number of dengue cases")+
ylab("Actual number of dengue cases")+
ggtitle("Actual vs Predicted Plot",
subtitle = "Model 1: Poisson Loglinear Model")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("Actual vs Predicted Plot (Model 1).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
Metrics::rmse(na.omit(test)$Dengue, na.omit(test)$fit1)
## [1] 11.34747
mod2 <- glm(Dengue ~
Rainfall +
lag.Rain +
Temperature +
Vegetation,
family = "poisson",
data = train)
summary(mod2)
##
## Call:
## glm(formula = Dengue ~ Rainfall + lag.Rain + Temperature + Vegetation,
## family = "poisson", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.960e-01 5.104e-02 -15.595 < 2e-16 ***
## Rainfall 4.155e-04 5.779e-05 7.191 6.44e-13 ***
## lag.Rain 2.197e-03 5.455e-05 40.279 < 2e-16 ***
## Temperature 9.074e-03 9.621e-04 9.432 < 2e-16 ***
## Vegetation 3.303e+00 5.201e-02 63.503 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 34836 on 3265 degrees of freedom
## Residual deviance: 26861 on 3261 degrees of freedom
## (142 observations deleted due to missingness)
## AIC: 33604
##
## Number of Fisher Scoring iterations: 6
plot(mod2)
test$fit2 <- predict(mod2, test, type = "response")
hist(test$fit2)
library(ggplot2)
# Map
ggplot(test)+
geom_sf(aes(fill = fit2, geometry = geometry))+
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B",
direction = -1,
limits = c(0, 34),
breaks = seq(0, 32, by = 8)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank())+
ggtitle("Predicted Dengue Cases Per Barangay in 2019",
subtitle = "Model 2: Poisson Regression with Monthly Rainfall Lag")
ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 2) - for presentation.png", plot = last_plot(), width = 8, height = 8)
ggplot(test, aes(y = Dengue, x = fit2))+
geom_point() +
geom_abline(slope = 1)+
xlim(0,100) + ylim(0,100)+theme_bw()+
xlab("Predicted number of dengue cases")+
ylab("Actual number of dengue cases") +
ggtitle("Actual vs Predicted Plot",
subtitle = "Model 2: Poisson Regresson Model with Monthly Rainfall Lag")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("Actual vs Predicted Plot (Model 2).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
Metrics::rmse(na.omit(test)$Dengue, na.omit(test)$fit2)
## [1] 11.12746
RMSE of this model is lower than RMSE of model 1.
Before performing the regression, some preliminary tests are considered first.
centroid <- st_centroid(QC)
## Warning: st_centroid assumes attributes are constant over geometries
# queen contiguity
nb <- poly2nb(QC, queen = T)
plot(QC$geometry,
main = "Neighbor Assignment Using Queen Contiguity")
plot(nb, centroid$geometry, col='red', lwd=1, add = T)
avg_QC <- train |> group_by(Barangay, Month) |>
summarize(Dengue = mean(Dengue), ## change to mean or sum to check
Temperature = mean(Temperature, na.rm = T),
Vegetation = mean(Vegetation, na.rm = T),
Rainfall = mean(Rainfall),
lag.Rain = mean(lag.Rain, na.rm = T)) ## check difference if may lag temp
## `summarise()` has grouped output by 'Barangay'. You can override using the
## `.groups` argument.
p <- ggplot(avg_QC) +
geom_sf(aes(fill = Dengue, geometry = geometry)) +
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B",
direction = -1,
limits = c(0, 220),
breaks = seq(0, 200, by = 50)) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()
) +
labs(
title = "Average of Monthly Dengue Cases Per Barangay", ## change title
subtitle = "Training Data: 2017-2018"
)
print(p)
ggsave("Average of Monthly Dengue Cases Per Barangay (Training) - for presentation.png", plot = p, width = 8, height = 8) ## change title
moran_tests <- data.frame(month = month(1:12, label = T),
statistic = rep(NA, 12),
p.value = rep(NA,12))
for (i in 1:12){
m <- month(i, label = T)
# W
nb <- poly2nb(filter(avg_QC, Month == m), queen = T)
lw <- nb2listw(nb, style = "U", zero.policy=TRUE)
# WX
lag.deng <- filter(avg_QC, Month == m)$Dengue %>%
lag.listw(lw,.,zero.policy=TRUE)
m_test <- moran.test(lag.deng, lw, na.action = na.pass)
moran_tests[i,"statistic"] <- m_test$estimate[1]
moran_tests[i,"p.value"] <- m_test$p.value
}
moran_tests
## month statistic p.value
## 1 Jan 0.7067399 1.484274e-50
## 2 Feb 0.7476249 5.082005e-57
## 3 Mar 0.7069445 9.582257e-51
## 4 Apr 0.7551903 3.210292e-58
## 5 May 0.7225299 2.137761e-54
## 6 Jun 0.7176300 4.489498e-53
## 7 Jul 0.7931579 7.559314e-63
## 8 Aug 0.7412206 4.739576e-57
## 9 Sep 0.7321940 3.993048e-54
## 10 Oct 0.6612202 5.943298e-45
## 11 Nov 0.6143546 7.458644e-40
## 12 Dec 0.6232205 3.441475e-41
SLMs <- data.frame()
test$fit3 <- NA
test$rho <- NA
for (i in 1:12){
m <- month(i, label = T)
temporary <- filter(avg_QC, Month == m) |> mutate(Dengue = round(Dengue,0))
temporary$rho <- temporary$Dengue %>%
lag.listw(lw,.,zero.policy=TRUE)
spat_model <- glm(Dengue ~ Rainfall + lag.Rain+
Temperature +
Vegetation + rho ,
data = temporary,
family = "poisson" )
#prediction
nb <- poly2nb(test[test$Month==m,], queen = T)
lw <- nb2listw(nb, style = "U", zero.policy=TRUE)
test[test$Month==m,]$rho <- test[test$Month==m,]$Dengue %>%
lag.listw(lw,.,zero.policy=TRUE)
test[test$Month==m,]$fit3 <- predict(spat_model,
newdata = test[test$Month==m,],
type = "response")
# combining estimates
t <- summary(spat_model)$coefficients[,c("Estimate","Pr(>|z|)")]
u <- data.frame(Month = m, coefficient = rownames(t), t )
colnames(u) <- c("Month", "Coefficient", "Estimate", "p.value")
row.names(u) <- NULL
SLMs <- rbind(SLMs, u)
}
SLMs %>%
pivot_longer(cols = c("Estimate", "p.value")) %>%
mutate(value = round(value,3)) %>%
pivot_wider(values_from = value, names_from = Month)
## # A tibble: 12 × 14
## Coefficient name Jan Feb Mar Apr May Jun Jul Aug
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) Estimate -3.56 -2.45 -4.38 -3.94 -2.47 -14.8 -5.87 -6.40
## 2 (Intercept) p.value 0 0.003 0 0 0.306 0 0.013 0
## 3 Rainfall Estimate -0.034 0.091 0.264 -0.018 -0.012 0.009 0.029 0.004
## 4 Rainfall p.value 0.036 0.004 0 0.837 0.546 0.249 0 0.092
## 5 lag.Rain Estimate -0.004 -0.024 -0.138 0.096 0.044 0.055 -0.015 0.008
## 6 lag.Rain p.value 0.274 0.117 0.001 0.573 0.023 0 0.015 0.021
## 7 Temperature Estimate 0.086 0.067 0.079 0.063 0.046 0.098 -0.008 0.039
## 8 Temperature p.value 0 0 0 0.001 0.025 0 0.249 0
## 9 Vegetation Estimate 7.78 -0.472 -6.89 -1.09 -0.491 -0.235 -0.685 0.721
## 10 Vegetation p.value 0 0.788 0 0.098 0.365 0.79 0.086 0.003
## 11 rho Estimate 18.9 16.1 21.3 32.8 26.5 18.8 8.07 5.56
## 12 rho p.value 0 0 0 0 0 0 0 0
## # ℹ 4 more variables: Sep <dbl>, Oct <dbl>, Nov <dbl>, Dec <dbl>
Map of prediction
ggplot(test)+
geom_sf(aes(fill = fit3, geometry = geometry))+
facet_wrap(~Month) +
scale_fill_viridis_c(option = "B",
direction = -1,
limits = c(0, 430),
breaks = seq(0, 400, by = 100)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank())+
ggtitle("Predicted Dengue Cases Per Barangay in 2019",
subtitle = "Model 3: Spatial Autoregressive Model")
ggsave("Predicted Dengue Cases Per Barangay in 2019 (Model 3) - 430 400 100.png", plot = last_plot(), width = 8, height = 8)
Prediction vs actual
ggplot(test, aes(y = Dengue, x = fit3))+
geom_point()+
geom_abline(slope = 1)+
xlim(0,100) + ylim(0,100)+theme_bw()+
xlab("Predicted number of dengue cases")+
ylab("Actual number of dengue cases")+
ggtitle("Actual vs Predicted Plot",
subtitle = "Model 3: Spatial Autoregressive Model")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggsave("Actual vs Predicted Plot (Model 3).png", plot = last_plot(), width = 8, height = 8)
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).
RMSE
Metrics::rmse(test$Dengue, test$fit3)
## [1] 22.84478
# Export test dataframe to CSV without modifying rows and columns
#write.csv(test, file = "test_data.csv", row.names = FALSE) # fit1, fit2, fit3, rho results from test dataframe
#write.csv(SLMs, file = "SLMs_data.csv", row.names = FALSE) # SLMs estimates dataframe
Adding estimates to the test data
# Remove the row with "(Intercept)" from SLMs dataframe
SLMs <- SLMs[SLMs$Coefficient != "(Intercept)", ]
# Loop through each month in SLMs dataframe
for (i in unique(SLMs$Month)) {
month_data <- filter(SLMs, Month == i)
# Loop through each coefficient in the month_data
for (j in 1:nrow(month_data)) {
coefficient <- month_data[j, "Coefficient"]
estimate <- month_data[j, "Estimate"]
# Create a new column name for the coefficient estimate
new_column_name <- paste(coefficient, "estimate", sep = "_")
# Match the month in the test dataframe and add the coefficient estimate
test[test$Month == i, new_column_name] <- as.numeric(estimate)
}
}
# Now test dataframe should have new columns for each coefficient estimate
Calculating exposure indices
# Calculate the Exposure Index per row in the test dataframe
test$Exposure_Index <- with(test,
Rainfall * Rainfall_estimate +
`lag.Rain` * `lag.Rain_estimate` +
Temperature * Temperature_estimate +
Vegetation * Vegetation_estimate +
rho * rho_estimate
)
# Print the first 10 rows of the test dataframe with the new Exposure_Index
print(head(test, 10))
## Simple feature collection with 10 features and 20 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 286990 ymin: 1621630 xmax: 287570.3 ymax: 1621984
## Projected CRS: WGS 84 / UTM zone 51N
## District Barangay Year Month Dengue Rainfall Temperature Vegetation
## 1 DISTRICT 1 ALICIA 2019 Jan 6 28.483838 43.64173 0.34745
## 2 DISTRICT 1 ALICIA 2019 Feb 0 5.538522 43.77079 0.30080
## 3 DISTRICT 1 ALICIA 2019 Mar 1 7.756636 52.06994 0.32640
## 4 DISTRICT 1 ALICIA 2019 Apr 1 12.265913 49.89770 0.29550
## 5 DISTRICT 1 ALICIA 2019 May 0 198.195090 47.72545 0.06920
## 6 DISTRICT 1 ALICIA 2019 Jun 1 227.645734 44.71861 0.25065
## 7 DISTRICT 1 ALICIA 2019 Jul 2 378.972106 41.71176 0.11395
## 8 DISTRICT 1 ALICIA 2019 Aug 1 521.374819 37.83839 0.17940
## 9 DISTRICT 1 ALICIA 2019 Sep 9 357.980239 46.80949 0.28320
## 10 DISTRICT 1 ALICIA 2019 Oct 5 97.082126 55.78059 0.45800
## geometry lag.Temp lag.Rain fit1 fit2
## 1 POLYGON ((287225.7 1621983,... 37.90846 126.422498 2.758916 2.8216933
## 2 POLYGON ((287225.7 1621983,... 43.64173 28.483838 2.273079 1.9342021
## 3 POLYGON ((287225.7 1621983,... 43.77079 5.538522 2.516804 2.1599189
## 4 POLYGON ((287225.7 1621983,... 52.06994 7.756636 2.284094 1.9252207
## 5 POLYGON ((287225.7 1621983,... 49.89770 12.265913 1.507536 0.9753155
## 6 POLYGON ((287225.7 1621983,... 47.72545 198.195090 2.867143 2.6322143
## 7 POLYGON ((287225.7 1621983,... 44.71861 227.645734 2.384300 1.8526119
## 8 POLYGON ((287225.7 1621983,... 41.71176 378.972106 3.783218 3.2847818
## 9 POLYGON ((287225.7 1621983,... 37.83839 521.374819 4.037112 6.4144884
## 10 POLYGON ((287225.7 1621983,... 46.80949 357.980239 4.572273 7.7673414
## fit3 rho Rainfall_estimate lag.Rain_estimate
## 1 6.9723992 0.022670025 -0.033565443 -0.003550731
## 2 1.4282020 0.012594458 0.091316601 -0.024035765
## 3 0.4616287 0.022670025 0.263615815 -0.138143330
## 4 0.7651975 0.010075567 -0.017851155 0.095662066
## 5 0.1319221 0.001259446 -0.011521284 0.043745477
## 6 12.6808215 0.006297229 0.008709635 0.055062791
## 7 4.2775306 0.013853904 0.028925229 -0.014618580
## 8 2.2194615 0.027707809 0.004419174 0.008250221
## 9 13.9867926 0.073047859 -0.005370727 0.013173704
## 10 9.0243087 0.042821159 0.022196362 -0.019547281
## Temperature_estimate Vegetation_estimate rho_estimate Exposure_Index
## 1 0.086473491 7.7763658 18.898711 5.4992205
## 2 0.066829650 -0.4717562 16.092275 2.8070840
## 3 0.078642570 -6.8917769 21.275014 3.6074050
## 4 0.062880472 -1.0896474 32.770193 3.6688333
## 5 0.045907648 -0.4913028 26.506915 0.4434655
## 6 0.097720395 -0.2348960 18.833071 17.3255256
## 7 -0.008337363 -0.6851428 8.067844 7.3199305
## 8 0.039122540 0.7208237 5.561326 7.1943918
## 9 0.044016593 0.5879307 7.166312 7.6962035
## 10 0.067658989 2.7319804 7.639324 0.5097600
Normalizing exposure indices
# Normalize the Exposure Index values to range from 0 to 1
min_exposure_index <- min(test$Exposure_Index, na.rm = TRUE)
max_exposure_index <- max(test$Exposure_Index, na.rm = TRUE)
test$Exposure_Index_Normalized <- (test$Exposure_Index - min_exposure_index) /
(max_exposure_index - min_exposure_index)
# Print the first 10 rows of the test dataframe with the new Exposure_Index_Normalized
print(head(test, 10))
## Simple feature collection with 10 features and 21 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 286990 ymin: 1621630 xmax: 287570.3 ymax: 1621984
## Projected CRS: WGS 84 / UTM zone 51N
## District Barangay Year Month Dengue Rainfall Temperature Vegetation
## 1 DISTRICT 1 ALICIA 2019 Jan 6 28.483838 43.64173 0.34745
## 2 DISTRICT 1 ALICIA 2019 Feb 0 5.538522 43.77079 0.30080
## 3 DISTRICT 1 ALICIA 2019 Mar 1 7.756636 52.06994 0.32640
## 4 DISTRICT 1 ALICIA 2019 Apr 1 12.265913 49.89770 0.29550
## 5 DISTRICT 1 ALICIA 2019 May 0 198.195090 47.72545 0.06920
## 6 DISTRICT 1 ALICIA 2019 Jun 1 227.645734 44.71861 0.25065
## 7 DISTRICT 1 ALICIA 2019 Jul 2 378.972106 41.71176 0.11395
## 8 DISTRICT 1 ALICIA 2019 Aug 1 521.374819 37.83839 0.17940
## 9 DISTRICT 1 ALICIA 2019 Sep 9 357.980239 46.80949 0.28320
## 10 DISTRICT 1 ALICIA 2019 Oct 5 97.082126 55.78059 0.45800
## geometry lag.Temp lag.Rain fit1 fit2
## 1 POLYGON ((287225.7 1621983,... 37.90846 126.422498 2.758916 2.8216933
## 2 POLYGON ((287225.7 1621983,... 43.64173 28.483838 2.273079 1.9342021
## 3 POLYGON ((287225.7 1621983,... 43.77079 5.538522 2.516804 2.1599189
## 4 POLYGON ((287225.7 1621983,... 52.06994 7.756636 2.284094 1.9252207
## 5 POLYGON ((287225.7 1621983,... 49.89770 12.265913 1.507536 0.9753155
## 6 POLYGON ((287225.7 1621983,... 47.72545 198.195090 2.867143 2.6322143
## 7 POLYGON ((287225.7 1621983,... 44.71861 227.645734 2.384300 1.8526119
## 8 POLYGON ((287225.7 1621983,... 41.71176 378.972106 3.783218 3.2847818
## 9 POLYGON ((287225.7 1621983,... 37.83839 521.374819 4.037112 6.4144884
## 10 POLYGON ((287225.7 1621983,... 46.80949 357.980239 4.572273 7.7673414
## fit3 rho Rainfall_estimate lag.Rain_estimate
## 1 6.9723992 0.022670025 -0.033565443 -0.003550731
## 2 1.4282020 0.012594458 0.091316601 -0.024035765
## 3 0.4616287 0.022670025 0.263615815 -0.138143330
## 4 0.7651975 0.010075567 -0.017851155 0.095662066
## 5 0.1319221 0.001259446 -0.011521284 0.043745477
## 6 12.6808215 0.006297229 0.008709635 0.055062791
## 7 4.2775306 0.013853904 0.028925229 -0.014618580
## 8 2.2194615 0.027707809 0.004419174 0.008250221
## 9 13.9867926 0.073047859 -0.005370727 0.013173704
## 10 9.0243087 0.042821159 0.022196362 -0.019547281
## Temperature_estimate Vegetation_estimate rho_estimate Exposure_Index
## 1 0.086473491 7.7763658 18.898711 5.4992205
## 2 0.066829650 -0.4717562 16.092275 2.8070840
## 3 0.078642570 -6.8917769 21.275014 3.6074050
## 4 0.062880472 -1.0896474 32.770193 3.6688333
## 5 0.045907648 -0.4913028 26.506915 0.4434655
## 6 0.097720395 -0.2348960 18.833071 17.3255256
## 7 -0.008337363 -0.6851428 8.067844 7.3199305
## 8 0.039122540 0.7208237 5.561326 7.1943918
## 9 0.044016593 0.5879307 7.166312 7.6962035
## 10 0.067658989 2.7319804 7.639324 0.5097600
## Exposure_Index_Normalized
## 1 0.32233960
## 2 0.20124389
## 3 0.23724335
## 4 0.24000647
## 5 0.09492534
## 6 0.85430179
## 7 0.40423745
## 8 0.39859056
## 9 0.42116268
## 10 0.09790735
# Define the breaks for 5 classes with the specified ranges
breaks <- c(0, 0.4, 0.6, 0.8, 0.9, 1)
# Convert normalized exposure index values into discrete intervals
test$Normalized_Exposure_Index_Group <- cut(test$Exposure_Index_Normalized,
breaks = breaks,
labels = FALSE,
include.lowest = TRUE)
# Define the colors for each class
class_colors <- c("#FFD700", "#FFA500", "#FF6347", "#CD5C5C", "#8B0000")
# Define the labels for each class
labels <- c("[0, 0.4)", "[0.4, 0.6)", "[0.6, 0.8)", "[0.8, 0.9)", "[0.9, 1]")
# Plot with the new grouping
ggplot(data = test) +
geom_sf(aes(fill = factor(Normalized_Exposure_Index_Group), geometry = geometry), color = "black") +
facet_wrap(~Month) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank()) +
ggtitle("Dengue Exposure Index") +
scale_fill_manual(values = class_colors,
name = "Index",
labels = labels)
ggsave("Dengue Exposure Index (Option 2).png", plot = last_plot(), width = 8, height = 8)