# Run these lines if you haven't installed the packages before
# install.packages("sf")
# install.packages("tigris")
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("readr") # For reading the CSV file
# Load the libraries for use in this session
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(tinytex)
library(stringr)
library(patchwork)
galveston_tracts <- tracts(state = "TX", county = "Galveston", cb = TRUE, year = 2010)
## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============= | 18% | |============= | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
# You can quickly view the geographic data like this:
plot(st_geometry(galveston_tracts))
number_of_tracts <- nrow(galveston_tracts)
print(number_of_tracts)
## [1] 67
library(readxl)
censustract_by_pent_year_deaths <- read_excel("~/utmb/ga_xiaoying/r_project_data_manipulation/data/primary/censustract_by pent_year_deaths.xls")
View(censustract_by_pent_year_deaths)
overall_mean_death_data <- censustract_by_pent_year_deaths %>%
group_by(censustracts) %>%
summarize(
overall_mean_age = weighted.mean(age, w = death_count, na.rm = TRUE)
)
# Create the new 6-digit ID column in the map data
# We use mutate() to add a new column, and str_sub() to get the
# last 6 characters from the GEOID column.
#----------------------------------------------------------------------
galveston_tracts <- galveston_tracts %>%
mutate(GEO_ID_6_digits = str_sub(GEO_ID, start = -6))
overall_mean_death_data <- mutate(overall_mean_death_data, censustracts = as.character(censustracts))
map_data <- left_join(galveston_tracts, overall_mean_death_data, by = c("GEO_ID_6_digits" = "censustracts"))
View(map_data)
#----------------------------------------------------------------------
# Create the final heatmap (choropleth map)
#
# - geom_sf() is the key function for plotting this kind of spatial data.
# - The 'fill' aesthetic is mapped to your calculated 'overall_mean_age' column.
#----------------------------------------------------------------------
ggplot(data = map_data) +
geom_sf(aes(fill = overall_mean_age), color = "white", size = 0.2) +
scale_fill_viridis_c(
option = "plasma",
direction = -1,
limits = c(64, 81), # Set the min and max of the legend
breaks = c(65, 70, 75, 80) # Set the specific labels
) +
labs(
title = "Mean Age at Death by Census Tract",
subtitle = "Galveston County, TX (2006-2025)",
fill = "Mean Age\nat Death"
) +
theme_void() +
theme(
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 12, hjust = 0.5),
legend.position = "right"
)
caption_text <- "Data aggregated by 5-year periods (2006-2025). To calculate the overall mean age of death, the mean for each 5-year period was weighted by its corresponding number of deaths."
wrapped_caption <- str_wrap(caption_text, width = 90)
p_map <- ggplot(data = map_data) +
geom_sf(aes(fill = overall_mean_age), color = "white", size = 0.2) +
scale_fill_viridis_c(
option = "plasma",
direction = -1,
limits = c(64, 81),
breaks = c(65, 70, 75, 80)
) +
labs(
# Title and subtitle are now handled by plot_annotation
fill = "Mean Age\nat Death"
) +
theme_void() +
theme(
legend.position = "right"
)
# Now, add the final annotations to the plot object
p_map +
plot_annotation(
title = 'Mean Age at Death by Census Tract',
subtitle = 'Galveston County, TX (2006-2025)',
caption = wrapped_caption
) & # The '&' is for applying the theme to the annotations
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
plot.caption = element_text(size = 10, hjust = 0) # Left-align caption
)
## output map
# (Assuming 'p_map' is your base map)
# Combine the map and annotations into one final object
final_plot <- p_map +
plot_annotation(
title = 'Mean Age at Death by Census Tract',
subtitle = 'Galveston County, TX (2006-2025)',
caption = caption_text
) &
theme(
plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
plot.caption = element_text(size = 9, hjust = 0.5),
legend.title = element_text(face = "bold", size = 10), # Makes legend title bold
legend.text = element_text(face = "bold"), # Makes legend labels bold
legend.margin = margin(l = -1, unit = "pt") # Reduces space on the left of the legend
)
ggsave(
"galveston_mortality_heatmap.jpeg", # The filename
plot = final_plot, # The plot object to save
width = 11, # Width in inches
height = 8.5, # Height in inches
dpi = 600 # Dots per inch (resolution), 300 is standard for printing
)