library(tidyverse)
library(tidyr)
library(leaflet) # For tooltip map
library(viridis) # For color palettes
library(maps) # For static map
setwd("C:/Users/Hana Rose/OneDrive/Data 110")
<- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
cities500 data(cities500)
Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
<- cities500|>
latlong mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
<- latlong |>
latlong_clean filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Unhealthy Beh…
4 2017 CA California Indio Census Tract BRFSS Health Outcom…
5 2017 CA California Inglewood Census Tract BRFSS Health Outcom…
6 2017 CA California Lakewood City BRFSS Unhealthy Beh…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
What variables are included? (can any of them be removed?)
names(latlong_clean)
[1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
<- latlong_clean |>
latlong_clean2 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2)
# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract Health … 0632548… Arthri…
2 2017 CA California Hawthorne City Unhealt… 632548 Curren…
3 2017 CA California Hayward City Unhealt… 633000 Obesit…
4 2017 CA California Indio Census Tract Health … 0636448… Arthri…
5 2017 CA California Inglewood Census Tract Health … 0636546… Diagno…
6 2017 CA California Lakewood City Unhealt… 639892 Obesit…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
For your assignment, work with a cleaned dataset.
1. Once you run the above code and learn how to filter in this format, filter this dataset however you choose so that you have a subset with no more than 900 observations.
Filter chunk here
# Filter for obesity in Alabama
<- latlong_clean2 |>
subset filter(Category == "Unhealthy Behaviors") |>
filter(Measure == "Obesity among adults aged >=18 Years") |>
filter(StateDesc == "Alabama")
head(subset)
# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 AL Alabama Huntsville Census Tract Unhealt… 0137000… Obesit…
2 2017 AL Alabama Birmingham Census Tract Unhealt… 0107000… Obesit…
3 2017 AL Alabama Mobile Census Tract Unhealt… 0150000… Obesit…
4 2017 AL Alabama Mobile Census Tract Unhealt… 0150000… Obesit…
5 2017 AL Alabama Mobile Census Tract Unhealt… 0150000… Obesit…
6 2017 AL Alabama Montgomery Census Tract Unhealt… 0151000… Obesit…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
# Clean the subset
<- subset |>
subset_clean filter(!is.na(Data_Value) & !is.na(PopulationCount)) |>
filter(Data_Value > 0 & PopulationCount > 0)
# Calculate the weighted obesity rates for each city and summarize it as a new variable for city-level obesity
<- subset_clean |>
city_obesity group_by(CityName) |>
summarize(
total_population = sum(PopulationCount),
weighted_obesity = sum(Data_Value * PopulationCount) / total_population,
lat = mean(lat, na.rm = TRUE),
long = mean(long, na.rm = TRUE)
|>
) arrange(desc(weighted_obesity))
head(city_obesity)
# A tibble: 6 × 5
CityName total_population weighted_obesity lat long
<chr> <dbl> <dbl> <dbl> <dbl>
1 Birmingham 424281 41.4 33.5 -86.8
2 Montgomery 411497 38.5 32.4 -86.3
3 Mobile 390156 37.7 30.7 -88.1
4 Tuscaloosa 180905 36.2 33.2 -87.5
5 Huntsville 360110 35.6 34.7 -86.6
6 Hoover 163211 28.8 33.4 -86.8
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
First plot chunk here
# Create a bar graph for weighted obesity rates in Alabama cities
ggplot(city_obesity, aes(x = reorder(CityName, weighted_obesity), y = weighted_obesity)) +
geom_col(fill = "pink") +
coord_flip() +
labs(
title = "Weighted City-Level Obesity Rates in Alabama (2017)",
x = "City",
y = "Obesity Rate (%)"
+
) theme_minimal()
Source: U.S. Centers for Disease Control and Prevention.
3. Now create a map of your subsetted dataset.
First map chunk here
# Retrieve US state map data from the "maps" package
<- map_data("state") us_states
# Filter states for Alabama
<- us_states %>% filter(region == "alabama") alabama_map
#Plot static map for weighted obesity rates in Alabama cities
ggplot() +
geom_polygon(data = alabama_map, aes(x = long, y = lat, group = group),
fill = "gray90", color = "gray40") +
geom_point(data = city_obesity, aes(x = long, y = lat, color = weighted_obesity),
size = 3.5, alpha = 0.7) +
geom_text(data = city_obesity, aes(x = long, y = lat, label = CityName),
nudge_y = 0.15, size = 3) +
scale_color_viridis(option = "plasma", name = "Obesity Rate (%)") +
coord_fixed(1.3) +
labs(
title = "Weighted City-Level Obesity Rates in Alabama (2017)",
x = "Longitude",
y = "Latitude"
+
) theme_minimal() +
theme(
legend.position = "right"
)
Source: U.S. Centers for Disease Control and Prevention.
4. Refine your map to include a mouse-click tooltip
Refined map chunk here
# Create a map of weighted obesity rates in Alabama cities with mouse-click tooltip
leaflet(city_obesity) |>
addTiles() |>
addCircles(
lng = ~long,
lat = ~lat,
weight = 1,
radius = ~weighted_obesity * 500,
popup = ~paste0(
"<strong>", CityName, "</strong><br>",
"Weighted Obesity Rate: ", round(weighted_obesity, 1), "%"
),color = "pink",
fillOpacity = 0.6
)
Source: U.S. Centers for Disease Control and Prevention.
5. Write a paragraph
In a paragraph, describe the plots you created and what they show.
My plots explore the weighted obesity rates of six cities in Alabama. I chose obesity as my measurement due to the obesity epidemic in the United States. I was initially going to do West Virginia as my state due to it holding the highest rate of obesity in 2017 (it still holds this record in 2025!), with Kentucky as my runner-up, but neither of those states has enough cities to explore, so I chose the third highest, which was Alabama. The bar graph, static map, and tooltip map all show the obesity rate percentages of these six Alabama cities in the year 2017 but are visualized in different ways. A critical insight that can be pulled from this data is that socioeconomic factors play a huge role in obesity. Despite Birmingham and Hoover’s proximity of just 10-15 miles, they sit on opposite ends of the spectrum when it comes to obesity. Hoover is more affluent than Birmingham, meaning the population of Hoover has more access to fresh and nutritious foods, healthcare, health literacy, and safe infrastructure for physical activity. Birmingham also has a historically higher black population, meaning racial health disparity as a result of systemic racial prejudice has likely contributed to the higher obesity rates in Birmingham’s population. All in all, these plots exemplify how health is a privilege rather than a decision made in a vacuum. While obesity is a complex health problem influenced by individual behaviors, the category “unhealthy behaviors”, as used in this data set to characterize obesity, is a reductive, problematic, and inaccurate way of framing this chronic disease and the obesity health crisis. Affordable housing, livable wages, access to education and healthcare, sustainable and human-centered urban planning, and support for historically marginalized communities are the most effective ways of combating large-scale obesity and nurturing a healthier nation.