library(tidyverse)
library(tidyr)
setwd("C:/Users/Lenovo/Downloads/SummerData110")
<- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
cities500 data(cities500)
Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
<- cities500|>
latlong mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong)
[1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
unique(latlong$Short_Question_Text)
[1] "Arthritis"
[2] "Current Smoking"
[3] "Coronary Heart Disease"
[4] "Obesity"
[5] "Cholesterol Screening"
[6] "Binge Drinking"
[7] "COPD"
[8] "Diabetes"
[9] "Mammography"
[10] "Teeth Loss"
[11] "Current Asthma"
[12] "Health Insurance"
[13] "Chronic Kidney Disease"
[14] "Stroke"
[15] "Dental Visit"
[16] "Physical Inactivity"
[17] "Sleep <7 hours"
[18] "High Blood Pressure"
[19] "Cancer (except skin)"
[20] "Annual Checkup"
[21] "Pap Smear Test"
[22] "Physical Health"
[23] "Mental Health"
[24] "Core preventive services for older men"
[25] "High Cholesterol"
[26] "Taking BP Medication"
[27] "Colorectal Cancer Screening"
[28] "Core preventive services for older women"
## Filtering data
<- latlong |>
mental_data filter(GeographicLevel == "City",
== "Crude prevalence",
Data_Value_Type == "Mental Health",
Short_Question_Text !is.na(Data_Value))
What variables are included? (can any of them be removed?)
names(mental_data)
[1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
<- mental_data |>
mental_data_clean select(CityName, StateAbbr, Data_Value, lat, long)
head(mental_data_clean)
# A tibble: 6 × 5
CityName StateAbbr Data_Value lat long
<chr> <chr> <dbl> <dbl> <dbl>
1 Stamford CT 10 41.1 -73.6
2 Iowa City IA 13.2 41.7 -91.5
3 Lake Charles LA 16.6 30.2 -93.2
4 Minneapolis MN 11.4 45.0 -93.3
5 Rochester MN 10.1 44.0 -92.5
6 Sparks NV 14.1 39.6 -120.
The new dataset is a manageable dataset now.
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
First plot chunk here
# non map plot
library(ggplot2)
library(dplyr)
# Create top/bottom dataset
<- mental_data_clean |>
top_bottom arrange(Data_Value) |>
slice(c(1:10, (n() - 9):n())) |> # Used ChatGPT assistance to slice 10 last rows. I still did not get this part
mutate(CityLabel = paste0(CityName, ", ", StateAbbr)) |> #Used Chat GPT assistance to create city and state label for visualization
mutate(Group = ifelse(row_number() <= 10, "Lowest", "Highest")) # Used ChatGPT to create 2 groups for lowest and highest values
# colors choise
<- c("Lowest" = "lightgreen", "Highest" = "#FF6F61") custom_colors
<- top_bottom |>
bar_plot ggplot(aes(x = reorder(CityLabel, -Data_Value), y = Data_Value, fill = Group)) +
geom_col() +
coord_flip() +
scale_fill_manual(
values = custom_colors,
name = "Group",
labels = c(
"Lowest" = "Highest 10",
"Highest" = "Lowest 10"
)+
) geom_text(aes(label = paste0(Data_Value, "%"))) +
labs(
title = "Top 10 vs Bottom 10 U.S. Cities by Poor Mental Health (2017)",
subtitle = "Crude prevalence of adults reporting poor mental health",
caption = "Data source: CDC 500 Cities Dataset",
y = "",
x = ""
+
) theme_minimal() +
theme(
plot.title = element_text(face = "bold")# Asked chatGPT how to make title bold
)
bar_plot
3. Now create a map of your subsetted dataset.
First map chunk here
library(leaflet)
Warning: package 'leaflet' was built under R version 4.4.2
library(RColorBrewer)
# Defining color palette based on mental health value
<- colorNumeric(
pal palette = "YlOrRd", # yellow to red
domain = mental_data$Data_Value
)
# Creating the map
leaflet(data = mental_data) |>
addProviderTiles(providers$OpenStreetMap.Mapnik) |> #adding a map
addCircleMarkers(
lng = ~long, lat = ~lat,
radius = 5,
color = ~pal(Data_Value),
stroke = FALSE,
fillOpacity = 0.7,
label = ~paste0(CityName, ", ", StateAbbr, ": ", Data_Value, "%")
|>
) addLegend(
pal = pal, values = ~Data_Value,
title = "Poor Mental Health",
)
4. Refine your map to include a mouse-click tooltip
Refined map chunk here
# Creating interactive map
leaflet(data = mental_data) |>
addProviderTiles(providers$OpenStreetMap.Mapnik) |> # Base map
addCircleMarkers(
lng = ~long, lat = ~lat, # Coordinates
radius = 5,
color = ~pal(Data_Value), # Fill color by data value
stroke = FALSE,
fillOpacity = 0.7,
popup = ~paste0( # Mouse-click popup
"<strong>", CityName, ", ", StateAbbr, "</strong><br/>",
"Poor Mental Health: ", Data_Value, "%"
)|>
) addLegend(
pal = pal,
values = ~Data_Value,
title = "Poor Mental Health (%)"
)
5. Write a paragraph
In a paragraph, describe the plots you created and what they show.
In this assignment, I explored the topic of mental health using the CDC 500 Cities dataset. I began by filtering the data to include only crude prevalence estimates for mental health in U.S. cities for 2017. After removing unnecessary columns, I created a new column combining city and state for labeling purposes, and I identified the top 10 and bottom 10 cities by poor mental health percentage. I visualized this contrast using a horizontal bar chart. Then, I created two interactive maps using the leaflet package. The first map displayed all 500 cities with circle markers color-scaled by mental health prevalence. The second map allowing users to click on each city for detailed tooltips. My initial goal was to analyze both cholesterol and mental health, but due to time constraints I focused solely on mental health. From the map, I observed that cities in the western U.S. generally report lower levels of poor mental health. I was impressed by how easy it was to create such powerful and informative visualizations using just a few lines of R code.