library(tidyverse)
library(tidyr)
cities500 <- read_csv("500CitiesLocalHealthIndicators.csv")
library(leaflet)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Split GeoLocation (lat, long) into two columns: lat and long
latlong2 <- cities500|>
mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong2)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
## 2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
## 3 2017 CA California Hayward City BRFSS Health Outcom…
## 4 2017 CA California Hayward City BRFSS Unhealthy Beh…
## 5 2017 CA California Hemet City BRFSS Prevention
## 6 2017 CA California Indio Census Tract BRFSS Health Outcom…
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong2 |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgomery City BRFSS Prevention
## 2 2017 CA California Concord City BRFSS Prevention
## 3 2017 CA California Concord City BRFSS Prevention
## 4 2017 CA California Fontana City BRFSS Prevention
## 5 2017 CA California Richmond Census Tract BRFSS Prevention
## 6 2017 FL Florida Davie Census Tract BRFSS Prevention
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong_clean)
## [1] "Year" "StateAbbr"
## [3] "StateDesc" "CityName"
## [5] "GeographicLevel" "DataSource"
## [7] "Category" "UniqueID"
## [9] "Measure" "Data_Value_Unit"
## [11] "DataValueTypeID" "Data_Value_Type"
## [13] "Data_Value" "Low_Confidence_Limit"
## [15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
## [17] "Data_Value_Footnote" "PopulationCount"
## [19] "lat" "long"
## [21] "CategoryID" "MeasureId"
## [23] "CityFIPS" "TractFIPS"
## [25] "Short_Question_Text"
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)
## # A tibble: 6 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgome… City Prevent… 151000 Choles…
## 2 2017 CA California Concord City Prevent… 616000 Visits…
## 3 2017 CA California Concord City Prevent… 616000 Choles…
## 4 2017 CA California Fontana City Prevent… 624680 Visits…
## 5 2017 CA California Richmond Census Tract Prevent… 0660620… Choles…
## 6 2017 FL Florida Davie Census Tract Prevent… 1216475… Choles…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
# Filtering all prevention measures for Brooklyn Park, Minnesota residents 18 years and older.
mn_bp <- prevention |>
filter(!is.na(Data_Value), StateDesc == "Minnesota", CityName == "Brooklyn Park")
head(mn_bp)
## # A tibble: 6 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 MN Minnesota Brooklyn … Census Tract Prevent… 2707966… Curren…
## 2 2017 MN Minnesota Brooklyn … City Prevent… 2707966 Taking…
## 3 2017 MN Minnesota Brooklyn … Census Tract Prevent… 2707966… Visits…
## 4 2017 MN Minnesota Brooklyn … Census Tract Prevent… 2707966… Visits…
## 5 2017 MN Minnesota Brooklyn … Census Tract Prevent… 2707966… Curren…
## 6 2017 MN Minnesota Brooklyn … Census Tract Prevent… 2707966… Choles…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
# Comparing all four prevention measures in Brooklyn Park, MN with facet-wrapped scatterplots.
ggplot(mn_bp, aes(x = MeasureId, y = Data_Value, color = MeasureId)) +
geom_point() +
geom_jitter() +
facet_wrap(~MeasureId) +
labs(title = "Brooklyn Park, MN Prevention Measures for Adults 18 and Older",
x = "Measure ID",
y = "Data Value",
color = "Measure",
caption = "Data from 2017, Source: CDC") +
scale_color_brewer(palette = "Spectral") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
# Creating a new filter that includes all cities in Minnesoata, but excludes all prevention measures except BPMED.
mn_bp2 <- prevention |>
filter(!is.na(Data_Value), StateDesc == "Minnesota", MeasureId == "BPMED")
head(mn_bp2)
## # A tibble: 6 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 MN Minnesota Brooklyn … City Prevent… 2707966 Taking…
## 2 2017 MN Minnesota Minneapol… Census Tract Prevent… 2743000… Taking…
## 3 2017 MN Minnesota Minneapol… Census Tract Prevent… 2743000… Taking…
## 4 2017 MN Minnesota Minneapol… Census Tract Prevent… 2743000… Taking…
## 5 2017 MN Minnesota Minneapol… Census Tract Prevent… 2743000… Taking…
## 6 2017 MN Minnesota Minneapol… Census Tract Prevent… 2743000… Taking…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
# Cleaning data.
mn_bp2$lat <- as.numeric(mn_bp2$lat)
mn_bp2$long <- as.numeric(mn_bp2$long)
# Setting longitude and latitude coordinates for Minnesota.
mn_lat <- 46.7296
mn_lon <- -94.6859
# Creating first map with the mn_bp2 data.
leaflet(data = mn_bp2) |>
addTiles() |>
addCircles(lng = ~long, lat = ~lat) |>
setView(lng = mn_lon, lat = mn_lat, zoom = 6)
# Creating tooltip and setting popup values.
mn_popup <- paste0(
"<b>City: </b>", mn_bp2$CityName, "<br>",
"<b>Population: </b>", mn_bp2$PopulationCount, "<br>",
"<b>Taking BP Meds: </b>", mn_bp2$Data_Value, "%")
# Creating second map with the tooltip/popups.
leaflet(data = mn_bp2) |>
addTiles() |>
addCircles(lng = ~long, lat = ~lat, popup = mn_popup) |>
setView(lng = mn_lon, lat = mn_lat, zoom = 6)
For this exploration I further filtered the prevention data to focus on Brooklyn Park, MN. I created a faceted scatterplot of all four prevention measures, which showed that overall the city of Brooklyn Park had a low percentage of residents with health insurance. This might be in part due to Brooklyn Park’s large Liberian immigrant population. In general, Minnesota has the largest Liberian population in the country and they are concentrated in the city of Brooklyn Park. I ran into some trouble creating my map, and feared that my data was too small/narrowed down. So I opened it up to all cities in Minnesota and focused on the prevalence of adults 18 and older with high blood pressure who report taking their blood pressure medication. These numbers fared better, with a prevalence between 30.8 and 88.4%, but still show room for improvement.