library(tidyverse)
library(tidyr)
library(RColorBrewer)
library(leaflet)
library(leaflegend)
setwd("C:/Users/jedi_/Documents/Academic/MC/Datasets")
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
Split GeoLocation (lat, long) into two columns: lat and long
latlong2 <- cities500|>
mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong2)
## # A tibble: 6 Ă— 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
## 2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
## 3 2017 CA California Hayward City BRFSS Health Outcom…
## 4 2017 CA California Hayward City BRFSS Unhealthy Beh…
## 5 2017 CA California Hemet City BRFSS Prevention
## 6 2017 CA California Indio Census Tract BRFSS Health Outcom…
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong2 |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
## # A tibble: 6 Ă— 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgomery City BRFSS Prevention
## 2 2017 CA California Concord City BRFSS Prevention
## 3 2017 CA California Concord City BRFSS Prevention
## 4 2017 CA California Fontana City BRFSS Prevention
## 5 2017 CA California Richmond Census Tract BRFSS Prevention
## 6 2017 FL Florida Davie Census Tract BRFSS Prevention
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong_clean)
## [1] "Year" "StateAbbr"
## [3] "StateDesc" "CityName"
## [5] "GeographicLevel" "DataSource"
## [7] "Category" "UniqueID"
## [9] "Measure" "Data_Value_Unit"
## [11] "DataValueTypeID" "Data_Value_Type"
## [13] "Data_Value" "Low_Confidence_Limit"
## [15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
## [17] "Data_Value_Footnote" "PopulationCount"
## [19] "lat" "long"
## [21] "CategoryID" "MeasureId"
## [23] "CityFIPS" "TractFIPS"
## [25] "Short_Question_Text"
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)
## # A tibble: 6 Ă— 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgome… City Prevent… 151000 Choles…
## 2 2017 CA California Concord City Prevent… 616000 Visits…
## 3 2017 CA California Concord City Prevent… 616000 Choles…
## 4 2017 CA California Fontana City Prevent… 624680 Visits…
## 5 2017 CA California Richmond Census Tract Prevent… 0660620… Choles…
## 6 2017 FL Florida Davie Census Tract Prevent… 1216475… Choles…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
unique(prevention$Measure)
## [1] "Cholesterol screening among adults aged >=18 Years"
## [2] "Visits to doctor for routine checkup within the past Year among adults aged >=18 Years"
## [3] "Current lack of health insurance among adults aged 18\x9664 Years"
## [4] "Taking medicine for high blood pressure control among adults aged >=18 Years with high blood pressure"
unique(prevention$StateAbbr) #look at values of measures and states
## [1] "AL" "CA" "FL" "CT" "IL" "MN" "NY" "PA" "NC" "OH" "OK" "OR" "TX" "RI" "SC"
## [16] "SD" "TN" "UT" "VA" "WA" "AK" "WI" "AZ" "AR" "CO" "DE" "NV" "DC" "GA" "ID"
## [31] "HI" "MA" "MI" "IN" "KS" "KY" "IA" "LA" "MD" "ME" "NH" "NJ" "NM" "MO" "MS"
## [46] "NE" "MT" "ND" "WV" "VT" "WY"
prevention2 <- prevention |>
filter(StateAbbr %in% c("DC", "MD", "VA")) |>
filter(!is.na(Data_Value)) |> #filter for DMV data and remove NAs
mutate(Short_Question_Text = if_else(Short_Question_Text == 'Health Insurance', 'Uninsured', Short_Question_Text)) #change the value of "Health Insurance" to "Uninsured" to make the viz easier to interpret
First plot chunk here
ggplot(prevention2, aes(x = Short_Question_Text, y = Data_Value, fill = StateAbbr)) +
geom_col(position = "dodge") +
scale_fill_brewer(type = "qual", palette = "Dark2") +
theme_minimal() +
xlab("Measure") +
ylab("Percentage") +
labs(title = "2017: Preventative Health Measures in the DMV",
caption = "Source: CDC") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(fill = "State/City")
prevention2 <- transform(prevention2, lat = as.numeric(lat))
prevention2 <- transform(prevention2, long = as.numeric(long))
# change lat and long column type to numeric
First map chunk here
dmv_lat <- 38.322438
dmv_long <- -77
leaflet(prevention2) |>
setView(lng = dmv_long, lat = dmv_lat, zoom = 7) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
data = prevention2,
lat = prevention2$lat,
lng = prevention2$long,
radius = ~ifelse(Short_Question_Text == "Uninsured", 6, 15000),
fillColor = "darkred",
fillOpacity = 0.5,
color = "black")
prevention3 <- prevention2 |>
select(-StateAbbr, StateDesc, CityName, UniqueID, PopulationCount, lat, long, CityFIPS, TractFIPS) |>
group_by(UniqueID) |>
add_count(Short_Question_Text) |>
group_by(UniqueID, Short_Question_Text) |>
summarize(Data_Value = first(Data_Value)) |>
ungroup() |>
pivot_wider(names_from = Short_Question_Text,
values_from = Data_Value) |>
full_join(prevention2 |>
select(-Short_Question_Text, Data_Value, Measure, MeasureId))
# Pivot wider and join new columns for prevention types to the prevention2 dataframe
prevention4 <- prevention3 |>
distinct(UniqueID, .keep_all = TRUE)
# Remove duplicate rows so there is only one row for each unique ID
dmv_lat <- 38.322438
dmv_long <- -77
leaflet(prevention4) |>
setView(lng = dmv_long, lat = dmv_lat, zoom = 7) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
data = prevention4,
lat = prevention4$lat,
lng = prevention4$long,
radius = (prevention4$Uninsured)*100,
fillColor = "darkred",
fillOpacity = 0.5,
color = "darkred")
# New map with pivoted dataset
popupdmv <- paste0(
"<b>City Name:</b>", prevention4$CityName, "<br>",
"<b>Annual Checkups (%): </b>", prevention4$`Annual Checkup`, "<br>",
"<b>Taking BP Meds (%): </b>", prevention4$`Taking BP Medication`, "<br>",
"<strong>Uninsured Adults 18-64 (%) </strong>", prevention4$Uninsured, "<br>"
)
Refined map chunk here
leaflet(prevention4) |>
setView(lng = dmv_long, lat = dmv_lat, zoom = 7) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
data = prevention4,
lat = prevention4$lat,
lng = prevention4$long,
radius = (prevention4$Uninsured)*10,
fillColor = "darkred",
fillOpacity = 0.5,
color = "darkred",
popup = popupdmv)
prevention5 <- prevention4 |>
filter(StateAbbr == "DC")
#Filter for Washington, DC results to focus on a smaller region
popupdc <- paste0(
"<b>Annual Checkups (%): </b>", prevention5$`Annual Checkup`, "<br>",
"<strong>Uninsured Adults 18-64 (%) </strong>", prevention5$Uninsured, "<br>",
"<b>Taking BP Meds (%): </b>", prevention5$`Taking BP Medication`, "<br>",
"<b>Completed Cholesterol Screening (%): </b>", prevention5$`Cholesterol Screening`, "<br>"
)
# new popup for DC
dc_lat <- 38.919251
dc_long <- -77.028381
#DC coordinates
pal <- colorNumeric(
palette = colorRampPalette(rainbow(5))(length(prevention5$`Annual Checkup`)),
domain = prevention5$`Annual Checkup`)
#create a palette for Annual Checkups
leaflet(prevention5) |>
setView(lng = dc_long, lat = dc_lat, zoom = 11) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
data = prevention5,
lat = prevention5$lat,
lng = prevention5$long,
radius = (prevention5$Uninsured)*30,
fillColor = ~pal(`Annual Checkup`),
fillOpacity = 0.5,
color = "darkblue",
popup = popupdc) |>
addLegend("bottomleft", pal = pal, values = ~`Annual Checkup`,
title = "% Annual Checkups Completed") |>
addLegendSize(
values = prevention5$Uninsured,
color = "darkblue",
fillColor = "darkblue",
breaks = 5,
opacity = .5,
title = '% Uninsured Adults (18-64 Years)',
shape = 'circle',
orientation = 'vertical')
# new map for DC with new elements: radius size, palette for annual checkup values, legend for color, and legend for size
My first plot, the side-by-side bar graph, compares the use of preventative health measures in DC, Maryland, and Virginia in 2017. The measures included are: annual checkups, cholesterol screening, and taking blood pressure medication as needed. In addition, the percentages of uninsured adults in each of these 3 regions is compared.
Taking BP medication appears to be the most uniform measure across the region. The other measures vary slightly. The differences in adults who are uninsured appear much more significant: Virginia has the highest number of uninsured adults, followed by Maryland, and then DC.
For my second plot, I focused on DC since the map showing all three states was harder to interpret without having to zoom in on individual cities. The size of the circles represents the percentage of uninsured adults (bigger circle = more uninsured) and the color represents the percentage of respondents who completed annual checkups within the past year (as of 2017).
The lowest percentage of annual checkups completed appears in neighborhoods with large student populations - the red circles on the map are directly over GWU and Georgetown’s campuses. In general, adults in wealthier parts of DC (most of Northwest, some parts of Southeast) are more likely to be insured, with the exception of the college neighborhoods mentioned.
Adults in the poorest parts of DC (Southeast wards 7 and 8) have a higher percentage of annual checkups completed compared to adults in other neighborhoods/wards, despite being less likely to have health insurance. This was surprising to me because lack of insurance is often a barrier to preventative care.
Adults in some neighborhoods (e.g., Columbia Heights, Mount Pleasant) are less likely to have insurance AND have completed their annual checkups, which may be related to the fact that these neighborhoods have higher proportions of adults who are newcomers to the United States. Therefore, they may face additional barriers to healthcare- for instance, lack of knowledge about local public health initiatives, language differences, etc.