library(tidyverse)
library(tidyr)
library(leaflet)
library(scales)
library(tidyverse)
library(knitr)
library(webshot2)
setwd("C:/Users/ebale/OneDrive/Desktop/DATA110")
<- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
cities500 data(cities500)
Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
<- cities500|>
latlong mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Filter the dataset
Remove the StateDesc that includes the United Sates, and filter for only measuring Age-adjusted prevalence and select only 2016.
<- latlong |>
latlong_clean filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Age-adjusted prevalence") |>
filter(Year == 2016)
head(latlong_clean)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2016 AR Arkansas Jonesboro City BRFSS Health Outc…
2 2016 CA California Antioch City BRFSS Health Outc…
3 2016 CA California Carlsbad City BRFSS Prevention
4 2016 CA California Chino Hills City BRFSS Health Outc…
5 2016 CA California Fairfield City BRFSS Prevention
6 2016 CA California Ontario City BRFSS Prevention
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Remove the variables that will not be used in the assignment
<- latlong_clean |>
latlong_clean2 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2)
# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2016 AR Arkansas Jonesboro City Health … 535710 "All t…
2 2016 CA California Antioch City Health … 602252 "All t…
3 2016 CA California Carlsbad City Prevent… 611194 "Mammo…
4 2016 CA California Chino Hi… City Health … 613214 "All t…
5 2016 CA California Fairfield City Prevent… 623182 "Papan…
6 2016 CA California Ontario City Prevent… 653896 "Mammo…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
For your assignment, work with a cleaned dataset.
1. Once you run the above code and learn how to filter in this format, filter this dataset however you choose so that you have a subset with no more than 900 observations.
Filter chunk here
<- latlong |>
latlong_clean filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Age-adjusted prevalence") |>
filter(Year == 2016)
head(latlong_clean)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2016 AR Arkansas Jonesboro City BRFSS Health Outc…
2 2016 CA California Antioch City BRFSS Health Outc…
3 2016 CA California Carlsbad City BRFSS Prevention
4 2016 CA California Chino Hills City BRFSS Health Outc…
5 2016 CA California Fairfield City BRFSS Prevention
6 2016 CA California Ontario City BRFSS Prevention
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Removing some variables
<- latlong_clean |>
latlong_clean2 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2)
# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2016 AR Arkansas Jonesboro City Health … 535710 "All t…
2 2016 CA California Antioch City Health … 602252 "All t…
3 2016 CA California Carlsbad City Prevent… 611194 "Mammo…
4 2016 CA California Chino Hi… City Health … 613214 "All t…
5 2016 CA California Fairfield City Prevent… 623182 "Papan…
6 2016 CA California Ontario City Prevent… 653896 "Mammo…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Set latitude and longitude
<- -81.5158
florida_lon <- 27.6648 florida_lat
Filter for florida
<- latlong_clean2 |>
florida1
filter(StateAbbr == "FL")
Cleaning data
<- !is.na(florida1$StateDesc) & !is.na(florida1$CityName) & !is.na(florida1$Data_Value) & !is.na(florida1$Short_Question_Text) & !is.na(florida1$PopulationCount) cleaning
Filter for teeth loss
<- florida1 %>%
florida2 filter(Short_Question_Text == "Teeth Loss") %>%
head( 50)
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
<- florida1 %>%
top10 filter(StateAbbr == "FL", Short_Question_Text == "Dental Visit") %>%
arrange(desc(Data_Value)) %>%
head(10)
ggplot(top10, aes(x = CityName, y = Data_Value, color = Data_Value)) +
geom_bar(stat = "identity", fill =
"#1e81b0") +
coord_flip() +
scale_y_continuous(labels = label_percent(scale = 1)) + # ← this formats as percent.# I google this to lable number in percent
labs(
title = "Top 10 Florida Cities by Dental Visit Rates",
x = "City",
y = "Dental Visit Rate (%)",
caption = "Source : CDC",
theme_bw(base_size = 14)
)
ggplot(florida1, aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
geom_point(alpha = 0.05) +
scale_color_viridis_d()+
geom_jitter() +
facet_wrap(~Short_Question_Text) +
labs(title = "Population Count vs Data value for Short Question Text",
caption = "Source: CDC") +
theme_bw(base_size = 14)
3. Now create a map of your subsetted dataset.
leaflet() |>
setView(lng = florida_lon, lat = florida_lat, zoom = 6.4) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = florida2,
radius = florida2$Data_Value * 1000,
color = "#14010d",
fillColor = "#FFDE21",
fillOpacity = 0.9)
Assuming "long" and "lat" are longitude and latitude, respectively
4. Refine your map to include a mouse-click tooltip
<- paste0("<b>Teeth Loss </b> <br>",
popup_florida "<b>Population Count: </b>", florida2$PopulationCount, "<br>",
"<b>City: </b>",(florida2$CityName), "<br>",
"<b>Data value : </b>",(florida2$Data_Value), "<br>"
)
leaflet() |>
setView(lng = florida_lon, lat = florida_lat, zoom = 6.4) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = florida2,
radius = florida2$Data_Value * 1000,
color = "#14010d",
fillColor = "#FFDE21",
fillOpacity = 0.5,
popup = popup_florida)
Assuming "long" and "lat" are longitude and latitude, respectively
5. Write a paragraph
In a paragraph, describe the plots you created and what they show.
The first graph shows ten Florida cities with dental visit rates. Boca Raton has the highest percentage of dental visit rates, and the other nine cities have almost similar rates. The second graph shows the data value and population count for each short question text. I can see that there is an outlier for all short questions- and they have almost the same structure. On the map, we can see the data value for teeth loss in each Florida city. Around Miami, Pompano Beach and Boca Raton have the highest data value for teeth loss. Based on my research on chatgbt, loss rates are higher around Miami, likely due to a combination of lower access to affordable dental care and socioeconomic challenges such as poverty and limited insurance coverage. These factors can lead to delayed treatment and less preventive care, increasing the risk of tooth loss.