library(tidyverse)
library(tidyr)
setwd("C:/Users/Angel/OneDrive/Documents/Datasets")
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
latlong <- cities500 |>
mutate(GeoLocation = str_replace_all(GeoLocation, "[()]","")) |>
separate(GeoLocation, into = c("lat", "long"), sep = ", ", convert = TRUE)
head(latlong)# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 AL Alabama Montgomery City BRFSS Prevention
2 2017 CA California Concord City BRFSS Prevention
3 2017 CA California Concord City BRFSS Prevention
4 2017 CA California Fontana City BRFSS Prevention
5 2017 CA California Richmond Census Tract BRFSS Prevention
6 2017 FL Florida Davie Census Tract BRFSS Prevention
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
What variables are included? (can any of them be removed?)
names(latlong_clean) [1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 AL Alabama Montgome… City Prevent… 151000 Choles…
2 2017 CA California Concord City Prevent… 616000 Visits…
3 2017 CA California Concord City Prevent… 616000 Choles…
4 2017 CA California Fontana City Prevent… 624680 Visits…
5 2017 CA California Richmond Census Tract Prevent… 0660620… Choles…
6 2017 FL Florida Davie Census Tract Prevent… 1216475… Choles…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
For your assignment, work with the cleaned “Prevention” dataset
1. Once you run the above code, filter this dataset one more time for any particular subset.
Top five states. Context, in respect to the highest observations in the ‘Prevention’ dataset.
prevention |>
# Grouping statedesc into a 'grouped' tibble
group_by(StateDesc) |>
# Counting for the amount of statedesc observations in the original tibble
count(StateDesc) |>
# Arranging for n from descending order to see the highest to lowest observations
arrange(desc(n))# A tibble: 51 × 2
# Groups: StateDesc [51]
StateDesc n
<chr> <int>
1 California 21947
2 Texas 12645
3 New York 9995
4 Florida 5454
5 Illinois 5283
6 Arizona 4202
7 Ohio 3415
8 North Carolin 3295
9 Michigan 3040
10 Colorado 2888
# ℹ 41 more rows
The same for cities
prevention |>
# Grouping statedesc into a 'grouped' tibble
group_by(CityName) |>
# Counting for the amount of statedesc observations in the original tibble
count(CityName) |>
# Arranging for n from descending order to see the highest to lowest observations
arrange(desc(n))# A tibble: 474 × 2
# Groups: CityName [474]
CityName n
<chr> <int>
1 New York 8563
2 Los Angeles 4016
3 Chicago 3192
4 Houston 2410
5 Philadelphia 1528
6 Phoenix 1440
7 San Antonio 1308
8 Dallas 1280
9 Detroit 1188
10 San Diego 1187
# ℹ 464 more rows
Filter chunk here.
new_prevention <- prevention |>
# Filtering for the 5 highest states
filter(StateDesc %in% c('California', 'Texas', 'New York', 'Florida', 'Illinois'))
# Filtering for one singular measure, instead of all four
#filter(Measure == 'Cholesterol screening among adults aged >=18 Years')
head(new_prevention)# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Concord City Preventi… 616000 Visits…
2 2017 CA California Concord City Preventi… 616000 Choles…
3 2017 CA California Fontana City Preventi… 624680 Visits…
4 2017 CA California Richmond Census Tract Preventi… 0660620… Choles…
5 2017 FL Florida Davie Census Tract Preventi… 1216475… Choles…
6 2017 FL Florida Hialeah Census Tract Preventi… 1230000… Visits…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
First plot chunk here
p1 <- new_prevention |>
ggplot(aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
# Scatterplot of Values vs Population
geom_point(alpha = 0.05)+
# Using facet_wrap to show all five states
facet_wrap(~StateDesc) +
geom_jitter() +
scale_color_viridis_d() +
theme_minimal()+
labs(x = "Prevalance Rate (95% Cl)", y = "Population Count", title = "Rate vs Population Count for Five States", color = "Question")
p1Warning: Removed 1445 rows containing missing values (`geom_point()`).
Removed 1445 rows containing missing values (`geom_point()`).
Interesting to see a couple of outliers in New York, maybe something worth exploring.
3. Now create a map of your subsetted dataset.
Loading necessary packages
library(leaflet)
library(leaflegend)
library(sf)Linking to GEOS 3.11.2, GDAL 3.7.2, PROJ 9.3.0; sf_use_s2() is TRUE
library(tmap)The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, were retired in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
remotes::install_github('r-tmap/tmap')
library(tmaptools)
setwd("C:/Users/Angel/OneDrive/Documents/Datasets")First map chunk here
# Correct the coordinate values, if must.
# Filtering data further
new_prevention <- new_prevention|>
filter(StateAbbr == 'NY', Short_Question_Text == 'Cholesterol Screening') |>
mutate(lat = as.numeric(lat), long = as.numeric(long))
# Set the latitude and longitude
ny_lat <- 42.7466
ny_lng <- -75.7700
# Loading data for United States
# usgeo <- read_sf()leaflet() |>
setView(lng = ny_lng, lat = ny_lat, zoom = 6) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(data = new_prevention, lat = new_prevention$lat, lng = new_prevention$long, radius = new_prevention$PopulationCount)The city named Buffulo has the largest amount of ‘volunteers’ – those who participated in this study – in all of the state that is, New York.
4. Refine your map to include a mousover tooltip
Refined map chunk here
popup <- paste0("<b>Population (Count): </b>", new_prevention$PopulationCount, "<br>",
"<b>City: </b>", new_prevention$CityName, "<br>",
"<b>GeoLevel: </b>", new_prevention$GeographicLevel, "<br>",
"<b>Rate (%): </b>", new_prevention$Data_Value, "<br>")leaflet() |>
setView(lng = ny_lng, lat = ny_lat, zoom = 6) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(data = new_prevention, lat = new_prevention$lat, lng = new_prevention$long, radius = new_prevention$PopulationCount, popup = popup)5. Write a paragraph
In a paragraph, describe the plots you created and what they show.
The above two visuals are my attempts of using the leaflet package. In short, perhaps I did not use the dataset correctly, but the points represent a location from the New York State that consider those who participated. In addition, if you click on an individual circle/point, there will be follow-up information that will add more context. Overall, I think this helps to visualize the idea of cholesterol screenings implementation throughout the state.