library(tidyverse)
library(tidyr)
library(highcharter)
library(leaflet)
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
latlong <- cities500|>
mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention" | MeasureId == "CSMOKING") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne City BRFSS Unhealthy Be…
2 2017 AL Alabama Montgomery City BRFSS Prevention
3 2017 AZ Arizona Glendale City BRFSS Unhealthy Be…
4 2017 CA California Concord City BRFSS Prevention
5 2017 CA California Concord City BRFSS Prevention
6 2017 CA California Escondido Census Tract BRFSS Unhealthy Be…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
What variables are included? (can any of them be removed?)
names(latlong_clean) [1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne City Unhealt… 632548 Curren…
2 2017 AL Alabama Montgome… City Prevent… 151000 Choles…
3 2017 AZ Arizona Glendale City Unhealt… 427820 Curren…
4 2017 CA California Concord City Prevent… 616000 Visits…
5 2017 CA California Concord City Prevent… 616000 Choles…
6 2017 CA California Escondido Census Tract Unhealt… 0622804… Curren…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
md <- prevention |>
filter(StateAbbr=="MD")
head(md)# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 MD Maryland Baltimore Census Tract Unhealth… 2404000… "Curre…
2 2017 MD Maryland Baltimore Census Tract Preventi… 2404000… "Chole…
3 2017 MD Maryland Baltimore Census Tract Preventi… 2404000… "Visit…
4 2017 MD Maryland Baltimore Census Tract Preventi… 2404000… "Visit…
5 2017 MD Maryland Baltimore Census Tract Preventi… 2404000… "Curre…
6 2017 MD Maryland Baltimore Census Tract Unhealth… 2404000… "Curre…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
For your assignment, work with the cleaned “Prevention” dataset
1. Once you run the above code, filter this dataset one more time for any particular subset.
I want to see the relationship between smokers and people without life insurences for all cities in the data frame.
Cities <- prevention |>
filter(GeographicLevel=="City") |>
filter(MeasureId=="ACCESS2" | MeasureId == "CSMOKING")
Cities<-Cities %>%
group_by(CityName)%>%
summarize(Smokers=sum(Data_Value[MeasureId=="CSMOKING"]), NonInsured=sum(Data_Value[MeasureId=="ACCESS2"]), lat=mean(lat), long=mean(long), Stateab=unique(StateAbbr), StateName=unique(StateDesc))Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
`summarise()` has grouped output by 'CityName'. You can override using the
`.groups` argument.
head(Cities)# A tibble: 6 × 7
# Groups: CityName [5]
CityName Smokers NonInsured lat long Stateab StateName
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 Abilene 19 23.9 32.5 -99.7 TX Texas
2 Akron 24.9 12.8 41.1 -81.5 OH Ohio
3 Alameda 11.2 8.3 37.8 -122. CA California
4 Albany 41.9 38.5 37.1 -79.0 GA Georgia
5 Albany 41.9 38.5 37.1 -79.0 NY New York
6 Albuquerque 17.1 14.3 35.1 -107. NM New Mexico
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
highchart () |>
hc_add_series(data = Cities,
type = "scatter" ,
hcaes(x = Smokers,
y = NonInsured,
group = Stateab),
tooltip =list(useHTML = TRUE,
pointFormat = "{point.CityName},
{point.StateName}<br>
without INsurence: {point.NonInsured}<br>
Smokers: {point.Smokers}")) |>
hc_xAxis(title = list(text="POpulation of Smokers (%)")) |>
hc_yAxis(title = list(text="Population without insurence (%))")) plot1 <- ggplot(Cities, aes(x=Smokers, y=NonInsured, colours("Set1")))+
geom_point(aes(color=Stateab))+ geom_smooth(method='lm',formula=y~x, color = "red")
plot13. Now create a map of your subsetted dataset.
ohio_lat <- 38.000000
ohio_lon <--97.000000leaflet() |>
setView(lng = ohio_lon, lat = ohio_lat, zoom =4.3) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = Cities,
radius = Cities$Smokers*1000,
color = 'darkgrey',
fillColor = 'black',
fillOpacity = Cities$NonInsured/100,
)Assuming "long" and "lat" are longitude and latitude, respectively
4. Refine your map to include a mousover tooltip
Now a grath with tooltips
popup <- paste0(
"<b>City: </b>", Cities$CityName, "<br>",
"<b>State:</b>", Cities$StateName,"<br>",
"<b>Smokers: </b>", Cities$Smokers, "<br>",
"<b>Non insured people:</b>", Cities$NonInsured,"<br>"
)leaflet() |>
setView(lng = ohio_lon, lat = ohio_lat, zoom =4.3) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = Cities,
radius = Cities$Smokers*2000,
color = 'darkgray',
fillColor = 'black',
fillOpacity = Cities$NonInsured/100,
popup = popup
)Assuming "long" and "lat" are longitude and latitude, respectively
5. Write a paragraph
That map is really interesting, the size of the bubble tells us the quantity of smokers, ans the great lakes region are full of them, and the the opacity tell us the number of people not insured, and strangely is suggest to us that the bigger the circle, more dense it is and so the more smokers the less insurgence people have, That is also seen in the graphs. That can be explained because it is more expensive to buy life insurgence when you are a smoker, the insurgence do a backgroundcheck in you.