library(tidyverse)
library(tidyr)
library(leaflet)
library(webshot2)
setwd("~/Documents/Data 110")
<- read_csv("500LocalCities.csv")
cities500 data(cities500)
500 Healthy Cities GIS Assignment
Load the libraries and set the working directory
The GeoLocation variable has (lat, long) format
Split GeoLocation (lat, long) into two columns: lat and long
<- cities500|>
latlong mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Health Outcom…
4 2017 CA California Hayward City BRFSS Unhealthy Beh…
5 2017 CA California Hemet City BRFSS Prevention
6 2017 CA California Indio Census Tract BRFSS Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Filter the dataset
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
<- latlong |>
latlong_clean filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
# A tibble: 6 × 25
Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
<dbl> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
3 2017 CA California Hayward City BRFSS Unhealthy Beh…
4 2017 CA California Indio Census Tract BRFSS Health Outcom…
5 2017 CA California Inglewood Census Tract BRFSS Health Outcom…
6 2017 CA California Lakewood City BRFSS Unhealthy Beh…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
# DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
# Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
# Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
What variables are included? (can any of them be removed?)
names(latlong_clean)
[1] "Year" "StateAbbr"
[3] "StateDesc" "CityName"
[5] "GeographicLevel" "DataSource"
[7] "Category" "UniqueID"
[9] "Measure" "Data_Value_Unit"
[11] "DataValueTypeID" "Data_Value_Type"
[13] "Data_Value" "Low_Confidence_Limit"
[15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote" "PopulationCount"
[19] "lat" "long"
[21] "CategoryID" "MeasureId"
[23] "CityFIPS" "TractFIPS"
[25] "Short_Question_Text"
Remove the variables that will not be used in the assignment
<- latlong_clean |>
latlong_clean2 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2)
# A tibble: 6 × 18
Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
<dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 2017 CA California Hawthorne Census Tract Health … 0632548… Arthri…
2 2017 CA California Hawthorne City Unhealt… 632548 Curren…
3 2017 CA California Hayward City Unhealt… 633000 Obesit…
4 2017 CA California Indio Census Tract Health … 0636448… Arthri…
5 2017 CA California Inglewood Census Tract Health … 0636546… Diagno…
6 2017 CA California Lakewood City Unhealt… 639892 Obesit…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
# PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
# MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
For your assignment, work with a cleaned dataset.
1. Once you run the above code and learn how to filter in this format, filter this dataset however you choose so that you have a subset with no more than 900 observations.
Filter chunk here
Official Subsetted Data Set
<- latlong |>
latlong_5 filter(!is.na(latlong$PopulationCount) & !is.na(latlong$Data_Value)) |>
filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2016) |>
filter(Category == "Prevention") |>
filter(GeographicLevel == "Census Tract") |>
filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test")) |>
filter(StateDesc %in% c("Texas"))|>
head(900) |>
mutate(prev_level = case_when(
>= 67.8 & Data_Value <= 77.8 ~ "Low",
Data_Value > 77.8 & Data_Value <= 79.9 ~ "Medium",
Data_Value > 79.9 ~ "High"))
Data_Value
<- latlong_5 |>
latlong_5 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)
fivenum(latlong_5$Data_Value)
[1] 67.8 77.8 79.9 81.7 86.8
head(latlong_5)
# A tibble: 6 × 12
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 Texas Houston Census Tract Prevent… Crude prevalen… 83.6
2 2016 Texas Houston Census Tract Prevent… Crude prevalen… 75.8
3 2016 Texas Amarillo Census Tract Prevent… Crude prevalen… 74.7
4 2016 Texas Arlington Census Tract Prevent… Crude prevalen… 78.7
5 2016 Texas Beaumont Census Tract Prevent… Crude prevalen… 80.2
6 2016 Texas Brownsvil… Census Tract Prevent… Crude prevalen… 76.4
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>, prev_level <chr>
<- latlong |>
latlong_4 filter(!is.na(latlong$PopulationCount) & !is.na(latlong$Data_Value)) |>
filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2016) |>
filter(Category == "Prevention") |>
filter(GeographicLevel == "Census Tract") |>
filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test")) |>
filter(StateDesc %in% c("California"))|>
head(900) |>
mutate(prev_level = case_when(
>= 72.50 & Data_Value < 79 ~ "Low",
Data_Value >= 79 & Data_Value < 83.15 ~ "Medium",
Data_Value >=83.15 ~ "High"))
Data_Value
<- latlong_4 |>
latlong_4 select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)
latlong_4
# A tibble: 900 × 12
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 California Buena P… Census Tract Prevent… Crude prevalen… 78.1
2 2016 California Concord Census Tract Prevent… Crude prevalen… 79.7
3 2016 California Richmond Census Tract Prevent… Crude prevalen… 80.8
4 2016 California Tracy Census Tract Prevent… Crude prevalen… 80.7
5 2016 California Bakersf… Census Tract Prevent… Crude prevalen… 80.8
6 2016 California Anaheim Census Tract Prevent… Crude prevalen… 80
7 2016 California Anaheim Census Tract Prevent… Crude prevalen… 80.4
8 2016 California Alameda Census Tract Prevent… Crude prevalen… 82.5
9 2016 California Anaheim Census Tract Prevent… Crude prevalen… 77.5
10 2016 California Anaheim Census Tract Prevent… Crude prevalen… 82.4
# ℹ 890 more rows
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>, prev_level <chr>
2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.
First plot chunk here
ggplot(latlong_4, aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
scale_color_manual(values = c("Mammography" = "purple", "Pap Smear Test" = "hotpink")) +
geom_point(alpha = 0.3, size = 5) +
labs(title = "Prevalence of Preventative Healthcare for Women, by Census Tracts in California",
subtitle = "In the Year 2016",
caption = "Source: CDC",
x = "Preventative Care Prevalence (%)",
y = "Population",
color = "Type of Preventative Care") +
theme_bw()
ggplot(latlong_5, aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
scale_color_manual(values = c("Mammography" = "green", "Pap Smear Test" = "pink")) +
geom_point(alpha = 0.3, size = 5) +
labs(title = "Prevalence of Preventative Healthcare for Women, by Census Tracts in Texas",
subtitle = "In the Year 2016",
caption = "Source: CDC",
x = "Preventative Care Prevalence (%)",
y = "Population",
color = "Type of Preventative Care") +
theme_bw()
For Fun
<- latlong |>
latlong_me filter(StateDesc != "United States") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2016) |>
filter(Category == "Prevention") |>
filter(GeographicLevel == "Census Tract") |>
filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test"))
<- latlong_me |>
latlong_me select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)
latlong_me
# A tibble: 55,635 × 11
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 Alabama Hoover Census Tract Prevent… Crude prevalen… 81.9
2 2016 Alaska Anchora… Census Tract Prevent… Crude prevalen… 74
3 2016 Arizona Glendale Census Tract Prevent… Crude prevalen… 81.2
4 2016 Arizona Phoenix Census Tract Prevent… Crude prevalen… 78.7
5 2016 California Buena P… Census Tract Prevent… Crude prevalen… 78.1
6 2016 California Concord Census Tract Prevent… Crude prevalen… 79.7
7 2016 California Richmond Census Tract Prevent… Crude prevalen… 80.8
8 2016 California Tracy Census Tract Prevent… Crude prevalen… 80.7
9 2016 Indiana Bloomin… Census Tract Prevent… Crude prevalen… 75
10 2016 Indiana Muncie Census Tract Prevent… Crude prevalen… 73.2
# ℹ 55,625 more rows
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>
<- latlong_me |>
latlong_top group_by(StateDesc) |>
slice_max(order_by = PopulationCount, n = 1, with_ties = FALSE) |>
arrange(-PopulationCount) |>
head(5)
latlong_top
# A tibble: 5 × 11
# Groups: StateDesc [5]
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 California Chula V… Census Tract Prevent… Crude prevalen… 80.6
2 2016 New York New York Census Tract Prevent… Crude prevalen… 87.7
3 2016 Florida Miramar Census Tract Prevent… Crude prevalen… 84.7
4 2016 Virginia Norfolk Census Tract Prevent… Crude prevalen… 83.8
5 2016 South Caro… Mount P… Census Tract Prevent… Crude prevalen… 84.8
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>
<- latlong_me |>
latlong_bott filter(PopulationCount > 4000)|>
group_by(StateDesc) |>
slice_min(order_by = PopulationCount, n = 1, with_ties = FALSE) |>
arrange(PopulationCount)|>
head(5)
latlong_bott
# A tibble: 5 × 11
# Groups: StateDesc [5]
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 Arizona Mesa Census Tract Prevent… Crude prevalen… 78.7
2 2016 California Los Ang… Census Tract Prevent… Crude prevalen… 84.1
3 2016 North Caro… Charlot… Census Tract Prevent… Crude prevalen… 81.1
4 2016 Florida Miami Census Tract Prevent… Crude prevalen… 86
5 2016 Illinois Evanston Census Tract Prevent… Crude prevalen… 78.9
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>
<- rbind(latlong_bott, latlong_top)
combined_latlong
<- combined_latlong |>
combined_latlong mutate(rep_lib = "Character")
1,12] = "Republican"
combined_latlong[2,12] = "Liberal"
combined_latlong[3,12] = "Republican"
combined_latlong[4,12] = "Republican"
combined_latlong[5,12] = "Liberal"
combined_latlong[6,12] = "Liberal"
combined_latlong[7,12] = "Liberal"
combined_latlong[8,12] = "Republican"
combined_latlong[9,12] = "Liberal"
combined_latlong[10,12] = "Republican"
combined_latlong[
<- combined_latlong |>
combined_latlong mutate(CityName=recode(CityName,
'Mesa' = 'Mesa, Arizona',
'Los Angeles' = 'Los Angeles, California',
'Charlotte' = 'Charlotte, North Carolina',
'Miami' = 'Miami, Florida',
'Evanston' = 'Evanston, Illinois',
'Chula Vista' = 'Chula Vista, California',
'New York' = 'New York, New York',
'Miramar' = 'Miramar, Florida',
'Norfolk' = 'Norfolk, Virginia',
'Mount Pleasant' = 'Mount Pleasant, South Carolina'))
head(combined_latlong)
# A tibble: 6 × 12
# Groups: StateDesc [5]
Year StateDesc CityName GeographicLevel Category Data_Value_Type Data_Value
<dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
1 2016 Arizona Mesa, A… Census Tract Prevent… Crude prevalen… 78.7
2 2016 California Los Ang… Census Tract Prevent… Crude prevalen… 84.1
3 2016 North Caro… Charlot… Census Tract Prevent… Crude prevalen… 81.1
4 2016 Florida Miami, … Census Tract Prevent… Crude prevalen… 86
5 2016 Illinois Evansto… Census Tract Prevent… Crude prevalen… 78.9
6 2016 California Chula V… Census Tract Prevent… Crude prevalen… 80.6
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
# Short_Question_Text <chr>, rep_lib <chr>
ggplot(combined_latlong, aes(x=Data_Value, y=PopulationCount, color =CityName, shape = rep_lib)) +
scale_color_viridis_d()+
geom_point(alpha = 0.6, size = 5) +
labs(title = "Prevalence of Preventative healthcare for women, by census tract population size",
caption = "Source: CDC, nytimes",
color = "City",
x = "Preventative Care prevalence(%)",
y = "Population",
shape = "Party in the 2016 Presidental Election") +
theme_bw()
3. Now create a map of your subsetted dataset.
First map chunk here
<- colorFactor(palette = c("hotpink", "yellow", "green"),
pal levels = c("Low", "Medium", "High"), latlong_4$prev_level)
leaflet(latlong_4) |>
setView(lng = -119.417931, lat = 36.778259, zoom = 5) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
stroke = FALSE,
fillColor = ~pal(prev_level),
radius = (latlong_4$PopulationCount)*4,
fillOpacity = 0.4
)
Assuming "long" and "lat" are longitude and latitude, respectively
leaflet(latlong_5) |>
setView(lng = -99.9018, lat = 31.9686, zoom = 5.5) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
stroke = FALSE,
fillColor = ~pal(prev_level),
radius = (latlong_5$PopulationCount)*3,
fillOpacity = 0.4
)
Assuming "long" and "lat" are longitude and latitude, respectively
4. Refine your map to include a mouse-click tooltip
Refined map chunk here
<- paste0(
popuptexas "<b>Population: </b>", latlong_5$PopulationCount, "<br>",
"<b>Women's Prevention Care Prevalence (%): </b>", latlong_5$Data_Value, "<br>",
"<b>Prevention Care Prevalence (comparative rating): </b>", latlong_5$prev_level, "<br>",
"<b> City Name: <b>", latlong_5$CityName, "<br>",
"<b> Geographic Level: <b>", latlong_5$GeographicLevel, "<br>",
"<b> Data Type: <b>" , latlong_5$Data_Value_Type, "<br>",
"<b> Preventative Care Type: <b>" , latlong_5$Short_Question_Text
)leaflet(latlong_5) |>
setView(lng = -99.9018, lat = 31.9686, zoom = 5.5) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
stroke = FALSE,
fillColor = ~pal(prev_level),
radius = (latlong_5$PopulationCount)*3,
fillOpacity = 0.4,
popup = popuptexas
)
Assuming "long" and "lat" are longitude and latitude, respectively
<- paste0(
popupcali "<b>Population: </b>", latlong_4$PopulationCount, "<br>",
"<b>Women's Prevention Care Prevalence (%): </b>", latlong_4$Data_Value, "<br>",
"<b>Prevention Care Prevalence (comparative rating): </b>", latlong_4$prev_level, "<br>",
"<b> City Name: <b>", latlong_4$CityName, "<br>",
"<b> Geographic Level: <b>", latlong_4$GeographicLevel, "<br>",
"<b> Data Type: <b>" , latlong_4$Data_Value_Type, "<br>",
"<b> Preventative Care Type: <b>" , latlong_4$Short_Question_Text
)leaflet(latlong_4) |>
setView(lng = -119.417931, lat = 36.778259, zoom = 5) |>
addProviderTiles("Esri.NatGeoWorldMap") |>
addCircles(
stroke = FALSE,
fillColor = ~pal(prev_level),
radius = (latlong_5$PopulationCount)*5,
fillOpacity = 0.4,
popup = popupcali
)
Assuming "long" and "lat" are longitude and latitude, respectively
5. Write a paragraph
In a paragraph, describe the plots you created and what they show.