500 Healthy Cities GIS Assignment

Author

Tessa McCCollum

Load the libraries and set the working directory

library(tidyverse)
library(tidyr)
library(leaflet)
library(webshot2)
setwd("~/Documents/Data 110")
cities500 <- read_csv("500LocalCities.csv")
data(cities500)

The GeoLocation variable has (lat, long) format

Split GeoLocation (lat, long) into two columns: lat and long

latlong <- cities500|>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
5  2017 CA        California Hemet     City            BRFSS      Prevention    
6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Filter the dataset

Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.

latlong_clean <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017)
head(latlong_clean)
# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
4  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
5  2017 CA        California Inglewood Census Tract    BRFSS      Health Outcom…
6  2017 CA        California Lakewood  City            BRFSS      Unhealthy Beh…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

What variables are included? (can any of them be removed?)

names(latlong_clean)
 [1] "Year"                       "StateAbbr"                 
 [3] "StateDesc"                  "CityName"                  
 [5] "GeographicLevel"            "DataSource"                
 [7] "Category"                   "UniqueID"                  
 [9] "Measure"                    "Data_Value_Unit"           
[11] "DataValueTypeID"            "Data_Value_Type"           
[13] "Data_Value"                 "Low_Confidence_Limit"      
[15] "High_Confidence_Limit"      "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote"        "PopulationCount"           
[19] "lat"                        "long"                      
[21] "CategoryID"                 "MeasureId"                 
[23] "CityFIPS"                   "TractFIPS"                 
[25] "Short_Question_Text"       

Remove the variables that will not be used in the assignment

latlong_clean2 <- latlong_clean |>
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2)
# A tibble: 6 × 18
   Year StateAbbr StateDesc  CityName  GeographicLevel Category UniqueID Measure
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>    <chr>    <chr>  
1  2017 CA        California Hawthorne Census Tract    Health … 0632548… Arthri…
2  2017 CA        California Hawthorne City            Unhealt… 632548   Curren…
3  2017 CA        California Hayward   City            Unhealt… 633000   Obesit…
4  2017 CA        California Indio     Census Tract    Health … 0636448… Arthri…
5  2017 CA        California Inglewood Census Tract    Health … 0636546… Diagno…
6  2017 CA        California Lakewood  City            Unhealt… 639892   Obesit…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

The new dataset “Prevention” is a manageable dataset now.

For your assignment, work with a cleaned dataset.

1. Once you run the above code and learn how to filter in this format, filter this dataset however you choose so that you have a subset with no more than 900 observations.

Filter chunk here

Official Subsetted Data Set

latlong_5 <- latlong |>
  filter(!is.na(latlong$PopulationCount) & !is.na(latlong$Data_Value)) |>
  filter(StateDesc != "United States") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2016) |>
  filter(Category == "Prevention") |>
  filter(GeographicLevel == "Census Tract") |>
  filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test")) |>
  filter(StateDesc %in% c("Texas"))|>
  head(900) |>
mutate(prev_level = case_when(
    Data_Value >= 67.8 & Data_Value <= 77.8 ~ "Low",
    Data_Value > 77.8 & Data_Value <= 79.9 ~ "Medium",
    Data_Value > 79.9 ~ "High"))

latlong_5 <- latlong_5 |> 
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)

fivenum(latlong_5$Data_Value)
[1] 67.8 77.8 79.9 81.7 86.8
head(latlong_5)
# A tibble: 6 × 12
   Year StateDesc CityName   GeographicLevel Category Data_Value_Type Data_Value
  <dbl> <chr>     <chr>      <chr>           <chr>    <chr>                <dbl>
1  2016 Texas     Houston    Census Tract    Prevent… Crude prevalen…       83.6
2  2016 Texas     Houston    Census Tract    Prevent… Crude prevalen…       75.8
3  2016 Texas     Amarillo   Census Tract    Prevent… Crude prevalen…       74.7
4  2016 Texas     Arlington  Census Tract    Prevent… Crude prevalen…       78.7
5  2016 Texas     Beaumont   Census Tract    Prevent… Crude prevalen…       80.2
6  2016 Texas     Brownsvil… Census Tract    Prevent… Crude prevalen…       76.4
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>, prev_level <chr>
latlong_4 <- latlong |>
  filter(!is.na(latlong$PopulationCount) & !is.na(latlong$Data_Value)) |>
  filter(StateDesc != "United States") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2016) |>
  filter(Category == "Prevention") |>
  filter(GeographicLevel == "Census Tract") |>
  filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test")) |>
  filter(StateDesc %in% c("California"))|>
  head(900) |>
  mutate(prev_level = case_when(
    Data_Value >= 72.50 & Data_Value < 79 ~ "Low",
    Data_Value >= 79 & Data_Value < 83.15 ~ "Medium",
    Data_Value >=83.15 ~ "High"))

latlong_4 <- latlong_4 |> 
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)


latlong_4
# A tibble: 900 × 12
    Year StateDesc  CityName GeographicLevel Category Data_Value_Type Data_Value
   <dbl> <chr>      <chr>    <chr>           <chr>    <chr>                <dbl>
 1  2016 California Buena P… Census Tract    Prevent… Crude prevalen…       78.1
 2  2016 California Concord  Census Tract    Prevent… Crude prevalen…       79.7
 3  2016 California Richmond Census Tract    Prevent… Crude prevalen…       80.8
 4  2016 California Tracy    Census Tract    Prevent… Crude prevalen…       80.7
 5  2016 California Bakersf… Census Tract    Prevent… Crude prevalen…       80.8
 6  2016 California Anaheim  Census Tract    Prevent… Crude prevalen…       80  
 7  2016 California Anaheim  Census Tract    Prevent… Crude prevalen…       80.4
 8  2016 California Alameda  Census Tract    Prevent… Crude prevalen…       82.5
 9  2016 California Anaheim  Census Tract    Prevent… Crude prevalen…       77.5
10  2016 California Anaheim  Census Tract    Prevent… Crude prevalen…       82.4
# ℹ 890 more rows
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>, prev_level <chr>

2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.

First plot chunk here

  ggplot(latlong_4, aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
  scale_color_manual(values = c("Mammography" = "purple", "Pap Smear Test" = "hotpink")) +
  geom_point(alpha = 0.3, size = 5) +
  labs(title = "Prevalence of Preventative Healthcare for Women, by Census Tracts in California",
       subtitle = "In the Year 2016",
       caption = "Source: CDC",
       x = "Preventative Care Prevalence (%)",
       y = "Population",
       color = "Type of Preventative Care") +
  theme_bw()

ggplot(latlong_5, aes(x = Data_Value, y = PopulationCount, color = Short_Question_Text)) +
  scale_color_manual(values = c("Mammography" = "green", "Pap Smear Test" = "pink")) +
  geom_point(alpha = 0.3, size = 5) +
  labs(title = "Prevalence of Preventative Healthcare for Women, by Census Tracts in Texas",
       subtitle = "In the Year 2016",
       caption = "Source: CDC",
       x = "Preventative Care Prevalence (%)",
       y = "Population",
       color = "Type of Preventative Care") +
  theme_bw()

For Fun

latlong_me <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2016) |>
  filter(Category == "Prevention") |>
  filter(GeographicLevel == "Census Tract") |>
  filter(Short_Question_Text %in% c("Mammography", "Pap Smear Test")) 

latlong_me <- latlong_me |> 
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote, -MeasureId, -StateAbbr, -UniqueID, -CityFIPS, -CategoryID, -TractFIPS, -Measure)
   

latlong_me
# A tibble: 55,635 × 11
    Year StateDesc  CityName GeographicLevel Category Data_Value_Type Data_Value
   <dbl> <chr>      <chr>    <chr>           <chr>    <chr>                <dbl>
 1  2016 Alabama    Hoover   Census Tract    Prevent… Crude prevalen…       81.9
 2  2016 Alaska     Anchora… Census Tract    Prevent… Crude prevalen…       74  
 3  2016 Arizona    Glendale Census Tract    Prevent… Crude prevalen…       81.2
 4  2016 Arizona    Phoenix  Census Tract    Prevent… Crude prevalen…       78.7
 5  2016 California Buena P… Census Tract    Prevent… Crude prevalen…       78.1
 6  2016 California Concord  Census Tract    Prevent… Crude prevalen…       79.7
 7  2016 California Richmond Census Tract    Prevent… Crude prevalen…       80.8
 8  2016 California Tracy    Census Tract    Prevent… Crude prevalen…       80.7
 9  2016 Indiana    Bloomin… Census Tract    Prevent… Crude prevalen…       75  
10  2016 Indiana    Muncie   Census Tract    Prevent… Crude prevalen…       73.2
# ℹ 55,625 more rows
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>
latlong_top <- latlong_me |>
  group_by(StateDesc) |>
  slice_max(order_by = PopulationCount, n = 1, with_ties = FALSE) |>
  arrange(-PopulationCount) |>
  head(5)

latlong_top
# A tibble: 5 × 11
# Groups:   StateDesc [5]
   Year StateDesc   CityName GeographicLevel Category Data_Value_Type Data_Value
  <dbl> <chr>       <chr>    <chr>           <chr>    <chr>                <dbl>
1  2016 California  Chula V… Census Tract    Prevent… Crude prevalen…       80.6
2  2016 New York    New York Census Tract    Prevent… Crude prevalen…       87.7
3  2016 Florida     Miramar  Census Tract    Prevent… Crude prevalen…       84.7
4  2016 Virginia    Norfolk  Census Tract    Prevent… Crude prevalen…       83.8
5  2016 South Caro… Mount P… Census Tract    Prevent… Crude prevalen…       84.8
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>
latlong_bott <- latlong_me |>
  filter(PopulationCount > 4000)|>
  group_by(StateDesc) |>
  slice_min(order_by = PopulationCount, n = 1, with_ties = FALSE) |>
  arrange(PopulationCount)|>
  head(5)

latlong_bott
# A tibble: 5 × 11
# Groups:   StateDesc [5]
   Year StateDesc   CityName GeographicLevel Category Data_Value_Type Data_Value
  <dbl> <chr>       <chr>    <chr>           <chr>    <chr>                <dbl>
1  2016 Arizona     Mesa     Census Tract    Prevent… Crude prevalen…       78.7
2  2016 California  Los Ang… Census Tract    Prevent… Crude prevalen…       84.1
3  2016 North Caro… Charlot… Census Tract    Prevent… Crude prevalen…       81.1
4  2016 Florida     Miami    Census Tract    Prevent… Crude prevalen…       86  
5  2016 Illinois    Evanston Census Tract    Prevent… Crude prevalen…       78.9
# ℹ 4 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>
combined_latlong <- rbind(latlong_bott, latlong_top) 


  combined_latlong <- combined_latlong |>
    mutate(rep_lib = "Character") 
  
  combined_latlong[1,12] = "Republican" 
  combined_latlong[2,12] = "Liberal" 
  combined_latlong[3,12] = "Republican" 
  combined_latlong[4,12] = "Republican" 
  combined_latlong[5,12] = "Liberal" 
  combined_latlong[6,12] = "Liberal" 
  combined_latlong[7,12] = "Liberal" 
  combined_latlong[8,12] = "Republican" 
  combined_latlong[9,12] = "Liberal" 
  combined_latlong[10,12] = "Republican" 
  
  combined_latlong <- combined_latlong |>
  mutate(CityName=recode(CityName,
                         'Mesa' = 'Mesa, Arizona',
                         'Los Angeles' = 'Los Angeles, California',
                         'Charlotte' = 'Charlotte, North Carolina',
                         'Miami' = 'Miami, Florida',
                         'Evanston' = 'Evanston, Illinois',
                         'Chula Vista' = 'Chula Vista, California',
                         'New York' = 'New York, New York',
                         'Miramar' = 'Miramar, Florida',
                         'Norfolk' = 'Norfolk, Virginia',
                         'Mount Pleasant' = 'Mount Pleasant, South Carolina'))

head(combined_latlong)
# A tibble: 6 × 12
# Groups:   StateDesc [5]
   Year StateDesc   CityName GeographicLevel Category Data_Value_Type Data_Value
  <dbl> <chr>       <chr>    <chr>           <chr>    <chr>                <dbl>
1  2016 Arizona     Mesa, A… Census Tract    Prevent… Crude prevalen…       78.7
2  2016 California  Los Ang… Census Tract    Prevent… Crude prevalen…       84.1
3  2016 North Caro… Charlot… Census Tract    Prevent… Crude prevalen…       81.1
4  2016 Florida     Miami, … Census Tract    Prevent… Crude prevalen…       86  
5  2016 Illinois    Evansto… Census Tract    Prevent… Crude prevalen…       78.9
6  2016 California  Chula V… Census Tract    Prevent… Crude prevalen…       80.6
# ℹ 5 more variables: PopulationCount <dbl>, lat <dbl>, long <dbl>,
#   Short_Question_Text <chr>, rep_lib <chr>
ggplot(combined_latlong, aes(x=Data_Value, y=PopulationCount, color =CityName, shape = rep_lib)) +
  scale_color_viridis_d()+
  geom_point(alpha = 0.6, size = 5) +  
  labs(title = "Prevalence of Preventative healthcare for women, by census tract population size",
       caption = "Source: CDC, nytimes",
       color = "City",
       x = "Preventative Care prevalence(%)",
       y = "Population",
       shape = "Party in the 2016 Presidental Election")  +
  theme_bw()

3. Now create a map of your subsetted dataset.

First map chunk here

pal <- colorFactor(palette = c("hotpink", "yellow", "green"), 
               levels = c("Low", "Medium", "High"), latlong_4$prev_level)
leaflet(latlong_4) |>
  setView(lng = -119.417931, lat = 36.778259, zoom = 5) |>
  addProviderTiles("Esri.NatGeoWorldMap") |>
  addCircles(
    stroke = FALSE,
    fillColor = ~pal(prev_level),
    radius = (latlong_4$PopulationCount)*4,
    fillOpacity = 0.4
    )
Assuming "long" and "lat" are longitude and latitude, respectively
leaflet(latlong_5) |>
  setView(lng = -99.9018, lat = 31.9686, zoom = 5.5) |>
  addProviderTiles("Esri.NatGeoWorldMap") |>
  addCircles(
    stroke = FALSE,
    fillColor = ~pal(prev_level),
    radius = (latlong_5$PopulationCount)*3,
    fillOpacity = 0.4
    )
Assuming "long" and "lat" are longitude and latitude, respectively

4. Refine your map to include a mouse-click tooltip

Refined map chunk here

popuptexas <- paste0(
      "<b>Population: </b>", latlong_5$PopulationCount, "<br>",
      "<b>Women's Prevention Care Prevalence (%): </b>", latlong_5$Data_Value, "<br>",
      "<b>Prevention Care Prevalence (comparative rating): </b>", latlong_5$prev_level, "<br>",
      "<b> City Name: <b>", latlong_5$CityName, "<br>",
      "<b> Geographic Level: <b>", latlong_5$GeographicLevel, "<br>",
      "<b> Data Type: <b>" , latlong_5$Data_Value_Type, "<br>",
      "<b> Preventative Care Type: <b>" , latlong_5$Short_Question_Text
    )
leaflet(latlong_5) |>
  setView(lng = -99.9018, lat = 31.9686, zoom = 5.5) |>
  addProviderTiles("Esri.NatGeoWorldMap") |>
  addCircles(
    stroke = FALSE,
    fillColor = ~pal(prev_level),
    radius = (latlong_5$PopulationCount)*3,
    fillOpacity = 0.4,
    popup = popuptexas
    )
Assuming "long" and "lat" are longitude and latitude, respectively
popupcali <- paste0(
      "<b>Population: </b>", latlong_4$PopulationCount, "<br>",
      "<b>Women's Prevention Care Prevalence (%): </b>", latlong_4$Data_Value, "<br>",
      "<b>Prevention Care Prevalence (comparative rating): </b>", latlong_4$prev_level, "<br>",
      "<b> City Name: <b>", latlong_4$CityName, "<br>",
      "<b> Geographic Level: <b>", latlong_4$GeographicLevel, "<br>",
      "<b> Data Type: <b>" , latlong_4$Data_Value_Type, "<br>",
      "<b> Preventative Care Type: <b>" , latlong_4$Short_Question_Text
    )
leaflet(latlong_4) |>
  setView(lng = -119.417931, lat = 36.778259, zoom = 5) |>
  addProviderTiles("Esri.NatGeoWorldMap") |>
  addCircles(
    stroke = FALSE,
    fillColor = ~pal(prev_level),
    radius = (latlong_5$PopulationCount)*5,
    fillOpacity = 0.4,
    popup = popupcali
    )
Assuming "long" and "lat" are longitude and latitude, respectively

5. Write a paragraph

In a paragraph, describe the plots you created and what they show.