GIS Healthy Cities

Author

Ash Ibasan

Load libraries and set working directory

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.4.1
Warning: package 'ggplot2' was built under R version 4.4.1
Warning: package 'tibble' was built under R version 4.4.1
Warning: package 'tidyr' was built under R version 4.4.1
Warning: package 'readr' was built under R version 4.4.1
Warning: package 'purrr' was built under R version 4.4.1
Warning: package 'dplyr' was built under R version 4.4.1
Warning: package 'stringr' was built under R version 4.4.1
Warning: package 'forcats' was built under R version 4.4.1
Warning: package 'lubridate' was built under R version 4.4.1
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
setwd("F:/Fall 2024/MC/DATA 110/b-e-a-utiful")
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
Rows: 810103 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
num  (1): PopulationCount

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data(cities500)
Warning in data(cities500): data set 'cities500' not found

Split GeoLocation (lat, long) into two columns: lat and long

latlong <- cities500|>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
5  2017 CA        California Hemet     City            BRFSS      Prevention    
6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Filter dataset

latlong_clean <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Category == "Prevention") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017)
head(latlong_clean)
# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName   GeographicLevel DataSource Category  
  <dbl> <chr>     <chr>      <chr>      <chr>           <chr>      <chr>     
1  2017 AL        Alabama    Montgomery City            BRFSS      Prevention
2  2017 CA        California Concord    City            BRFSS      Prevention
3  2017 CA        California Concord    City            BRFSS      Prevention
4  2017 CA        California Fontana    City            BRFSS      Prevention
5  2017 CA        California Richmond   Census Tract    BRFSS      Prevention
6  2017 FL        Florida    Davie      Census Tract    BRFSS      Prevention
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

What variables are included? (can any of them be removed?)

names(latlong_clean)
 [1] "Year"                       "StateAbbr"                 
 [3] "StateDesc"                  "CityName"                  
 [5] "GeographicLevel"            "DataSource"                
 [7] "Category"                   "UniqueID"                  
 [9] "Measure"                    "Data_Value_Unit"           
[11] "DataValueTypeID"            "Data_Value_Type"           
[13] "Data_Value"                 "Low_Confidence_Limit"      
[15] "High_Confidence_Limit"      "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote"        "PopulationCount"           
[19] "lat"                        "long"                      
[21] "CategoryID"                 "MeasureId"                 
[23] "CityFIPS"                   "TractFIPS"                 
[25] "Short_Question_Text"       

Remove unused variables for assignment

prevention <- latlong_clean |>
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)
# A tibble: 6 × 18
   Year StateAbbr StateDesc  CityName  GeographicLevel Category UniqueID Measure
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>    <chr>    <chr>  
1  2017 AL        Alabama    Montgome… City            Prevent… 151000   Choles…
2  2017 CA        California Concord   City            Prevent… 616000   Visits…
3  2017 CA        California Concord   City            Prevent… 616000   Choles…
4  2017 CA        California Fontana   City            Prevent… 624680   Visits…
5  2017 CA        California Richmond  Census Tract    Prevent… 0660620… Choles…
6  2017 FL        Florida    Davie     Census Tract    Prevent… 1216475… Choles…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
ny <- prevention |>
  filter(StateAbbr=="NY")
head(ny)
# A tibble: 6 × 18
   Year StateAbbr StateDesc CityName   GeographicLevel Category UniqueID Measure
  <dbl> <chr>     <chr>     <chr>      <chr>           <chr>    <chr>    <chr>  
1  2017 NY        New York  Buffalo    Census Tract    Prevent… 3611000… "Chole…
2  2017 NY        New York  Rochester  Census Tract    Prevent… 3663000… "Curre…
3  2017 NY        New York  Rochester  Census Tract    Prevent… 3663000… "Visit…
4  2017 NY        New York  Rochester  Census Tract    Prevent… 3663000… "Chole…
5  2017 NY        New York  Schenecta… Census Tract    Prevent… 3665508… "Takin…
6  2017 NY        New York  New York   Census Tract    Prevent… 3651000… "Curre…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Check for NY state cities in dataset

unique(ny$CityName)
[1] "Buffalo"      "Rochester"    "Schenectady"  "New York"     "Mount Vernon"
[6] "New Rochelle" "Albany"       "Syracuse"     "Yonkers"     

1. Filtering dataset to concentrate only on Buffalo, NY

buff_data <- prevention |>
  filter(CityName == "Buffalo")
head(buff_data)
# A tibble: 6 × 18
   Year StateAbbr StateDesc CityName GeographicLevel Category   UniqueID Measure
  <dbl> <chr>     <chr>     <chr>    <chr>           <chr>      <chr>    <chr>  
1  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Choles…
2  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
3  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
4  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
5  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Choles…
6  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Taking…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
nrow(buff_data) 
[1] 320
names(buff_data) # lists all variables in cleaned dataset
 [1] "Year"                "StateAbbr"           "StateDesc"          
 [4] "CityName"            "GeographicLevel"     "Category"           
 [7] "UniqueID"            "Measure"             "Data_Value_Type"    
[10] "Data_Value"          "PopulationCount"     "lat"                
[13] "long"                "CategoryID"          "MeasureId"          
[16] "CityFIPS"            "TractFIPS"           "Short_Question_Text"

2. Bar graph of Buffalo health indicators in 2017

ggplot(buff_data, aes(x = Short_Question_Text, y = Data_Value, fill = Short_Question_Text)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  scale_fill_manual(values = c("#FFA07A", "#98FB98", "#87CEEB", "#DA70D6"), name = "Indicators") +
  labs(title = "Health Indicators in Buffalo, NY - 2017",
       x = "Health Indicator",
       y = "Crude Prevalence (%)") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold", color = "#2E4A62")  # Adjust title font size, style, and color
  )

3. Map of subsetted dataset

Double-checking numeric values and data summary

str(buff_data$lat)
 num [1:320] 42.9 42.9 42.8 42.9 42.9 ...
str(buff_data$long)
 num [1:320] -78.9 -78.9 -78.8 -78.8 -78.9 ...
head(buff_data)
# A tibble: 6 × 18
   Year StateAbbr StateDesc CityName GeographicLevel Category   UniqueID Measure
  <dbl> <chr>     <chr>     <chr>    <chr>           <chr>      <chr>    <chr>  
1  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Choles…
2  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
3  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
4  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Visits…
5  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Choles…
6  2017 NY        New York  Buffalo  Census Tract    Prevention 3611000… Taking…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
summary(buff_data$Data_Value)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   7.30   24.23   73.75   60.85   79.20   86.70 
library(leaflet)
Warning: package 'leaflet' was built under R version 4.4.2
# Ensure lat and long are correctly referenced and not NA
buff_data <- buff_data[!is.na(buff_data$lat) & !is.na(buff_data$long), ]
leaflet(buff_data) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircles(
    lng = ~long,
    lat = ~lat,
    weight = 1,
    radius = ~scales::rescale(Data_Value, to = c(50, 500)),  # Rescale to fixed range
    color = "#FDBB30",  
    fillColor = "#002654",  
    fillOpacity = 0.6,
    popup = ~paste("<strong>Health Indicator:</strong>", Short_Question_Text,
                   "<br><strong>Crude Prevalence:</strong>", Data_Value, "%")
  ) %>%
  setView(lng = -78.8784, lat = 42.8864, zoom = 12)

To have a better look at the prevalence

library(ggplot2)
library(sf)
Warning: package 'sf' was built under R version 4.4.2
Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
buffalo_data_sf <- st_as_sf(buff_data, coords = c("long", "lat"), crs = 4326)
ggplot() +
  geom_sf(data = buffalo_data_sf, aes(size = Data_Value, color = Short_Question_Text), alpha = 0.7) +
  theme_minimal() +
  labs(title = "Health Indicators in Buffalo, 2017",
       color = "Indicator",
       size = "Prevalence (%)") +
  scale_color_manual(values = c("#FFA07A", "#98FB98", "#87CEEB", "#DA70D6")) +
  theme(plot.title = element_text(size = 16, face = "bold", color = "#2E4A62"))

4. Buffalo map with hover tooltip

library(leaflet)
library(scales)
Warning: package 'scales' was built under R version 4.4.1

Attaching package: 'scales'
The following object is masked from 'package:purrr':

    discard
The following object is masked from 'package:readr':

    col_factor
leaflet(buff_data) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircles(
    lng = ~long,
    lat = ~lat,
    weight = 1,
    radius = ~scales::rescale(Data_Value, to = c(50, 500)), 
    fillColor = "#002654",
    color = "#FDBB30",
    fillOpacity = 0.6,
    popup = ~paste("<strong>Health Indicator:</strong>", Short_Question_Text,
                   "<br><strong>Crude Prevalence:</strong>", Data_Value, "%"),
    label = ~paste(Short_Question_Text, ": ", Data_Value, "%"),  # for mouseover text
    labelOptions = labelOptions(
      noHide = FALSE,
      direction = "auto",
      opacity = 0.8,
      offset = c(0, -10),
      style = list(
        "color" = "#002654",
        "font-size" = "12px",
        "font-weight" = "bold"
      )
    )
  ) %>%
  setView(lng = -78.8784, lat = 42.8864, zoom = 12)

5. Paragraph

The visualizations on Buffalo’s health indicators in 2017 bring together different layers of insight into community health behaviors. The bar plot highlights variations in prevalence rates across indicators like annual checkups and cholesterol screenings, showing where public health engagement is stronger and where more outreach might be needed, and the geographic scatter plot maps these indicators across Buffalo neighborhoods, allowing us to see how prevalence varies by location. Taking it a step further, the interactive map offers a deeper dive by adding neighborhood names and specific data in a mouseover tooltip, making it easier to identify areas with higher or lower engagement. Together, these tools paint a clear picture of Buffalo’s health landscape, helping pinpoint where resources could have the greatest impact.