GIS Assignment

 setwd("/Users/alassanefaye/Library/Mobile Documents/com~apple~CloudDocs/DATA110 ")
 library(readr)
X500CitiesLocalHealthIndicators_cdc <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
Rows: 810103 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
num  (1): PopulationCount

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)

cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
Rows: 810103 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
num  (1): PopulationCount

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(cities500)
# A tibble: 6 × 24
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
5  2017 CA        California Hemet     City            BRFSS      Prevention    
6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
# ℹ 17 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, GeoLocation <chr>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
glimpse(cities500)
Rows: 810,103
Columns: 24
$ Year                       <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2…
$ StateAbbr                  <chr> "CA", "CA", "CA", "CA", "CA", "CA", "CA", "…
$ StateDesc                  <chr> "California", "California", "California", "…
$ CityName                   <chr> "Hawthorne", "Hawthorne", "Hayward", "Haywa…
$ GeographicLevel            <chr> "Census Tract", "City", "City", "City", "Ci…
$ DataSource                 <chr> "BRFSS", "BRFSS", "BRFSS", "BRFSS", "BRFSS"…
$ Category                   <chr> "Health Outcomes", "Unhealthy Behaviors", "…
$ UniqueID                   <chr> "0632548-06037602504", "632548", "633000", …
$ Measure                    <chr> "Arthritis among adults aged >=18 Years", "…
$ Data_Value_Unit            <chr> "%", "%", "%", "%", "%", "%", "%", "%", "%"…
$ DataValueTypeID            <chr> "CrdPrv", "CrdPrv", "AgeAdjPrv", "CrdPrv", …
$ Data_Value_Type            <chr> "Crude prevalence", "Crude prevalence", "Ag…
$ Data_Value                 <dbl> 14.6, 15.4, 4.8, 24.2, 78.0, 22.0, 17.7, 6.…
$ Low_Confidence_Limit       <dbl> 13.9, 15.0, 4.7, 24.1, 77.6, 21.1, 17.5, 5.…
$ High_Confidence_Limit      <dbl> 15.2, 15.9, 4.8, 24.4, 78.3, 22.8, 17.9, 6.…
$ Data_Value_Footnote_Symbol <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ Data_Value_Footnote        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ PopulationCount            <dbl> 4407, 84293, 144186, 144186, 78657, 5006, 7…
$ GeoLocation                <chr> "(33.905547923, -118.337332298)", "(33.9146…
$ CategoryID                 <chr> "HLTHOUT", "UNHBEH", "HLTHOUT", "UNHBEH", "…
$ MeasureId                  <chr> "ARTHRITIS", "CSMOKING", "CHD", "OBESITY", …
$ CityFIPS                   <dbl> 632548, 632548, 633000, 633000, 633182, 636…
$ TractFIPS                  <dbl> 6037602504, NA, NA, NA, NA, 6065045213, NA,…
$ Short_Question_Text        <chr> "Arthritis", "Current Smoking", "Coronary H…
latlong <- cities500 |>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", "")) |>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
latlong_clean <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Category == "Prevention") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017)
latlong_clean2 <- latlong_clean |>
  select(-DataSource, -Data_Value_Unit, -DataValueTypeID,
         -Low_Confidence_Limit, -High_Confidence_Limit,
         -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
prevention_subset <- latlong_clean2 |>
  filter(Measure == "Current smoking among adults aged >=18 years") |>
  filter(StateDesc == "California") |>
  slice_head(n = 900)

nrow(prevention_subset) 
[1] 0
unique(latlong_clean2$StateDesc)
 [1] "Alabama"       "California"    "Florida"       "Connecticut"  
 [5] "Illinois"      "Minnesota"     "New York"      "Pennsylvania" 
 [9] "North Carolin" "Ohio"          "Oklahoma"      "Oregon"       
[13] "Texas"         "Rhode Island"  "South Carolin" "South Dakota" 
[17] "Tennessee"     "Utah"          "Virginia"      "Washington"   
[21] "Alaska"        "Wisconsin"     "Arizona"       "Arkansas"     
[25] "Colorado"      "Delaware"      "Nevada"        "District of C"
[29] "Georgia"       "Idaho"         "Hawaii"        "Massachusetts"
[33] "Michigan"      "Indiana"       "Kansas"        "Kentucky"     
[37] "Iowa"          "Louisiana"     "Maryland"      "Maine"        
[41] "New Hampshire" "New Jersey"    "New Mexico"    "Missouri"     
[45] "Mississippi"   "Nebraska"      "Montana"       "North Dakota" 
[49] "West Virginia" "Vermont"       "Wyoming"      
unique(latlong_clean2$Measure)
[1] "Cholesterol screening among adults aged >=18 Years"                                                   
[2] "Visits to doctor for routine checkup within the past Year among adults aged >=18 Years"               
[3] "Current lack of health insurance among adults aged 18\x9664 Years"                                    
[4] "Taking medicine for high blood pressure control among adults aged >=18 Years with high blood pressure"
unique(latlong_clean2$Year)
[1] 2017
prevention_subset <- latlong_clean2 |>
  filter(StateDesc == "California") |>
  filter(Measure == "Current smoking among adults aged >=18 years") |>
  slice_head(n = 900)

nrow(prevention_subset) 
[1] 0
prevention_subset <- latlong_clean2 |>
  filter(Category == "Prevention") |>
  filter(Measure == "Cholesterol screening among adults aged >=18 Years") |>
  filter(StateDesc == "California") |>
  filter(Year == 2017) |>
  filter(!is.na(Data_Value) & !is.na(lat) & !is.na(long)) |>
  slice_head(n = 900)
ggplot(prevention_subset, aes(x = CityName, y = Data_Value)) +
  geom_col(fill = "darkseagreen") +
  labs(title = "Smoking Prevalence by City in California (2017)",
       x = "City",
       y = "Crude Prevalence (%)",
       caption = "Source: CDC 500 Cities") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

library(leaflet)

leaflet(data = prevention_subset) |>
  addProviderTiles("Esri.WorldStreetMap") |>
  addCircles(lng = ~long, lat = ~lat, radius = 500,
             color = "orchid", fillOpacity = 0.5)
popup_info <- paste0(
  "<b>City: </b>", prevention_subset$CityName, "<br>",
  "<b>State: </b>", prevention_subset$StateDesc, "<br>",
  "<b>Prevalence: </b>", prevention_subset$Data_Value, "%"
)

leaflet(data = prevention_subset) |>
  addProviderTiles("Esri.WorldStreetMap") |>
  addCircles(lng = ~long, lat = ~lat, radius = 500,
             color = "darkseagreen", fillColor = "orchid", fillOpacity = 0.4,
             popup = popup_info)

For this project, I used 2017 data from the CDC’s 500 Cities dataset to investigate the current smoking prevalence among California residents who are 18 years of age and older. I selected crude prevalence numbers for California cities and filtered the data to concentrate on the “Prevention” category. I then made an interactive map and a bar plot to show the results.

Significant differences in smoking prevalence were shown by the bar plot, with some cities having prevalence rates above 20% and others having far lower rates. This variety implies that local factors including socioeconomic disparities, public health policies, education, and access to smoking cessation programs affect smoking habit in California.

I made an interactive map using Leaflet to do a more thorough spatial analysis of this data. With a tooltip displaying the city name, state, and smoking prevalence, each city is shown by a circle. Geographic clusters are readily visible on the map, places with higher incidence are typically found further inland or in rural areas, whereas metropolitan or coastal locations typically report lower rates. This spatial pattern lends validity to the idea that public health behaviors are significantly influenced by location.

By integrating spatial mapping and statistical visualization, I was able to obtain a more thorough knowledge of the variations in smoking prevalence throughout California. In addition to helping to identify the areas that may require the greatest intervention, these technologies also help communities and policymakers better access and use public health data.