library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
setwd("/Users/Briancaceres/Desktop/Data_110")
cities500 <- read_csv("citieshealth.csv")
## Rows: 810103 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
## dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
## num  (1): PopulationCount
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(cities500)
## # A tibble: 6 × 24
##    Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
##   <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
## 1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
## 2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
## 3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
## 4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
## 5  2017 CA        California Hemet     City            BRFSS      Prevention    
## 6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
## # ℹ 17 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## #   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## #   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## #   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## #   PopulationCount <dbl>, GeoLocation <chr>, CategoryID <chr>,
## #   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

The GeoLocation variable has (lat, long) format

Split GeoLocation (lat, long) into two columns: lat and long

latlong <- cities500|>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
## # A tibble: 6 × 25
##    Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
##   <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
## 1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
## 2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
## 3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
## 4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
## 5  2017 CA        California Hemet     City            BRFSS      Prevention    
## 6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## #   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## #   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## #   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## #   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## #   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Filter the dataset

Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.

latlong_clean <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Category == "Prevention") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017)
head(latlong_clean)
## # A tibble: 6 × 25
##    Year StateAbbr StateDesc  CityName   GeographicLevel DataSource Category  
##   <dbl> <chr>     <chr>      <chr>      <chr>           <chr>      <chr>     
## 1  2017 AL        Alabama    Montgomery City            BRFSS      Prevention
## 2  2017 CA        California Concord    City            BRFSS      Prevention
## 3  2017 CA        California Concord    City            BRFSS      Prevention
## 4  2017 CA        California Fontana    City            BRFSS      Prevention
## 5  2017 CA        California Richmond   Census Tract    BRFSS      Prevention
## 6  2017 FL        Florida    Davie      Census Tract    BRFSS      Prevention
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## #   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## #   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## #   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## #   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## #   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

What variables are included? (can any of them be removed?)

names(latlong_clean)
##  [1] "Year"                       "StateAbbr"                 
##  [3] "StateDesc"                  "CityName"                  
##  [5] "GeographicLevel"            "DataSource"                
##  [7] "Category"                   "UniqueID"                  
##  [9] "Measure"                    "Data_Value_Unit"           
## [11] "DataValueTypeID"            "Data_Value_Type"           
## [13] "Data_Value"                 "Low_Confidence_Limit"      
## [15] "High_Confidence_Limit"      "Data_Value_Footnote_Symbol"
## [17] "Data_Value_Footnote"        "PopulationCount"           
## [19] "lat"                        "long"                      
## [21] "CategoryID"                 "MeasureId"                 
## [23] "CityFIPS"                   "TractFIPS"                 
## [25] "Short_Question_Text"

Remove the variables that will not be used in the assignment

prevention <- latlong_clean |>
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)

head(prevention) |>
  distinct(Measure)
## # A tibble: 2 × 1
##   Measure                                                                       
##   <chr>                                                                         
## 1 Cholesterol screening among adults aged >=18 Years                            
## 2 Visits to doctor for routine checkup within the past Year among adults aged >…

The new dataset “Prevention” is a manageable dataset now. # For your assignment, work with the cleaned “Prevention” dataset

1. Once you run the above code, filter this dataset one more time for any particular subset.

Filter chunk here

chol_screening <- prevention |>
  filter(Measure =="Cholesterol screening among adults aged >=18 Years", StateAbbr == "CA", CityName == "Los Angeles")

chol_screening$long <- as.numeric(chol_screening$long)
chol_screening$lat <- as.numeric(chol_screening$lat)

la_long <- -118.2426 
la_lat <- 34.0549
chol_screening 
## # A tibble: 1,004 × 18
##     Year StateAbbr StateDesc  CityName GeographicLevel Category UniqueID Measure
##    <dbl> <chr>     <chr>      <chr>    <chr>           <chr>    <chr>    <chr>  
##  1  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  2  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  3  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  4  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  5  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  6  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  7  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  8  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
##  9  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
## 10  2017 CA        California Los Ang… Census Tract    Prevent… 0644000… Choles…
## # ℹ 994 more rows
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## #   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## #   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.

ggplot(chol_screening, aes(x = UniqueID, y = Data_Value)) +
  geom_point()
## Warning: Removed 9 rows containing missing values (`geom_point()`).

Now I can see that there is a rather large range and a lot of variety so I am interested in seeing what this looks like distributed on the map. How does each sub reagion differ.

3. Now create a map of your subsetted dataset.

First map chunk here

library(leaflet)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
library(knitr)

la_long <- -118.2426 
la_lat <- 34.0549

Here we successfully plotted circles representing the percentage of adults that partake in cholesterol screening.

lamap <- leaflet() |>
   setView(lng = -118.2426, lat =  34.0549, zoom = 10) |>
  addProviderTiles("Esri.WorldStreetMap") |>
 addCircles(data = chol_screening, 
            radius = chol_screening$Data_Value, 
            color = "blue"
  )
## Assuming "long" and "lat" are longitude and latitude, respectively
lamap

4. Refine your map to include a mousover tooltip

Refined map chunk here

labels <- paste0(
 "POPULATION: ", chol_screening$PopulationCount,"<br>", 
 "Percent Screened For Cholesterol: ", chol_screening$Data_Value, "%"
)

lamap <- leaflet() |>
   setView(lng = -118.2426, lat =  34.0549, zoom = 10) |>
  addProviderTiles("Esri.WorldStreetMap") |>
 addCircles(data = chol_screening, 
            radius = chol_screening$Data_Value, 
            color = "blue", 
            popup = labels
  )
## Assuming "long" and "lat" are longitude and latitude, respectively
lamap

5. Write a paragraph

In a paragraph, describe the plots you created and what they show.

My maps focused on the city of Los Angeles. I picked this city because I understand that there is a large range of income levels in this city and it is expensive to live there overall. Knowing this, I wanted to see if there were areas that screened less than others. I assumed that the levels of access to health care are more polarizing so I wanted to focus on the actual lab tests they do when they go to the doctor and if that data is equally polarizing. I did not find any major data trends that showed one area screened less than the other. The biggest range of data I found was about 12% but it was not consistent from area to area. It seemed like the lowest percentage areas were random and it may not be a good indicator of any trend.