library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
setwd("/Users/Briancaceres/Desktop/Data_110")
cities500 <- read_csv("citieshealth.csv")
## Rows: 810103 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
## num (1): PopulationCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(cities500)
## # A tibble: 6 × 24
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
## 2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
## 3 2017 CA California Hayward City BRFSS Health Outcom…
## 4 2017 CA California Hayward City BRFSS Unhealthy Beh…
## 5 2017 CA California Hemet City BRFSS Prevention
## 6 2017 CA California Indio Census Tract BRFSS Health Outcom…
## # ℹ 17 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, GeoLocation <chr>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Split GeoLocation (lat, long) into two columns: lat and long
latlong <- cities500|>
mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
## 2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
## 3 2017 CA California Hayward City BRFSS Health Outcom…
## 4 2017 CA California Hayward City BRFSS Unhealthy Beh…
## 5 2017 CA California Hemet City BRFSS Prevention
## 6 2017 CA California Indio Census Tract BRFSS Health Outcom…
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgomery City BRFSS Prevention
## 2 2017 CA California Concord City BRFSS Prevention
## 3 2017 CA California Concord City BRFSS Prevention
## 4 2017 CA California Fontana City BRFSS Prevention
## 5 2017 CA California Richmond Census Tract BRFSS Prevention
## 6 2017 FL Florida Davie Census Tract BRFSS Prevention
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong_clean)
## [1] "Year" "StateAbbr"
## [3] "StateDesc" "CityName"
## [5] "GeographicLevel" "DataSource"
## [7] "Category" "UniqueID"
## [9] "Measure" "Data_Value_Unit"
## [11] "DataValueTypeID" "Data_Value_Type"
## [13] "Data_Value" "Low_Confidence_Limit"
## [15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
## [17] "Data_Value_Footnote" "PopulationCount"
## [19] "lat" "long"
## [21] "CategoryID" "MeasureId"
## [23] "CityFIPS" "TractFIPS"
## [25] "Short_Question_Text"
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention) |>
distinct(Measure)
## # A tibble: 2 × 1
## Measure
## <chr>
## 1 Cholesterol screening among adults aged >=18 Years
## 2 Visits to doctor for routine checkup within the past Year among adults aged >…
The new dataset “Prevention” is a manageable dataset now. # For your assignment, work with the cleaned “Prevention” dataset
Filter chunk here
chol_screening <- prevention |>
filter(Measure =="Cholesterol screening among adults aged >=18 Years", StateAbbr == "CA", CityName == "Los Angeles")
chol_screening$long <- as.numeric(chol_screening$long)
chol_screening$lat <- as.numeric(chol_screening$lat)
la_long <- -118.2426
la_lat <- 34.0549
chol_screening
## # A tibble: 1,004 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 2 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 3 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 4 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 5 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 6 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 7 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 8 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 9 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## 10 2017 CA California Los Ang… Census Tract Prevent… 0644000… Choles…
## # ℹ 994 more rows
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
ggplot(chol_screening, aes(x = UniqueID, y = Data_Value)) +
geom_point()
## Warning: Removed 9 rows containing missing values (`geom_point()`).
Now I can see that there is a rather large range and a lot of variety so I am interested in seeing what this looks like distributed on the map. How does each sub reagion differ.
First map chunk here
library(leaflet)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyverse)
library(knitr)
la_long <- -118.2426
la_lat <- 34.0549
Here we successfully plotted circles representing the percentage of adults that partake in cholesterol screening.
lamap <- leaflet() |>
setView(lng = -118.2426, lat = 34.0549, zoom = 10) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(data = chol_screening,
radius = chol_screening$Data_Value,
color = "blue"
)
## Assuming "long" and "lat" are longitude and latitude, respectively
lamap
Refined map chunk here
labels <- paste0(
"POPULATION: ", chol_screening$PopulationCount,"<br>",
"Percent Screened For Cholesterol: ", chol_screening$Data_Value, "%"
)
lamap <- leaflet() |>
setView(lng = -118.2426, lat = 34.0549, zoom = 10) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(data = chol_screening,
radius = chol_screening$Data_Value,
color = "blue",
popup = labels
)
## Assuming "long" and "lat" are longitude and latitude, respectively
lamap
In a paragraph, describe the plots you created and what they show.
My maps focused on the city of Los Angeles. I picked this city because I understand that there is a large range of income levels in this city and it is expensive to live there overall. Knowing this, I wanted to see if there were areas that screened less than others. I assumed that the levels of access to health care are more polarizing so I wanted to focus on the actual lab tests they do when they go to the doctor and if that data is equally polarizing. I did not find any major data trends that showed one area screened less than the other. The biggest range of data I found was about 12% but it was not consistent from area to area. It seemed like the lowest percentage areas were random and it may not be a good indicator of any trend.