library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
library(ggplot2)
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.3.2
library(viridis)
## Warning: package 'viridis' was built under R version 4.3.2
## Loading required package: viridisLite
setwd("C:/Users/danyd/OneDrive/Desktop/data 110/week10hw")
cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")
## Rows: 810103 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
## dbl (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
## num (1): PopulationCount
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Split GeoLocation (lat, long) into two columns: lat and long
latlong <- tidyr::extract(cities500, GeoLocation, c('lat', 'long'),
regex = ',?\\s*\\((\\d+\\.\\d+).*(-?\\d+\\.\\d+)\\)')
head(latlong)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Hawthorne Census Tract BRFSS Health Outcom…
## 2 2017 CA California Hawthorne City BRFSS Unhealthy Beh…
## 3 2017 CA California Hayward City BRFSS Health Outcom…
## 4 2017 CA California Hayward City BRFSS Unhealthy Beh…
## 5 2017 CA California Hemet City BRFSS Prevention
## 6 2017 CA California Indio Census Tract BRFSS Health Outcom…
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <chr>, long <chr>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.
latlong_clean <- latlong |>
filter(StateDesc != "United States") |>
filter(Category == "Prevention") |>
filter(Data_Value_Type == "Crude prevalence") |>
filter(Year == 2017)
head(latlong_clean)
## # A tibble: 6 × 25
## Year StateAbbr StateDesc CityName GeographicLevel DataSource Category
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgomery City BRFSS Prevention
## 2 2017 CA California Concord City BRFSS Prevention
## 3 2017 CA California Concord City BRFSS Prevention
## 4 2017 CA California Fontana City BRFSS Prevention
## 5 2017 CA California Richmond Census Tract BRFSS Prevention
## 6 2017 FL Florida Davie Census Tract BRFSS Prevention
## # ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
## # DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
## # Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
## # PopulationCount <dbl>, lat <chr>, long <chr>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
names(latlong_clean)
## [1] "Year" "StateAbbr"
## [3] "StateDesc" "CityName"
## [5] "GeographicLevel" "DataSource"
## [7] "Category" "UniqueID"
## [9] "Measure" "Data_Value_Unit"
## [11] "DataValueTypeID" "Data_Value_Type"
## [13] "Data_Value" "Low_Confidence_Limit"
## [15] "High_Confidence_Limit" "Data_Value_Footnote_Symbol"
## [17] "Data_Value_Footnote" "PopulationCount"
## [19] "lat" "long"
## [21] "CategoryID" "MeasureId"
## [23] "CityFIPS" "TractFIPS"
## [25] "Short_Question_Text"
prevention <- latlong_clean |>
select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)
## # A tibble: 6 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 AL Alabama Montgome… City Prevent… 151000 Choles…
## 2 2017 CA California Concord City Prevent… 616000 Visits…
## 3 2017 CA California Concord City Prevent… 616000 Choles…
## 4 2017 CA California Fontana City Prevent… 624680 Visits…
## 5 2017 CA California Richmond Census Tract Prevent… 0660620… Choles…
## 6 2017 FL Florida Davie Census Tract Prevent… 1216475… Choles…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <chr>, long <chr>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
The new dataset “Prevention” is a manageable dataset now.
Filter chunk here
annualcheckup <- prevention |>
filter(Measure == "Visits to doctor for routine checkup within the past Year among adults aged >=18 Years")
head(annualcheckup)
## # A tibble: 6 × 18
## Year StateAbbr StateDesc CityName GeographicLevel Category UniqueID Measure
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2017 CA California Concord City Prevent… 616000 Visits…
## 2 2017 CA California Fontana City Prevent… 624680 Visits…
## 3 2017 FL Florida Hialeah Census Tract Prevent… 1230000… Visits…
## 4 2017 CA California Stockton City Prevent… 675000 Visits…
## 5 2017 NY New York Rochest… Census Tract Prevent… 3663000… Visits…
## 6 2017 NC North Caro… Gastonia Census Tract Prevent… 3725580… Visits…
## # ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
## # PopulationCount <dbl>, lat <chr>, long <chr>, CategoryID <chr>,
## # MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>
First plot chunk here
ggplot(annualcheckup, aes(x = long, y = lat, color = Data_Value)) +
geom_point() +
labs(title = "Annual Checkup Prevalence",
x = "Longitude",
y = "Latitude",
color = "Prevalence Rate (%)") +
theme_minimal()
First map chunk here
annualcheckup$long <- as.numeric(annualcheckup$long)
annualcheckup$lat <- as.numeric(annualcheckup$lat)
annualcheckup <- annualcheckup[!is.na(annualcheckup$lat) & !is.na(annualcheckup$long), ]
worldmap <- map_data("world")
ggplot() +
geom_polygon(
data = worldmap,
aes(x = long, y = lat, group = group),
fill = "lightblue",
color = "white"
) +
geom_point(
data = annualcheckup,
aes(x = long, y = lat, color = Data_Value),
size = 3,
alpha = 0.7
) +
scale_color_viridis(option = "plasma", limits = c(0, 100), name = "Prevalence Rate (%)") +
labs(
title = "Annual Checkup Prevalence Map",
x = "Longitude",
y = "Latitude"
) +
coord_cartesian(xlim = c(-125, -65), ylim = c(24, 49)) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(size = 18, hjust = 0.5, margin = margin(b = 20)),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
) +
coord_fixed(1.3) +
theme(panel.background = element_rect(fill = scales::viridis_pal()(100), color = "black"))
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
Refined map chunk here
annualcheckup$long <- as.numeric(annualcheckup$long)
annualcheckup$lat <- as.numeric(annualcheckup$lat)
annualcheckup <- annualcheckup[!is.na(annualcheckup$long) & !is.na(annualcheckup$lat),]
color_palette <- colorNumeric(palette = "viridis", domain = annualcheckup$Data_Value)
leaflet(annualcheckup) %>%
addTiles() %>%
addCircles(
lng = ~long,
lat = ~lat,
radius = 5,
color = ~color_palette(Data_Value),
label = ~paste("Prevalence Rate:", Data_Value, "%"),
popup = ~paste("City:", CityName, "<br>",
"Prevalence Rate:", Data_Value, "%")
) %>%
addLegend(
"bottomright",
pal = color_palette,
values = ~Data_Value,
title = "Prevalence Rate",
labFormat = labelFormat(suffix = "%")
)
In a paragraph, describe the plots you created and what they show.
Using the filtered prevention dataset, I created several graphs solely focusing on the rate of doctor visits for routine checkups. At first it began with a disorganized scatterplot, which was difficult to understand and required a new visualization to be able to read it. The first map utilized the library(viridis) from week 3 to show a more appealing visualization with a plasma legend highlighting the lowest and highest rates of doctor visits, however I was unable to move the plots to the designated region it corresponded to. With the refined map, it provided interactivity in order to read the percentages and cities of the plots, however I was still unable to get the plots to hover over the designated region. In addition, I was unable to implement a similar legend to that of the first map.