week 10 hw

Loading the libraries

library(leaflet)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.5.2
✔ ggplot2   4.0.0     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(tidyr)
#setwd("C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets")
cities500 <- read_csv("500CitiesLocalHealthIndicators.ER9SkOzc.cdc.csv.part")

Rows: 810103 Columns: 24
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (17): StateAbbr, StateDesc, CityName, GeographicLevel, DataSource, Categ...
dbl  (6): Year, Data_Value, Low_Confidence_Limit, High_Confidence_Limit, Cit...
num  (1): PopulationCount

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data(cities500hw)

Warning in data(cities500hw): data set 'cities500hw' not found

Spliting lat, long into two columns

latlonghw <- cities500|>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlonghw)

# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
5  2017 CA        California Hemet     City            BRFSS      Prevention    
6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Filtering

latlong_cleanhw <- latlonghw |>
  filter(StateDesc != "United States") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017) |>
  filter(StateAbbr == ("DC")) |>
  filter(Category == "Unhealthy Behaviors") |>
  filter(!is.na(Short_Question_Text) )
  


head(latlong_cleanhw)

# A tibble: 6 × 25
   Year StateAbbr StateDesc     CityName   GeographicLevel DataSource Category  
  <dbl> <chr>     <chr>         <chr>      <chr>           <chr>      <chr>     
1  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
2  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
3  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
4  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
5  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
6  2017 DC        District of C Washington Census Tract    BRFSS      Unhealthy…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Removeing some variables that will not be used in the assignment

latlong_clean2hw <- latlong_cleanhw |>
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(latlong_clean2hw)

# A tibble: 6 × 18
   Year StateAbbr StateDesc   CityName GeographicLevel Category UniqueID Measure
  <dbl> <chr>     <chr>       <chr>    <chr>           <chr>    <chr>    <chr>  
1  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Binge …
2  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Curren…
3  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Obesit…
4  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Obesit…
5  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Binge …
6  2017 DC        District o… Washing… Census Tract    Unhealt… 1150000… Curren…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

createing plot about something in the subsetted dataset.

plot <- latlong_clean2hw |>
  ggplot() +
  geom_bar(aes(x=Short_Question_Text, y=Data_Value, fill = Measure),
      position = "dodge", stat = "identity") +
  labs(y = "Data Value %",
       x = "Behaviors",
       title = "Unhealthy Behaviors in DC in 2017",
       fill = "Measure",
      caption = "Source: CDC")+
  scale_fill_brewer(palette = "Set3") +
  theme_minimal(base_size = 9)
plot

Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_bar()`).

## Createing a map of the subsetted dataset

leaflet() |>
  setView(lng = -77.03637, lat = 38.89511, zoom =11.4) |>
  addProviderTiles("Esri.WorldStreetMap") |>
  addCircles(
    data = latlong_clean2hw,
    radius = sqrt(1.29^latlong_clean2hw$Data_Value)*2,
    color = "maroon",
    fillColor = "lightblue",
    fillOpacity = 0.19
)

Assuming "long" and "lat" are longitude and latitude, respectively

Includeing a mouse-click tooltip

popupudc <- paste0(
      "<b>Unhealthy Behavior: </b>", latlong_clean2hw$Short_Question_Text, "<br>",
      "<b>Data Value: </b>", latlong_clean2hw$Data_Value, "<br>",
      "<b>Measure: </b>", latlong_clean2hw$Measure, "<br>",
      "<b>Population: </b>", latlong_clean2hw$PopulationCount, "<br>"
    )

leaflet() |>
  setView(lng = -77.03637, lat = 38.89511, zoom = 11.4) |>
  addProviderTiles("Esri.WorldStreetMap") |>
  addCircles(
    data = latlong_clean2hw,
    radius = sqrt(1.29^latlong_clean2hw$Data_Value) * 2,
    color = "maroon",
    fillColor = "lightblue",
    fillOpacity = 0.19,
    popup = popupudc
  )

Assuming "long" and "lat" are longitude and latitude, respectively

Paragraph

The first plot is a bargraph that shows some of the unhealthy habits of pepole in DC. The bar graph shows that physical inactivity is the leading unhealthy habit in DC out of the 4 listed. The second plot is a map with points where the diffrent places are located. The radious of the plot varys based on the data value. For the 3rd plot it is the map with a mouse click tooltip, when clicking on a point it shows the unhealthy behavior, data value, measure, and population. In the future it would be intresting to see how DC compares to Maryland and Virginia and maybe some other states or the entire country but that would be over the the observation limit for this assignment.