Healthy Cities GIS Assignment

Author

Your Name

Load the libraries and set the working directory

library(tidyverse)
library(tidyr)
library(highcharter)
library(leaflet)

cities500 <- read_csv("500CitiesLocalHealthIndicators.cdc.csv")

The GeoLocation variable has (lat, long) format

Split GeoLocation (lat, long) into two columns: lat and long

latlong <- cities500|>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", ""))|>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
head(latlong)

# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName  GeographicLevel DataSource Category      
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>      <chr>         
1  2017 CA        California Hawthorne Census Tract    BRFSS      Health Outcom…
2  2017 CA        California Hawthorne City            BRFSS      Unhealthy Beh…
3  2017 CA        California Hayward   City            BRFSS      Health Outcom…
4  2017 CA        California Hayward   City            BRFSS      Unhealthy Beh…
5  2017 CA        California Hemet     City            BRFSS      Prevention    
6  2017 CA        California Indio     Census Tract    BRFSS      Health Outcom…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

Filter the dataset

Remove the StateDesc that includes the United Sates, select Prevention as the category (of interest), filter for only measuring crude prevalence and select only 2017.

latlong_clean <- latlong |>
  filter(StateDesc != "United States") |>
  filter(Category == "Prevention" | MeasureId == "CSMOKING") |>
  filter(Data_Value_Type == "Crude prevalence") |>
  filter(Year == 2017)
head(latlong_clean)

# A tibble: 6 × 25
   Year StateAbbr StateDesc  CityName   GeographicLevel DataSource Category     
  <dbl> <chr>     <chr>      <chr>      <chr>           <chr>      <chr>        
1  2017 CA        California Hawthorne  City            BRFSS      Unhealthy Be…
2  2017 AL        Alabama    Montgomery City            BRFSS      Prevention   
3  2017 AZ        Arizona    Glendale   City            BRFSS      Unhealthy Be…
4  2017 CA        California Concord    City            BRFSS      Prevention   
5  2017 CA        California Concord    City            BRFSS      Prevention   
6  2017 CA        California Escondido  Census Tract    BRFSS      Unhealthy Be…
# ℹ 18 more variables: UniqueID <chr>, Measure <chr>, Data_Value_Unit <chr>,
#   DataValueTypeID <chr>, Data_Value_Type <chr>, Data_Value <dbl>,
#   Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>,
#   Data_Value_Footnote_Symbol <chr>, Data_Value_Footnote <chr>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

What variables are included? (can any of them be removed?)

names(latlong_clean)

 [1] "Year"                       "StateAbbr"                 
 [3] "StateDesc"                  "CityName"                  
 [5] "GeographicLevel"            "DataSource"                
 [7] "Category"                   "UniqueID"                  
 [9] "Measure"                    "Data_Value_Unit"           
[11] "DataValueTypeID"            "Data_Value_Type"           
[13] "Data_Value"                 "Low_Confidence_Limit"      
[15] "High_Confidence_Limit"      "Data_Value_Footnote_Symbol"
[17] "Data_Value_Footnote"        "PopulationCount"           
[19] "lat"                        "long"                      
[21] "CategoryID"                 "MeasureId"                 
[23] "CityFIPS"                   "TractFIPS"                 
[25] "Short_Question_Text"

Remove the variables that will not be used in the assignment

prevention <- latlong_clean |>
  select(-DataSource,-Data_Value_Unit, -DataValueTypeID, -Low_Confidence_Limit, -High_Confidence_Limit, -Data_Value_Footnote_Symbol, -Data_Value_Footnote)
head(prevention)

# A tibble: 6 × 18
   Year StateAbbr StateDesc  CityName  GeographicLevel Category UniqueID Measure
  <dbl> <chr>     <chr>      <chr>     <chr>           <chr>    <chr>    <chr>  
1  2017 CA        California Hawthorne City            Unhealt… 632548   Curren…
2  2017 AL        Alabama    Montgome… City            Prevent… 151000   Choles…
3  2017 AZ        Arizona    Glendale  City            Unhealt… 427820   Curren…
4  2017 CA        California Concord   City            Prevent… 616000   Visits…
5  2017 CA        California Concord   City            Prevent… 616000   Choles…
6  2017 CA        California Escondido Census Tract    Unhealt… 0622804… Curren…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

md <- prevention |>
  filter(StateAbbr=="MD")
head(md)

# A tibble: 6 × 18
   Year StateAbbr StateDesc CityName  GeographicLevel Category  UniqueID Measure
  <dbl> <chr>     <chr>     <chr>     <chr>           <chr>     <chr>    <chr>  
1  2017 MD        Maryland  Baltimore Census Tract    Unhealth… 2404000… "Curre…
2  2017 MD        Maryland  Baltimore Census Tract    Preventi… 2404000… "Chole…
3  2017 MD        Maryland  Baltimore Census Tract    Preventi… 2404000… "Visit…
4  2017 MD        Maryland  Baltimore Census Tract    Preventi… 2404000… "Visit…
5  2017 MD        Maryland  Baltimore Census Tract    Preventi… 2404000… "Curre…
6  2017 MD        Maryland  Baltimore Census Tract    Unhealth… 2404000… "Curre…
# ℹ 10 more variables: Data_Value_Type <chr>, Data_Value <dbl>,
#   PopulationCount <dbl>, lat <dbl>, long <dbl>, CategoryID <chr>,
#   MeasureId <chr>, CityFIPS <dbl>, TractFIPS <dbl>, Short_Question_Text <chr>

The new dataset “Prevention” is a manageable dataset now.

For your assignment, work with the cleaned “Prevention” dataset

1. Once you run the above code, filter this dataset one more time for any particular subset.

I want to see the relationship between smokers and people without life insurences for all cities in the data frame.

Cities <- prevention |>
  filter(GeographicLevel=="City") |>
  filter(MeasureId=="ACCESS2" | MeasureId == "CSMOKING")

Cities<-Cities %>%
  group_by(CityName)%>%
  summarize(Smokers=sum(Data_Value[MeasureId=="CSMOKING"]), NonInsured=sum(Data_Value[MeasureId=="ACCESS2"]), lat=mean(lat), long=mean(long), Stateab=unique(StateAbbr), StateName=unique(StateDesc))

Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.

`summarise()` has grouped output by 'CityName'. You can override using the
`.groups` argument.

head(Cities)

# A tibble: 6 × 7
# Groups:   CityName [5]
  CityName    Smokers NonInsured   lat   long Stateab StateName 
  <chr>         <dbl>      <dbl> <dbl>  <dbl> <chr>   <chr>     
1 Abilene        19         23.9  32.5  -99.7 TX      Texas     
2 Akron          24.9       12.8  41.1  -81.5 OH      Ohio      
3 Alameda        11.2        8.3  37.8 -122.  CA      California
4 Albany         41.9       38.5  37.1  -79.0 GA      Georgia   
5 Albany         41.9       38.5  37.1  -79.0 NY      New York  
6 Albuquerque    17.1       14.3  35.1 -107.  NM      New Mexico

2. Based on the GIS tutorial (Japan earthquakes), create one plot about something in your subsetted dataset.

highchart () |>
  hc_add_series(data = Cities,
                type = "scatter" ,
                hcaes(x = Smokers,
                      y = NonInsured,
                      group = Stateab),
                tooltip =list(useHTML = TRUE,
                pointFormat = "{point.CityName},
                               {point.StateName}<br>
                               without INsurence: {point.NonInsured}<br>
                               Smokers: {point.Smokers}")) |>
hc_xAxis(title = list(text="POpulation of Smokers (%)")) |>
hc_yAxis(title = list(text="Population without insurence (%))"))

plot1 <- ggplot(Cities, aes(x=Smokers, y=NonInsured, colours("Set1")))+
  geom_point(aes(color=Stateab))+ geom_smooth(method='lm',formula=y~x, color = "red")
plot1

3. Now create a map of your subsetted dataset.

ohio_lat <- 38.000000
ohio_lon <--97.000000

leaflet() |>
setView(lng = ohio_lon, lat = ohio_lat, zoom =4.3) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = Cities,
radius = Cities$Smokers*1000,
color = 'darkgrey',
fillColor = 'black',
fillOpacity = Cities$NonInsured/100,
)

Assuming "long" and "lat" are longitude and latitude, respectively

4. Refine your map to include a mousover tooltip

Now a grath with tooltips

popup <- paste0(
"<b>City: </b>", Cities$CityName, "<br>",
"<b>State:</b>", Cities$StateName,"<br>",
"<b>Smokers: </b>", Cities$Smokers, "<br>",
"<b>Non insured people:</b>", Cities$NonInsured,"<br>"
)

leaflet() |>
setView(lng = ohio_lon, lat = ohio_lat, zoom =4.3) |>
addProviderTiles("Esri.WorldStreetMap") |>
addCircles(
data = Cities,
radius = Cities$Smokers*2000,
color = 'darkgray',
fillColor = 'black',
fillOpacity = Cities$NonInsured/100,
popup = popup
)

Assuming "long" and "lat" are longitude and latitude, respectively

5. Write a paragraph

That map is really interesting, the size of the bubble tells us the quantity of smokers, ans the great lakes region are full of them, and the the opacity tell us the number of people not insured, and strangely is suggest to us that the bigger the circle, more dense it is and so the more smokers the less insurgence people have, That is also seen in the graphs. That can be explained because it is more expensive to buy life insurgence when you are a smoker, the insurgence do a backgroundcheck in you.