R Week 9 Assignment

Author

Erin Witt

Published

April 3, 2025

task 1:

Show the code

wd <- getwd()
print(wd)

[1] "C:/Users/erins/Downloads"

Show the code

data_dir <- file.path(wd)

tests <- readr::read_csv(file.path(data_dir, "R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv"), lazy = FALSE)

Rows: 177 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Show the code

nyc_zip <- st_read(file.path(data_dir, "R-Spatial_I_Lab (1)/ZIP_CODE_040114/ZIP_CODE_040114.shp"))

Reading layer `ZIP_CODE_040114' from data source 
  `C:\Users\erins\Downloads\R-Spatial_I_Lab (1)\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
Projected CRS: NAD83 / New York Long Island (ftUS)

Show the code

covid_data <- tests %>% mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA))

# perform the join
nyc_zip_joined_new <- nyc_zip %>%
  left_join(covid_data, by = c("ZIPCODE" = "MODIFIED_ZCTA"))

pal1 <- brewer.pal(7, "Reds")
pal2 <- brewer.pal(7, "Blues")

# change the jenks for both the different variables because they have very different needs
breaks1 <- classIntervals(nyc_zip_joined_new$COVID_DEATH_RATE, n = 7, style = "jenks")$brks

Warning in classIntervals(nyc_zip_joined_new$COVID_DEATH_RATE, n = 7, style =
"jenks"): var has missing values, omitted in finding classes

Show the code

breaks2 <- classIntervals(nyc_zip_joined_new$TOTAL_COVID_TESTS, n = 7, style = "jenks")$brks

Warning in classIntervals(nyc_zip_joined_new$TOTAL_COVID_TESTS, n = 7, style =
"jenks"): var has missing values, omitted in finding classes

Show the code

# Assign colors based on breaks
nyc_zip_joined_new$color1 <- cut(nyc_zip_joined_new$COVID_DEATH_RATE, breaks = breaks1, labels = pal1, include.lowest = TRUE)
nyc_zip_joined_new$color2 <- cut(nyc_zip_joined_new$TOTAL_COVID_TESTS, breaks = breaks2, labels = pal2, include.lowest = TRUE)

plot(nyc_zip_joined_new["COVID_DEATH_RATE"], 
     breaks = breaks1, col = pal1, 
     graticule = st_crs(4326),
     axes = TRUE, reset = FALSE,
     main = "COVID Death Rate")

Warning in plot.sf(nyc_zip_joined_new["COVID_DEATH_RATE"], breaks = breaks1, :
col is not of length 1 or nrow(x): colors will be recycled; use pal to specify
a color palette

Show the code

# 1st legend for COVID Death Rate
legend("topright", legend = round(breaks1, 2), fill = pal1, title = "COVID Death Rate", cex = 0.8)

Show the code

plot(nyc_zip_joined_new["TOTAL_COVID_TESTS"], 
     breaks = breaks2, col = pal2, 
     graticule = st_crs(4326),
     axes = TRUE, reset = FALSE,
     main = "Total COVID Tests")

Warning in plot.sf(nyc_zip_joined_new["TOTAL_COVID_TESTS"], breaks = breaks2, :
col is not of length 1 or nrow(x): colors will be recycled; use pal to specify
a color palette

Show the code

# second legend for Total COVID Tests
legend("topright", legend = round(breaks2, 2), fill = pal2, title = "Total COVID Tests", cex = 0.8)

task 2:

Show the code

censustracts <- sf::st_read('R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)

Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\erins\Downloads\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 2165 features and 11 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
Geodetic CRS:  WGS84(DD)

Show the code

acsData <- readLines("R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)

                 GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
1  1400000US36005000100   7080         51    6503       577     1773     4239
2  1400000US36005000200   4542        950    2264      2278     2165     1279
3  1400000US36005000400   5634        710    2807      2827     2623     1699
4  1400000US36005001600   5917        989    2365      3552     2406     2434
5  1400000US36005001900   2765         76    1363      1402      585     1041
6  1400000US36005002000   9409        977    4119      5290     3185     4487
7  1400000US36005002300   4600        648    2175      2425      479     2122
8  1400000US36005002400    172          0     121        51       69       89
9  1400000US36005002500   5887        548    2958      2929      903     1344
10 1400000US36005002701   2868        243    1259      1609      243      987
   asianPop hispanicPop adultPop citizenAdult censusCode
1       130        2329     6909         6100  005000100
2       119        3367     3582         2952  005000200
3       226        3873     4507         4214  005000400
4        68        3603     4416         3851  005001600
5       130        1413     2008         1787  005001900
6        29        5905     6851         6170  005002000
7        27        2674     3498         3056  005002300
8        14           0      131           42  005002400
9        68        4562     4237         2722  005002500
10        0        1985     1848         1412  005002701

Show the code

censustracts %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

popData <- merge(censustracts, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)

[1] 8443713

Show the code

popNYC <- sf::st_transform(popData, st_crs(nyc_zip_joined_new)) 

covidPopZipNYC <- sf::st_join(
   nyc_zip_joined_new, 
  popNYC %>% sf::st_centroid(),  # Convert census tracts to points for spatial join
  join = st_contains
) %>%
  group_by(ZIPCODE) %>%  # Only group by ZIPCODE
  summarise(
    totPop = sum(totPop, na.rm = TRUE), 
    malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    COVID_CASE_COUNT = sum(COVID_CASE_COUNT, na.rm = TRUE)
  )

Warning: st_centroid assumes attributes are constant over geometries

Show the code

# Plot 1: COVID-19 Cases Map
covid_map <- ggplot(covidPopZipNYC) +
  geom_sf(aes(fill = COVID_CASE_COUNT)) +
  scale_fill_viridis_c() +
  ggtitle("COVID-19 Cases") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  theme(axis.text = element_blank(), axis.title = element_blank()) +
  coord_sf(
    crs = st_crs(4326),
    datum = sf::st_crs(4326),  # Ensures graticule is drawn
    default_crs = sf::st_crs(4326)
  ) +
  theme(panel.grid.major = element_line(color = "gray80", linetype = "dashed", linewidth = 0.3))  # Add graticule (grid lines)


# Plot 3: Hispanic Population Map (example)
hispanic_map <- ggplot(covidPopZipNYC) +
  geom_sf(aes(fill = hispanicPop)) +
  scale_fill_viridis_c() +
  ggtitle("Hispanic Population") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  theme(axis.text = element_blank(), axis.title = element_blank()) +
  coord_sf(
    crs = st_crs(4326),
    datum = sf::st_crs(4326),
    default_crs = sf::st_crs(4326)
  ) +
  theme(panel.grid.major = element_line(color = "gray80", linetype = "dashed", linewidth = 0.3))

# Arrange the maps side by side
grid.arrange(covid_map, hispanic_map, ncol = 2)

Show the code

#using nyc_zip_joined_new we can display facts about 
install.packages(c("leaflet", "htmlwidgets", "sf"))

Warning: packages 'leaflet', 'htmlwidgets', 'sf' are in use and will not be
installed

Show the code

library(leaflet)
library(sf)
library(htmlwidgets)
nyc_zip_joined_new <- st_transform(nyc_zip_joined_new, crs = 4326)  # 4326 = WGS84

covid_map_leaflet <- leaflet(nyc_zip_joined_new) %>%
  addTiles() %>%  # Base map (OpenStreetMap)
  addPolygons(
    fillColor = ~colorQuantile("YlOrRd", COVID_CASE_COUNT)(COVID_CASE_COUNT), # Color by cases
    fillOpacity = 0.7,
    weight = 1,
    color = "white",
    label = ~paste("ZIP:", ZIPCODE, "Cases:",  COVID_CASE_COUNT),  # Popup on hover
    highlightOptions = highlightOptions(
      weight = 3,
      color = "red",
      bringToFront = TRUE
    )
  ) %>%
  addLegend(
    "bottomright",
    pal = colorQuantile("YlOrRd", nyc_zip_joined_new$COVID_CASE_COUNT),
    values = ~COVID_CASE_COUNT,
    title = "COVID-19 Cases (Quantiles)"
  )

# for just plotting it regular style
nyc_zip_joined_new <- st_as_sf(nyc_zip_joined_new)

# Create and display the ggplot
ggplot(nyc_zip_joined_new) +
  geom_sf(aes(fill = COVID_CASE_COUNT), color = "white", size = 0.2) +
  scale_fill_viridis_c(
    option = "plasma",
    trans = "sqrt",
    breaks = quantile(nyc_zip_joined_new$COVID_CASE_COUNT, 
                    probs = seq(0, 1, 0.2), na.rm = TRUE),
    labels = scales::comma
  ) +
  labs(
    title = "COVID-19 Cases by ZIP Code",
    fill = "Case Count"
  ) +
  theme_void() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

Show the code

# for saving to HTML file
saveWidget(covid_map_leaflet, "covid_map_leaflet.html")