library(sf)
library(readr)
library(dplyr)
library(ggplot2)
library(mapview)
unzip("Section_08/R-Spatial_II_Lab.zip", exdir = "Section_08/Week_08")
unzip("./Section_08/Week_08/R-Spatial_II_Lab/2010 Census Tracts.zip", exdir = "./Section_08/Week_08/Census")
nycCensus <- sf::st_read('./Section_08/Week_08/Census/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Alijah Anyagwosi\Downloads\SPRING 2025\RStudio\Section_08\Week_08\Census\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
acsData <- readLines("./Section_08/Week_08/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

acsData %>%
  magrittr::extract(1:10,)
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
sum(popData$totPop)
## [1] 8443713
unzip("./Section_07/Week_07/ZIP_CODE_040114.zip", exdir = "./Section_08/Week_08/ZIP")
nyc_sf_ZIP <- st_read("./Section_08/Week_08/ZIP/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\Alijah Anyagwosi\Downloads\SPRING 2025\RStudio\Section_08\Week_08\ZIP\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
nyc_sf_ZIP1 <- st_transform(nyc_sf_ZIP, crs = 4326)
popNYC <- sf::st_transform(popData, st_crs(nyc_sf_ZIP1)) # nyc_sf_ZIP is the JOINED zip code data from task 1.

# this may take a long time to run
popData %>% magrittr::extract('elderlyPop') %>% plot(breaks = 'jenks')

#mapview(popData, zcol = "totPop")
#mapview(popData, zcol = "elderlyPop")
mapview(popData, zcol = "asianPop")
covid_20210423_data <- read_csv("Section_08/Week_08/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nyc_sf_ZIPcov <- nyc_sf_ZIP1 %>%
  left_join(covid_20210423_data %>%
              mutate(MODIFIED_ZCTA = as.character(MODIFIED_ZCTA)),
            by = c("ZIPCODE" = "MODIFIED_ZCTA"))
popData <- st_transform(popData, crs = st_crs(nyc_sf_ZIPcov))

covidPopZipNYC <- sf::st_join(nyc_sf_ZIPcov, popData %>% sf::st_centroid(), # this essentially converts census tracts to points join = st_contains) %>% group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%

summarise(totPop = sum(totPop), malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated asianPop = sum(asianPop), blackPop = sum(blackPop), hispanicPop = sum(hispanicPop), whitePop = sum(whitePop))

covidPopZipNYC <- sf::st_join(nyc_sf_ZIPcov, 
                              popData %>% sf::st_centroid(), # this essentially converts census tracts to points
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% 
  
  summarise(
    totPop = sum(totPop, na.rm = TRUE),
    malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
    asianPop = sum(asianPop, na.rm = TRUE),
    blackPop = sum(blackPop, na.rm = TRUE),
    hispanicPop = sum(hispanicPop, na.rm = TRUE),
    whitePop = sum(whitePop, na.rm = TRUE),
    .groups = "drop"
    )
## Warning: st_centroid assumes attributes are constant over geometries
sum(covidPopZipNYC$totPop, na.rm = T)
## [1] 8446394
covidPopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 0 features and 12 fields
## Bounding box:  xmin: NA ymin: NA xmax: NA ymax: NA
## Geodetic CRS:  WGS 84
## # A tibble: 0 × 13
## # ℹ 13 variables: ZIPCODE <chr>, PO_NAME <chr>, POPULATION <dbl>, COUNTY <chr>,
## #   COVID_CASE_COUNT <dbl>, TOTAL_COVID_TESTS <dbl>, totPop <int>,
## #   malePctg <dbl>, asianPop <int>, blackPop <int>, hispanicPop <int>,
## #   whitePop <int>, geometry <GEOMETRY [°]>
plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='jenks', main="COVID")

#plot(covidPopZipNYC["blackPop"], breaks='jenks', main="blackPop jenks")
#plot(covidPopZipNYC["asianPop"], breaks='jenks', main="asianPop jenks")
#plot(covidPopZipNYC["COVID_CASE_COUNT"], breaks='quantile', main="COVID quantile")
#plot(covidPopZipNYC["asianPop"], breaks='quantile', main="asianPop quantile")
food_data <- st_read("Section_08/Week_08/R-Spatial_II_Lab/nycFoodStore.shp")
## Reading layer `nycFoodStore' from data source 
##   `C:\Users\Alijah Anyagwosi\Downloads\SPRING 2025\RStudio\Section_08\Week_08\R-Spatial_II_Lab\nycFoodStore.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 11300 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -74.2484 ymin: 40.50782 xmax: -73.67061 ymax: 40.91008
## Geodetic CRS:  WGS 84
food_sf <- food_data %>%
  mutate(storeType = case_when(
    Estbl_T == "A" ~ "Store",
    Estbl_T == "B" ~ "Bakery",
    Estbl_T == "C" ~ "Food Manufacturer",
    Estbl_T == "D" ~ "Food Warehouse",
    Estbl_T == "E" ~ "Beverage Plant",
    Estbl_T == "F" ~ "Feed Mill/Non-Medicated",
    Estbl_T == "G" ~ "Processing Plant",
    Estbl_T == "H" ~ "Wholesale Manufacturer",
    Estbl_T == "I" ~ "Refrigerated Warehouse",
    Estbl_T == "J" ~ "Multiple Operations",
    Estbl_T == "K" ~ "Vehicle",
    Estbl_T == "L" ~ "Produce Refrigerated Warehouse",
    Estbl_T == "M" ~ "Salvage Dealer",
    Estbl_T == "N" ~ "Wholesale Produce Packer",
    Estbl_T == "O" ~ "Produce Grower/Packer/Broker",
    Estbl_T == "P" ~ "C.A. Room",
    Estbl_T == "Q" ~ "Feed Mill/Medicated",
    Estbl_T == "R" ~ "Pet Food Manufacturer",
    Estbl_T == "S" ~ "Feed Warehouse and/or Distributor",
    Estbl_T == "T" ~ "Disposal Plant",
    Estbl_T == "U" ~ "Disposal Plant/Transportation Service",
    Estbl_T == "V" ~ "Slaughterhouse",
    Estbl_T == "W" ~ "Farm Winery-Exempt",
    Estbl_T == "Z" ~ "Farm Product Use Only",
    # ... add others as needed ...
    TRUE ~ "Other"
  ))
food_sf %>%
  st_set_geometry(NULL) %>%  # removes geometry to speed up
  count(Estbl_T, sort = TRUE)
##    Estbl_T    n
## 1      JAC 8457
## 2        A 2289
## 3     JABC  224
## 4    JABCH   54
## 5     JACK   46
## 6     JACD   45
## 7     JACH   36
## 8     JABH   22
## 9    JABCK   18
## 10  JACDHK   13
## 11   JACDK   13
## 12    JADK   13
## 13   JACHK   12
## 14  JABCHK   11
## 15     JAD   11
## 16   JABHK    6
## 17     JAB    5
## 18    JABK    5
## 19   JABCD    3
## 20   JACDH    3
## 21   JACDE    2
## 22   JADHK    2
## 23  JABCDH    1
## 24  JABCDK    1
## 25    JACG    1
## 26    JACZ    1
## 27    JADO    1
## 28    JAHK    1
## 29     JAK    1
## 30     JAZ    1
## 31     JDA    1
## 32     JKA    1
food_filter <- st_transform(food_sf, st_crs(nyc_sf_ZIP1))
valid_codes <- c("JAC", "JABCHK", "JACH", "JAB")
food_by_zip <- st_join(food_filter, nyc_sf_ZIP1, join = st_within) %>%
  group_by(ZIPCODE) %>%
  summarise(food_store_count = n())
food_by_zip <- food_data %>%
  filter(Estbl_T %in% valid_codes) %>%
  st_transform(st_crs(nyc_sf_ZIP1)) %>%
  st_join(nyc_sf_ZIP1, join = st_within) %>%
  st_set_geometry(NULL) %>%  # Drop geometry so we can group easily
  group_by(ZIPCODE) %>%
  summarise(food_store_count = n())
Food_ZIP <- nyc_sf_ZIP1 %>%
  left_join(food_by_zip, by = "ZIPCODE")
#mapview(Food_ZIP, zcol = "food_store_count", layer.name = "Food Stores")
health_data <- read.csv("./Section_08/Week_08/R-Spatial_II_Lab/NYS_Health_Facility.csv")
health_data_new <- health_data %>%
  filter(!is.na(Facility.Longitude) & !is.na(Facility.Latitude))
health_filtered <- health_data_new %>%
  filter(Description %in% c("Residential Health Care Facility - SNF") & Short.Description %in% c("NH"))
health_sf <- st_as_sf(health_filtered,
                      coords = c("Facility.Longitude", "Facility.Latitude"),
                      crs = 4326)  # assuming WGS84
health_new_sf <- st_transform(health_sf, st_crs(nyc_sf_ZIP1))
health_by_zip <- st_join(health_new_sf, nyc_sf_ZIP1, join = st_within) %>%
  st_set_geometry(NULL) %>%
  group_by(ZIPCODE) %>%
  summarise(NursingHomes = n())
nyc_health_sf_ZIP <- nyc_sf_ZIP1 %>%
  left_join(health_by_zip, by = "ZIPCODE")
#mapview(nyc_health_sf_ZIP, zcol = "NursingHomes", layer.name = "Nursing Homes")
mapview(Food_ZIP, zcol = "food_store_count", layer.name = "Food Stores") +
mapview(nyc_health_sf_ZIP, zcol = "NursingHomes", layer.name = "Nursing Homes")