Task 1: Load Data and Check COVID Data Structure

load("~/Spring25/R Analysis/Module 3/Week_07/nyc_spatial_data.RData")

covid_data <- read_csv("~/Spring25/R Analysis/Module 3/Week_08/Data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
## Rows: 177 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
## dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(covid_data)  # Check the structure
## # A tibble: 6 × 13
##   MODIFIED_ZCTA NEIGHBORHOOD_NAME                BOROUGH_GROUP label   lat   lon
##           <dbl> <chr>                            <chr>         <chr> <dbl> <dbl>
## 1         10001 Chelsea/NoMad/West Chelsea       Manhattan     1000…  40.8 -74.0
## 2         10002 Chinatown/Lower East Side        Manhattan     10002  40.7 -74.0
## 3         10003 East Village/Gramercy/Greenwich… Manhattan     10003  40.7 -74.0
## 4         10004 Financial District               Manhattan     10004  40.7 -74.0
## 5         10005 Financial District               Manhattan     10005  40.7 -74.0
## 6         10006 Financial District               Manhattan     10006  40.7 -74.0
## # ℹ 7 more variables: COVID_CASE_COUNT <dbl>, COVID_CASE_RATE <dbl>,
## #   POP_DENOMINATOR <dbl>, COVID_DEATH_COUNT <dbl>, COVID_DEATH_RATE <dbl>,
## #   PERCENT_POSITIVE <dbl>, TOTAL_COVID_TESTS <dbl>

Task 1.5: Check Column Names in COVID Data

colnames(covid_data)  # Print all column names
##  [1] "MODIFIED_ZCTA"     "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"    
##  [4] "label"             "lat"               "lon"              
##  [7] "COVID_CASE_COUNT"  "COVID_CASE_RATE"   "POP_DENOMINATOR"  
## [10] "COVID_DEATH_COUNT" "COVID_DEATH_RATE"  "PERCENT_POSITIVE" 
## [13] "TOTAL_COVID_TESTS"

Task 2: Prepare COVID Data for Merging

covid_data <- covid_data %>% 
  rename(ZIPCODE = MODIFIED_ZCTA) %>%  
  mutate(ZIPCODE = as.character(ZIPCODE))

Task 2.5: Check and Reproject CRS of Spatial Objects

st_crs(nyc_zip_codes)
## Coordinate Reference System:
##   User input: NAD83 / New York Long Island (ftUS) 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
st_crs(health_facilities)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
st_crs(retail_food_stores)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
# Reproject all objects to the same CRS
nyc_zip_codes <- st_transform(nyc_zip_codes, crs = 4326)
health_facilities <- st_transform(health_facilities, crs = 4326)
retail_food_stores <- st_transform(retail_food_stores, crs = 4326)

Task 3: Count Health Facilities and Food Stores by ZIP Code

health_count <- health_facilities %>% 
  st_join(nyc_zip_codes, join = st_contains) %>% 
  group_by(ZIPCODE) %>% 
  summarise(HealthFacilitiesNum = n())

food_count <- retail_food_stores %>% 
  st_join(nyc_zip_codes, join = st_contains) %>% 
  group_by(ZIPCODE) %>% 
  summarise(FoodStoreNum = n())

Task 4: Merge Data into a Single sf Object

# Drop geometry to create regular data frames for merging
health_count <- st_drop_geometry(health_count)
food_count <- st_drop_geometry(food_count)


final_sf <- nyc_zip_codes %>% 
  left_join(covid_data, by = "ZIPCODE")

final_sf <- final_sf %>% 
  left_join(health_count, by = "ZIPCODE") %>% 
  left_join(food_count, by = "ZIPCODE")

Task 5: Inspect Final Data and Plot COVID Cases by ZIP Code

head(final_sf)
## Simple feature collection with 6 features and 26 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.99193 ymin: 40.63029 xmax: -73.78805 ymax: 40.6863
## Geodetic CRS:  WGS 84
##   ZIPCODE BLDGZIP  PO_NAME POPULATION     AREA STATE COUNTY ST_FIPS CTY_FIPS
## 1   11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2   11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3   11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4   11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5   11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6   11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
##                    URL SHAPE_AREA SHAPE_LEN
## 1 http://www.usps.com/          0         0
## 2 http://www.usps.com/          0         0
## 3 http://www.usps.com/          0         0
## 4 http://www.usps.com/          0         0
## 5 http://www.usps.com/          0         0
## 6 http://www.usps.com/          0         0
##                                NEIGHBORHOOD_NAME BOROUGH_GROUP label      lat
## 1                 South Jamaica/South Ozone Park        Queens 11436 40.67582
## 2                           Crown Heights (East)      Brooklyn 11213 40.67107
## 3                         Ocean Hill-Brownsville      Brooklyn 11212 40.66293
## 4 Crown Heights (West)/Prospect Lefferts Gardens      Brooklyn 11225 40.66306
## 5                     Kensington/Windsor Terrace      Brooklyn 11218 40.64348
## 6             Flatbush/Prospect Lefferts Gardens      Brooklyn 11226 40.64646
##         lon COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR COVID_DEATH_COUNT
## 1 -73.79662             1888         9419.96        20042.54                64
## 2 -73.93633             5166         7996.75        64601.26               203
## 3 -73.91301             7182         9709.74        73966.99               330
## 4 -73.95423             3833         6664.50        57513.69               177
## 5 -73.97604             6199         8377.49        73995.92               218
## 6 -73.95665             7279         7476.75        97355.08               368
##   COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS HealthFacilitiesNum
## 1           319.32            17.57             11082                  NA
## 2           314.24            13.72             38560                  NA
## 3           446.14            15.64             47319                  NA
## 4           307.75            11.62             33709                  NA
## 5           294.61            13.93             45884                  NA
## 6           378.00            13.33             56287                  NA
##   FoodStoreNum                       geometry
## 1           NA POLYGON ((-73.80585 40.6829...
## 2           NA POLYGON ((-73.9374 40.67973...
## 3           NA POLYGON ((-73.90294 40.6708...
## 4           NA POLYGON ((-73.95797 40.6706...
## 5           NA POLYGON ((-73.97208 40.6506...
## 6           NA POLYGON ((-73.9619 40.65487...
summary(final_sf)
##    ZIPCODE            BLDGZIP            PO_NAME            POPULATION      
##  Length:263         Length:263         Length:263         Min.   :     0.0  
##  Class :character   Class :character   Class :character   1st Qu.:    49.5  
##  Mode  :character   Mode  :character   Mode  :character   Median : 27985.0  
##                                                           Mean   : 31933.9  
##                                                           3rd Qu.: 54445.0  
##                                                           Max.   :109069.0  
##                                                                             
##       AREA              STATE              COUNTY            ST_FIPS         
##  Min.   :     3155   Length:263         Length:263         Length:263        
##  1st Qu.:   964323   Class :character   Class :character   Class :character  
##  Median : 21927545   Mode  :character   Mode  :character   Mode  :character  
##  Mean   : 31816554                                                           
##  3rd Qu.: 45935567                                                           
##  Max.   :473985727                                                           
##                                                                              
##    CTY_FIPS             URL              SHAPE_AREA   SHAPE_LEN
##  Length:263         Length:263         Min.   :0    Min.   :0  
##  Class :character   Class :character   1st Qu.:0    1st Qu.:0  
##  Mode  :character   Mode  :character   Median :0    Median :0  
##                                        Mean   :0    Mean   :0  
##                                        3rd Qu.:0    3rd Qu.:0  
##                                        Max.   :0    Max.   :0  
##                                                                
##  NEIGHBORHOOD_NAME  BOROUGH_GROUP         label                lat       
##  Length:263         Length:263         Length:263         Min.   :40.51  
##  Class :character   Class :character   Class :character   1st Qu.:40.67  
##  Mode  :character   Mode  :character   Mode  :character   Median :40.73  
##                                                           Mean   :40.73  
##                                                           3rd Qu.:40.78  
##                                                           Max.   :40.90  
##                                                           NA's   :74     
##       lon         COVID_CASE_COUNT COVID_CASE_RATE POP_DENOMINATOR 
##  Min.   :-74.24   Min.   :  164    Min.   : 3413   Min.   :  2972  
##  1st Qu.:-73.98   1st Qu.: 1842    1st Qu.: 6776   1st Qu.: 25437  
##  Median :-73.92   Median : 3172    Median : 8623   Median : 40871  
##  Mean   :-73.92   Mean   : 3957    Mean   : 8522   Mean   : 45326  
##  3rd Qu.:-73.85   3rd Qu.: 5673    3rd Qu.:10269   3rd Qu.: 65687  
##  Max.   :-73.71   Max.   :11581    Max.   :16212   Max.   :110370  
##  NA's   :74       NA's   :74       NA's   :74      NA's   :74      
##  COVID_DEATH_COUNT COVID_DEATH_RATE PERCENT_POSITIVE TOTAL_COVID_TESTS
##  Min.   :  0.0     Min.   :  0.0    Min.   : 5.50    Min.   : 2441    
##  1st Qu.: 59.0     1st Qu.:206.5    1st Qu.:11.21    1st Qu.:16429    
##  Median :115.0     Median :297.4    Median :15.03    Median :26382    
##  Mean   :145.7     Mean   :299.1    Mean   :13.79    Mean   :28704    
##  3rd Qu.:219.0     3rd Qu.:372.7    3rd Qu.:16.87    3rd Qu.:41394    
##  Max.   :518.0     Max.   :944.5    Max.   :21.10    Max.   :72559    
##  NA's   :74        NA's   :74       NA's   :74       NA's   :74       
##  HealthFacilitiesNum  FoodStoreNum          geometry  
##  Min.   : NA         Min.   : NA   POLYGON      :263  
##  1st Qu.: NA         1st Qu.: NA   epsg:4326    :  0  
##  Median : NA         Median : NA   +proj=long...:  0  
##  Mean   :NaN         Mean   :NaN                      
##  3rd Qu.: NA         3rd Qu.: NA                      
##  Max.   : NA         Max.   : NA                      
##  NA's   :263         NA's   :263
plot(final_sf["COVID_CASE_COUNT"], breaks = "jenks", main = "COVID Cases by ZIP Code")