R Week 08 Assignment

Author

Caitlin Cacciatore

Published

March 20, 2026

Show the code
require(tidyverse);
Loading required package: tidyverse
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Show the code
require(sf); 
Loading required package: sf
Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
Show the code
require(mapview); 
Loading required package: mapview
Show the code
require(magrittr)
Loading required package: magrittr

Attaching package: 'magrittr'

The following object is masked from 'package:purrr':

    set_names

The following object is masked from 'package:tidyr':

    extract
Show the code
#Loading the Packages

options(repos = c(CRAN = "https://cloud.r-project.org"))

# Load a list of packages. Install them first if they are not available.
# The list of packages to be installed
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
                      "raster", "grid", "rasterVis",
                      "tidyverse", "magrittr", "ggpubr", "lubridate",
                      "devtools", "htmlwidgets", "mapview",
                      "classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", "mapview",
                      "ggrepel", "ggsn",
                      "spdep","spatialreg","GWmodel");

# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

# Install those missing packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)
Warning: packages 'maptools', 'rgeos', 'rgdal', 'ggsn' are not available for this version of R

Versions of these packages for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Show the code
# Load all packages.

lapply(list.of.packages,function(x) {
  require(x,character.only = TRUE,quietly = TRUE)
})

Attaching package: 'raster'

The following object is masked from 'package:dplyr':

    select


Attaching package: 'ggpubr'

The following object is masked from 'package:raster':

    rotate

ℹ Google's Terms of Service: <https://mapsplatform.google.com>
  Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
  OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
Attaching package: 'ggmap'

The following object is masked from 'package:magrittr':

    inset

To access larger datasets in this package, install the spDataLarge
package with: `install.packages('spDataLarge',
repos='https://nowosad.github.io/drat/', type='source')`

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'spatialreg'

The following objects are masked from 'package:spdep':

    get.ClusterOption, get.coresOption, get.mcOption,
    get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
    set.coresOption, set.mcOption, set.VerboseOption,
    set.ZeroPolicyOption
[[1]]
[1] TRUE

[[2]]
[1] TRUE

[[3]]
[1] TRUE

[[4]]
[1] FALSE

[[5]]
[1] FALSE

[[6]]
[1] FALSE

[[7]]
[1] TRUE

[[8]]
[1] TRUE

[[9]]
[1] TRUE

[[10]]
[1] TRUE

[[11]]
[1] TRUE

[[12]]
[1] TRUE

[[13]]
[1] TRUE

[[14]]
[1] TRUE

[[15]]
[1] TRUE

[[16]]
[1] TRUE

[[17]]
[1] TRUE

[[18]]
[1] TRUE

[[19]]
[1] TRUE

[[20]]
[1] TRUE

[[21]]
[1] TRUE

[[22]]
[1] TRUE

[[23]]
[1] TRUE

[[24]]
[1] FALSE

[[25]]
[1] TRUE

[[26]]
[1] TRUE

[[27]]
[1] FALSE
Show the code
install.packages("sf")     # run once if not installed

The downloaded binary packages are in
    /var/folders/dm/sj7012g577qdnv76gj2_szd80000gp/T//RtmpbEx4lA/downloaded_packages
Show the code
install.packages("tidyverse")

The downloaded binary packages are in
    /var/folders/dm/sj7012g577qdnv76gj2_szd80000gp/T//RtmpbEx4lA/downloaded_packages
Show the code
library(sf)
library(tidyverse)

Task 1

Show the code
# Assignment begins here


# Task 1

# read the file 

zip_sf <- st_read("Zip_Code_040114.shp")
Reading layer `ZIP_CODE_040114' from data source 
  `/Users/heliosselene/Desktop/R-Spatial/ZIP_CODE_040114.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 263 features and 12 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
Projected CRS: NAD83 / New York Long Island (ftUS)
Show the code
#clean the data

zip_nyc <- zip_sf %>%
  filter(!is.na(ZIPCODE))

nyc_zip_sf <- st_as_sf(zip_nyc,crs = 4326)

# create  fancy plots

plot(nyc_zip_sf)
Warning: plotting the first 9 out of 12 attributes; use max.plot = 12 to plot
all

Show the code
# figure out structure

str(nyc_zip_sf)
Classes 'sf' and 'data.frame':  263 obs. of  13 variables:
 $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
 $ BLDGZIP   : chr  "0" "0" "0" "0" ...
 $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
 $ POPULATION: num  18681 62426 83866 56527 72280 ...
 $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
 $ STATE     : chr  "NY" "NY" "NY" "NY" ...
 $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
 $ ST_FIPS   : chr  "36" "36" "36" "36" ...
 $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
 $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
 $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
 $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
 $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
  ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
  ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
  ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
Show the code
# Read the COVID data for one week 

covid_data <- readr::read_csv("tests-by-zcta_2021_04_23.csv", lazy = FALSE)
Rows: 177 Columns: 13
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): NEIGHBORHOOD_NAME, BOROUGH_GROUP, label
dbl (10): MODIFIED_ZCTA, lat, lon, COVID_CASE_COUNT, COVID_CASE_RATE, POP_DE...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show the code
str(covid_data)
spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
 $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
 $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
 $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
 $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
 $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
 $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
 $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
 $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
 $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
 $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
 $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
 $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
 - attr(*, "spec")=
  .. cols(
  ..   MODIFIED_ZCTA = col_double(),
  ..   NEIGHBORHOOD_NAME = col_character(),
  ..   BOROUGH_GROUP = col_character(),
  ..   label = col_character(),
  ..   lat = col_double(),
  ..   lon = col_double(),
  ..   COVID_CASE_COUNT = col_double(),
  ..   COVID_CASE_RATE = col_double(),
  ..   POP_DENOMINATOR = col_double(),
  ..   COVID_DEATH_COUNT = col_double(),
  ..   COVID_DEATH_RATE = col_double(),
  ..   PERCENT_POSITIVE = col_double(),
  ..   TOTAL_COVID_TESTS = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
Show the code
# Merge Zip Code and COVID Data


nyc_covid_data_sf_merged <- 
  base::merge(nyc_zip_sf, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_covid_data_sf_merged) 
 [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
 [4] "POPULATION"        "AREA"              "STATE"            
 [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
[10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
[13] "NEIGHBORHOOD_NAME" "BOROUGH_GROUP"     "label"            
[16] "lat"               "lon"               "COVID_CASE_COUNT" 
[19] "COVID_CASE_RATE"   "POP_DENOMINATOR"   "COVID_DEATH_COUNT"
[22] "COVID_DEATH_RATE"  "PERCENT_POSITIVE"  "TOTAL_COVID_TESTS"
[25] "geometry"         

Task 2

Show the code
# Task 2


#Read NYS Retail Food Store Data


nys_retail <- readr::read_csv("NYS_Retail_Food_Stores.csv", lazy = FALSE)
Rows: 29389 Columns: 15
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): County, License Number, Operation Type, Establishment Type, Entity...
dbl  (1): Zip Code
num  (1): Square Footage
lgl  (2): Address Line 2, Address Line 3

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show the code
str(nys_retail)
spc_tbl_ [29,389 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ County            : chr [1:29389] "Albany" "Albany" "Albany" "Albany" ...
 $ License Number    : chr [1:29389] "733149" "704590" "727909" "720557" ...
 $ Operation Type    : chr [1:29389] "Store" "Store" "Store" "Store" ...
 $ Establishment Type: chr [1:29389] "A" "JAC" "JAC" "JAC" ...
 $ Entity Name       : chr [1:29389] "SPEEDWAY LLC" "1250 SELKIRK INC" "RED-KAP SALES INC" "SAEED SADIQ, SAIKA NOREEN" ...
 $ DBA Name          : chr [1:29389] "12110" "1250 SELKIRK" "1667 GENERAL STORE" "19 STREET QUICK STOP" ...
 $ Street Number     : chr [1:29389] "719" "1250" "1667" "315" ...
 $ Street Name       : chr [1:29389] "NEW LOUDON RD" "RTE 9W & 396" "WESTERN AVENUE" "19TH STREET" ...
 $ Address Line 2    : logi [1:29389] NA NA NA NA NA NA ...
 $ Address Line 3    : logi [1:29389] NA NA NA NA NA NA ...
 $ City              : chr [1:29389] "LATHAM" "SELKIRK" "ALBANY" "WATERVLIET" ...
 $ State             : chr [1:29389] "NY" "NY" "NY" "NY" ...
 $ Zip Code          : num [1:29389] 12110 12158 12203 12189 12210 ...
 $ Square Footage    : num [1:29389] 300 3000 2000 1200 1800 0 0 200 0 2000 ...
 $ Location          : chr [1:29389] "719 NEW LOUDON RD\nLATHAM, NY 12110\n(42.739618, -73.761949)" "1250 RTE 9 W\nSELKIRK, NY 12158\n(42.547591, -73.8073)" "1667 WESTERN AVENUE\nALBANY, NY 12203\n(42.686553, -73.854665)" "315 19TH STREET\nWATERVLIET, NY 12189\n(42.73063, -73.703443)" ...
 - attr(*, "spec")=
  .. cols(
  ..   County = col_character(),
  ..   `License Number` = col_character(),
  ..   `Operation Type` = col_character(),
  ..   `Establishment Type` = col_character(),
  ..   `Entity Name` = col_character(),
  ..   `DBA Name` = col_character(),
  ..   `Street Number` = col_character(),
  ..   `Street Name` = col_character(),
  ..   `Address Line 2` = col_logical(),
  ..   `Address Line 3` = col_logical(),
  ..   City = col_character(),
  ..   State = col_character(),
  ..   `Zip Code` = col_double(),
  ..   `Square Footage` = col_number(),
  ..   Location = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
Show the code
# Merge food stores and Zip Code Data

nyc_food_stores <- 
  base::merge(nyc_zip_sf, nys_retail, by.x = "ZIPCODE", by.y = "Zip Code")
names(nyc_food_stores) 
 [1] "ZIPCODE"            "BLDGZIP"            "PO_NAME"           
 [4] "POPULATION"         "AREA"               "STATE"             
 [7] "COUNTY"             "ST_FIPS"            "CTY_FIPS"          
[10] "URL"                "SHAPE_AREA"         "SHAPE_LEN"         
[13] "County"             "License Number"     "Operation Type"    
[16] "Establishment Type" "Entity Name"        "DBA Name"          
[19] "Street Number"      "Street Name"        "Address Line 2"    
[22] "Address Line 3"     "City"               "State"             
[25] "Square Footage"     "Location"           "geometry"          
Show the code
# Aggregate by Zip Code 

zip_summary_sf <- nyc_food_stores %>%
  group_by(ZIPCODE) %>%
  summarise(store_count = n())

# check the names of the sf file

names(zip_summary_sf)
[1] "ZIPCODE"     "store_count" "geometry"   

Task 3

Show the code
# Task 3


# Join Zip Code and Health Facilities


#Read NYS Health Care data


nyc_health <- readr::read_csv("NYS_Health_Facility.csv", lazy = FALSE)
Rows: 3990 Columns: 36
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (28): Facility Name, Short Description, Description, Facility Open Date,...
dbl  (8): Facility ID, Facility Phone Number, Facility Fax Number, Facility ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show the code
str(nyc_health)
spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Facility ID                 : num [1:3990] 204 620 654 1156 2589 ...
 $ Facility Name               : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
 $ Short Description           : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
 $ Description                 : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
 $ Facility Open Date          : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
 $ Facility Address 1          : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
 $ Facility Address 2          : chr [1:3990] NA NA NA NA ...
 $ Facility City               : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
 $ Facility State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Facility Zip Code           : chr [1:3990] "13850" "13413" "13205" "14569" ...
 $ Facility Phone Number       : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
 $ Facility Fax Number         : num [1:3990] NA NA NA NA NA ...
 $ Facility Website            : chr [1:3990] NA NA NA NA ...
 $ Facility County Code        : num [1:3990] 3 32 33 60 2 ...
 $ Facility County             : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
 $ Regional Office ID          : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
 $ Regional Office             : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
 $ Main Site Name              : chr [1:3990] NA NA NA NA ...
 $ Main Site Facility ID       : num [1:3990] NA NA NA NA NA ...
 $ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
 $ Operator Name               : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
 $ Operator Address 1          : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
 $ Operator Address 2          : chr [1:3990] NA NA NA NA ...
 $ Operator City               : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
 $ Operator State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Operator Zip Code           : chr [1:3990] "13905" "13413" "13205" "14569" ...
 $ Cooperator Name             : chr [1:3990] NA NA NA NA ...
 $ Cooperator Address          : chr [1:3990] NA NA NA NA ...
 $ Cooperator Address 2        : chr [1:3990] NA NA NA NA ...
 $ Cooperator City             : chr [1:3990] NA NA NA NA ...
 $ Cooperator State            : chr [1:3990] "New York" "New York" "New York" "New York" ...
 $ Cooperator Zip Code         : chr [1:3990] NA NA NA NA ...
 $ Ownership Type              : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
 $ Facility Latitude           : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
 $ Facility Longitude          : num [1:3990] -76 -75.2 NA -78.1 -78 ...
 $ Facility Location           : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
 - attr(*, "spec")=
  .. cols(
  ..   `Facility ID` = col_double(),
  ..   `Facility Name` = col_character(),
  ..   `Short Description` = col_character(),
  ..   Description = col_character(),
  ..   `Facility Open Date` = col_character(),
  ..   `Facility Address 1` = col_character(),
  ..   `Facility Address 2` = col_character(),
  ..   `Facility City` = col_character(),
  ..   `Facility State` = col_character(),
  ..   `Facility Zip Code` = col_character(),
  ..   `Facility Phone Number` = col_double(),
  ..   `Facility Fax Number` = col_double(),
  ..   `Facility Website` = col_character(),
  ..   `Facility County Code` = col_double(),
  ..   `Facility County` = col_character(),
  ..   `Regional Office ID` = col_double(),
  ..   `Regional Office` = col_character(),
  ..   `Main Site Name` = col_character(),
  ..   `Main Site Facility ID` = col_double(),
  ..   `Operating Certificate Number` = col_character(),
  ..   `Operator Name` = col_character(),
  ..   `Operator Address 1` = col_character(),
  ..   `Operator Address 2` = col_character(),
  ..   `Operator City` = col_character(),
  ..   `Operator State` = col_character(),
  ..   `Operator Zip Code` = col_character(),
  ..   `Cooperator Name` = col_character(),
  ..   `Cooperator Address` = col_character(),
  ..   `Cooperator Address 2` = col_character(),
  ..   `Cooperator City` = col_character(),
  ..   `Cooperator State` = col_character(),
  ..   `Cooperator Zip Code` = col_character(),
  ..   `Ownership Type` = col_character(),
  ..   `Facility Latitude` = col_double(),
  ..   `Facility Longitude` = col_double(),
  ..   `Facility Location` = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
Show the code
# Merge food stores and Zip Code Data

nyc_health_care_centers <- 
  base::merge(nyc_zip_sf, nyc_health, by.x = "ZIPCODE", by.y = "Cooperator Zip Code")
names(nyc_health_care_centers) 
 [1] "ZIPCODE"                      "BLDGZIP"                     
 [3] "PO_NAME"                      "POPULATION"                  
 [5] "AREA"                         "STATE"                       
 [7] "COUNTY"                       "ST_FIPS"                     
 [9] "CTY_FIPS"                     "URL"                         
[11] "SHAPE_AREA"                   "SHAPE_LEN"                   
[13] "Facility ID"                  "Facility Name"               
[15] "Short Description"            "Description"                 
[17] "Facility Open Date"           "Facility Address 1"          
[19] "Facility Address 2"           "Facility City"               
[21] "Facility State"               "Facility Zip Code"           
[23] "Facility Phone Number"        "Facility Fax Number"         
[25] "Facility Website"             "Facility County Code"        
[27] "Facility County"              "Regional Office ID"          
[29] "Regional Office"              "Main Site Name"              
[31] "Main Site Facility ID"        "Operating Certificate Number"
[33] "Operator Name"                "Operator Address 1"          
[35] "Operator Address 2"           "Operator City"               
[37] "Operator State"               "Operator Zip Code"           
[39] "Cooperator Name"              "Cooperator Address"          
[41] "Cooperator Address 2"         "Cooperator City"             
[43] "Cooperator State"             "Ownership Type"              
[45] "Facility Latitude"            "Facility Longitude"          
[47] "Facility Location"            "geometry"                    
Show the code
# check the names of the sf file
Show the code
# Task 4

# Read the Census Tract Data


nycCensus <- sf::st_read('nyc_census_tracts.shp',
                         stringsAsFactors = FALSE)
Reading layer `nyc_census_tracts' from data source 
  `/Users/heliosselene/Desktop/R-Spatial/nyc_census_tracts.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 2162 features and 10 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -74.25559 ymin: 40.5021 xmax: -73.70002 ymax: 40.91526
Geodetic CRS:  NAD83
Show the code
str(nycCensus)
Classes 'sf' and 'data.frame':  2162 obs. of  11 variables:
 $ GEOID   : chr  "36061000100" "36061001401" "36061000201" "36061000600" ...
 $ STATEFP : chr  "36" "36" "36" "36" ...
 $ COUNTYFP: chr  "061" "061" "061" "061" ...
 $ TRACTCE : chr  "000100" "001401" "000201" "000600" ...
 $ AFFGEOID: chr  "1400000US36061000100" "1400000US36061001401" "1400000US36061000201" "1400000US36061000600" ...
 $ NAME    : chr  "1" "14.01" "2.01" "6" ...
 $ LSAD    : chr  "CT" "CT" "CT" "CT" ...
 $ ALAND   : num  78638 93510 90233 240406 310039 ...
 $ AWATER  : num  0 0 75976 176018 428737 ...
 $ CBSA    : chr  "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" "New York-Newark-Jersey City, NY-NJ-PA" ...
 $ geometry:sfc_MULTIPOLYGON of length 2162; first list element: List of 3
  ..$ :List of 1
  .. ..$ : num [1:25, 1:2] -74 -74 -74 -74 -74 ...
  ..$ :List of 1
  .. ..$ : num [1:5, 1:2] -74 -74 -74 -74 -74 ...
  ..$ :List of 1
  .. ..$ : num [1:14, 1:2] -74 -74 -74 -74 -74 ...
  ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
 - attr(*, "sf_column")= chr "geometry"
 - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA
  ..- attr(*, "names")= chr [1:10] "GEOID" "STATEFP" "COUNTYFP" "TRACTCE" ...
Show the code
names(nycCensus)
 [1] "GEOID"    "STATEFP"  "COUNTYFP" "TRACTCE"  "AFFGEOID" "NAME"    
 [7] "LSAD"     "ALAND"    "AWATER"   "CBSA"     "geometry"
Show the code
# We must now assign borough names to each borough code so we can 
# have the data sorted by borough

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  COUNTYFP == 'Bronx' ~ '005',
  COUNTYFP == 'Brooklyn' ~ '047',
  COUNTYFP == 'Manhattan' ~ '061',
  COUNTYFP == 'Queens' ~ '081',
  COUNTYFP == 'Staten Island' ~ '085'),
)

Tasks 4 and 5

#I could Not Get the Following Lines to Work and/or Render after many hours of attempts.

acsData <- na.omit(acsData)

acsData <- readLines(“ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv”) %>% magrittr::extract(-2) %>% textConnection() %>% read.csv(header=TRUE, quote= “"”) %>% dplyr::select(GEO_ID, totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65 malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E, asianPop = DP05_0067E, hispanicPop = DP05_0071E, adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>% dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1)) %>% drop_na(acsData)

acsData %>% magrittr::extract(1:10,)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

But first we must make sure the GEOID and the GEO_IDs match

acsData <- acsData %>% mutate(GEOID = stringr::str_sub(GEO_ID, -11, -1))

popData <- nycCensus %>% left_join(acsData, by = “GEOID”)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

verify the data

sum(popData$totPop)

str(popData)

st_crs(popData) popNYC <- sf::st_transform(popData, st_crs(nyc_covid_data_sf_merged))

Use JOINED zip code data from task 1.

Now aggregate to the zip code level

Join by locations with st_join (spatial join)

popNYC <- sf::st_join(nyc_covid_data_sf_merged, popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points join = st_contains) %>% group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have summarise(totPop = sum(totPop), malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated asianPop = sum(asianPop), blackPop = sum(blackPop), hispanicPop = sum(hispanicPop), whitePop = sum(whitePop))

popNYC %>% head()

Check and verify the data again

sum(popNYC$totPop, na.rm = T)




<!-- -->

::: {.quarto-embedded-source-code}
```````````````````{.markdown shortcodes="false"}
---
title: "R Week 08 Assignment"
author: "Caitlin Cacciatore"
date: "3/20/2026"
format:
  html:
    toc: true
    toc-location: left
    code-fold: true
    code-summary: "Show the code"
    code-tools: true
---



```{r load_packages, include=TRUE}


require(tidyverse);
require(sf); 
require(mapview); 
require(magrittr)

#Loading the Packages

options(repos = c(CRAN = "https://cloud.r-project.org"))

# Load a list of packages. Install them first if they are not available.
# The list of packages to be installed
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
                      "raster", "grid", "rasterVis",
                      "tidyverse", "magrittr", "ggpubr", "lubridate",
                      "devtools", "htmlwidgets", "mapview",
                      "classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", "mapview",
                      "ggrepel", "ggsn",
                      "spdep","spatialreg","GWmodel");

# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

# Install those missing packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)

# Load all packages.

lapply(list.of.packages,function(x) {
  require(x,character.only = TRUE,quietly = TRUE)
})

install.packages("sf")     # run once if not installed
install.packages("tidyverse")

library(sf)
library(tidyverse)

Task 1

```{r Task 1, include=TRUE}

Assignment begins here

Task 1

read the file

zip_sf <- st_read(“Zip_Code_040114.shp”)

#clean the data

zip_nyc <- zip_sf %>% filter(!is.na(ZIPCODE))

nyc_zip_sf <- st_as_sf(zip_nyc,crs = 4326)

create fancy plots

plot(nyc_zip_sf)

figure out structure

str(nyc_zip_sf)

Read the COVID data for one week

covid_data <- readr::read_csv(“tests-by-zcta_2021_04_23.csv”, lazy = FALSE) str(covid_data)

Merge Zip Code and COVID Data

nyc_covid_data_sf_merged <- base::merge(nyc_zip_sf, covid_data, by.x = “ZIPCODE”, by.y = “MODIFIED_ZCTA”) names(nyc_covid_data_sf_merged)



# Task 2

```{r Task 2, include=TRUE}

# Task 2


#Read NYS Retail Food Store Data


nys_retail <- readr::read_csv("NYS_Retail_Food_Stores.csv", lazy = FALSE)
str(nys_retail)


# Merge food stores and Zip Code Data

nyc_food_stores <- 
  base::merge(nyc_zip_sf, nys_retail, by.x = "ZIPCODE", by.y = "Zip Code")
names(nyc_food_stores) 


# Aggregate by Zip Code 

zip_summary_sf <- nyc_food_stores %>%
  group_by(ZIPCODE) %>%
  summarise(store_count = n())

# check the names of the sf file

names(zip_summary_sf)

Task 3

```{r Task 3, include=TRUE}

Task 3

Join Zip Code and Health Facilities

#Read NYS Health Care data

nyc_health <- readr::read_csv(“NYS_Health_Facility.csv”, lazy = FALSE) str(nyc_health)

Merge food stores and Zip Code Data

nyc_health_care_centers <- base::merge(nyc_zip_sf, nyc_health, by.x = “ZIPCODE”, by.y = “Cooperator Zip Code”) names(nyc_health_care_centers)

check the names of the sf file


```{r Working Code, include=TRUE}

# Task 4

# Read the Census Tract Data


nycCensus <- sf::st_read('nyc_census_tracts.shp',
                         stringsAsFactors = FALSE)
str(nycCensus)

names(nycCensus)


# We must now assign borough names to each borough code so we can 
# have the data sorted by borough

nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  COUNTYFP == 'Bronx' ~ '005',
  COUNTYFP == 'Brooklyn' ~ '047',
  COUNTYFP == 'Manhattan' ~ '061',
  COUNTYFP == 'Queens' ~ '081',
  COUNTYFP == 'Staten Island' ~ '085'),
)

Tasks 4 and 5

#I could Not Get the Following Lines to Work and/or Render after many hours of attempts.

acsData <- na.omit(acsData)

acsData <- readLines(“ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv”) %>% magrittr::extract(-2) %>% textConnection() %>% read.csv(header=TRUE, quote= “"”) %>% dplyr::select(GEO_ID, totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65 malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E, asianPop = DP05_0067E, hispanicPop = DP05_0071E, adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>% dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1)) %>% drop_na(acsData)

acsData %>% magrittr::extract(1:10,)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

But first we must make sure the GEOID and the GEO_IDs match

acsData <- acsData %>% mutate(GEOID = stringr::str_sub(GEO_ID, -11, -1))

popData <- nycCensus %>% left_join(acsData, by = “GEOID”)

Merge (JOIN) ACS data to the census tracts

join by attributes /columns

verify the data

sum(popData$totPop)

str(popData)

st_crs(popData) popNYC <- sf::st_transform(popData, st_crs(nyc_covid_data_sf_merged))

Use JOINED zip code data from task 1.

Now aggregate to the zip code level

Join by locations with st_join (spatial join)

popNYC <- sf::st_join(nyc_covid_data_sf_merged, popNYC %>% sf::st_centroid(), # this essentially converts census tracts to points join = st_contains) %>% group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>% # use names(zpNYC) and names(popNYC) to see what we have summarise(totPop = sum(totPop), malePctg = sum(malePop)/totPop*100, # note the totPop is the newly calculated asianPop = sum(asianPop), blackPop = sum(blackPop), hispanicPop = sum(hispanicPop), whitePop = sum(whitePop))

popNYC %>% head()

Check and verify the data again

sum(popNYC$totPop, na.rm = T)

:::