Explanation of the template

Update the title with your information. Make sure to include identification information so that we know it is your submission.

Also update the author name and date accordingly.

Check out the Source Code from the top-right corner </>Code menu.

In the following R code chunk, load_packages is the code chunk name. include=FALSE suggests that the code chunk will run, but the code itself and its outputs will not be included in the rendered HTML. echo=TRUE in the following code chunk suggests that the code and results from running the code will be included in the rendered HTML.

FROM BEFORE

R Spatial Lab Assignment # 1 - Week 7

Don’t use a single chunk for the entire assignment. Break it into multiple chunks.

task 1: Set up a R project for the R-Spatial section.

# setting up R project for R-Spatial section

dir.create("~/Documents/R-Spatial", recursive = TRUE)
## Warning in dir.create("~/Documents/R-Spatial", recursive = TRUE):
## '/Users/samikarim/Documents/R-Spatial' already exists

task 2: Read the NYC postal areas in Shapefiles into sf objects. As NYC DOH publishes COVID-19 data by zip code, we will utilize the postal area data later.

# read shapefile into sf object

nyc_zip_sf <- st_read("/Users/samikarim/Documents/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source 
##   `/Users/samikarim/Documents/R-spatial/data/ZIP_CODE_040114/ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
str(nyc_zip_sf)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...

task 3: Read and process the NYS health facilities spreadsheet data. Create sf objects from geographic coordinates.

# read csv into df 

nyshealth_df <- read_csv("/Users/samikarim/Documents/R-spatial/data/NYS_Health_Facility.csv", 
                               show_col_types = FALSE, 
                               lazy = FALSE)
str(nyshealth_df)
## spc_tbl_ [3,990 × 36] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Facility ID                 : num [1:3990] 204 620 654 1156 2589 ...
##  $ Facility Name               : chr [1:3990] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
##  $ Short Description           : chr [1:3990] "HSPC" "NH" "NH" "NH" ...
##  $ Description                 : chr [1:3990] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ Facility Open Date          : chr [1:3990] "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
##  $ Facility Address 1          : chr [1:3990] "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
##  $ Facility Address 2          : chr [1:3990] NA NA NA NA ...
##  $ Facility City               : chr [1:3990] "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Facility State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr [1:3990] "13850" "13413" "13205" "14569" ...
##  $ Facility Phone Number       : num [1:3990] 6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
##  $ Facility Fax Number         : num [1:3990] NA NA NA NA NA ...
##  $ Facility Website            : chr [1:3990] NA NA NA NA ...
##  $ Facility County Code        : num [1:3990] 3 32 33 60 2 ...
##  $ Facility County             : chr [1:3990] "Broome" "Oneida" "Onondaga" "Wyoming" ...
##  $ Regional Office ID          : num [1:3990] 3 3 3 1 1 1 7 1 7 5 ...
##  $ Regional Office             : chr [1:3990] "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
##  $ Main Site Name              : chr [1:3990] NA NA NA NA ...
##  $ Main Site Facility ID       : num [1:3990] NA NA NA NA NA ...
##  $ Operating Certificate Number: chr [1:3990] "0301501F" "3227304N" "3301326N" "6027303N" ...
##  $ Operator Name               : chr [1:3990] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
##  $ Operator Address 1          : chr [1:3990] "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
##  $ Operator Address 2          : chr [1:3990] NA NA NA NA ...
##  $ Operator City               : chr [1:3990] "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
##  $ Operator State              : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr [1:3990] "13905" "13413" "13205" "14569" ...
##  $ Cooperator Name             : chr [1:3990] NA NA NA NA ...
##  $ Cooperator Address          : chr [1:3990] NA NA NA NA ...
##  $ Cooperator Address 2        : chr [1:3990] NA NA NA NA ...
##  $ Cooperator City             : chr [1:3990] NA NA NA NA ...
##  $ Cooperator State            : chr [1:3990] "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr [1:3990] NA NA NA NA ...
##  $ Ownership Type              : chr [1:3990] "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
##  $ Facility Latitude           : num [1:3990] 42.1 43.1 NA 42.7 42.1 ...
##  $ Facility Longitude          : num [1:3990] -76 -75.2 NA -78.1 -78 ...
##  $ Facility Location           : chr [1:3990] "(42.097095, -75.975243)" "(43.05497, -75.228828)" NA "(42.738979, -78.12867)" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `Facility ID` = col_double(),
##   ..   `Facility Name` = col_character(),
##   ..   `Short Description` = col_character(),
##   ..   Description = col_character(),
##   ..   `Facility Open Date` = col_character(),
##   ..   `Facility Address 1` = col_character(),
##   ..   `Facility Address 2` = col_character(),
##   ..   `Facility City` = col_character(),
##   ..   `Facility State` = col_character(),
##   ..   `Facility Zip Code` = col_character(),
##   ..   `Facility Phone Number` = col_double(),
##   ..   `Facility Fax Number` = col_double(),
##   ..   `Facility Website` = col_character(),
##   ..   `Facility County Code` = col_double(),
##   ..   `Facility County` = col_character(),
##   ..   `Regional Office ID` = col_double(),
##   ..   `Regional Office` = col_character(),
##   ..   `Main Site Name` = col_character(),
##   ..   `Main Site Facility ID` = col_double(),
##   ..   `Operating Certificate Number` = col_character(),
##   ..   `Operator Name` = col_character(),
##   ..   `Operator Address 1` = col_character(),
##   ..   `Operator Address 2` = col_character(),
##   ..   `Operator City` = col_character(),
##   ..   `Operator State` = col_character(),
##   ..   `Operator Zip Code` = col_character(),
##   ..   `Cooperator Name` = col_character(),
##   ..   `Cooperator Address` = col_character(),
##   ..   `Cooperator Address 2` = col_character(),
##   ..   `Cooperator City` = col_character(),
##   ..   `Cooperator State` = col_character(),
##   ..   `Cooperator Zip Code` = col_character(),
##   ..   `Ownership Type` = col_character(),
##   ..   `Facility Latitude` = col_double(),
##   ..   `Facility Longitude` = col_double(),
##   ..   `Facility Location` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# filter out NA from dataframe 

sum(is.na(nyshealth_df$`Facility Longitude`))
## [1] 142
sum(is.na(nyshealth_df$`Facility Latitude`))
## [1] 142
nyshealth_df_ <- nyshealth_df %>%
  dplyr::filter(
    !is.na(`Facility Longitude`),
    !is.na(`Facility Latitude`)
  )

# create sf object with geographic coordinates 

nyshealth_sf <- st_as_sf(nyshealth_df_, 
                               coords = c("Facility Longitude", "Facility Latitude"))
str(nyshealth_sf)
## sf [3,848 × 35] (S3: sf/tbl_df/tbl/data.frame)
##  $ Facility ID                 : num [1:3848] 204 620 1156 2589 3455 ...
##  $ Facility Name               : chr [1:3848] "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "East Side Nursing Home" "Wellsville Manor Care Center" ...
##  $ Short Description           : chr [1:3848] "HSPC" "NH" "NH" "NH" ...
##  $ Description                 : chr [1:3848] "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ Facility Open Date          : chr [1:3848] "06/01/1985" "02/01/1989" "08/01/1979" "02/01/1989" ...
##  $ Facility Address 1          : chr [1:3848] "4102 Old Vestal Road" "2050 Tilden Avenue" "62 Prospect St" "4192A Bolivar Road" ...
##  $ Facility Address 2          : chr [1:3848] NA NA NA NA ...
##  $ Facility City               : chr [1:3848] "Vestal" "New Hartford" "Warsaw" "Wellsville" ...
##  $ Facility State              : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Facility Zip Code           : chr [1:3848] "13850" "13413" "14569" "14895" ...
##  $ Facility Phone Number       : num [1:3848] 6.08e+09 3.16e+09 5.86e+09 5.86e+09 7.17e+09 ...
##  $ Facility Fax Number         : num [1:3848] NA NA NA NA NA ...
##  $ Facility Website            : chr [1:3848] NA NA NA NA ...
##  $ Facility County Code        : num [1:3848] 3 32 60 2 14 ...
##  $ Facility County             : chr [1:3848] "Broome" "Oneida" "Wyoming" "Allegany" ...
##  $ Regional Office ID          : num [1:3848] 3 3 1 1 1 7 1 7 5 7 ...
##  $ Regional Office             : chr [1:3848] "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" "Western Regional Office - Buffalo" ...
##  $ Main Site Name              : chr [1:3848] NA NA NA NA ...
##  $ Main Site Facility ID       : num [1:3848] NA NA NA NA NA ...
##  $ Operating Certificate Number: chr [1:3848] "0301501F" "3227304N" "6027303N" "0228305N" ...
##  $ Operator Name               : chr [1:3848] "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "East Side Nursing Home Inc" "Wellsville Manor LLC" ...
##  $ Operator Address 1          : chr [1:3848] "169 Riverside Drive" "Box 1000 Tilden Avenue" "62 Prospect Street" "4192a Bolivar Road" ...
##  $ Operator Address 2          : chr [1:3848] NA NA NA NA ...
##  $ Operator City               : chr [1:3848] "Binghamton" "New Hartford" "Warsaw" "Wellsville" ...
##  $ Operator State              : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Operator Zip Code           : chr [1:3848] "13905" "13413" "14569" "14897" ...
##  $ Cooperator Name             : chr [1:3848] NA NA NA NA ...
##  $ Cooperator Address          : chr [1:3848] NA NA NA NA ...
##  $ Cooperator Address 2        : chr [1:3848] NA NA NA NA ...
##  $ Cooperator City             : chr [1:3848] NA NA NA NA ...
##  $ Cooperator State            : chr [1:3848] "New York" "New York" "New York" "New York" ...
##  $ Cooperator Zip Code         : chr [1:3848] NA NA NA NA ...
##  $ Ownership Type              : chr [1:3848] "Not for Profit Corporation" "Not for Profit Corporation" "Business Corporation" "LLC" ...
##  $ Facility Location           : chr [1:3848] "(42.097095, -75.975243)" "(43.05497, -75.228828)" "(42.738979, -78.12867)" "(42.126461, -77.967834)" ...
##  $ geometry                    :sfc_POINT of length 3848; first list element:  'XY' num [1:2] -76 42.1
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:34] "Facility ID" "Facility Name" "Short Description" "Description" ...
# set coordinate reference system

st_crs(nyshealth_sf) <- 4326
st_crs(nyshealth_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
nyshealth_sf
## Simple feature collection with 3848 features and 34 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.6299 ymin: -75.45935 xmax: 43.21162 ymax: 44.97849
## Geodetic CRS:  WGS 84
## # A tibble: 3,848 × 35
##    `Facility ID` `Facility Name`                 `Short Description` Description
##  *         <dbl> <chr>                           <chr>               <chr>      
##  1           204 Hospice at Lourdes              HSPC                Hospice    
##  2           620 Charles T Sitrin Health Care C… NH                  Residentia…
##  3          1156 East Side Nursing Home          NH                  Residentia…
##  4          2589 Wellsville Manor Care Center    NH                  Residentia…
##  5          3455 Harris Hill Nursing Facility, … NH                  Residentia…
##  6          3853 Garden City Surgi Center        DTC                 Diagnostic…
##  7          4249 Willcare                        CHHA                Certified …
##  8          4473 Good Shepherd Hospice           HSPC                Hospice    
##  9          6230 NYU Langone Rutherford          HOSP-EC             Hospital E…
## 10          6482 Endoscopy Center of Long Islan… DTC                 Diagnostic…
## # ℹ 3,838 more rows
## # ℹ 31 more variables: `Facility Open Date` <chr>, `Facility Address 1` <chr>,
## #   `Facility Address 2` <chr>, `Facility City` <chr>, `Facility State` <chr>,
## #   `Facility Zip Code` <chr>, `Facility Phone Number` <dbl>,
## #   `Facility Fax Number` <dbl>, `Facility Website` <chr>,
## #   `Facility County Code` <dbl>, `Facility County` <chr>,
## #   `Regional Office ID` <dbl>, `Regional Office` <chr>, …

task 4: Read and process the NYS retail food stores data. Create sf objects from geographic coordinates for NYC.

# read csv into df 

nysretailfood_df <- read_csv("/Users/samikarim/Documents/R-spatial/data/NYS_Retail_Food_Stores.csv", 
                               show_col_types = FALSE, 
                               lazy = FALSE)
str(nysretailfood_df)
## spc_tbl_ [29,389 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ County            : chr [1:29389] "Albany" "Albany" "Albany" "Albany" ...
##  $ License Number    : chr [1:29389] "733149" "704590" "727909" "720557" ...
##  $ Operation Type    : chr [1:29389] "Store" "Store" "Store" "Store" ...
##  $ Establishment Type: chr [1:29389] "A" "JAC" "JAC" "JAC" ...
##  $ Entity Name       : chr [1:29389] "SPEEDWAY LLC" "1250 SELKIRK INC" "RED-KAP SALES INC" "SAEED SADIQ, SAIKA NOREEN" ...
##  $ DBA Name          : chr [1:29389] "12110" "1250 SELKIRK" "1667 GENERAL STORE" "19 STREET QUICK STOP" ...
##  $ Street Number     : chr [1:29389] "719" "1250" "1667" "315" ...
##  $ Street Name       : chr [1:29389] "NEW LOUDON RD" "RTE 9W & 396" "WESTERN AVENUE" "19TH STREET" ...
##  $ Address Line 2    : logi [1:29389] NA NA NA NA NA NA ...
##  $ Address Line 3    : logi [1:29389] NA NA NA NA NA NA ...
##  $ City              : chr [1:29389] "LATHAM" "SELKIRK" "ALBANY" "WATERVLIET" ...
##  $ State             : chr [1:29389] "NY" "NY" "NY" "NY" ...
##  $ Zip Code          : num [1:29389] 12110 12158 12203 12189 12210 ...
##  $ Square Footage    : num [1:29389] 300 3000 2000 1200 1800 0 0 200 0 2000 ...
##  $ Location          : chr [1:29389] "719 NEW LOUDON RD\nLATHAM, NY 12110\n(42.739618, -73.761949)" "1250 RTE 9 W\nSELKIRK, NY 12158\n(42.547591, -73.8073)" "1667 WESTERN AVENUE\nALBANY, NY 12203\n(42.686553, -73.854665)" "315 19TH STREET\nWATERVLIET, NY 12189\n(42.73063, -73.703443)" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   County = col_character(),
##   ..   `License Number` = col_character(),
##   ..   `Operation Type` = col_character(),
##   ..   `Establishment Type` = col_character(),
##   ..   `Entity Name` = col_character(),
##   ..   `DBA Name` = col_character(),
##   ..   `Street Number` = col_character(),
##   ..   `Street Name` = col_character(),
##   ..   `Address Line 2` = col_logical(),
##   ..   `Address Line 3` = col_logical(),
##   ..   City = col_character(),
##   ..   State = col_character(),
##   ..   `Zip Code` = col_double(),
##   ..   `Square Footage` = col_number(),
##   ..   Location = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# has no coordinates, read xy csv into df 

nysretailfood_xy_df <- read.csv("/Users/samikarim/Documents/R-spatial/data/nys_retail_food_store_xy.csv",fileEncoding='latin1', check.names=F)

str(nysretailfood_xy_df)
## 'data.frame':    29389 obs. of  18 variables:
##  $ ï..County         : chr  "Albany" "Albany" "Albany" "Albany" ...
##  $ License.Number    : int  733149 704590 727909 720557 15890 735254 708848 716318 713889 715759 ...
##  $ Operation.Type    : chr  "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type: chr  "A     " "JAC   " "JAC   " "JAC   " ...
##  $ Entity.Name       : chr  "SPEEDWAY LLC                     " "1250 SELKIRK INC                 " "RED-KAP SALES INC                " "SAEED SADIQ, SAIKA NOREEN        " ...
##  $ DBA.Name          : chr  "12110                  " "1250 SELKIRK           " "1667 GENERAL STORE     " "19 STREET QUICK STOP   " ...
##  $ Street.Number     : chr  "719" "1250" "1667" "315" ...
##  $ Street.Name       : chr  "NEW LOUDON RD                " "RTE 9W & 396                " "WESTERN AVENUE              " "19TH STREET                  " ...
##  $ Address.Line.2    : logi  NA NA NA NA NA NA ...
##  $ Address.Line.3    : logi  NA NA NA NA NA NA ...
##  $ City              : chr  "LATHAM            " "SELKIRK           " "ALBANY            " "WATERVLIET        " ...
##  $ State             : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip.Code          : int  12110 12158 12203 12189 12210 12209 12110 12084 12077 12159 ...
##  $ Square.Footage    : chr  "300" "3,000" "2,000" "1,200" ...
##  $ Location          : chr  "719 NEW LOUDON RD\nLATHAM, NY 12110\n(42.739618, -73.761949)" "1250 RTE 9 W\nSELKIRK, NY 12158\n(42.547591, -73.8073)" "1667 WESTERN AVENUE\nALBANY, NY 12203\n(42.686553, -73.854665)" "315 19TH STREET\nWATERVLIET, NY 12189\n(42.73063, -73.703443)" ...
##  $ Coords            : chr  "42.739618, -73.761949" "42.547591, -73.8073" "42.686553, -73.854665" "42.73063, -73.703443" ...
##  $ Y                 : num  42.7 42.5 42.7 42.7 42.7 ...
##  $ X                 : num  -73.8 -73.8 -73.9 -73.7 -73.8 ...
# filter out NA from dataframe  

sum(is.na(nysretailfood_xy_df$`X`))
## [1] 5417
sum(is.na(nysretailfood_xy_df$`Y`))
## [1] 5417
nysretailfood_xy_df_ <- nysretailfood_xy_df %>%
  dplyr::filter(
    !is.na(`X`),
    !is.na(`Y`)
  )

# create sf object with geographic coordinates 

nysretailfood_sf <- st_as_sf(nysretailfood_xy_df_, 
                               coords = c("X", "Y"))
str(nysretailfood_sf)
## Classes 'sf' and 'data.frame':   23972 obs. of  17 variables:
##  $ ï..County         : chr  "Albany" "Albany" "Albany" "Albany" ...
##  $ License.Number    : int  733149 704590 727909 720557 15890 735254 708848 713889 715759 723927 ...
##  $ Operation.Type    : chr  "Store" "Store" "Store" "Store" ...
##  $ Establishment.Type: chr  "A     " "JAC   " "JAC   " "JAC   " ...
##  $ Entity.Name       : chr  "SPEEDWAY LLC                     " "1250 SELKIRK INC                 " "RED-KAP SALES INC                " "SAEED SADIQ, SAIKA NOREEN        " ...
##  $ DBA.Name          : chr  "12110                  " "1250 SELKIRK           " "1667 GENERAL STORE     " "19 STREET QUICK STOP   " ...
##  $ Street.Number     : chr  "719" "1250" "1667" "315" ...
##  $ Street.Name       : chr  "NEW LOUDON RD                " "RTE 9W & 396                " "WESTERN AVENUE              " "19TH STREET                  " ...
##  $ Address.Line.2    : logi  NA NA NA NA NA NA ...
##  $ Address.Line.3    : logi  NA NA NA NA NA NA ...
##  $ City              : chr  "LATHAM            " "SELKIRK           " "ALBANY            " "WATERVLIET        " ...
##  $ State             : chr  "NY" "NY" "NY" "NY" ...
##  $ Zip.Code          : int  12110 12158 12203 12189 12210 12209 12110 12077 12159 12084 ...
##  $ Square.Footage    : chr  "300" "3,000" "2,000" "1,200" ...
##  $ Location          : chr  "719 NEW LOUDON RD\nLATHAM, NY 12110\n(42.739618, -73.761949)" "1250 RTE 9 W\nSELKIRK, NY 12158\n(42.547591, -73.8073)" "1667 WESTERN AVENUE\nALBANY, NY 12203\n(42.686553, -73.854665)" "315 19TH STREET\nWATERVLIET, NY 12189\n(42.73063, -73.703443)" ...
##  $ Coords            : chr  "42.739618, -73.761949" "42.547591, -73.8073" "42.686553, -73.854665" "42.73063, -73.703443" ...
##  $ geometry          :sfc_POINT of length 23972; first list element:  'XY' num  -73.8 42.7
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:16] "ï..County" "License.Number" "Operation.Type" "Establishment.Type" ...
# set coordinate reference system to wgs 

st_crs(nysretailfood_sf) <- 4326
st_crs(nysretailfood_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
nysretailfood_sf
## Simple feature collection with 23972 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.75953 ymin: 40.50782 xmax: -71.93873 ymax: 44.99484
## Geodetic CRS:  WGS 84
## First 10 features:
##    ï..County License.Number Operation.Type Establishment.Type
## 1     Albany         733149          Store             A     
## 2     Albany         704590          Store             JAC   
## 3     Albany         727909          Store             JAC   
## 4     Albany         720557          Store             JAC   
## 5     Albany          15890          Store             A     
## 6     Albany         735254          Store             JAC   
## 7     Albany         708848          Store             JAC   
## 8     Albany         713889          Store             JAC   
## 9     Albany         715759          Store             JAC   
## 10    Albany         723927          Store             JAC   
##                          Entity.Name                DBA.Name Street.Number
## 1  SPEEDWAY LLC                      12110                             719
## 2  1250 SELKIRK INC                  1250 SELKIRK                     1250
## 3  RED-KAP SALES INC                 1667 GENERAL STORE               1667
## 4  SAEED SADIQ, SAIKA NOREEN         19 STREET QUICK STOP              315
## 5  AZIZ MOHAMMAD S                   24 HR ALBANY NEWS                   8
## 6  7-ELEVEN INC                      7-ELEVEN                          477
## 7  ADVANCED FRESH CONCEPTS FRANCHISE AFC SUSHI @ PRICE CHOPP           873
## 8  ADVANCED FRESH CONCEPTS FRANCHISE AFC SUSHI @ PRICE CHOPP           329
## 9  ADVANCED FRESH CONCEPTS FRANCHISE AFC SUSHI @PRICE CHOPPE          1395
## 10 ADVANCED FRESH CONCEPTS FRAN CORP AFC SUSHI@ PRICE CHOPPE          2080
##                        Street.Name Address.Line.2 Address.Line.3
## 1    NEW LOUDON RD                             NA             NA
## 2     RTE 9W & 396                             NA             NA
## 3     WESTERN AVENUE                           NA             NA
## 4    19TH STREET                               NA             NA
## 5  CENTRAL AVE                                 NA             NA
## 6    DELAWARE AVE                              NA             NA
## 7          NEW LOUDON RD                       NA             NA
## 8        GLENMONT RD                           NA             NA
## 9         NEW SCOTLAND RD                      NA             NA
## 10    WESTERN AVE                              NA             NA
##                  City State Zip.Code Square.Footage
## 1  LATHAM                NY    12110            300
## 2  SELKIRK               NY    12158          3,000
## 3  ALBANY                NY    12203          2,000
## 4  WATERVLIET            NY    12189          1,200
## 5  ALBANY                NY    12210          1,800
## 6  ALBANY                NY    12209              0
## 7  LATHAM                NY    12110              0
## 8  GLENMONT              NY    12077              0
## 9  SLINGERLANDS          NY    12159          2,000
## 10 GUILDERLAND           NY    12084            400
##                                                                 Location
## 1           719 NEW LOUDON RD\nLATHAM, NY 12110\n(42.739618, -73.761949)
## 2                 1250 RTE 9 W\nSELKIRK, NY 12158\n(42.547591, -73.8073)
## 3         1667 WESTERN AVENUE\nALBANY, NY 12203\n(42.686553, -73.854665)
## 4          315 19TH STREET\nWATERVLIET, NY 12189\n(42.73063, -73.703443)
## 5               8 CENTRAL AVE\nALBANY, NY 12210\n(42.657136, -73.763712)
## 6            477 DELAWARE AVE\nALBANY, NY 12209\n(42.639931, -73.784962)
## 7            873 NEW LOUDON RD\nLATHAM, NY 12110\n(42.75459, -73.758714)
## 8           329 GLENMONT RD\nGLENMONT, NY 12077\n(42.601045, -73.790364)
## 9  1395 NEW SCOTLAND RD\nSLINGERLANDS, NY 12159\n(42.636568, -73.856367)
## 10       2080 WESTERN AVE\nGUILDERLAND, NY 12084\n(42.698502, -73.89159)
##                   Coords                   geometry
## 1  42.739618, -73.761949 POINT (-73.76195 42.73962)
## 2    42.547591, -73.8073  POINT (-73.8073 42.54759)
## 3  42.686553, -73.854665 POINT (-73.85466 42.68655)
## 4   42.73063, -73.703443 POINT (-73.70344 42.73063)
## 5  42.657136, -73.763712 POINT (-73.76371 42.65714)
## 6  42.639931, -73.784962 POINT (-73.78496 42.63993)
## 7   42.75459, -73.758714 POINT (-73.75871 42.75459)
## 8  42.601045, -73.790364 POINT (-73.79036 42.60104)
## 9  42.636568, -73.856367 POINT (-73.85637 42.63657)
## 10  42.698502, -73.89159  POINT (-73.89159 42.6985)
# set coordinate reference system to nys long island 

nysretailfood_sf_ <- st_transform(nysretailfood_sf, 2263)

st_crs(nysretailfood_sf_)
## Coordinate Reference System:
##   User input: EPSG:2263 
##   wkt:
## PROJCRS["NAD83 / New York Long Island (ftUS)",
##     BASEGEOGCRS["NAD83",
##         DATUM["North American Datum 1983",
##             ELLIPSOID["GRS 1980",6378137,298.257222101,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4269]],
##     CONVERSION["SPCS83 New York Long Island zone (US survey foot)",
##         METHOD["Lambert Conic Conformal (2SP)",
##             ID["EPSG",9802]],
##         PARAMETER["Latitude of false origin",40.1666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8821]],
##         PARAMETER["Longitude of false origin",-74,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8822]],
##         PARAMETER["Latitude of 1st standard parallel",41.0333333333333,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8823]],
##         PARAMETER["Latitude of 2nd standard parallel",40.6666666666667,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8824]],
##         PARAMETER["Easting at false origin",984250,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8826]],
##         PARAMETER["Northing at false origin",0,
##             LENGTHUNIT["US survey foot",0.304800609601219],
##             ID["EPSG",8827]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["US survey foot",0.304800609601219]],
##     USAGE[
##         SCOPE["Engineering survey, topographic mapping."],
##         AREA["United States (USA) - New York - counties of Bronx; Kings; Nassau; New York; Queens; Richmond; Suffolk."],
##         BBOX[40.47,-74.26,41.3,-71.8]],
##     ID["EPSG",2263]]
# check differences between coordinates

st_crs(nysretailfood_sf)$proj4string
## [1] "+proj=longlat +datum=WGS84 +no_defs"
st_crs(nysretailfood_sf_)$proj4string
## [1] "+proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333 +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
sf::st_bbox(nysretailfood_sf)
##      xmin      ymin      xmax      ymax 
## -79.75953  40.50782 -71.93873  44.99484
st_bbox(nysretailfood_sf_)
##      xmin      ymin      xmax      ymax 
## -574632.2  124371.4 1552535.4 1761473.6
# filter to nyc only using zip codes 

nysretailfood_sf_$Zip.Code <- as.character(nysretailfood_sf_$Zip.Code)
nyc_zip_sf$ZIPCODE <- as.character(nyc_zip_sf$ZIPCODE)

nysretailfood_nyc_sf <- nysretailfood_sf_ %>%
  filter(Zip.Code %in% nyc_zip_sf$ZIPCODE)

task 5: Use simple mapping method such as mapview with a basemap to verify the above datasets in terms of their geographic locations.

# mapview for all three sf objects 

mapview(nyc_zip_sf, layer.name='Zip Codes')
mapview(nyshealth_sf, layer.name='NYS Health Facilities')
mapview(nysretailfood_nyc_sf, layer.name='NYC Retail Food Stores')

task 6: Save the three sf objects in a RData file or in a single GeoPackage file/database.

# save all three sf objects into an RData file

dir.create("data", recursive = TRUE, showWarnings = FALSE)

save(nyc_zip_sf, nyshealth_sf, nysretailfood_nyc_sf,
     file = './data/nys_nyc_zipcodes_health_retailfood.RData')

# save all three sf objects into a single GeoPackage file/database

st_write(nyc_zip_sf, 
         dsn = './data/ny_zhr_data.gpkg', 
         layer='nyc_zip_codes',
         delete_layer = TRUE)
## Deleting layer `nyc_zip_codes' using driver `GPKG'
## Writing layer `nyc_zip_codes' to data source 
##   `./data/ny_zhr_data.gpkg' using driver `GPKG'
## Writing 263 features with 12 fields and geometry type Polygon.
st_write(nyshealth_sf,          
         dsn = './data/ny_zhr_data.gpkg', 
         layer='nys_health_facilities',
         delete_layer = TRUE)
## Deleting layer `nys_health_facilities' using driver `GPKG'
## Writing layer `nys_health_facilities' to data source 
##   `./data/ny_zhr_data.gpkg' using driver `GPKG'
## Writing 3848 features with 34 fields and geometry type Point.
st_write(nysretailfood_nyc_sf,          
         dsn = './data/ny_zhr_data.gpkg', 
         layer='nyc_retail_food_stores',
         delete_layer = TRUE)
## Deleting layer `nyc_retail_food_stores' using driver `GPKG'
## Writing layer `nyc_retail_food_stores' to data source 
##   `./data/ny_zhr_data.gpkg' using driver `GPKG'
## Writing 11372 features with 16 fields and geometry type Point.

R Spatial Lab Assignment 2

Don’t use a single chunk for the entire assignment. Break it into multiple chunks.

task 1: Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

coviddata <- readr::read_csv("/Users/samikarim/Documents/R-spatial/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv", lazy = FALSE)
## Rows: 178 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (4): MODZCTA, Positive, Total, zcta_cum.perc_pos
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(coviddata)
## spc_tbl_ [178 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODZCTA          : num [1:178] NA 10001 10002 10003 10004 ...
##  $ Positive         : num [1:178] 1934 211 539 279 23 ...
##  $ Total            : num [1:178] 2082 448 1024 662 59 ...
##  $ zcta_cum.perc_pos: num [1:178] 92.9 47.1 52.6 42.1 39 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODZCTA = col_double(),
##   ..   Positive = col_double(),
##   ..   Total = col_double(),
##   ..   zcta_cum.perc_pos = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
nyc_zip_sf_ <- st_as_sf(nyc_zip_sf)

str(nyc_zip_sf_)
## Classes 'sf' and 'data.frame':   263 obs. of  13 variables:
##  $ ZIPCODE   : chr  "11436" "11213" "11212" "11225" ...
##  $ BLDGZIP   : chr  "0" "0" "0" "0" ...
##  $ PO_NAME   : chr  "Jamaica" "Brooklyn" "Brooklyn" "Brooklyn" ...
##  $ POPULATION: num  18681 62426 83866 56527 72280 ...
##  $ AREA      : num  22699295 29631004 41972104 23698630 36868799 ...
##  $ STATE     : chr  "NY" "NY" "NY" "NY" ...
##  $ COUNTY    : chr  "Queens" "Kings" "Kings" "Kings" ...
##  $ ST_FIPS   : chr  "36" "36" "36" "36" ...
##  $ CTY_FIPS  : chr  "081" "047" "047" "047" ...
##  $ URL       : chr  "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" "http://www.usps.com/" ...
##  $ SHAPE_AREA: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SHAPE_LEN : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ geometry  :sfc_POLYGON of length 263; first list element: List of 1
##   ..$ : num [1:159, 1:2] 1038098 1038142 1038171 1038280 1038521 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:12] "ZIPCODE" "BLDGZIP" "PO_NAME" "POPULATION" ...
dplyr::left_join(nyc_zip_sf_, 
                 coviddata %>% 
                   dplyr::mutate(MODZCTA=as.character(MODZCTA)), 
                 by = c('ZIPCODE' = 'MODZCTA')) -> nyc_covid_zip_merged

names(nyc_covid_zip_merged)
##  [1] "ZIPCODE"           "BLDGZIP"           "PO_NAME"          
##  [4] "POPULATION"        "AREA"              "STATE"            
##  [7] "COUNTY"            "ST_FIPS"           "CTY_FIPS"         
## [10] "URL"               "SHAPE_AREA"        "SHAPE_LEN"        
## [13] "Positive"          "Total"             "zcta_cum.perc_pos"
## [16] "geometry"

task 2: Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

names(nysretailfood_nyc_sf)[1] <- "County"

nysretailfood_nyc_sf %>% dplyr::filter(stringr::str_detect(Establishment.Type, '[AJD]'))
## Simple feature collection with 11372 features and 16 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 915179.3 ymin: 124371.4 xmax: 1079468 ymax: 270861.1
## Projected CRS: NAD83 / New York Long Island (ftUS)
## First 10 features:
##    County License.Number Operation.Type Establishment.Type
## 1   Bronx         734149          Store             JAC   
## 2   Bronx         606221          Store             JAC   
## 3   Bronx         606228          Store             JAC   
## 4   Bronx         723375          Store             JAC   
## 5   Bronx         724807          Store             JAC   
## 6   Bronx         712943          Store             JAC   
## 7   Bronx         703060          Store             JAC   
## 8   Bronx         609065          Store             JAC   
## 9   Bronx         722972          Store             A     
## 10  Bronx         609621          Store             JAC   
##                          Entity.Name                DBA.Name Street.Number
## 1  7 ELEVEN FOOD STORE #37933H                                         500
## 2  1001 SAN MIGUEL FOOD CENTER INC   1001 SAN MIGUEL FD CNTR          1001
## 3  1029 FOOD PLAZA INC               1029 FOOD PLAZA                   122
## 4  1078 DELI GROCERY CORP            1078 DELI GROCERY                1078
## 5  1086 LUNA DELI GROCERY CORP       1086 LUNA DELI GROCERY           1086
## 6  109 AJ DELI GROCERY CORP          109 AJ DELI GROCERY               109
## 7  10 NEIGHBORHOOD CANDY GROCERY COR 10 NEIGHBORHOOD CANDY G            10
## 8  1105 TINTON DELI GROCERY CORP     1105 TINTON DELI GRCY            1105
## 9  1150 WEBSTER PHARMACY INC         1150 WEBSTER PHARMACY I          1150
## 10 1158 GROCERY & DELI INC           1158 GROCERY & DELI              1158
##                       Street.Name Address.Line.2 Address.Line.3
## 1   BAYCHESTER AVE                            NA             NA
## 2    SHERIDAN AVE                             NA             NA
## 3   E 181ST ST                                NA             NA
## 4    EAST 165TH STREET                        NA             NA
## 5    BOSTON ROAD                              NA             NA
## 6   E TREMONT AVE                             NA             NA
## 7  W. GUN HILL RD.                            NA             NA
## 8    TINTON AVE                               NA             NA
## 9    WEBSTER AVENUE                           NA             NA
## 10   ST LAWRENCE AVE                          NA             NA
##                  City State Zip.Code Square.Footage
## 1  BRONX                 NY    10475              0
## 2  BRONX                 NY    10456          1,100
## 3  BRONX                 NY    10453          2,000
## 4  BRONX                 NY    10459          1,200
## 5  BRONX                 NY    10456          1,500
## 6  BRONX                 NY    10453          2,400
## 7  BRONX                 NY    10467          1,000
## 8  BRONX                 NY    10456          1,200
## 9  BRONX                 NY    10456          3,400
## 10 BRONX                 NY    10472            500
##                                                            Location
## 1      500 BAYCHESTER AVE\nBRONX, NY 10475\n(40.869156, -73.831875)
## 2       1001 SHERIDAN AVE\nBRONX, NY 10456\n(40.829061, -73.919613)
## 3          122 E 181ST ST\nBRONX, NY 10453\n(40.854755, -73.902853)
## 4  1078 EAST 165TH STREET\nBRONX, NY 10459\n(40.825105, -73.890589)
## 5        1086 BOSTON ROAD\nBRONX, NY 10456\n(40.827096, -73.905123)
## 6       109 E TREMONT AVE\nBRONX, NY 10453\n(40.850537, -73.907137)
## 7        10 W GUN HILL RD\nBRONX, NY 10467\n(40.882869, -73.881552)
## 8         1105 TINTON AVE\nBRONX, NY 10456\n(40.826607, -73.901498)
## 9      1150 WEBSTER AVENUE\nBRONX, NY 10456\n(40.830425, -73.91063)
## 10   1158 ST LAWRENCE AVE\nBRONX, NY 10472\n(40.829105, -73.866678)
##                   Coords                 geometry
## 1  40.869156, -73.831875 POINT (1030750 255979.3)
## 2  40.829061, -73.919613 POINT (1006497 241336.7)
## 3  40.854755, -73.902853 POINT (1011124 250702.7)
## 4  40.825105, -73.890589 POINT (1014531 239904.1)
## 5  40.827096, -73.905123 POINT (1010507 240624.8)
## 6  40.850537, -73.907137 POINT (1009941 249164.7)
## 7  40.882869, -73.881552   POINT (1017003 260953)
## 8  40.826607, -73.901498 POINT (1011511 240447.8)
## 9   40.830425, -73.91063 POINT (1008982 241836.1)
## 10 40.829105, -73.866678 POINT (1021146 241370.6)
foodstores_zc_ <- sf::st_join(nyc_covid_zip_merged, nysretailfood_nyc_sf %>% st_transform(2263) %>% 
          sf::st_centroid(), join = st_contains) %>% 
          mutate(zc_area = st_area(geometry)) %>%
          group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total) %>% 
summarise(n_foodstores = n()) 
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'Positive'. You can override using the `.groups` argument.

task 3: Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

nyshealth_sf$'Facility Zip Code' <- as.character(nyshealth_sf$'Facility Zip Code')

nychealth_sf <- nyshealth_sf %>%
  filter('Facility Zip Code' %in% nyc_zip_sf$ZIPCODE)

nyc_nh <- nychealth_sf %>% 
  dplyr::filter('Short Description' == 'NH')

nh_fs_c_zipcode <- sf::st_join(
  foodstores_zc_ %>% st_transform(2263),
  nyc_nh %>% st_transform(2263) %>% sf::st_centroid(),
  join = st_contains
) %>%
  mutate(zc_area = st_area(geometry)) %>%
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total, n_foodstores) %>%
  summarise(n_nursinghomes = n())
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'Positive', 'Total'. You can override using the `.groups` argument.

task 4: Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

nycCensus <- sf::st_read(
  "/Users/samikarim/Documents/R-spatial/nyct2010_24a-2/nyct2010.shp",
  quiet = FALSE
)
## Reading layer `nyct2010' from data source 
##   `/Users/samikarim/Documents/R-spatial/nyct2010_24a-2/nyct2010.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
str(nycCensus)
## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ CTLabel   : chr  "9" "102" "104" "113" ...
##  $ BoroCode  : chr  "5" "1" "1" "1" ...
##  $ BoroName  : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ CT2010    : chr  "000900" "010200" "010400" "011300" ...
##  $ BoroCT2010: chr  "5000900" "1010200" "1010400" "1011300" ...
##  $ CDEligibil: chr  "E" "I" "I" "I" ...
##  $ NTACode   : chr  "SI22" "MN17" "MN17" "MN17" ...
##  $ NTAName   : chr  "West New Brighton-New Brighton-St. George" "Midtown-Midtown South" "Midtown-Midtown South" "Midtown-Midtown South" ...
##  $ PUMA      : chr  "3903" "3807" "3807" "3807" ...
##  $ Shape_Leng: num  7729 5688 5693 5700 5808 ...
##  $ Shape_Area: num  2497010 1860993 1864600 1890907 1918145 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] 962269 962289 962548 962368 962292 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "CTLabel" "BoroCode" "BoroName" "CT2010" ...
nycCensus <- nycCensus %>% dplyr::mutate(cntyFIPS = case_when(
  BoroName == 'Bronx' ~ '005',
  BoroName == 'Brooklyn' ~ '047',
  BoroName == 'Manhattan' ~ '061',
  BoroName == 'Queens' ~ '081',
  BoroName == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, CT2010, sep='')
)

acsData <- readLines("/Users/samikarim/Documents/R-spatial/data/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

str(acsData)
## 'data.frame':    2167 obs. of  12 variables:
##  $ GEO_ID      : chr  "1400000US36005000100" "1400000US36005000200" "1400000US36005000400" "1400000US36005001600" ...
##  $ totPop      : int  7080 4542 5634 5917 2765 9409 4600 172 5887 2868 ...
##  $ elderlyPop  : int  51 950 710 989 76 977 648 0 548 243 ...
##  $ malePop     : int  6503 2264 2807 2365 1363 4119 2175 121 2958 1259 ...
##  $ femalePop   : int  577 2278 2827 3552 1402 5290 2425 51 2929 1609 ...
##  $ whitePop    : int  1773 2165 2623 2406 585 3185 479 69 903 243 ...
##  $ blackPop    : int  4239 1279 1699 2434 1041 4487 2122 89 1344 987 ...
##  $ asianPop    : int  130 119 226 68 130 29 27 14 68 0 ...
##  $ hispanicPop : int  2329 3367 3873 3603 1413 5905 2674 0 4562 1985 ...
##  $ adultPop    : int  6909 3582 4507 4416 2008 6851 3498 131 4237 1848 ...
##  $ citizenAdult: int  6100 2952 4214 3851 1787 6170 3056 42 2722 1412 ...
##  $ censusCode  : chr  "005000100" "005000200" "005000400" "005001600" ...
nyc_acs_tract_merged <- base::merge(nycCensus, acsData, by.x = "tractFIPS", by.y = "censusCode")
names(nyc_acs_tract_merged) 
##  [1] "tractFIPS"    "CTLabel"      "BoroCode"     "BoroName"     "CT2010"      
##  [6] "BoroCT2010"   "CDEligibil"   "NTACode"      "NTAName"      "PUMA"        
## [11] "Shape_Leng"   "Shape_Area"   "cntyFIPS"     "GEO_ID"       "totPop"      
## [16] "elderlyPop"   "malePop"      "femalePop"    "whitePop"     "blackPop"    
## [21] "asianPop"     "hispanicPop"  "adultPop"     "citizenAdult" "geometry"

task 5: Aggregate the ACS census data to zip code area data.

nyc_acs_tract_merged <- sf::st_transform(nyc_acs_tract_merged, st_crs(nh_fs_c_zipcode))

acs_zip_merged <- sf::st_join(nh_fs_c_zipcode, nyc_acs_tract_merged %>% 
          sf::st_centroid(), join = st_contains) %>% 
          mutate(zc_area = st_area(geometry)) %>%
          group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, Positive, Total,n_foodstores,n_nursinghomes) %>% 
summarise(totPop = sum(totPop),
            malePctg = sum(malePop)/totPop*100,
            asianPop = sum(asianPop),
            blackPop = sum(blackPop),
            hispanicPop = sum(hispanicPop),
            whitePop = sum(whitePop)) 
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION',
## 'COUNTY', 'Positive', 'Total', 'n_foodstores'. You can override using the
## `.groups` argument.
names(acs_zip_merged)
##  [1] "ZIPCODE"        "PO_NAME"        "POPULATION"     "COUNTY"        
##  [5] "Positive"       "Total"          "n_foodstores"   "n_nursinghomes"
##  [9] "totPop"         "malePctg"       "asianPop"       "blackPop"      
## [13] "hispanicPop"    "whitePop"       "geometry"