GTECH78520_23S_week11

libraries

library(sf)

## Warning: package 'sf' was built under R version 4.2.3

## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE

library(tidyverse)

## Warning: package 'tibble' was built under R version 4.2.3

## Warning: package 'dplyr' was built under R version 4.2.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(stringr)
library(mapview)

## Warning: package 'mapview' was built under R version 4.2.3

library(purrr)

# Set working directory
wd <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(wd)

Part 1. Join the COVID-19 data to the NYC zip code area data (sf or sp polygons).

Adding data and data clean-up

# Add NYC zip code shapefile
zipcode_data <- st_read("data/hw_data/ZIP_CODE_040114.shp")

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\amyca\OneDrive\Documents\GTECH7852_R\R-spatial\GTECH7852_HW\Data\HW_Data\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

zipcode_data <- clean_names(zipcode_data)
zipcode_data$zipcode = as.numeric(as.character(zipcode_data$zipcode))

# NYC zip code transform to 4326
zipcode_data %>%
  st_transform(4326)

## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25576 ymin: 40.49584 xmax: -73.6996 ymax: 40.91517
## Geodetic CRS:  WGS 84
## First 10 features:
##    zipcode bldgzip  po_name population     area state county st_fips cty_fips
## 1    11436       0  Jamaica      18681 22699295    NY Queens      36      081
## 2    11213       0 Brooklyn      62426 29631004    NY  Kings      36      047
## 3    11212       0 Brooklyn      83866 41972104    NY  Kings      36      047
## 4    11225       0 Brooklyn      56527 23698630    NY  Kings      36      047
## 5    11218       0 Brooklyn      72280 36868799    NY  Kings      36      047
## 6    11226       0 Brooklyn     106132 39408598    NY  Kings      36      047
## 7    11219       0 Brooklyn      92561 42002738    NY  Kings      36      047
## 8    11210       0 Brooklyn      67067 47887023    NY  Kings      36      047
## 9    11230       0 Brooklyn      80857 49926703    NY  Kings      36      047
## 10   11204       0 Brooklyn      77354 43555185    NY  Kings      36      047
##                     url shape_area shape_len                       geometry
## 1  http://www.usps.com/          0         0 POLYGON ((-73.80585 40.6829...
## 2  http://www.usps.com/          0         0 POLYGON ((-73.9374 40.67973...
## 3  http://www.usps.com/          0         0 POLYGON ((-73.90294 40.6708...
## 4  http://www.usps.com/          0         0 POLYGON ((-73.95797 40.6706...
## 5  http://www.usps.com/          0         0 POLYGON ((-73.97208 40.6506...
## 6  http://www.usps.com/          0         0 POLYGON ((-73.9619 40.65487...
## 7  http://www.usps.com/          0         0 POLYGON ((-73.98906 40.6441...
## 8  http://www.usps.com/          0         0 POLYGON ((-73.9584 40.63633...
## 9  http://www.usps.com/          0         0 POLYGON ((-73.96451 40.6366...
## 10 http://www.usps.com/          0         0 POLYGON ((-73.98108 40.6352...

# Add COVID-19 test data csv files
COVID_TESTS_4_12_2020 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv")
COVID_TESTS_4_19_2020 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv")
COVID_TESTS_4_23_2021 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")

# Clean names
COVID_TESTS_4_12_2020 <- clean_names(COVID_TESTS_4_12_2020)
COVID_TESTS_4_19_2020 <- clean_names(COVID_TESTS_4_19_2020)
COVID_TESTS_4_23_2021 <- clean_names(COVID_TESTS_4_23_2021)

# Edit names to include the dates
colnames(COVID_TESTS_4_12_2020) <- c('zipcode','4_12_20_Positive','4_12_20_Total', '4_12_20_cum_percpositive')

colnames(COVID_TESTS_4_19_2020) <- c('zipcode','4_19_20_Positive','4_19_20_Total', '4_19_20_cum_percpositive')

# Clean up 4_23_2021 data because it has more columns of data than the other covid datasets
COVID_TESTS_4_23_2021_select <- COVID_TESTS_4_23_2021 %>%
  select(modified_zcta, covid_case_count, total_covid_tests, percent_positive)

colnames(COVID_TESTS_4_23_2021_select) <- c('zipcode','4_23_21_Positive','4_23_21_Total', '4_23_21_cum_percpositive')

Joining dataframes (zipcode_data, COVID_TESTS_4_12_2020, COVID_TESTS_4_19_2020, COVID_TESTS_4_23_2021_select)

# Join multiple data.frames
list_df = list(zipcode_data,COVID_TESTS_4_12_2020, COVID_TESTS_4_19_2020, COVID_TESTS_4_23_2021_select)

Zipcode_CovidTests <- list_df %>% 
  reduce(full_join, by='zipcode')%>%
  drop_na()%>%
  st_transform(crs = 4326)

Visual of joined dataframe

mapview(Zipcode_CovidTests, zcol = c('4_12_20_cum_percpositive','4_19_20_cum_percpositive','4_23_21_cum_percpositive'), legend = FALSE)

Part 2. Aggregate the NYC food retails store data (points) to the zip code data, so that we know how many retail stores in each zip code area. Note that not all locations are for food retail. And we need to choose the specific types according to the data.

Adding data and data clean-up for food retails store data

# Add NYC food retails store data (points)
food_retails_xy <- read.csv("data/hw_data/nys_retail_food_store_xy.csv", fileEncoding = "Latin1", check.names = F)

# Clean data and select only NYC boroughs
food_retails_xy <- clean_names(food_retails_xy)
food_retails_NY <- food_retails_xy %>%
  filter(zip_code > 7000) %>%
  filter(i_county %in% c("Bronx", "Kings","Queens", "New York","Richmond")) %>%
  filter(!is.na(x)) %>%
  filter(!is.na(y))

# Turning csv into sf. Process the location column using stringr
st_as_sf(food_retails_NY %>% tidyr::drop_na(x, y), coords = c('x', 'y')) -> food_retailsNY_SF

# Assign coordinate system
st_crs(food_retailsNY_SF) <- 4326

# First Trim off the extra white space in establishment_type columns
food_retailNY_SF2 <- food_retailsNY_SF %>% 
  mutate(across(where(is.character), str_trim))

# Filter the data to only food retails
ET = c("JAC", "JABC", "JABCP", "JACDK", "JABCDP", "JACD","JACFS", "JABCHK", "JACHK", "JABCK", "JACK", "JACDHK", "JACH", "JACDE", "JABCH", "JABCDH", "JABCD", "JACE", "JACI", "JACO ", "JACDH", "JABCG", "JACV", "JABCOP", "JAK ", "JACL", "JACG", "JABCDK", "JACZ", "JACW", "JCA", "JACDKM", "JABCGP", "JACN", "JABCKO", "JABCW", "JACDIK", "JACS", "JABCO", "JACDG", "JACP", "JABCHO", "JACHOP", "JACHO", "JACEW", "JDAC", "JKDAC", "JACHKO", "JACEK")

  
food_retailNY_SF3 <- food_retailNY_SF2 %>%
  filter(establishment_type %in% ET)

Find how many retail stores in each zip code area

# Aggregate the NYC food retails store data (points) to the zip code data
foodretail_zip <- Zipcode_CovidTests %>%
  mutate(tract_area = st_area(geometry)) %>%
  st_transform (4326) %>%
  st_join(food_retailNY_SF3)

COVID_FoodRetail_ZIP <- foodretail_zip %>%
  select(c('zipcode','county', 'st_fips', 'cty_fips', 'po_name', '4_12_20_Positive','4_12_20_Total', '4_12_20_cum_percpositive','4_19_20_Positive','4_19_20_Total', '4_19_20_cum_percpositive','4_23_21_Positive','4_23_21_Total', '4_23_21_cum_percpositive'))

COVID_FoodRetail_ZIP <- COVID_FoodRetail_ZIP %>%
  group_by(zipcode) %>%
  mutate(Total_FoodRetail = n()) %>%
  distinct(zipcode, .keep_all = TRUE)

Visual for number of food retail stores by zipcode

mapview(COVID_FoodRetail_ZIP, zcol = 'Total_FoodRetail')

Part 3. Aggregate the NYC health facilities (points) to the zip code data. Similarly, choose appropriate subtypes such as nursing homes from the facilities.

Add data and data clean-up for health facilties

# Add NYC health facilties data (points)
HealthFacilties_data <- read.csv("data/hw_data/NYS_Health_Facility.csv")

# Clean data
HealthFacilties_data <- clean_names(HealthFacilties_data)
str(HealthFacilties_data)

## 'data.frame':    3990 obs. of  36 variables:
##  $ facility_id                 : int  204 620 654 1156 2589 3455 3853 4249 4473 6230 ...
##  $ facility_name               : chr  "Hospice at Lourdes" "Charles T Sitrin Health Care Center Inc" "Central Park Rehabilitation and Nursing Center" "East Side Nursing Home" ...
##  $ short_description           : chr  "HSPC" "NH" "NH" "NH" ...
##  $ description                 : chr  "Hospice" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" "Residential Health Care Facility - SNF" ...
##  $ facility_open_date          : chr  "06/01/1985" "02/01/1989" "02/01/1989" "08/01/1979" ...
##  $ facility_address_1          : chr  "4102 Old Vestal Road" "2050 Tilden Avenue" "116 Martin Luther King East" "62 Prospect St" ...
##  $ facility_address_2          : chr  "" "" "" "" ...
##  $ facility_city               : chr  "Vestal" "New Hartford" "Syracuse" "Warsaw" ...
##  $ facility_state              : chr  "New York" "New York" "New York" "New York" ...
##  $ facility_zip_code           : chr  "13850" "13413" "13205" "14569" ...
##  $ facility_phone_number       : num  6.08e+09 3.16e+09 3.15e+09 5.86e+09 5.86e+09 ...
##  $ facility_fax_number         : num  NA NA NA NA NA ...
##  $ facility_website            : chr  "" "" "" "" ...
##  $ facility_county_code        : int  3 32 33 60 2 14 29 14 29 7093 ...
##  $ facility_county             : chr  "Broome" "Oneida" "Onondaga" "Wyoming" ...
##  $ regional_office_id          : int  3 3 3 1 1 1 7 1 7 5 ...
##  $ regional_office             : chr  "Central New York Regional Office" "Central New York Regional Office" "Central New York Regional Office" "Western Regional Office - Buffalo" ...
##  $ main_site_name              : chr  "" "" "" "" ...
##  $ main_site_facility_id       : int  NA NA NA NA NA NA NA NA NA 1463 ...
##  $ operating_certificate_number: chr  "0301501F" "3227304N" "3301326N" "6027303N" ...
##  $ operator_name               : chr  "Our Lady of Lourdes Memorial Hospital Inc" "Charles T Sitrin Health Care Center, Inc" "CPRNC, LLC" "East Side Nursing Home Inc" ...
##  $ operator_address_1          : chr  "169 Riverside Drive" "Box 1000 Tilden Avenue" "116 Martin Luther King East" "62 Prospect Street" ...
##  $ operator_address_2          : chr  "" "" "" "" ...
##  $ operator_city               : chr  "Binghamton" "New Hartford" "Syracuse" "Warsaw" ...
##  $ operator_state              : chr  "New York" "New York" "New York" "New York" ...
##  $ operator_zip_code           : chr  "13905" "13413" "13205" "14569" ...
##  $ cooperator_name             : chr  "" "" "" "" ...
##  $ cooperator_address          : chr  "" "" "" "" ...
##  $ cooperator_address_2        : chr  "" "" "" "" ...
##  $ cooperator_city             : chr  "" "" "" "" ...
##  $ cooperator_state            : chr  "New York" "New York" "New York" "New York" ...
##  $ cooperator_zip_code         : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ ownership_type              : chr  "Not for Profit Corporation" "Not for Profit Corporation" "LLC" "Business Corporation" ...
##  $ facility_latitude           : num  42.1 43.1 NA 42.7 42.1 ...
##  $ facility_longitude          : num  -76 -75.2 NA -78.1 -78 ...
##  $ facility_location           : chr  "(42.097095, -75.975243)" "(43.05497, -75.228828)" "" "(42.738979, -78.12867)" ...

# Filter data to only show NYC 5 boroughs
HealthFacilties_data <- HealthFacilties_data %>%
  filter(facility_county %in% c("Bronx", "Kings","Queens", "New York","Richmond"))

# Turning health facilties csv into sf. 

#Process the location column using stringr
leftPos <- stringr::str_locate(HealthFacilties_data$facility_location, "\\(")[,1]
rghtPos <- stringr::str_locate(HealthFacilties_data$facility_location, "\\)")[,1]

# Get the coordinates text
HealthFacilties_data$facility_location %>% stringr::str_sub(leftPos+1, rghtPos -1) -> HealthFacilties_data$coords
cmmaPos <- stringr::str_locate(HealthFacilties_data$coords, ", ")

#Get the numeric coordinates
HealthFacilties_data$Y <- stringr::str_sub(HealthFacilties_data$coords, 1, cmmaPos[,1]-1) %>% as.numeric()
HealthFacilties_data$X <- stringr::str_sub(HealthFacilties_data$coords, cmmaPos[,2]+1) %>% as.numeric()

# Take out the rows without coordinates and make a sf object
st_as_sf(HealthFacilties_data %>% tidyr::drop_na(X, Y), coords = c('X', 'Y')) -> HealthFacilties_SF

# Assign coordinate system
st_crs(HealthFacilties_SF) <- 4326

# Filter appropriate subtypes such as nursing homes from the facilities. 

# HF includes hospitals, clinics, and medical centers.
HF <- c("Diagnostic and Treatment Center","Hospital Extension Clinic","Hospital","Diagnostic and Treatment Center Extension Clinic","Primary Care Hospital - Critical Access Hospital Extension Clinic", "Primary Care Hospital - Critical Access Hospital")

NYC_HealthFacilties_SF <- HealthFacilties_SF %>%
  filter(description %in% HF)

Find how many health facilties in each zip code area

# Aggregate the NYC health facilities (points) to the zip code data
COVID_FR_HealthFacilties_zip <- COVID_FoodRetail_ZIP%>%
  filter(zipcode >100) %>%
  mutate(tract_area = st_area(geometry)) %>%
  st_transform (4326) %>%
  st_join(NYC_HealthFacilties_SF) %>%
  select(c('zipcode','county', 'st_fips', 'cty_fips', 'po_name', '4_12_20_Positive','4_12_20_Total', '4_12_20_cum_percpositive','4_19_20_Positive','4_19_20_Total', '4_19_20_cum_percpositive','4_23_21_Positive','4_23_21_Total', '4_23_21_cum_percpositive','Total_FoodRetail')) %>%
  group_by(zipcode) %>%
  mutate(Total_HealthFacilities = n()) %>%
  distinct(zipcode, .keep_all = TRUE)

Data visualization for number of health faciltiies by zip code

mapview(COVID_FR_HealthFacilties_zip, zcol = 'Total_HealthFacilities')

Part 4. Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.

Adding data and data clean-up for NYC Planning Census Tract

#Read NYC Planning Census Tract
nyc_census <- st_read("Data/R-Spatial_II_Lab/2010 Census Tracts/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp")

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\amyca\OneDrive\Documents\GTECH7852_R\R-spatial\GTECH7852_HW\Data\R-Spatial_II_Lab\2010 Census Tracts\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

# NYC Planning Census Tract: Create a county FIPS & census tract to later use for joining with ACS census data
nyc_census <- nyc_census %>%
  mutate(cntyFIPS = case_when(
    boro_name == 'Bronx' ~ '005',
    boro_name == 'Brooklyn' ~ '047',
    boro_name == 'Manhattan' ~ '061',
    boro_name == 'Queens' ~ '081',
    boro_name == 'Staten Island' ~ '085'),
    tractFIPS = paste(cntyFIPS, ct2010, sep=''))

Adding data and data clean-up for Census ACS data (population, race, and age data).

# Read the Census ACS data
ACS_census <- read.csv("Data/R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv")

# Cleaning the data and selecting out population, race, and age data
ACS_census2 <- ACS_census[-1,]

#select out what we need
categories = c("DP05_0001E","DP05_0024E","DP05_0002E","DP05_0003E","DP05_0037E","DP05_0038E","DP05_0039E","DP05_0067E","DP05_0052E","DP05_0071E","DP05_0087E")

ACS_categories <- ACS_census2 %>%
  select(c("GEO_ID", "NAME", categories)) %>%
  rename("TotalPop"="DP05_0001E",
         "ElderlyPop" = "DP05_0024E",
         "MalePop" = "DP05_0002E",
         "FemalePop" = "DP05_0003E",
         "WhitePop" = "DP05_0037E",
         "BlackPop" = "DP05_0038E",
         "AmericanIndian" = "DP05_0039E",
         "AsianPop" = "DP05_0067E",
         "NativeHawaiian" = "DP05_0052E",
         "HispanicPop" = "DP05_0071E",
         "CitizenVotingAgePop" = "DP05_0087E") %>%
  mutate(census_code = str_sub(GEO_ID, -9))

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(categories)
## 
##   # Now:
##   data %>% select(all_of(categories))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Joining the NYC Planning Census Tract and ACS Census Data

# Attribute Join. Join the Census ACS population, race, and age data to the NYC Planning Census Tract Data.
census_merge <- merge(nyc_census, ACS_categories, by.x = "tractFIPS", by.y = "census_code")

census_merge <- na.omit(census_merge)

Data visualization for NYC Planning Census Tract and ACS Census Data

mapview(census_merge, zcol = "boro_name")

Part 5: Aggregate the ACS census data to zip code area data.

Data set-up and clean-up

# Create census centroids from census_merge file
census_centroids <- census_merge %>%
  st_centroid() %>%
  st_transform(4326)

## Warning: st_centroid assumes attributes are constant over geometries

# Make sure the dataframes are using  the same crs projection
zipcode_4326 <- COVID_FR_HealthFacilties_zip %>%
  st_transform(crs = 4326)

census_c4326 <- census_centroids %>%
  st_transform(crs = 4326)

# Make sure the data is numeric
i <- c(16:25)
census_c4326[,i] <- apply(census_c4326[,i],2,
                      function(x) as.numeric(as.character(x)))

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in matrix(value, n, p): data length [23804] is not a sub-multiple or
## multiple of the number of columns [10]

Join all of the census_merge data to zip code area data with the addition of demographics data

census_centroid_zip_merge <- st_join(zipcode_4326, census_c4326, join = st_contains) %>%
  filter(zipcode > 100) %>%
  group_by(zipcode)

Consolidated version of the covid data, food retail stores, location, and census demographics information by zipcode

nyc_zipcode_demongraphics <- st_join(zipcode_4326, census_c4326, join = st_contains) %>%
  filter(zipcode > 100) %>%
  select(c('zipcode','county', 'st_fips', 'cty_fips', 'po_name', '4_12_20_Positive','4_12_20_Total', '4_12_20_cum_percpositive','4_19_20_Positive','4_19_20_Total', '4_19_20_cum_percpositive','4_23_21_Positive','4_23_21_Total', '4_23_21_cum_percpositive','Total_FoodRetail','Total_HealthFacilities', 'TotalPop','ElderlyPop','MalePop','FemalePop','WhitePop', 'BlackPop','AmericanIndian','AsianPop','NativeHawaiian','HispanicPop')) %>%
  group_by(zipcode) %>%
  mutate(
        TotalPop = sum(TotalPop),
        ElderlyPop = sum(ElderlyPop),
        MalePop = sum(MalePop),
        FemalePop = sum(FemalePop),
        WhitePop = sum(WhitePop),
        BlackPop = sum(BlackPop),
        AmericanIndian = sum(AmericanIndian),
        AsianPop = sum(AsianPop),
        NativeHawaiian = sum(NativeHawaiian),
        HispanicPop = sum(HispanicPop)) %>%
  mutate(
        ElderlyPop_pp = ((ElderlyPop/TotalPop)*100),
        MalePop_pp = ((MalePop/TotalPop)*100),
        FemalePop_pp = ((FemalePop/TotalPop)*100),
        WhitePop_pp = ((WhitePop/TotalPop)*100),
        BlackPop_pp = ((BlackPop/TotalPop)*100),
        AmericanIndian_pp = ((AmericanIndian/TotalPop)*100),
        AsianPop_pp = ((AsianPop/TotalPop)*100),
        NativeHawaiian_pp = ((NativeHawaiian/TotalPop)*100),
        HispanicPop_pp = ((HispanicPop/TotalPop)*100)) %>%
  distinct(zipcode, .keep_all = TRUE)

# clean datasets to get rid of NAs
census_centroid_zip_merge <- na.omit(census_centroid_zip_merge)
nyc_zipcode_demongraphics <-na.omit(nyc_zipcode_demongraphics)

Data visualization

# Looking at the data
head(nyc_zipcode_demongraphics)

## Simple feature collection with 6 features and 35 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -73.99193 ymin: 40.63029 xmax: -73.78805 ymax: 40.6863
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 36
## # Groups:   zipcode [6]
##   zipcode county st_fips cty_fips po_name  `4_12_20_Positive` `4_12_20_Total`
##     <dbl> <chr>  <chr>   <chr>    <chr>                 <int>           <int>
## 1   11436 Queens 36      081      Jamaica                 269             412
## 2   11213 Kings  36      047      Brooklyn                793            1296
## 3   11212 Kings  36      047      Brooklyn                842            1302
## 4   11225 Kings  36      047      Brooklyn                632            1001
## 5   11218 Kings  36      047      Brooklyn                976            1606
## 6   11226 Kings  36      047      Brooklyn                995            1527
## # ℹ 29 more variables: `4_12_20_cum_percpositive` <dbl>,
## #   `4_19_20_Positive` <int>, `4_19_20_Total` <int>,
## #   `4_19_20_cum_percpositive` <dbl>, `4_23_21_Positive` <int>,
## #   `4_23_21_Total` <int>, `4_23_21_cum_percpositive` <dbl>,
## #   Total_FoodRetail <int>, Total_HealthFacilities <int>, TotalPop <dbl>,
## #   ElderlyPop <dbl>, MalePop <dbl>, FemalePop <dbl>, WhitePop <dbl>,
## #   BlackPop <dbl>, AmericanIndian <dbl>, AsianPop <dbl>, …

# Testing out the new dataframe by looking at Hispanic population percentage rates by zipcode in NYC
mapview(nyc_zipcode_demongraphics, zcol = "HispanicPop_pp")

Output

In the end, we should have the confirmed and tested cases of covid-19, numbers of specific types of food stores, numbers of specific types of health facilities, and population (total population, elderly, by race, etc.) at the zip code level. We should also have boroughs, names, etc. for each zip code area.

# Confirmed and tested cases of covid-19
view(Zipcode_CovidTests)
# Numbers of specific types of food stores
view(COVID_FoodRetail_ZIP)
# Numbers of specific types of health facilities
view(COVID_FR_HealthFacilties_zip)
# MASTER DATASET. Population (total pop, race, elderly, etc.) at the zipcode level...We should also have boroughs, names, etc. for each zip code area.
view(nyc_zipcode_demongraphics)

GTECH78520_23S_week11_24351622

Amy Carrillo

5-01-2023