libraries
library(sf)
## Warning: package 'sf' was built under R version 4.2.3
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(tidyverse)
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(stringr)
library(mapview)
## Warning: package 'mapview' was built under R version 4.2.3
library(purrr)
# Set working directory
wd <- dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(wd)
# Add NYC zip code shapefile
zipcode_data <- st_read("data/hw_data/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\amyca\OneDrive\Documents\GTECH7852_R\R-spatial\GTECH7852_HW\Data\HW_Data\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
zipcode_data <- clean_names(zipcode_data)
zipcode_data$zipcode = as.numeric(as.character(zipcode_data$zipcode))
# NYC zip code transform to 4326
zipcode_data %>%
st_transform(4326)
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.25576 ymin: 40.49584 xmax: -73.6996 ymax: 40.91517
## Geodetic CRS: WGS 84
## First 10 features:
## zipcode bldgzip po_name population area state county st_fips cty_fips
## 1 11436 0 Jamaica 18681 22699295 NY Queens 36 081
## 2 11213 0 Brooklyn 62426 29631004 NY Kings 36 047
## 3 11212 0 Brooklyn 83866 41972104 NY Kings 36 047
## 4 11225 0 Brooklyn 56527 23698630 NY Kings 36 047
## 5 11218 0 Brooklyn 72280 36868799 NY Kings 36 047
## 6 11226 0 Brooklyn 106132 39408598 NY Kings 36 047
## 7 11219 0 Brooklyn 92561 42002738 NY Kings 36 047
## 8 11210 0 Brooklyn 67067 47887023 NY Kings 36 047
## 9 11230 0 Brooklyn 80857 49926703 NY Kings 36 047
## 10 11204 0 Brooklyn 77354 43555185 NY Kings 36 047
## url shape_area shape_len geometry
## 1 http://www.usps.com/ 0 0 POLYGON ((-73.80585 40.6829...
## 2 http://www.usps.com/ 0 0 POLYGON ((-73.9374 40.67973...
## 3 http://www.usps.com/ 0 0 POLYGON ((-73.90294 40.6708...
## 4 http://www.usps.com/ 0 0 POLYGON ((-73.95797 40.6706...
## 5 http://www.usps.com/ 0 0 POLYGON ((-73.97208 40.6506...
## 6 http://www.usps.com/ 0 0 POLYGON ((-73.9619 40.65487...
## 7 http://www.usps.com/ 0 0 POLYGON ((-73.98906 40.6441...
## 8 http://www.usps.com/ 0 0 POLYGON ((-73.9584 40.63633...
## 9 http://www.usps.com/ 0 0 POLYGON ((-73.96451 40.6366...
## 10 http://www.usps.com/ 0 0 POLYGON ((-73.98108 40.6352...
# Add COVID-19 test data csv files
COVID_TESTS_4_12_2020 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2020_04_12.csv")
COVID_TESTS_4_19_2020 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2020_04_19.csv")
COVID_TESTS_4_23_2021 <- read.csv("Data/R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv")
# Clean names
COVID_TESTS_4_12_2020 <- clean_names(COVID_TESTS_4_12_2020)
COVID_TESTS_4_19_2020 <- clean_names(COVID_TESTS_4_19_2020)
COVID_TESTS_4_23_2021 <- clean_names(COVID_TESTS_4_23_2021)
# Edit names to include the dates
colnames(COVID_TESTS_4_12_2020) <- c('zipcode','4_12_20_Positive','4_12_20_Total', '4_12_20_cum_percpositive')
colnames(COVID_TESTS_4_19_2020) <- c('zipcode','4_19_20_Positive','4_19_20_Total', '4_19_20_cum_percpositive')
# Clean up 4_23_2021 data because it has more columns of data than the other covid datasets
COVID_TESTS_4_23_2021_select <- COVID_TESTS_4_23_2021 %>%
select(modified_zcta, covid_case_count, total_covid_tests, percent_positive)
colnames(COVID_TESTS_4_23_2021_select) <- c('zipcode','4_23_21_Positive','4_23_21_Total', '4_23_21_cum_percpositive')
# Join multiple data.frames
list_df = list(zipcode_data,COVID_TESTS_4_12_2020, COVID_TESTS_4_19_2020, COVID_TESTS_4_23_2021_select)
Zipcode_CovidTests <- list_df %>%
reduce(full_join, by='zipcode')%>%
drop_na()%>%
st_transform(crs = 4326)
mapview(Zipcode_CovidTests, zcol = c('4_12_20_cum_percpositive','4_19_20_cum_percpositive','4_23_21_cum_percpositive'), legend = FALSE)