Load COVID-19 Testing Data
setwd("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8")
covid_data <- read_csv("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/tests-by-zcta_2020_04_12.csv") # Replace with your chosen file name
glimpse(covid_data)
## Rows: 178
## Columns: 4
## $ MODZCTA <dbl> NA, 10001, 10002, 10003, 10004, 10005, 10006, 10007,…
## $ Positive <dbl> 1934, 211, 539, 279, 23, 38, 10, 34, 381, 166, 355, …
## $ Total <dbl> 2082, 448, 1024, 662, 59, 116, 43, 107, 851, 468, 73…
## $ zcta_cum.perc_pos <dbl> 92.89, 47.10, 52.64, 42.15, 38.98, 32.76, 23.26, 31.…
covid_data <- covid_data %>%
filter(!is.na(MODZCTA)) %>%
mutate(MODZCTA = as.character(MODZCTA))
nyc_zips <- nyc_zips %>%
mutate(ZIPCODE = as.character(ZIPCODE))
nyc_zip_covid <- nyc_zips %>%
left_join(covid_data, by = c("ZIPCODE" = "MODZCTA"))
mapview(nyc_zip_covid, zcol = "Positive", legend = TRUE)
# Transform the CRS of nyc_retail_sf to match nyc_zips
nyc_retail_sf <- st_transform(nyc_retail_sf, crs = st_crs(nyc_zips))
# Join each store to its ZIP code
retail_with_zip <- st_join(nyc_retail_sf, nyc_zips["ZIPCODE"])
##task 4:Summarize the number of stores per ZIP code
retail_count_by_zip <- retail_with_zip %>%
st_drop_geometry() %>%
dplyr::count(ZIPCODE) %>%
dplyr::rename(Retail_Store_Count = n)
nyc_zips <- nyc_zips %>%
left_join(retail_count_by_zip, by = "ZIPCODE")
mapview(nyc_zips, zcol = "Retail_Store_Count")
health_sf <- st_transform(health_sf, crs = st_crs(nyc_zips))
Spatial join health facilities to zip codes:
health_with_zip <- st_join(health_sf, nyc_zips["ZIPCODE"])
##task 5: Part 3 Drop geometry and count health facilities per zip code:
health_count_by_zip <- health_with_zip %>%
st_drop_geometry() %>%
dplyr::count(ZIPCODE) %>%
dplyr::rename(Health_Facility_Count = n)
##task 5: Part 4 Merge the result back into nyc_zips:
nyc_zips <- nyc_zips %>%
left_join(health_count_by_zip, by = "ZIPCODE")
mapview(nyc_zips, zcol = "Health_Facility_Count")
##task 6:Load the ACS Census Data
# Read the shapefile (R will automatically recognize the .shp and its accompanying files)
nycCensus <- sf::st_read("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Zack\Documents\Data Analysis and Viz\WEEK8\R-Spatial_II_Lab\Assignment 8\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
##task 7: Extract the tract GEOID from acs_data/ Build matching GEOID column in census_tracts/ Join ACS data to the spatial tracts
acs_data <- readLines("R-Spatial_II_Lab/Assignment 8/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
malePop = DP05_0002E, femalePop = DP05_0003E,
whitePop = DP05_0037E, blackPop = DP05_0038E,
asianPop = DP05_0067E, hispanicPop = DP05_0071E,
adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
acs_data %>%
magrittr::extract(1:10,) # Get last 11 characters
## GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1 1400000US36005000100 7080 51 6503 577 1773 4239
## 2 1400000US36005000200 4542 950 2264 2278 2165 1279
## 3 1400000US36005000400 5634 710 2807 2827 2623 1699
## 4 1400000US36005001600 5917 989 2365 3552 2406 2434
## 5 1400000US36005001900 2765 76 1363 1402 585 1041
## 6 1400000US36005002000 9409 977 4119 5290 3185 4487
## 7 1400000US36005002300 4600 648 2175 2425 479 2122
## 8 1400000US36005002400 172 0 121 51 69 89
## 9 1400000US36005002500 5887 548 2958 2929 903 1344
## 10 1400000US36005002701 2868 243 1259 1609 243 987
## asianPop hispanicPop adultPop citizenAdult censusCode
## 1 130 2329 6909 6100 005000100
## 2 119 3367 3582 2952 005000200
## 3 226 3873 4507 4214 005000400
## 4 68 3603 4416 3851 005001600
## 5 130 1413 2008 1787 005001900
## 6 29 5905 6851 6170 005002000
## 7 27 2674 3498 3056 005002300
## 8 14 0 131 42 005002400
## 9 68 4562 4237 2722 005002500
## 10 0 1985 1848 1412 005002701
# Create a mapping from boro_code to county FIPS
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
nyc_tracts <- merge(nycCensus, acs_data, by.x ='tractFIPS', by.y = 'censusCode')
#nyc_tracts <- nycCensus %>%
#left_join(acs_data, by = c("tractFIPS" = "censusCode"))
nyc_tracts <- st_transform(nyc_tracts, st_crs(nyc_zips))
tracts_with_zip <- st_join(nyc_tracts, nyc_zips["ZIPCODE"])
acs_by_zip <- tracts_with_zip %>%
st_drop_geometry() %>%
filter(!is.na(ZIPCODE)) %>%
group_by(ZIPCODE) %>%
summarise(
ZIPCODE = first(ZIPCODE),
Total_Population = sum(totPop, na.rm = TRUE),
Elderly_Population = sum(elderlyPop, na.rm = TRUE),
Hispanic_Population = sum(hispanicPop, na.rm = TRUE),
Black_Population = sum(blackPop, na.rm = TRUE)
) %>%
ungroup()
nyc_zips <- nyc_zips %>%
left_join(acs_by_zip, by = "ZIPCODE")
##task : Read and Clean the Right ACS File
acs_data_raw <- read_csv(
"C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
skip = 1 # skips the metadata header row
)
mapview(nyc_tracts, zcol = "elderlyPop", legend = TRUE)