Task 1:

Load COVID-19 Testing Data

setwd("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8")

covid_data <- read_csv("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/tests-by-zcta_2020_04_12.csv")  # Replace with your chosen file name
glimpse(covid_data)
## Rows: 178
## Columns: 4
## $ MODZCTA           <dbl> NA, 10001, 10002, 10003, 10004, 10005, 10006, 10007,…
## $ Positive          <dbl> 1934, 211, 539, 279, 23, 38, 10, 34, 381, 166, 355, …
## $ Total             <dbl> 2082, 448, 1024, 662, 59, 116, 43, 107, 851, 468, 73…
## $ zcta_cum.perc_pos <dbl> 92.89, 47.10, 52.64, 42.15, 38.98, 32.76, 23.26, 31.…

task 2: Part 1 Clean and Match ZIP Code Columns

covid_data <- covid_data %>%
  filter(!is.na(MODZCTA)) %>%
  mutate(MODZCTA = as.character(MODZCTA))

nyc_zips <- nyc_zips %>%
  mutate(ZIPCODE = as.character(ZIPCODE))

task 2:Part 2 Join COVID data to Zip Code Shapes

nyc_zip_covid <- nyc_zips %>%
  left_join(covid_data, by = c("ZIPCODE" = "MODZCTA"))

Map: Positive covid

mapview(nyc_zip_covid, zcol = "Positive", legend = TRUE)

task 3:Transform nyc_retail_sf to match nyc_zips

# Transform the CRS of nyc_retail_sf to match nyc_zips
nyc_retail_sf <- st_transform(nyc_retail_sf, crs = st_crs(nyc_zips))
# Join each store to its ZIP code
retail_with_zip <- st_join(nyc_retail_sf, nyc_zips["ZIPCODE"])

##task 4:Summarize the number of stores per ZIP code

retail_count_by_zip <- retail_with_zip %>%
  st_drop_geometry() %>%
  dplyr::count(ZIPCODE) %>%
  dplyr::rename(Retail_Store_Count = n)

nyc_zips <- nyc_zips %>%
  left_join(retail_count_by_zip, by = "ZIPCODE")

Map: Retail_Store

mapview(nyc_zips, zcol = "Retail_Store_Count")

task 5: Aggregate Health Facilities by ZIP Code

health_sf <- st_transform(health_sf, crs = st_crs(nyc_zips))

task 5: part 2

Spatial join health facilities to zip codes:

health_with_zip <- st_join(health_sf, nyc_zips["ZIPCODE"])

##task 5: Part 3 Drop geometry and count health facilities per zip code:

health_count_by_zip <- health_with_zip %>%
  st_drop_geometry() %>%
  dplyr::count(ZIPCODE) %>%
  dplyr::rename(Health_Facility_Count = n)

##task 5: Part 4 Merge the result back into nyc_zips:

nyc_zips <- nyc_zips %>%
  left_join(health_count_by_zip, by = "ZIPCODE")

Map: Health_Facility

mapview(nyc_zips, zcol = "Health_Facility_Count")

##task 6:Load the ACS Census Data

# Read the shapefile (R will automatically recognize the .shp and its accompanying files)
nycCensus <- sf::st_read("C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp", stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\Zack\Documents\Data Analysis and Viz\WEEK8\R-Spatial_II_Lab\Assignment 8\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

##task 7: Extract the tract GEOID from acs_data/ Build matching GEOID column in census_tracts/ Join ACS data to the spatial tracts

acs_data <- readLines("R-Spatial_II_Lab/Assignment 8/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E, # >= 65
                malePop = DP05_0002E, femalePop = DP05_0003E,  
                whitePop = DP05_0037E, blackPop = DP05_0038E,
                asianPop = DP05_0067E, hispanicPop = DP05_0071E,
                adultPop = DP05_0021E, citizenAdult = DP05_0087E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));


acs_data %>%
  magrittr::extract(1:10,)         # Get last 11 characters
##                  GEO_ID totPop elderlyPop malePop femalePop whitePop blackPop
## 1  1400000US36005000100   7080         51    6503       577     1773     4239
## 2  1400000US36005000200   4542        950    2264      2278     2165     1279
## 3  1400000US36005000400   5634        710    2807      2827     2623     1699
## 4  1400000US36005001600   5917        989    2365      3552     2406     2434
## 5  1400000US36005001900   2765         76    1363      1402      585     1041
## 6  1400000US36005002000   9409        977    4119      5290     3185     4487
## 7  1400000US36005002300   4600        648    2175      2425      479     2122
## 8  1400000US36005002400    172          0     121        51       69       89
## 9  1400000US36005002500   5887        548    2958      2929      903     1344
## 10 1400000US36005002701   2868        243    1259      1609      243      987
##    asianPop hispanicPop adultPop citizenAdult censusCode
## 1       130        2329     6909         6100  005000100
## 2       119        3367     3582         2952  005000200
## 3       226        3873     4507         4214  005000400
## 4        68        3603     4416         3851  005001600
## 5       130        1413     2008         1787  005001900
## 6        29        5905     6851         6170  005002000
## 7        27        2674     3498         3056  005002300
## 8        14           0      131           42  005002400
## 9        68        4562     4237         2722  005002500
## 10        0        1985     1848         1412  005002701
# Create a mapping from boro_code to county FIPS
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

nyc_tracts <- merge(nycCensus, acs_data, by.x ='tractFIPS', by.y = 'censusCode')

#nyc_tracts <- nycCensus %>%
  #left_join(acs_data, by = c("tractFIPS" = "censusCode"))

task8: Spatial Join Census Data to Zip Codes/Aggregate ACS Data by ZIP Code/ Join the summarized ACS data to nyc_zips

nyc_tracts <- st_transform(nyc_tracts, st_crs(nyc_zips))

tracts_with_zip <- st_join(nyc_tracts, nyc_zips["ZIPCODE"])


acs_by_zip <- tracts_with_zip %>%
  st_drop_geometry() %>%
  filter(!is.na(ZIPCODE)) %>%
  group_by(ZIPCODE) %>%
  summarise(
    ZIPCODE = first(ZIPCODE),
    Total_Population = sum(totPop, na.rm = TRUE),
    Elderly_Population = sum(elderlyPop, na.rm = TRUE),
    Hispanic_Population = sum(hispanicPop, na.rm = TRUE),
    Black_Population = sum(blackPop, na.rm = TRUE)
    
  ) %>%
  ungroup()

nyc_zips <- nyc_zips %>%
  left_join(acs_by_zip, by = "ZIPCODE")

##task : Read and Clean the Right ACS File

acs_data_raw <- read_csv(
  "C:/Users/Zack/Documents/Data Analysis and Viz/WEEK8/R-Spatial_II_Lab/Assignment 8/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv",
  skip = 1  # skips the metadata header row
)

Map: elderlyPop

mapview(nyc_tracts, zcol = "elderlyPop", legend = TRUE)
save(nyc_zips, file = "nyc_spatial_data.RData")

save(nyc_zips, nyc_retail_sf, health_sf, acs_by_zip, file = "nyc_assignment_final_data.RData")