---
title: "R Week 08 Assignment Updated"
author: "Caitlin Cacciatore"
date: "3/26/2026"
format:
html:
toc: true
toc-location: left
code-fold: true
code-summary: "Show the code"
code-tools: true
---
# Loading Packages - First Steps
```{r load_packages, include=FALSE}
require(tidyverse);
require(sf);
require(mapview);
require(magrittr)
#Loading the Packages
options(repos = c(CRAN = "https://cloud.r-project.org"))
# Load a list of packages. Install them first if they are not available.
# The list of packages to be installed
list.of.packages <- c("sf", "sp", "spatial", "maptools", "rgeos","rgdal",
"raster", "grid", "rasterVis",
"tidyverse", "magrittr", "ggpubr", "lubridate",
"devtools", "htmlwidgets", "mapview",
"classInt", "RColorBrewer", "ggmap", "tmap", "leaflet", "mapview",
"ggrepel", "ggsn",
"spdep","spatialreg","GWmodel");
# Check out the packages that have not been installed yet.
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
# Install those missing packages first. It could take a long time for the first time.
if(length(new.packages)>0) install.packages(new.packages)
# Load all packages.
lapply(list.of.packages,function(x) {
require(x,character.only = TRUE,quietly = TRUE)
})
install.packages("sf") # run once if not installed
install.packages("tidyverse")
library(sf)
library(tidyverse)
```
# Task 1
```{r Task 1, include=TRUE}
# Task 1
# read the file
zip_sf <- st_read("Zip_Code_040114.shp")
#clean the data
zip_nyc <- zip_sf %>%
filter(!is.na(ZIPCODE))
nyc_zip_sf <- st_as_sf(zip_nyc,crs = 4326)
# create fancy plots
plot(nyc_zip_sf)
# figure out structure
str(nyc_zip_sf)
# Read the COVID data for one week
covid_data <- readr::read_csv("tests-by-zcta_2021_04_23.csv", lazy = FALSE)
str(covid_data)
# Merge Zip Code and COVID Data
nyc_covid_data_sf_merged <-
base::merge(nyc_zip_sf, covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
names(nyc_covid_data_sf_merged)
```
# Task 2
```{r Task 2, include=TRUE}
# Aggregate by ZIP code
# Read the CSV file
nys_retail <- read_csv(
"nys_retail_food_store_xy.csv",
locale = locale(encoding = "Latin1"),
lazy = FALSE
)
# Let's check column names
names(nys_retail)
names(nyc_zip_sf)
# Rename
nys_retail <- nys_retail %>%
rename("Zip Code" = "Zip.Code")
# Then remove NA values
nys_retail <- nys_retail %>%
drop_na("X", "Y") # replace X/Y with your lon/lat column names
# Convert points CSV to sf object using their coordinates
nys_retail_sf <- st_as_sf(
nys_retail,
coords = c("X", "Y"), # replace with your actual lon/lat column names
crs = 4326 # WGS84
)
# Making sure everything has the same coordinates
nyc_zip_sf <- st_transform(nyc_zip_sf, st_crs(nys_retail_sf))
# Spatial join - joining zip codes
nyc_food_stores <- st_join(nys_retail_sf, nyc_zip_sf)
# Aggregate stores by ZIP code
zip_summary_sf <- nyc_food_stores %>%
group_by("Zip Code") %>% # use bare column name, not quotes
summarise(store_count = n(), .groups = "drop") # counts stores per ZIP
# 7. See what happened
head(zip_summary_sf)
names(zip_summary_sf)
st_crs(zip_summary_sf)
```
# Task 3
```{r Task 3, include=TRUE}
# Read the Health Facilities
nyc_health <- readr::read_csv("NYS_Health_Facility.csv", lazy = FALSE)
str(nyc_health)
# First get rid of NA values
nyc_health_sf_no_na <- nyc_health %>%
dplyr::filter(
!is.na(`Facility Longitude`),
!is.na(`Facility Latitude`)
)
# Convert to sf
nyc_health_sf <- nyc_health_sf_no_na %>%
sf::st_as_sf(coords = c("Facility Longitude", "Facility Latitude"), crs = 4326)
# Make sure coordinate systems match
nyc_zip_sf <- sf::st_transform(nyc_zip_sf, sf::st_crs(nyc_health_sf))
# Spatial join: Perform
nyc_health_care_centers <- sf::st_join(nyc_health_sf, nyc_zip_sf) %>%
dplyr::filter(!is.na("Zip Code"))
# Check names of the health care centers
names(nyc_health_care_centers)
```
# Task 4
```{r Task 4, include=TRUE}
# Read the Census Tract Data
nycCensus <- sf::st_read('nyc_census_tracts.shp',
stringsAsFactors = FALSE)
str(nycCensus)
names(nycCensus)
# We must now assign borough names to each borough code so we can
# have the data sorted by borough
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
COUNTYFP == 'Bronx' ~ '005',
COUNTYFP == 'Brooklyn' ~ '047',
COUNTYFP == 'Manhattan' ~ '061',
COUNTYFP == 'Queens' ~ '081',
COUNTYFP == 'Staten Island' ~ '085'),
)
```
# Task 5
# Read the CSV File
acsData <- readLines("ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv", encoding = "UTF-8") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header = TRUE, na.strings = c("", " ", "NA", "N/A", "NULL")) %>%
dplyr::select(
totPop = DP05_0001E,
elderlyPop = DP05_0024E,
malePop = DP05_0002E,
femalePop = DP05_0003E,
whitePop = DP05_0037E,
blackPop = DP05_0038E,
asianPop = DP05_0067E,
hispanicPop = DP05_0071E,
adultPop = DP05_0021E,
citizenAdult = DP05_0087E
) %>%
dplyr::mutate(GEO_ID = stringr::str_sub(GEO_ID, -9, -1)) %>%
tidyr::drop_na()
popData <- nycCensus %>%
left_join(acsData, by = c("GEOID" = "GEO_ID")) %>%
filter(!is.na(totPop)) # remove any tracts with missing population
popNYC <- sf::st_transform(popData, sf::st_crs(nyc_covid_data_sf_merged))
popNYC_centroid <- popNYC %>% sf::st_centroid()
popZIP <- sf::st_join(
nyc_covid_data_sf_merged,
popNYC_centroid,
join = sf::st_contains
) %>%
filter(!is.na(totPop)) %>% # remove rows with no population after join
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY, COVID_CASE_COUNT, TOTAL_COVID_TESTS) %>%
summarise(
totPop = sum(totPop, na.rm = TRUE),
malePctg = sum(malePop, na.rm = TRUE) / totPop * 100,
asianPop = sum(asianPop, na.rm = TRUE),
blackPop = sum(blackPop, na.rm = TRUE),
hispanicPop = sum(hispanicPop, na.rm = TRUE),
whitePop = sum(whitePop, na.rm = TRUE),
.groups = "drop"
)
sum(popZIP$totPop, na.rm = TRUE) # total population
str(popZIP) # check structure