Data Sources: Zipcode Shapefile: Data.gov Zipcode Characteristics: Amerian Community Survey from data.census.gov Emergency Food Access Map: Broome County Food Council (addresses geocoded by geocod.io)

setwd("~/Documents/Documents/DIDA370/zip_codes")
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(urbnmapr)

map <- st_read("cb_2019_us_zcta510_500k.shp")
## Reading layer `cb_2019_us_zcta510_500k' from data source 
##   `/Users/evanhaddican/Documents/Documents/DIDA370/zip_codes/cb_2019_us_zcta510_500k.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 33144 features and 5 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34132
## Geodetic CRS:  NAD83
colnames(map)[1] <- "zipcode"
map$zipcode <- as.numeric(map$zipcode)

map_ny <- map %>% 
  filter(zipcode > 09999 & zipcode < 15000) 

ggplot(map_ny)+
  geom_sf(fill = "white")

setwd("~/Documents/Documents/DIDA370")
data <- read.csv("select_zipcode_statistics.csv")
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ lubridate 1.9.3     ✔ stringr   1.5.0
## ✔ purrr     1.0.2     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#join shapefile with survey data
ny1 <- map_ny %>% 
  left_join(data, by = c("zipcode" = "Geographic.Area.Name")) %>% 
  st_transform("EPSG:32116")

#get just Broome county
#load in NYS county shape file and set crs
counties <- get_urbn_map("counties", sf = TRUE)

#filter the data to get just NYS
broome <- counties %>% 
  filter(state_abbv == "NY") %>% 
  filter(county_name == "Broome County") %>% 
  st_transform("EPSG:32116")
## old-style crs object detected; please recreate object with a recent sf::st_crs()
#isolate Broome County
broome_map <- ny1[broome,]

#plot it
ggplot(broome_map)+
  geom_sf(mapping = aes(), fill = "white")+
  theme_minimal()

#now let's load in the EFAM map data
setwd("~/Documents/Documents/DIDA370")
efam <- read.csv("efam_data.csv")

efam <- efam %>% filter(!Zip %in% NA )

#convert to a simple features object
efam1 <- efam %>% 
  #I geocoded addresses using Geocod.io
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326) %>% 
  st_transform(crs = st_crs(broome_map))

#plot it
ggplot()+
  geom_sf(data = broome_map, mapping = aes(), fill = "gray90")+
  geom_sf(data = efam1, mapping = aes(color = Type))+
  theme_minimal()+
  labs(title = "Emergency Food Access in Broome County")

#Map 1
ggplot(broome_map)+
  geom_sf(mapping = aes(fill = total_population_poverty)) +
  geom_sf(data = efam1, mapping = aes(color = Type))+
  theme_minimal()+
  scale_fill_gradient(
    name = "Total Population in Poverty",
    low = "lightblue",
    high = "darkblue") +
    labs(title = "Poverty in Broome County and Food Access Locations")

joined <- efam1 %>% 
  st_join(broome_map, by = c("zipcode" = "Zip")) %>% 
#When I used left_join I got an error telling me to use st_joins for spatial joins
  group_by(zipcode) %>% 
  summarize(
    count = n(),
    poverty = sum(total_population_poverty),
    count2poverty = sum(count/total_population_poverty)
    )

headrow <- joined %>% 
  arrange(count2poverty) %>% 
  slice_head()
#https://dplyr.tidyverse.org/reference/slice.html
  
centroid_point <- broome_map %>%
  filter(zipcode == headrow$zipcode) %>%
  st_centroid()
## Warning: st_centroid assumes attributes are constant over geometries
#spatial operation: st_centroid

broome_zips <- broome_map %>% 
  filter(zipcode == 13833)

#Map 2
ggplot() +
  geom_sf(data = broome_map,
    mapping = aes(), color = "lightgray")+
  geom_sf(data = broome_zips,
          mapping = aes(), fill = "aquamarine3", color = 'black')+
  geom_sf(data = centroid_point,
          mapping = aes(), color = "red")+
  theme_minimal()+
  labs(title = "Location of New Food Pantry")

foodaccess_within <- efam1[broome_zips, , op = st_within]
#spatial operation: st_within


#Map 3
ggplot()+
  geom_sf(data = broome_zips, mapping = aes(), fill = "aquamarine")+
  geom_sf(data = centroid_point, mapping = aes(), color = "purple")+
  geom_sf(data = foodaccess_within, mapping = aes(color = Type))+
  theme_minimal()+
  labs(title = "Emergency Food Access in 13833 and Proposed New Food Pantry")

bivariate<-lm(joined$poverty~joined$count, data=joined)
#Last semester I took DIDA 130 and learned some statistical tools to use in R.
summary(bivariate)
## 
## Call:
## lm(formula = joined$poverty ~ joined$count, data = joined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -254917  -55451    5604   21718  479349 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -72168      49201  -1.467    0.163    
## joined$count    28178       3229   8.725 2.91e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 150300 on 15 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8354, Adjusted R-squared:  0.8244 
## F-statistic: 76.13 on 1 and 15 DF,  p-value: 2.91e-07
#I found the intercept and slope in the summary and used them to plot my line of best fit.

#Graph 1
ggplot(joined, aes(x = count, y = poverty)) +
  geom_point()+
  geom_abline(intercept = -72168, slope = 28178)+
  theme_minimal()+
  labs(x = "Number of Emergency Food Access Locations", y= "Total Population in Poverty", title = "Relationship of Poverty to Emergency Food Access by Zipcode")
## Warning: Removed 1 rows containing missing values (`geom_point()`).

#https://ggplot2.tidyverse.org/reference/geom_abline.html

I attempted to find the relationship between the number of food access locations and the number of people living in poverty in each zip code and find the perfect location for a food pantry based on that. I decided to perform a bivariate regression to find the relationship between the number of food access locations and the total population in poverty, and then I plotted the points on a graph with a line of best fit. I found that for the most part, the number of food access locations was proportional to the number of people living in poverty, with a strong positive correlation. However, I did end up choosing where to put my food pantry based on the relationship between food access locations and poverty, and decided to place it in the center of the 13833 zip code. To get to this decision I divided the number of food access locations by the number of people living in poverty and found the area with the least number of food access locations relative to the total population in poverty was the 13833 zip code. Then I plotted all the food access locations in that zip code to make sure that my location was not nearby any others, and found that the only other food access location was not too close to my proposed location.