library(readr)
library(readxl)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.1.0
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(rinat)
library(tidycensus)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("tmap")
library(tmap)
library(ggplot2)
#census_api_key("21039eb27d210c67ad0cba2f4f7876f32822d33a", install = TRUE)
#install.packages("GWmodel")
library(GWmodel)
## Loading required package: robustbase
## Loading required package: sp
## Loading required package: Rcpp
## Welcome to GWmodel version 2.4-2.
food_access_research_atlas <- read_csv("2019 Food Access Research Atlas Data/Food Access Research Atlas.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 72531 Columns: 147
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (113): State, County, MedianFamilyIncome, LAPOP1_10, LAPOP05_10, LAPOP1_...
## dbl (34): CensusTract, Urban, Pop2010, OHU2010, GroupQuartersFlag, NUMGQTRS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
censustracts <- st_read("cb_2019_37_tract_500k/cb_2019_37_tract_500k.shp")
## Reading layer `cb_2019_37_tract_500k' from data source
## `/Users/aligrau/Documents/GEOG391/cb_2019_37_tract_500k/cb_2019_37_tract_500k.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2192 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.32187 ymin: 33.84232 xmax: -75.46062 ymax: 36.58812
## Geodetic CRS: NAD83
foodaccessdata <- food_access_research_atlas %>%
mutate(
GEOID = sprintf("%011s", CensusTract)
)
nc_foodaccessdata <- food_access_research_atlas %>%
filter(State=="North Carolina")
nccounties = read_sf("https://gis11.services.ncdot.gov/arcgis/rest/services/NCDOT_CountyBdy_Poly/MapServer/0/query?outFields=*&where=1%3D1&f=geojson")
nc_poverty_rates <- nc_foodaccessdata %>%
select(CensusTract, State, County, PovertyRate)
aggregated_data <- nc_foodaccessdata %>%
group_by(County) %>%
summarise(mean = mean(PovertyRate))
ncfoodjoin <- cbind(nccounties, aggregated_data)
str(ncfoodjoin)
## Classes 'sf' and 'data.frame': 100 obs. of 18 variables:
## $ OBJECTID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ FIPS : int 1 3 5 7 9 11 13 15 17 19 ...
## $ CountyName : chr "Alamance" "Alexander" "Alleghany" "Anson" ...
## $ UpperCountyName : chr "ALAMANCE" "ALEXANDER" "ALLEGHANY" "ANSON" ...
## $ SapCountyId : chr "001" "002" "003" "004" ...
## $ DOTDistrictID : int 1 2 1 3 3 2 1 2 3 3 ...
## $ DOTDivisionID : int 7 12 11 10 11 11 2 1 6 3 ...
## $ SAP_CNTY_NBR : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CNTY_NBR : int 0 1 2 3 4 5 6 7 8 9 ...
## $ DSTRCT_NBR : int 1 2 1 3 3 2 1 2 3 3 ...
## $ DIV_NBR : int 7 12 11 10 11 11 2 1 6 3 ...
## $ NAME : chr "Alamance" "Alexander" "Alleghany" "Anson" ...
## $ Shape.STArea.. : num 1.21e+10 7.34e+09 6.58e+09 1.50e+10 1.19e+10 ...
## $ Shape.STLength..: num 476733 387342 439167 575504 520084 ...
## $ SHPNumber : int 1 2 3 4 5 6 7 8 9 10 ...
## $ County : chr "Alamance County" "Alexander County" "Alleghany County" "Anson County" ...
## $ mean : num 16.2 14.4 23.9 19.4 18.6 ...
## $ geometry :sfc_POLYGON of length 100; first list element: List of 1
## ..$ : num [1:1652, 1:2] -79.3 -79.3 -79.3 -79.3 -79.3 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:17] "OBJECTID" "FIPS" "CountyName" "UpperCountyName" ...
la_aggregated_data <- nc_foodaccessdata %>%
group_by(County) %>%
summarise(sum = sum(LA1and20)/length(LA1and20))
mastervariables <- cbind(nccounties, ncfoodjoin$mean, la_aggregated_data$sum)
#plot(stgeometry(ncfoodjoin$mean))
nc_poverty_map =
ggplot(ncfoodjoin) +
geom_sf(aes(fill = mean)) +
scale_fill_gradient2(midpoint = 17.8, low = "white", mid = "orange", high = "red") + ggtitle("Poverty Rates in NC by County") +
theme_minimal()
nc_poverty_map
ggsave(filename = "nc_poverty_map.png", plot = nc_poverty_map, width = 6, height = 4, units = "in", dpi = 300)
la_aggregated_data <- nc_foodaccessdata %>%
group_by(County) %>%
summarise(sum = sum(LA1and20)/length(LA1and20))
lowaccessdata <- cbind(nccounties, la_aggregated_data)
str(ncfoodjoin)
## Classes 'sf' and 'data.frame': 100 obs. of 18 variables:
## $ OBJECTID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ FIPS : int 1 3 5 7 9 11 13 15 17 19 ...
## $ CountyName : chr "Alamance" "Alexander" "Alleghany" "Anson" ...
## $ UpperCountyName : chr "ALAMANCE" "ALEXANDER" "ALLEGHANY" "ANSON" ...
## $ SapCountyId : chr "001" "002" "003" "004" ...
## $ DOTDistrictID : int 1 2 1 3 3 2 1 2 3 3 ...
## $ DOTDivisionID : int 7 12 11 10 11 11 2 1 6 3 ...
## $ SAP_CNTY_NBR : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CNTY_NBR : int 0 1 2 3 4 5 6 7 8 9 ...
## $ DSTRCT_NBR : int 1 2 1 3 3 2 1 2 3 3 ...
## $ DIV_NBR : int 7 12 11 10 11 11 2 1 6 3 ...
## $ NAME : chr "Alamance" "Alexander" "Alleghany" "Anson" ...
## $ Shape.STArea.. : num 1.21e+10 7.34e+09 6.58e+09 1.50e+10 1.19e+10 ...
## $ Shape.STLength..: num 476733 387342 439167 575504 520084 ...
## $ SHPNumber : int 1 2 3 4 5 6 7 8 9 10 ...
## $ County : chr "Alamance County" "Alexander County" "Alleghany County" "Anson County" ...
## $ mean : num 16.2 14.4 23.9 19.4 18.6 ...
## $ geometry :sfc_POLYGON of length 100; first list element: List of 1
## ..$ : num [1:1652, 1:2] -79.3 -79.3 -79.3 -79.3 -79.3 ...
## ..- attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:17] "OBJECTID" "FIPS" "CountyName" "UpperCountyName" ...
nc_lowaccess_map =
ggplot(lowaccessdata) +
geom_sf(aes(fill = sum)) +
scale_fill_gradient2(midpoint = 0.260191, low = "white", mid = "orange", high = "red") + ggtitle("Percentage of Census Tracts Within Each County with Low Food Access") +
theme_minimal()
nc_lowaccess_map
ggsave(filename = "nc_lowaccess_map.png", plot = nc_lowaccess_map, width = 6, height = 4, units = "in", dpi = 300)
Note that in the dataset, Hyde County only has one entry, so the sum is 1.
attach(nc_foodaccessdata)
povertyaccessttest <- t.test(PovertyRate[LA1and20=="1"], PovertyRate[LA1and20=="0"])
povertyaccessttest
##
## Welch Two Sample t-test
##
## data: PovertyRate[LA1and20 == "1"] and PovertyRate[LA1and20 == "0"]
## t = -4.8959, df = 1763.8, p-value = 1.068e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.318864 -1.420343
## sample estimates:
## mean of x mean of y
## 14.90785 17.27746
Bandwidth = 59
bw <- bw.gwr(formula = la_aggregated_data.sum ~ ncfoodjoin.mean,
approach = "AIC",
adaptive = T,
data = mastervariables)
## Adaptive bandwidth (number of nearest neighbours): 69 AICc value: -33.35065
## Adaptive bandwidth (number of nearest neighbours): 51 AICc value: -33.65637
## Adaptive bandwidth (number of nearest neighbours): 38 AICc value: -31.9007
## Adaptive bandwidth (number of nearest neighbours): 57 AICc value: -34.09875
## Adaptive bandwidth (number of nearest neighbours): 63 AICc value: -33.64334
## Adaptive bandwidth (number of nearest neighbours): 55 AICc value: -33.94373
## Adaptive bandwidth (number of nearest neighbours): 59 AICc value: -34.10869
## Adaptive bandwidth (number of nearest neighbours): 60 AICc value: -33.82567
## Adaptive bandwidth (number of nearest neighbours): 58 AICc value: -33.96131
## Adaptive bandwidth (number of nearest neighbours): 59 AICc value: -34.10869
bw
## [1] 59
gwr.mod <- gwr.basic(formula = la_aggregated_data.sum ~ ncfoodjoin.mean,
adaptive = T,
data = mastervariables,
bw = bw)
gwr.mod
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2025-12-08 12:30:22.489738
## Call:
## gwr.basic(formula = la_aggregated_data.sum ~ ncfoodjoin.mean,
## data = mastervariables, bw = bw, adaptive = T)
##
## Dependent (y) variable: la_aggregated_data.sum
## Independent variables: ncfoodjoin.mean
## Number of data points: 100
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29300 -0.15844 -0.00499 0.14675 0.75959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.340390 0.081146 4.195 6e-05 ***
## ncfoodjoin.mean -0.004483 0.004388 -1.022 0.309
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.2057 on 98 degrees of freedom
## Multiple R-squared: 0.01054
## Adjusted R-squared: 0.0004431
## F-statistic: 1.044 on 1 and 98 DF, p-value: 0.3094
## ***Extra Diagnostic information
## Residual sum of squares: 4.146214
## Sigma(hat): 0.2056898
## AIC: -28.50975
## AICc: -28.25975
## BIC: -106.8787
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 59 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 0.1931767 0.3717536 0.4675467 0.5130316 0.5667
## ncfoodjoin.mean -0.0185205 -0.0150740 -0.0077579 -0.0047491 0.0024
## ************************Diagnostic information*************************
## Number of data points: 100
## Effective number of parameters (2trace(S) - trace(S'S)): 9.008639
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 90.99136
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -34.10869
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -44.67108
## BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): -119.4868
## Residual sum of squares: 3.492879
## R-square value: 0.1664527
## Adjusted R-square value: 0.08300995
##
## ***********************************************************************
## Program stops at: 2025-12-08 12:30:23.128305
gwr_nc <- gwr.mod$SDF |>
select(ncfoodjoin.mean) |>
pivot_longer(-geometry) |>
ggplot() +
geom_sf(aes(fill = value)) +
scale_fill_gradient2() +
facet_wrap(~name) + ggtitle("Relationship Between Mean Poverty Rate and Food Access in NC")
gwr_nc
ggsave(filename = "nc_gwr_map.png", plot = gwr_nc, width = 6, height = 4, units = "in", dpi = 300)
Negative relationship between mean poverty rate and limitation in food access. Where poverty rate is high, percentage of census tracts that have limited food access is low. Results are counter-intuative.
37 = NC
021 = Buncombe County
buncombe_census <- censustracts %>%
filter(COUNTYFP == "021")
buncombe_poverty <- nc_foodaccessdata %>%
filter(County == "Buncombe County")
buncombepovertymapdata <- cbind(buncombe_census, buncombe_poverty)
buncombe_aggregated_data <- buncombepovertymapdata %>%
group_by(CensusTract) %>%
summarise(sum = sum(PovertyRate))
buncombe_ag_map <- cbind(buncombe_census, buncombe_aggregated_data)
buncombe_poverty_map =
ggplot(buncombe_aggregated_data) +
geom_sf(aes(fill = sum)) +
scale_fill_gradient2(midpoint = 13.27143, low = "white", mid = "lightblue", high = "darkblue") + ggtitle("Poverty Rates in Buncombe County by Census Tract") +
theme_minimal()
buncombe_poverty_map
ggsave(filename = "buncombe_poverty_map.png", plot = buncombe_poverty_map, width = 6, height = 4, units = "in", dpi = 300)
la_buncombe_ag_data <- nc_foodaccessdata %>%
group_by(CensusTract) %>%
filter(County == "Buncombe County") %>%
summarise(sum = sum(LA1and20)/length(LA1and20))
buncombeladata <- cbind(buncombe_census, la_buncombe_ag_data)
str(buncombeladata)
## Classes 'sf' and 'data.frame': 56 obs. of 12 variables:
## $ STATEFP : chr "37" "37" "37" "37" ...
## $ COUNTYFP : chr "021" "021" "021" "021" ...
## $ TRACTCE : chr "000900" "001300" "002205" "000600" ...
## $ AFFGEOID : chr "1400000US37021000900" "1400000US37021001300" "1400000US37021002205" "1400000US37021000600" ...
## $ GEOID : chr "37021000900" "37021001300" "37021002205" "37021000600" ...
## $ NAME : chr "9" "13" "22.05" "6" ...
## $ LSAD : chr "CT" "CT" "CT" "CT" ...
## $ ALAND : num 4692782 5453346 11396083 1687565 3759477 ...
## $ AWATER : num 107050 0 10137 0 8948 ...
## $ CensusTract: num 3.7e+10 3.7e+10 3.7e+10 3.7e+10 3.7e+10 ...
## $ sum : num 0 0 0 1 0 0 0 1 1 0 ...
## $ geometry :sfc_MULTIPOLYGON of length 56; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:40, 1:2] -82.6 -82.6 -82.6 -82.6 -82.6 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "STATEFP" "COUNTYFP" "TRACTCE" "AFFGEOID" ...
buncombe_ag_poverty <- nc_foodaccessdata %>%
group_by(CensusTract) %>%
filter(County == "Buncombe County") %>%
summarise(mean = mean(PovertyRate))
buncombejoin <- cbind(buncombe_census, buncombe_ag_poverty)
buncombemaster <- cbind(buncombe_census, buncombejoin$mean, la_buncombe_ag_data$sum)
bw_buncombe <- bw.gwr(formula = la_buncombe_ag_data.sum ~ buncombejoin.mean,
approach = "AIC",
adaptive = T,
data = buncombemaster)
## Adaptive bandwidth (number of nearest neighbours): 42 AICc value: 90.32931
## Adaptive bandwidth (number of nearest neighbours): 34 AICc value: 93.1877
## Adaptive bandwidth (number of nearest neighbours): 48 AICc value: 89.19361
## Adaptive bandwidth (number of nearest neighbours): 50 AICc value: 88.66093
## Adaptive bandwidth (number of nearest neighbours): 53 AICc value: 87.93792
## Adaptive bandwidth (number of nearest neighbours): 53 AICc value: 87.93792
bw_buncombe
## [1] 53
bw = 53
gwr.mod_buncombe <- gwr.basic(formula = la_buncombe_ag_data.sum ~ buncombejoin.mean,
adaptive = T,
data = buncombemaster,
bw = bw)
gwr.mod_buncombe
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2025-12-08 12:30:27.594227
## Call:
## gwr.basic(formula = la_buncombe_ag_data.sum ~ buncombejoin.mean,
## data = buncombemaster, bw = bw, adaptive = T)
##
## Dependent (y) variable: la_buncombe_ag_data.sum
## Independent variables: buncombejoin.mean
## Number of data points: 56
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5200 -0.4726 -0.3949 0.5151 0.6912
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.539239 0.135254 3.987 0.000203 ***
## buncombejoin.mean -0.005648 0.008827 -0.640 0.524979
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.506 on 54 degrees of freedom
## Multiple R-squared: 0.007524
## Adjusted R-squared: -0.01085
## F-statistic: 0.4094 on 1 and 54 DF, p-value: 0.525
## ***Extra Diagnostic information
## Residual sum of squares: 13.82377
## Sigma(hat): 0.5059602
## AIC: 86.57922
## AICc: 87.04076
## BIC: 48.73133
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: bisquare
## Adaptive bandwidth: 59 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 0.4556832 0.4829128 0.5056204 0.5396454 0.6423
## buncombejoin.mean -0.0152236 -0.0070387 -0.0046715 -0.0020540 0.0017
## ************************Diagnostic information*************************
## Number of data points: 56
## Effective number of parameters (2trace(S) - trace(S'S)): 3.905277
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 52.09472
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 87.16551
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 81.20541
## BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 34.66844
## Residual sum of squares: 13.2198
## R-square value: 0.05088606
## Adjusted R-square value: -0.02165671
##
## ***********************************************************************
## Program stops at: 2025-12-08 12:30:27.608777
gwr_buncombe_map <- gwr.mod_buncombe$SDF |>
select(buncombejoin.mean) |>
pivot_longer(-geometry) |>
ggplot() +
geom_sf(aes(fill = value)) +
scale_fill_gradient2() + ggtitle("Relationship Between Poverty Rate and Food Access in Buncombe County") +
facet_wrap(~name)
gwr_buncombe_map
ggsave(filename = "gwr_buncombe_map.png", plot = gwr_buncombe_map, width = 6, height = 4, units = "in", dpi = 300)
buncombepovertymapdata <- buncombepovertymapdata %>%
mutate(TractSNAP = as.numeric(TractSNAP))
buncombe_aggregated_la_data <- buncombepovertymapdata %>%
group_by(CensusTract) %>%
summarise(sum = sum(TractSNAP, na.rm = TRUE))
buncombe_la_map_data <- cbind(buncombe_census, buncombe_aggregated_la_data)
buncombe_snap_map =
ggplot(buncombe_la_map_data) +
geom_sf(aes(fill = sum)) +
scale_fill_gradient2(midpoint = 182.1607, low = "white", mid = "lightblue", high = "darkblue") + ggtitle("Housing units receiving SNAP benefits in Buncombe County") +
theme_minimal()
buncombe_snap_map
ggsave(filename = "buncombe_snap_map.png", plot = buncombe_snap_map, width = 6, height = 4, units = "in", dpi = 300)
nc_foodaccessdata <- nc_foodaccessdata %>%
mutate(TractSNAP = as.numeric(TractSNAP))
nc_aggregated_snap_data <- nc_foodaccessdata %>%
group_by(County) %>%
summarise(mean = mean(TractSNAP))
bindncsnap <- cbind(nccounties, nc_aggregated_snap_data$mean)
nc_snap_map =
ggplot(bindncsnap) +
geom_sf(aes(fill = nc_aggregated_snap_data.mean)) +
scale_fill_gradient2(midpoint = 281.9706, low = "white", mid = "orange", high = "red") + ggtitle("Housing Units receiving SNAP Benefits by County") +
theme_minimal()
nc_snap_map
ggsave(filename = "nc_snap_map.png", plot = buncombe_snap_map, width = 6, height = 4, units = "in", dpi = 300)