In this exercise I conducted an analysis of food access within Harris County, utilizing data from the American Community Survey (ACS) in conjunction with the USDA’s Food Access Research Atlas. I aimed to discern patterns of food access across various neighborhood types delineated by race and ethnicity.
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'janitor'
##
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
##
##
##
## Attaching package: 'plotly'
##
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
##
## The following object is masked from 'package:stats':
##
## filter
##
##
## The following object is masked from 'package:graphics':
##
## layout
## Getting data from the 2015-2019 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 52%
|
|===================================== | 54%
|
|====================================== | 54%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 65%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================= | 69%
|
|================================================= | 70%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================= | 94%
|
|================================================================== | 95%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Rows: 786
## Columns: 21
## $ GEOID <chr> "48201311900", "48201450200", "48201450400", "48201554502"…
## $ NAME <chr> "Census Tract 3119, Harris County, Texas", "Census Tract 4…
## $ B03002_001E <dbl> 2454, 5450, 4612, 6045, 6537, 9953, 7063, 4295, 3694, 4453…
## $ B03002_001M <dbl> 302, 358, 1139, 454, 600, 892, 286, 592, 408, 714, 835, 41…
## $ B03002_003E <dbl> 237, 3903, 1432, 4613, 222, 1365, 5423, 109, 300, 694, 180…
## $ B03002_003M <dbl> 127, 315, 449, 460, 159, 453, 354, 114, 179, 259, 121, 27,…
## $ B03002_004E <dbl> 61, 162, 1610, 165, 1333, 2037, 0, 2444, 26, 1188, 2326, 1…
## $ B03002_004M <dbl> 71, 135, 1187, 111, 509, 577, 19, 451, 20, 912, 720, 405, …
## $ B03002_006E <dbl> 73, 607, 208, 251, 411, 404, 850, 0, 0, 153, 277, 5, 91, 6…
## $ B03002_006M <dbl> 55, 142, 204, 160, 225, 353, 171, 14, 14, 129, 180, 8, 109…
## $ B03002_007E <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, …
## $ B03002_007M <dbl> 14, 19, 14, 19, 19, 19, 19, 14, 14, 14, 19, 14, 14, 19, 25…
## $ B03002_005E <dbl> 0, 0, 0, 66, 0, 243, 87, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40…
## $ B03002_005M <dbl> 14, 19, 14, 73, 19, 390, 134, 25, 14, 14, 19, 14, 14, 19, …
## $ B03002_008E <dbl> 0, 68, 0, 0, 1, 57, 20, 0, 0, 0, 34, 0, 0, 17, 0, 15, 170,…
## $ B03002_008M <dbl> 14, 74, 14, 19, 3, 89, 32, 14, 14, 14, 71, 14, 14, 28, 25,…
## $ B03002_009E <dbl> 0, 126, 128, 138, 35, 60, 294, 0, 28, 0, 141, 10, 22, 166,…
## $ B03002_009M <dbl> 14, 110, 154, 115, 41, 90, 151, 14, 20, 14, 170, 13, 25, 1…
## $ B03002_012E <dbl> 2083, 584, 1234, 812, 4535, 5787, 389, 1719, 3340, 2418, 3…
## $ B03002_012M <dbl> 326, 230, 626, 179, 545, 972, 114, 590, 454, 640, 686, 107…
## $ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-95.33319 2..., MULTIPOLYGON …
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 72531 Columns: 147
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (113): State, County, MedianFamilyIncome, LAPOP1_10, LAPOP05_10, LAPOP1_...
## dbl (34): CensusTract, Urban, Pop2010, OHU2010, GroupQuartersFlag, NUMGQTRS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# rename variables
harris_acs19 <- harris_acs19 %>%
mutate(totpop = B03002_001E,
white = B03002_003E,
black = B03002_004E,
aapi = B03002_006E + B03002_007E,
aian = B03002_005E,
multi_race = B03002_008E + B03002_009E,
hisp_lat = B03002_012E,
white_pct = white/totpop,
black_pct = black/totpop,
aapi_pct = aapi/totpop,
aian_pct = aian/totpop,
multi_race_pct = multi_race/totpop,
hisp_lat_pct = hisp_lat/totpop)
# calculation for race/ethnic majorities
# filter those whit population less than 0
harris_acs19 <- harris_acs19 %>%
mutate(neighb_race = case_when(
harris_acs19$white_pct > 0.5 ~ "Majority White",
harris_acs19$black_pct > 0.5 ~ "Majority Black",
harris_acs19$aapi_pct > 0.5 ~ "Majority AAPI",
harris_acs19$aian_pct > 0.5 ~ "Majority AIAN",
harris_acs19$multi_race_pct > 0.5 ~ "Majority Multiracial",
harris_acs19$hisp_lat_pct> 0.5 ~ "Majority Hispanic/Latino",
TRUE ~ "Racially Diverse"
)) %>%
filter(totpop > 0)
head(harris_acs19)
## Simple feature collection with 6 features and 34 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -95.65378 ymin: 29.70875 xmax: -95.12716 ymax: 30.00429
## Geodetic CRS: NAD83
## GEOID NAME B03002_001E
## 1 48201311900 Census Tract 3119, Harris County, Texas 2454
## 2 48201450200 Census Tract 4502, Harris County, Texas 5450
## 3 48201450400 Census Tract 4504, Harris County, Texas 4612
## 4 48201554502 Census Tract 5545.02, Harris County, Texas 6045
## 5 48201550603 Census Tract 5506.03, Harris County, Texas 6537
## 6 48201252200 Census Tract 2522, Harris County, Texas 9953
## B03002_001M B03002_003E B03002_003M B03002_004E B03002_004M B03002_006E
## 1 302 237 127 61 71 73
## 2 358 3903 315 162 135 607
## 3 1139 1432 449 1610 1187 208
## 4 454 4613 460 165 111 251
## 5 600 222 159 1333 509 411
## 6 892 1365 453 2037 577 404
## B03002_006M B03002_007E B03002_007M B03002_005E B03002_005M B03002_008E
## 1 55 0 14 0 14 0
## 2 142 0 19 0 19 68
## 3 204 0 14 0 14 0
## 4 160 0 19 66 73 0
## 5 225 0 19 0 19 1
## 6 353 0 19 243 390 57
## B03002_008M B03002_009E B03002_009M B03002_012E B03002_012M
## 1 14 0 14 2083 326
## 2 74 126 110 584 230
## 3 14 128 154 1234 626
## 4 19 138 115 812 179
## 5 3 35 41 4535 545
## 6 89 60 90 5787 972
## geometry totpop white black aapi aian multi_race
## 1 MULTIPOLYGON (((-95.33319 2... 2454 237 61 73 0 0
## 2 MULTIPOLYGON (((-95.59029 2... 5450 3903 162 607 0 194
## 3 MULTIPOLYGON (((-95.62377 2... 4612 1432 1610 208 0 128
## 4 MULTIPOLYGON (((-95.65325 2... 6045 4613 165 251 66 138
## 5 MULTIPOLYGON (((-95.48031 2... 6537 222 1333 411 0 36
## 6 MULTIPOLYGON (((-95.18517 2... 9953 1365 2037 404 243 117
## hisp_lat white_pct black_pct aapi_pct aian_pct multi_race_pct
## 1 2083 0.09657702 0.02485738 0.02974735 0.00000000 0.000000000
## 2 584 0.71614679 0.02972477 0.11137615 0.00000000 0.035596330
## 3 1234 0.31049436 0.34908933 0.04509974 0.00000000 0.027753686
## 4 812 0.76311001 0.02729529 0.04152192 0.01091811 0.022828784
## 5 4535 0.03396053 0.20391617 0.06287288 0.00000000 0.005507113
## 6 5787 0.13714458 0.20466191 0.04059078 0.02441475 0.011755250
## hisp_lat_pct neighb_race
## 1 0.8488183 Majority Hispanic/Latino
## 2 0.1071560 Majority White
## 3 0.2675629 Racially Diverse
## 4 0.1343259 Majority White
## 5 0.6937433 Majority Hispanic/Latino
## 6 0.5814327 Majority Hispanic/Latino
# check
summary(harris_acs19$totpop)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4 3684 5024 5912 7221 29605
table(harris_acs19$neighb_race, useNA = "always")
##
## Majority Black Majority Hispanic/Latino Majority White
## 74 298 208
## Racially Diverse <NA>
## 206 0
We have identified the distribution of racial and ethnic majority tracts as follows. This will be our benchmark for further analysis.
Majority Black tracts: 74
Majority Hispanic/Latino: 298
Majority White: 208
Racially Diverse: 206
Explore tracts in Harris County categorized by racial and ethnic majorities
mapview(harris_acs19, zcol = "neighb_race")
harris_food <- harris_acs19 %>%
rename(CensusTract = GEOID) %>%
mutate(CensusTract = as.numeric(CensusTract)) %>%
left_join(atlas, by = 'CensusTract') %>%
clean_names() %>%
as.data.frame()
# check merge
harris_food %>%
select(census_tract, white, black, aapi, aian, multi_race,
hisp_lat, lila_tracts_1and10) %>%
head()
## census_tract white black aapi aian multi_race hisp_lat lila_tracts_1and10
## 1 48201311900 237 61 73 0 0 2083 0
## 2 48201450200 3903 162 607 0 194 584 0
## 3 48201450400 1432 1610 208 0 128 1234 0
## 4 48201554502 4613 165 251 66 138 812 0
## 5 48201550603 222 1333 411 0 36 4535 0
## 6 48201252200 1365 2037 404 243 117 5787 1
I will now calculate the percentage of tracts classified as food deserts within racial and ethnic majority areas. These tracts are low-income neighborhoods where residents must travel a significant distance to access a quality food store. Majority Black and Hispanic/Latino census tracts in Harris County experience higher rates of food desert incidence compared to Majority White tracts. Moreover, Black neighborhoods are disproportionately represented as food deserts compared to their share of total tracts.
# analysis
neigh_food_desert <- harris_food %>%
group_by(neighb_race) %>%
summarise(
count_food_desert = sum(lila_tracts_1and10),
total_tracts = n(),
pct_food_desert = mean(lila_tracts_1and10))
# visz
neigh_food_desert %>%
mutate(pct_tracts = total_tracts / sum(total_tracts)) %>%
select(neighb_race, pct_food_desert, pct_tracts) %>%
pivot_longer(
cols = c(pct_food_desert, pct_tracts),
names_to = 'group',
values_to = 'pct'
) %>%
ggplot(aes(x = neighb_race, y = pct, fill = group)) +
geom_col(width = 0.65, position = 'dodge') +
labs(
x = NULL,
y = NULL,
title = 'Black Neighborhoods Disproportionately Encounter Food Access Challenges in Harris County',
fill = 'Percent of '
) +
geom_text(aes(label = round(pct * 100, 0)),
position = position_dodge(0.65),
vjust = 1.25,
color = 'white',
fontface = 'bold') +
scale_y_continuous(label = scales::percent) +
scale_fill_manual(values = c("pct_food_desert" = "#FC887B", "pct_tracts" = "#94CCE0"),
labels = c("Low Food Access", "Total Tracts")) +
theme_classic() +
theme(plot.title.position = 'plot',
legend.position = 'top')
We will now calculate the count and percentage of residents by race and ethnicity who reside in food desert tracts.
# analysis
table_harris_food <- harris_food %>%
select(census_tract, white, black, aapi, aian, multi_race, hisp_lat, lila_tracts_1and10) %>%
pivot_longer(
cols = !c(lila_tracts_1and10, census_tract),
names_to = 'race_eth',
values_to = 'pop'
) %>%
group_by(race_eth) %>%
summarise(
food_desert = sum(pop[lila_tracts_1and10 == 1]),
not_food_desert = sum(pop[lila_tracts_1and10 == 0])
) %>%
mutate(
total_pop = food_desert + not_food_desert,
per_food_desert = food_desert/total_pop
) %>%
arrange(desc(per_food_desert))
table_harris_food
## # A tibble: 6 × 5
## race_eth food_desert not_food_desert total_pop per_food_desert
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 black 223230 639814 863044 0.259
## 2 hisp_lat 356039 1639076 1995115 0.178
## 3 aian 1084 7021 8105 0.134
## 4 multi_race 9696 71932 81628 0.119
## 5 aapi 31762 292071 323833 0.0981
## 6 white 88523 1286382 1374905 0.0644
# vizs
table_harris_food %>%
select(race_eth, food_desert) %>%
mutate(race_eth = case_when(
race_eth == 'hisp_lat' ~ 'Hispanic/Latino',
race_eth == 'black' ~ 'Black',
race_eth == 'white' ~ 'White',
race_eth == 'aapi' ~ 'AAPI',
race_eth == 'multi_race' ~ 'Multiracial',
race_eth == 'aian' ~ 'AIAN')) %>%
ggplot(aes(x = reorder(race_eth, -food_desert), y = food_desert)) +
geom_col(width = 0.65, fill = "#FC887B") +
labs(y = 'Low access to good food',
x = NULL,
title = '360,000 Latinos Lack Access to Quality Food in Harris County') +
scale_y_continuous(label = scales::comma) +
theme_classic() +
theme(plot.title.position = 'plot')
The scatter plot illustrates that neighborhoods with limited access to quality food tend to have fewer White residents and, concurrently, experience higher levels of poverty.
plt <- harris_food %>%
mutate(lila_tracts_1and10 = ifelse(lila_tracts_1and10 == 1, 'Limited', 'Good')) %>%
ggplot(aes(x = white_pct, y = poverty_rate/100, color = lila_tracts_1and10)) +
geom_point(alpha = 0.75) +
labs(
x = 'Percent of White Residents',
y = 'Poverty Rate',
title = 'Neighborhoods with limited access to quality food have less White residents\nand experience higher levels of poverty',
color = 'Quality of Food Access'
) +
scale_x_continuous(label = scales::percent) +
scale_y_continuous(label = scales::percent) +
scale_color_manual(values = c("Limited" = "#FC887B", "Good" = "#94CCE0")) +
theme_classic() +
theme(plot.title.position = 'plot',
legend.position = 'top')
ggplotly(plt) %>%
layout(legend = list(orientation = "h", x = 0.3, y = 0.9))