library(tidyverse)
library(leaflet)
library(highcharter)
library(scales)
library(RColorBrewer)
food <- read_csv('/Users/eabban/College Stuff/R Studio/merged_food_access_data.csv')Project 2
##Introduction
Food deserts are areas where people are low-income and do not have a grocery store nearby. This means that getting fresh and affordable food is hard, not because of personal choice, but because of where people live and how much money they make. I chose this topic because food access affects our everyday life and is not always talked about. When a neighborhood has no nearby grocery store and people cannot afford to travel far, it affects the general population. The data comes from the USDA Food Access Research Atlas (2019), it’s a free dataset that looks at food access by census tract across the United States.
Filtering
In this chunk, I’m filtering the NA’s from the file and removing places like prison/dorms from the dataset. The Urban == 1 filters for places that are low-income and beyond 1 from a grocery store. And also
filtering_food <- food |>
filter(GroupQuartersFlag == 0)|>
filter(Urban == 1)|>
filter(!is.na(PovertyRate)) |>
filter(!is.na(MedianFamilyIncome)) |>
filter(!is.na(lapop1share)) |>
filter(!is.na(lalowi1share)) |>
filter(!is.na(lablack1share)) |>
filter(!is.na(lahisp1share)) |>
filter(!is.na(lawhite1share)) |>
filter(!is.na(laasian1share)) |>
filter(!is.na(TractSNAP)) |>
filter(!is.na(INTPTLAT)) |>
filter(!is.na(INTPTLONG)) |>
filter(County %in% c("Gwinnett County", "Davidson County", "Macon County"))Variables Explanation
| Variable | Description |
|---|---|
| TractSNAP | SNAP benefits |
| TractHUNV | Count of carless housing units |
| lablack1share, lahisp1share, lawhite1share, laasian1share | Demographics |
| Pop2010 | Population count from 2010 census |
| PovertyRate | Share of pop below federal poverty line |
| MedianFamilyIncome | Tract median family income |
| CensusTract,State,County | Identifiers & Geography |
| TractLOWI | Count of low-income individuals |
| INTPTLAT & INTPTLONG | Latitude/Longitude |
| LILATracts_1And10 | Flag for low-income and low access when considering low accessibilty at 1 and 10 miles |
| lapop1share | Share of tract population that are beyond 1 mile from supermarket |
| lalowi1share | Share of tract population that are low income individuals beyond 1 mile from supermarket |
| HUNVFlag | >=100 HHs without vehicle, beyond 1/2 mi |
Selecting Important Variables
Selecting the important variable that I’ll be using
cleaning_food <- filtering_food |>
select(
CensusTract,
State,
County,
INTPTLAT, INTPTLONG,
Pop2010,
PovertyRate,
MedianFamilyIncome,
TractLOWI,
LILATracts_1And10,
lapop1share,
lalowi1share,
HUNVFlag,
lablack1share,
lahisp1share,
lawhite1share,
laasian1share,
TractSNAP
)More Filtering
Changing the numbers in LILATracts_1And10 to symbolise “Not a Food Desert and Food Desert” 0 = it’s a low-income but close to a grocery store, 1 = it is both low-income and low-access
food_dd <- cleaning_food |>
mutate(
LILATracts_1And10 = factor(LILATracts_1And10,
levels = c(0, 1),
labels = c("Not a Food Desert", "Food Desert")),
lapop1share = as.numeric(lapop1share),
lalowi1share = as.numeric(lalowi1share),
lablack1share = as.numeric(lablack1share),
lahisp1share = as.numeric(lahisp1share),
lawhite1share = as.numeric(lawhite1share),
laasian1share = as.numeric(laasian1share),
MedianFamilyIncome = as.numeric(MedianFamilyIncome)
)Warning: There were 6 warnings in `mutate()`.
The first warning was:
ℹ In argument: `lapop1share = as.numeric(lapop1share)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run `dplyr::last_dplyr_warnings()` to see the 5 remaining warnings.
MAP 1
Just a map of the selected places and information about them.
color_map <- colorFactor(
palette = c("Not a Food Desert" = "#7f8c8d", "Food Desert" = "#c0392b"),
domain = food_dd$LILATracts_1And10
)
leaflet(food_dd) |>
addProviderTiles("CartoDB.Positron") |>
setView(lng = -86.5, lat = 33.5, zoom = 4) |> # centered on Southeast
addCircleMarkers(
lng = ~INTPTLONG,
lat = ~INTPTLAT,
radius = ~PovertyRate,
color = ~color_map(LILATracts_1And10),
fillOpacity = 0.65,
popup = ~paste0(
"<b>", County, " County, ", State, "</b><br>",
"Status: <b>", LILATracts_1And10, "</b><br>",
"Poverty Rate: ", PovertyRate, "%<br>",
"Median Family Income: $",
formatC(MedianFamilyIncome, format = "d", big.mark = ","), "<br>",
"Residents more than 1mi from grocery: ", lapop1share, "%"
), clusterOptions = markerClusterOptions()
) |>
addLegend(
pal = color_map,
values = ~LILATracts_1And10,
title = "Tract Status",
position = "bottomright"
)Plot 2
This plot shows the states by their poverty (each status and it’s poverty rate)
plot2 <- food_dd |>
group_by(State, LILATracts_1And10) |>
summarise(avg_poverty = mean(PovertyRate))`summarise()` has regrouped the output.
ℹ Summaries were computed grouped by State and LILATracts_1And10.
ℹ Output is grouped by State.
ℹ Use `summarise(.groups = "drop_last")` to silence this message.
ℹ Use `summarise(.by = c(State, LILATracts_1And10))` for per-operation grouping
(`?dplyr::dplyr_by`) instead.
ggplot(plot2,
aes(x = reorder(State, avg_poverty), y = avg_poverty, fill = LILATracts_1And10)) +
geom_col(position = "dodge", width = 0.65) +
scale_fill_manual(values = c("Not a Food Desert" = "#2166ac",
"Food Desert" = "#c0392b")) +
geom_hline(yintercept = mean(plot2$avg_poverty),
color = "#f4a261", linetype = "dashed", linewidth = 1) +
annotate("text", x = 1, y = mean(plot2$avg_poverty),
label = "Overall Average", color = "#f4a261", size = 3.5) +
coord_flip() +
labs(
title = "States by Their Poverty Rate",
subtitle = "food desert vs. not a food desert",
y = "Average Poverty Rate (%)",
x = "States",
caption = "Source: USDA Food Access Research Atlas (2019)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")Plot 3
plot3 <- food_dd |>
group_by(LILATracts_1And10) |>
summarise(avg_income = mean(MedianFamilyIncome))
hchart(plot3,
"column",
hcaes(x = LILATracts_1And10, y = avg_income, group = LILATracts_1And10)) |>
hc_colors(c("#fee08b", "#c0392b")) |>
hc_title(text = "Average median family income") |>
hc_subtitle(text = "food desert vs. not a food desert") |>
hc_xAxis(title = list(text = "")) |>
hc_yAxis(title = list(text = "Average Median Family Income ($)")) |>
hc_tooltip(pointFormat = "Average Income: <b>${point.y:,.0f}</b>")Plot 4
plot4_data <- food_dd |>
mutate(
TractSNAP = as.numeric(TractSNAP),
snap_rate = TractSNAP / Pop2010
) |>
group_by(LILATracts_1And10) |>
summarise(
Poverty_Rate = mean(PovertyRate, na.rm = TRUE),
Low_Access = mean(lapop1share, na.rm = TRUE),
SNAP_Rate = mean(snap_rate, na.rm = TRUE)
) |>
pivot_longer(
cols = c(Poverty_Rate, Low_Access, SNAP_Rate),
names_to = "Metric",
values_to = "Average"
)
ggplot(plot4_data,
aes(x = LILATracts_1And10, y = Average, fill = LILATracts_1And10)) +
geom_col(width = 0.5) +
facet_wrap(~Metric, scales = "free_y") +
scale_fill_manual(values = c("Not a Food Desert" = "#95a5a6",
"Food Desert" = "#c0392b")) +
labs(
title = "Data Comparison",
subtitle = "Differences between food desert and non-food-desert tracts",
y = "Average (%)",
caption = "Source: USDA Food Access Research Atlas (2019)"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")Reflection
Map 1 shows where food desert tracts are located. Red circles are food desert tracts and gray circles are low-income tracts that have a grocery store nearby. The size of each circle is based on the poverty rate, so bigger circles mean higher poverty. Clicking on the circles shows the poverty rate, median family income, and the percentage of residents living more than 1 mile from a grocery store. Plot 2 shows that food desert tracts have higher average poverty rates than non-food-desert tracts in every state shown except Alabama. The orange dashed line shows the overall average so it is easy to see which states are above or below it. Plot 3 uses Highcharter to show that food deserts also have lower family income. You can hover over each bar to see the exact dollar amount. Plot 4 puts it all together by showing that food deserts have worse on three things at once: poverty rate, grocery access, and SNAP use. One thing I wanted to include but could not get to work was a comparison across different years. The USDA has data from more than one year but went over the maximum obs.