library(tidyverse)
## Warning: package 'readr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
FoodAccessResearchAtlasData2019 <- read_csv("C:\\Users\\prath\\Downloads\\2019 Food Access Research Atlas Data(1)\\Food Access Research Atlas.csv", na = "NULL")
## Rows: 72531 Columns: 147
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): State, County
## dbl (145): CensusTract, Urban, Pop2010, OHU2010, GroupQuartersFlag, NUMGQTRS...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(FoodAccessResearchAtlasData2019)
# readr package uses read_csv and "na ="

# Check row 811
# The racial demographics that are represented in the data are White, Black, Asian, Native Hawaiian/Pacific Islander, American Indian/Alaska Native, Hispanic/Latino, Other/Multiple race

# Low access is defined as...
# 1/2 mi, 1 mi for urban areas; 10 mi, 20 mi for rural

# Distribution of poverty rates
ggplot(FoodAccessResearchAtlasData2019, aes(x=PovertyRate)) +
  geom_density(fill="blue", alpha=0.5) +
  scale_x_continuous(breaks = seq(0, 100, by = 20)) +
  geom_vline(xintercept = 20, color = "red") +
  labs(x="Poverty Rate of Census Tract", title="Distribution of Poverty Rates of Census Tracts in 2019") + theme_minimal()
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_density()`).

# Filter data for LowIncome and not LowIncome tracts
Low_Income <- FoodAccessResearchAtlasData2019 %>% filter(LowIncomeTracts == 1)
Not_LI <- FoodAccessResearchAtlasData2019 %>% filter(LowIncomeTracts == 0)

#filter data for Urban or not Urban
Urban <- FoodAccessResearchAtlasData2019 %>% filter(Urban==1)
Rural <- FoodAccessResearchAtlasData2019 %>% filter(Urban==0)

# Comparing low access pop at 1 mi for Urban areas to poverty rate
ggplot(Urban, aes(x=PovertyRate, y=lapop1share)) +
  geom_point(alpha=0.1) +
  geom_smooth() +
  labs(x="Poverty Rate", y="% Population >1 mi from supermarket", title = "Poverty Rate vs Share of Low-Access Population in Urban Areas")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 19983 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19983 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Looks to be a small dense area of low poverty rate but very high share, but mostly low poverty = low share for Urban

# For Rural? low access pop at 10 mi for Urban areas to poverty rate
ggplot(Rural, aes(x=PovertyRate, y=lapop10share)) +
  geom_point(alpha=0.2) +
  geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 9973 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 9973 rows containing missing values or values outside the scale range
## (`geom_point()`).