library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
food_sec_all_households = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 2)
food_sec_hh_w_children = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 3)
## New names:
## • `` -> `...13`
## • `` -> `...14`
child_food_sec_trends = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 4)
ed_emp_disability = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 5)
food_sec_by_state = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 6)
head(food_sec_all_households)
## # A tibble: 6 × 13
## Year Category Subcategory `Sub-subcategory` Total `Food secure-1,000`
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2001 All households <NA> <NA> 107824 96303
## 2 2001 Household comp… With child… <NA> 38330 32141
## 3 2001 Household comp… With child… With children < … 16858 13920
## 4 2001 Household comp… With child… Married-couple f… 26182 23389
## 5 2001 Household comp… With child… Female head, no … 9080 6185
## 6 2001 Household comp… With child… Male head, no sp… 2389 2009
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## # `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## # `Low food security-percent` <chr>, `Very low food security-1,000` <chr>,
## # `Very low food security-percent` <chr>
head(food_sec_by_state)
## # A tibble: 6 × 6
## Year State `Food insecurity prevalence` Food insecurity–margin of …¹
## <chr> <chr> <dbl> <chr>
## 1 2006–2008 U.S. total 12.2 0.25
## 2 2006–2008 AK 11.6 1.66
## 3 2006–2008 AL 13.3 1.66
## 4 2006–2008 AR 15.9 3.19
## 5 2006–2008 AZ 13.2 1.51
## 6 2006–2008 CA 12 0.74
## # ℹ abbreviated name: ¹`Food insecurity–margin of error`
## # ℹ 2 more variables: `Very low food security prevalence` <dbl>,
## # `Very low food security–margin of error` <dbl>
head(food_sec_hh_w_children)
## # A tibble: 6 × 14
## Year Category Subcategory Total Food-secure househol…¹ Food-secure househol…²
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2008 All hou… <NA> 39699 31364 79
## 2 2008 Househo… With child… 17503 13595 77.7
## 3 2008 Househo… Married-co… 26705 22886 85.7
## 4 2008 Househo… Female hea… 9639 6057 62.8
## 5 2008 Househo… Male head,… 2782 2014 72.4
## 6 2008 Househo… Other hous… 572 405 70.8
## # ℹ abbreviated names: ¹`Food-secure households-1,000`,
## # ²`Food-secure households-percent`
## # ℹ 8 more variables: `Food-insecure households-1,000` <dbl>,
## # `Food-insecure households-percent` <dbl>,
## # `Households with food-insecure children-1,000` <chr>,
## # `Households with food-insecure children-percent` <chr>,
## # `Households with very low food security among children-1,000` <chr>, …
head(child_food_sec_trends)
## # A tibble: 6 × 11
## Year Category Total Food-secure househol…¹ Food-secure househol…²
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1998 Children - by food … 71282 57255 80.3
## 2 1998 Households with Chi… 38036 31335 82.4
## 3 1999 Children - by food … 71418 59344 83.1
## 4 1999 Households with Chi… 37884 32290 85.2
## 5 2000 Children - by food … 71763 58867 82
## 6 2000 Households with Chi… 38113 31942 83.8
## # ℹ abbreviated names: ¹`Food-secure households-1,000`,
## # ²`Food-secure households-percent`
## # ℹ 6 more variables: `Food-insecure households-1,000` <dbl>,
## # `Food-insecure households-percent` <dbl>,
## # `Households with food-insecure children-1,000` <dbl>,
## # `Households with food-insecure children-percent` <dbl>,
## # `Households with very low food security among children-1,000` <dbl>, …
head(ed_emp_disability)
## # A tibble: 6 × 11
## Year Category Subcategory `Sub-subcategory` Total `Food insecure-1,000`
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2017 All househol… All househ… All households 1.27e5 15018.
## 2 2017 All househol… Employment Full-time 8.37e4 7871.
## 3 2017 All househol… Employment Retired 2.69e4 2002.
## 4 2017 All househol… Employment Part-time non-ec… 5.67e3 1055.
## 5 2017 All househol… Employment Part-time econom… 1.16e3 447.
## 6 2017 All househol… Employment Unemployed 1.81e3 775.
## # ℹ 5 more variables: `Food insecure-percent` <dbl>,
## # `Food insecure-share` <dbl>, `Very low food security-1,000` <dbl>,
## # `Very low food security-percent` <dbl>,
## # `Very low food security-share` <dbl>
dataframes = c(food_sec_all_households, food_sec_by_state, food_sec_hh_w_children, child_food_sec_trends, ed_emp_disability)
suppressWarnings({
for (item in dataframes) {
for (col in colnames(item)) {
if (sum(is.na(item$col)) > 0) {
print(item)
print(col)
print(sum(is.na(item$col)))
}
}}})
There are no missing values present in any files.
food_sec_all_households
## # A tibble: 660 × 13
## Year Category Subcategory `Sub-subcategory` Total `Food secure-1,000`
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2001 All households <NA> <NA> 107824 96303
## 2 2001 Household com… With child… <NA> 38330 32141
## 3 2001 Household com… With child… With children < … 16858 13920
## 4 2001 Household com… With child… Married-couple f… 26182 23389
## 5 2001 Household com… With child… Female head, no … 9080 6185
## 6 2001 Household com… With child… Male head, no sp… 2389 2009
## 7 2001 Household com… With child… Other household … 678 555
## 8 2001 Household com… With no ch… <NA> 69495 64163
## 9 2001 Household com… With no ch… More than one ad… 40791 38328
## 10 2001 Household com… With no ch… Women living alo… 16513 14915
## # ℹ 650 more rows
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## # `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## # `Low food security-percent` <chr>, `Very low food security-1,000` <chr>,
## # `Very low food security-percent` <chr>
food_sec_all_households <- food_sec_all_households %>%
#select(c(Year, Category, Subcategory, `Sub-subcategory`, `Food secure-percent`, `Food insecure-percent`, `Low food security-percent`, `Very low food security-percent`)) %>%
mutate_at(vars(`Low food security-percent`, `Very low food security-percent`), as.numeric)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Low food security-percent = .Primitive("as.double")(`Low food
## security-percent`)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
all_households = food_sec_all_households %>%
filter(Category == 'All households')
all_household_compostion = food_sec_all_households %>%
filter(Category == 'Household composition')
all_household_race = food_sec_all_households %>%
filter(Category == 'Race/ethnicity of households')
# Create a new column for the midpoint of the three-year range
food_sec_by_state$MidYear <- as.numeric(substr(food_sec_by_state$Year, 1, 4)) + 1
ggplot() +
geom_line(data = subset(food_sec_all_households, (Category == 'All households')), aes(x = Year, y = `Food insecure-percent`)) +
labs(y = 'Food Insecurity Percentage', title = 'Yearly U.S. Food Insecurity')
ggplot(data = all_households, aes(x = Year)) +
#geom_line(aes(y = `Food secure-percent`, color = 'Percent Food Secure')) +
geom_line(aes(y = `Food insecure-percent`, color = 'Percent Food Insecure')) +
geom_line(aes(y = `Low food security-percent`, color = 'Percent Low Food Security')) +
geom_line(aes(y = `Very low food security-percent`, color = 'Percent Very Low Food Security')) +
labs(y = 'Percent of all Households', title = 'Yearly Food Insecurity Level Percentages', color = 'Food Security Level')
ggplot() +
geom_line(data = subset(all_household_compostion, (Subcategory == 'With children < 18 years' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With children < 18 years')) +
geom_line(data = subset(all_household_compostion, (Subcategory == 'With no children < 18 years' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With no children < 18 years')) +
geom_line(data = subset(all_household_compostion, (Subcategory == 'With elderly' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With elderly')) +
labs(y = 'Percent Food Insecure', title = 'Yearly Food Insecurity Percentages by Household Composition', color = 'Household Composition')
food_sec_all_households_gen = food_sec_all_households %>%
filter(Category == 'All households')
food_sec_all_households_gen
## # A tibble: 22 × 13
## Year Category Subcategory `Sub-subcategory` Total `Food secure-1,000`
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2001 All households <NA> <NA> 107824 96303
## 2 2002 All households <NA> <NA> 108601 96543
## 3 2003 All households <NA> <NA> 112214 99631
## 4 2004 All households <NA> <NA> 112967 99473
## 5 2005 All households <NA> <NA> 114437 101851
## 6 2006 All households <NA> <NA> 115609 102961
## 7 2007 All households <NA> <NA> 117100 104089
## 8 2008 All households <NA> <NA> 117565 100416
## 9 2009 All households <NA> <NA> 118174 100820
## 10 2010 All households <NA> <NA> 118756 101527
## # ℹ 12 more rows
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## # `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## # `Low food security-percent` <dbl>, `Very low food security-1,000` <chr>,
## # `Very low food security-percent` <dbl>
ggplot() +
geom_line(data = food_sec_all_households_gen, aes(x = Year, y = `Food insecure-percent`, color = 'U.S. Average')) +
geom_line(data = subset(all_household_race, Subcategory == 'White non-Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'White non-Hispanic')) +
geom_line(data = subset(all_household_race, Subcategory == 'Black non-Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'Black non-Hispanic')) +
geom_line(data = subset(all_household_race, Subcategory == 'Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'Hispanic')) +
geom_line(data = subset(all_household_race, Subcategory == 'Other'), aes(x = Year, y = `Food insecure-percent`, color = 'Other Ethnicities')) +
labs(y = 'Food Insecurity (%)', color = 'Ethnicity', title = 'Yearly Food Insecurity Percentages by Ethnicity') +
scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
breaks = c("Black non-Hispanic", "Hispanic", "White non-Hispanic", "Other Ethnicities", "U.S. Average"))
food_sec_by_state$State[food_sec_by_state$State == 'U.S. total'] <- 'U.S.'
state_fi_means = food_sec_by_state %>%
group_by(State) %>%
summarize(state_mean = mean(`Food insecurity prevalence`)) %>%
arrange(desc(state_mean))
top_4_states = as.character(as.list(head(state_fi_means, 4)$State))
# Create a new column for the midpoint of the three-year range
food_sec_by_state$MidYear <- as.numeric(substr(food_sec_by_state$Year, 1, 4)) + 1
# Plot using geom_line
ggplot() +
geom_line(data = subset(food_sec_by_state, State == 'U.S.'), aes(x = MidYear, y = `Food insecurity prevalence`, color = 'U.S. Average')) +
geom_line(data = subset(food_sec_by_state, State == top_4_states[1]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[1])) +
geom_line(data = subset(food_sec_by_state, State == top_4_states[2]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[2])) +
geom_line(data = subset(food_sec_by_state, State == top_4_states[3]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[3])) +
geom_line(data = subset(food_sec_by_state, State == top_4_states[4]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[4])) +
labs(x = 'Year', y = 'Food Insecurity Percent', title = 'Food Insecurity Prevalence by Year (Top Four States)', colour = 'State') +
scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
breaks = c("AL", "AR", "MS", "TX", "U.S. Average"))
bottom_4_states = as.character(as.list(tail(state_fi_means, 4)$State))
# Plot using geom_line
ggplot() +
geom_line(data = subset(food_sec_by_state, State == 'U.S.'), aes(x = MidYear, y = `Food insecurity prevalence`, color = 'U.S. Average')) +
geom_line(data = subset(food_sec_by_state, State == bottom_4_states[1]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[1])) +
geom_line(data = subset(food_sec_by_state, State == bottom_4_states[2]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[2])) +
geom_line(data = subset(food_sec_by_state, State == bottom_4_states[3]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[3])) +
geom_line(data = subset(food_sec_by_state, State == bottom_4_states[4]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[4])) +
labs(x = 'Year', y = 'Food Insecurity Percent', title = 'Food Insecurity Prevalence by Year (Bottom Four States)', colour = 'State') +
scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
breaks = c("MN", "ND", "NH", "VA", "U.S. Average"))
ggplot(data = subset(child_food_sec_trends, Category == 'Children - by food security status of household')) +
geom_line(aes(x = Year, y = `Food-insecure households-percent`)) +
labs(y = 'Percent of Children Experiencing Food Insecurity', title = 'Yearly Percent of Children Experiencing Food Insecurity')