Necessary Packages:

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1

Loading Data:

food_sec_all_households = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 2)
food_sec_hh_w_children = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 3)
## New names:
## • `` -> `...13`
## • `` -> `...14`
child_food_sec_trends = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 4)
ed_emp_disability = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 5)
food_sec_by_state = read_excel("foodsecurity_data_file_2022.xlsx", sheet = 6)

Initial Views of Data:

head(food_sec_all_households)
## # A tibble: 6 × 13
##    Year Category        Subcategory `Sub-subcategory`  Total `Food secure-1,000`
##   <dbl> <chr>           <chr>       <chr>              <dbl>               <dbl>
## 1  2001 All households  <NA>        <NA>              107824               96303
## 2  2001 Household comp… With child… <NA>               38330               32141
## 3  2001 Household comp… With child… With children < …  16858               13920
## 4  2001 Household comp… With child… Married-couple f…  26182               23389
## 5  2001 Household comp… With child… Female head, no …   9080                6185
## 6  2001 Household comp… With child… Male head, no sp…   2389                2009
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## #   `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## #   `Low food security-percent` <chr>, `Very low food security-1,000` <chr>,
## #   `Very low food security-percent` <chr>
head(food_sec_by_state)
## # A tibble: 6 × 6
##   Year      State      `Food insecurity prevalence` Food insecurity–margin of …¹
##   <chr>     <chr>                             <dbl> <chr>                       
## 1 2006–2008 U.S. total                         12.2 0.25                        
## 2 2006–2008 AK                                 11.6 1.66                        
## 3 2006–2008 AL                                 13.3 1.66                        
## 4 2006–2008 AR                                 15.9 3.19                        
## 5 2006–2008 AZ                                 13.2 1.51                        
## 6 2006–2008 CA                                 12   0.74                        
## # ℹ abbreviated name: ¹​`Food insecurity–margin of error`
## # ℹ 2 more variables: `Very low food security prevalence` <dbl>,
## #   `Very low food security–margin of error` <dbl>
head(food_sec_hh_w_children)
## # A tibble: 6 × 14
##    Year Category Subcategory Total Food-secure househol…¹ Food-secure househol…²
##   <dbl> <chr>    <chr>       <dbl>                  <dbl>                  <dbl>
## 1  2008 All hou… <NA>        39699                  31364                   79  
## 2  2008 Househo… With child… 17503                  13595                   77.7
## 3  2008 Househo… Married-co… 26705                  22886                   85.7
## 4  2008 Househo… Female hea…  9639                   6057                   62.8
## 5  2008 Househo… Male head,…  2782                   2014                   72.4
## 6  2008 Househo… Other hous…   572                    405                   70.8
## # ℹ abbreviated names: ¹​`Food-secure households-1,000`,
## #   ²​`Food-secure households-percent`
## # ℹ 8 more variables: `Food-insecure households-1,000` <dbl>,
## #   `Food-insecure households-percent` <dbl>,
## #   `Households with food-insecure children-1,000` <chr>,
## #   `Households with food-insecure children-percent` <chr>,
## #   `Households with very low food security among children-1,000` <chr>, …
head(child_food_sec_trends)
## # A tibble: 6 × 11
##    Year Category             Total Food-secure househol…¹ Food-secure househol…²
##   <dbl> <chr>                <dbl>                  <dbl>                  <dbl>
## 1  1998 Children - by food … 71282                  57255                   80.3
## 2  1998 Households with Chi… 38036                  31335                   82.4
## 3  1999 Children - by food … 71418                  59344                   83.1
## 4  1999 Households with Chi… 37884                  32290                   85.2
## 5  2000 Children - by food … 71763                  58867                   82  
## 6  2000 Households with Chi… 38113                  31942                   83.8
## # ℹ abbreviated names: ¹​`Food-secure households-1,000`,
## #   ²​`Food-secure households-percent`
## # ℹ 6 more variables: `Food-insecure households-1,000` <dbl>,
## #   `Food-insecure households-percent` <dbl>,
## #   `Households with food-insecure children-1,000` <dbl>,
## #   `Households with food-insecure children-percent` <dbl>,
## #   `Households with very low food security among children-1,000` <dbl>, …
head(ed_emp_disability)
## # A tibble: 6 × 11
##    Year Category      Subcategory `Sub-subcategory`  Total `Food insecure-1,000`
##   <dbl> <chr>         <chr>       <chr>              <dbl>                 <dbl>
## 1  2017 All househol… All househ… All households    1.27e5                15018.
## 2  2017 All househol… Employment  Full-time         8.37e4                 7871.
## 3  2017 All househol… Employment  Retired           2.69e4                 2002.
## 4  2017 All househol… Employment  Part-time non-ec… 5.67e3                 1055.
## 5  2017 All househol… Employment  Part-time econom… 1.16e3                  447.
## 6  2017 All househol… Employment  Unemployed        1.81e3                  775.
## # ℹ 5 more variables: `Food insecure-percent` <dbl>,
## #   `Food insecure-share` <dbl>, `Very low food security-1,000` <dbl>,
## #   `Very low food security-percent` <dbl>,
## #   `Very low food security-share` <dbl>

Checking for NA Values:

dataframes = c(food_sec_all_households, food_sec_by_state, food_sec_hh_w_children, child_food_sec_trends, ed_emp_disability)

suppressWarnings({
for (item in dataframes) {
  for (col in colnames(item)) {
  if (sum(is.na(item$col)) > 0) {
    print(item)
    print(col)
    print(sum(is.na(item$col)))
  }
}}})

There are no missing values present in any files.

Filtering Down “food_sec_all_households”:

food_sec_all_households
## # A tibble: 660 × 13
##     Year Category       Subcategory `Sub-subcategory`  Total `Food secure-1,000`
##    <dbl> <chr>          <chr>       <chr>              <dbl>               <dbl>
##  1  2001 All households <NA>        <NA>              107824               96303
##  2  2001 Household com… With child… <NA>               38330               32141
##  3  2001 Household com… With child… With children < …  16858               13920
##  4  2001 Household com… With child… Married-couple f…  26182               23389
##  5  2001 Household com… With child… Female head, no …   9080                6185
##  6  2001 Household com… With child… Male head, no sp…   2389                2009
##  7  2001 Household com… With child… Other household …    678                 555
##  8  2001 Household com… With no ch… <NA>               69495               64163
##  9  2001 Household com… With no ch… More than one ad…  40791               38328
## 10  2001 Household com… With no ch… Women living alo…  16513               14915
## # ℹ 650 more rows
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## #   `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## #   `Low food security-percent` <chr>, `Very low food security-1,000` <chr>,
## #   `Very low food security-percent` <chr>
food_sec_all_households <- food_sec_all_households %>%
  #select(c(Year, Category, Subcategory, `Sub-subcategory`, `Food secure-percent`, `Food insecure-percent`, `Low food security-percent`, `Very low food security-percent`)) %>%
  mutate_at(vars(`Low food security-percent`, `Very low food security-percent`), as.numeric)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Low food security-percent = .Primitive("as.double")(`Low food
##   security-percent`)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
all_households = food_sec_all_households %>%
  filter(Category == 'All households')

all_household_compostion = food_sec_all_households %>%
  filter(Category == 'Household composition')

all_household_race = food_sec_all_households %>%
  filter(Category == 'Race/ethnicity of households')

# Create a new column for the midpoint of the three-year range
food_sec_by_state$MidYear <- as.numeric(substr(food_sec_by_state$Year, 1, 4)) + 1 

Plot: Yearly U.S. Food Insecurity

ggplot() +
  geom_line(data = subset(food_sec_all_households, (Category == 'All households')), aes(x = Year, y = `Food insecure-percent`)) + 
  labs(y = 'Food Insecurity Percentage', title = 'Yearly U.S. Food Insecurity')

Plot: Yearly Food Security Level Percentages

ggplot(data = all_households, aes(x = Year)) +
  #geom_line(aes(y = `Food secure-percent`, color = 'Percent Food Secure')) +
  geom_line(aes(y = `Food insecure-percent`, color = 'Percent Food Insecure')) +
  geom_line(aes(y = `Low food security-percent`, color = 'Percent Low Food Security')) +
  geom_line(aes(y = `Very low food security-percent`, color = 'Percent Very Low Food Security')) +
  labs(y = 'Percent of all Households', title = 'Yearly Food Insecurity Level Percentages', color = 'Food Security Level')

Plot: Yearly Food Insecurity Percentages by Household Composition

ggplot() +
  geom_line(data = subset(all_household_compostion, (Subcategory == 'With children < 18 years' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With children < 18 years')) +
  geom_line(data = subset(all_household_compostion, (Subcategory == 'With no children < 18 years' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With no children < 18 years')) +
  geom_line(data = subset(all_household_compostion, (Subcategory == 'With elderly' & is.na(`Sub-subcategory`))), aes(x = Year, y = `Food insecure-percent`, color = 'With elderly')) +
  labs(y = 'Percent Food Insecure', title = 'Yearly Food Insecurity Percentages by Household Composition', color = 'Household Composition')

Plot: Yearly Food Insecurity Percentages by Ethnicity

food_sec_all_households_gen = food_sec_all_households %>%
  filter(Category == 'All households')

food_sec_all_households_gen
## # A tibble: 22 × 13
##     Year Category       Subcategory `Sub-subcategory`  Total `Food secure-1,000`
##    <dbl> <chr>          <chr>       <chr>              <dbl>               <dbl>
##  1  2001 All households <NA>        <NA>              107824               96303
##  2  2002 All households <NA>        <NA>              108601               96543
##  3  2003 All households <NA>        <NA>              112214               99631
##  4  2004 All households <NA>        <NA>              112967               99473
##  5  2005 All households <NA>        <NA>              114437              101851
##  6  2006 All households <NA>        <NA>              115609              102961
##  7  2007 All households <NA>        <NA>              117100              104089
##  8  2008 All households <NA>        <NA>              117565              100416
##  9  2009 All households <NA>        <NA>              118174              100820
## 10  2010 All households <NA>        <NA>              118756              101527
## # ℹ 12 more rows
## # ℹ 7 more variables: `Food secure-percent` <dbl>, `Food insecure-1,000` <dbl>,
## #   `Food insecure-percent` <dbl>, `Low food security-1,000` <chr>,
## #   `Low food security-percent` <dbl>, `Very low food security-1,000` <chr>,
## #   `Very low food security-percent` <dbl>
ggplot() +
  geom_line(data = food_sec_all_households_gen, aes(x = Year, y = `Food insecure-percent`, color = 'U.S. Average')) +
  geom_line(data = subset(all_household_race, Subcategory == 'White non-Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'White non-Hispanic')) +
  geom_line(data = subset(all_household_race, Subcategory == 'Black non-Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'Black non-Hispanic')) +
  geom_line(data = subset(all_household_race, Subcategory == 'Hispanic'), aes(x = Year, y = `Food insecure-percent`, color = 'Hispanic')) +
  geom_line(data = subset(all_household_race, Subcategory == 'Other'), aes(x = Year, y = `Food insecure-percent`, color = 'Other Ethnicities')) + 
  labs(y = 'Food Insecurity (%)', color = 'Ethnicity', title = 'Yearly Food Insecurity Percentages by Ethnicity') +
  scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
                     breaks = c("Black non-Hispanic", "Hispanic", "White non-Hispanic", "Other Ethnicities", "U.S. Average"))

Cleaning food_sec_by_state

food_sec_by_state$State[food_sec_by_state$State == 'U.S. total'] <- 'U.S.'

Plot: Top 5 States in terms of Food Insecurity by Three-Year Period

state_fi_means = food_sec_by_state %>%
  group_by(State) %>%
  summarize(state_mean = mean(`Food insecurity prevalence`)) %>%
  arrange(desc(state_mean))

top_4_states = as.character(as.list(head(state_fi_means, 4)$State))

# Create a new column for the midpoint of the three-year range
food_sec_by_state$MidYear <- as.numeric(substr(food_sec_by_state$Year, 1, 4)) + 1 

# Plot using geom_line
ggplot() +
  geom_line(data = subset(food_sec_by_state, State == 'U.S.'), aes(x = MidYear, y = `Food insecurity prevalence`, color = 'U.S. Average')) +
  geom_line(data = subset(food_sec_by_state, State == top_4_states[1]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[1])) +
  geom_line(data = subset(food_sec_by_state, State == top_4_states[2]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[2])) +
  geom_line(data = subset(food_sec_by_state, State == top_4_states[3]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[3])) +
  geom_line(data = subset(food_sec_by_state, State == top_4_states[4]), aes(x = MidYear, y = `Food insecurity prevalence`, color = top_4_states[4])) +
  labs(x = 'Year', y = 'Food Insecurity Percent', title = 'Food Insecurity Prevalence by Year (Top Four States)', colour = 'State') +
  scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
                     breaks = c("AL", "AR", "MS", "TX", "U.S. Average"))

Plot: Bottom 4 States in terms of Food Insecurity by Three-Year Period

bottom_4_states = as.character(as.list(tail(state_fi_means, 4)$State))

# Plot using geom_line
ggplot() +
  geom_line(data = subset(food_sec_by_state, State == 'U.S.'), aes(x = MidYear, y = `Food insecurity prevalence`, color = 'U.S. Average')) +
  geom_line(data = subset(food_sec_by_state, State == bottom_4_states[1]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[1])) +
  geom_line(data = subset(food_sec_by_state, State == bottom_4_states[2]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[2])) +
  geom_line(data = subset(food_sec_by_state, State == bottom_4_states[3]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[3])) +
  geom_line(data = subset(food_sec_by_state, State == bottom_4_states[4]), aes(x = MidYear, y = `Food insecurity prevalence`, color = bottom_4_states[4])) +
  labs(x = 'Year', y = 'Food Insecurity Percent', title = 'Food Insecurity Prevalence by Year (Bottom Four States)', colour = 'State')  +
  scale_color_manual(values = c("red", "blue", "green", "purple", "black"),
                     breaks = c("MN", "ND", "NH", "VA", "U.S. Average"))

Plot: Yearly Percent of Children Experiencing Food Insecurity

ggplot(data = subset(child_food_sec_trends, Category == 'Children - by food security status of household')) +
  geom_line(aes(x = Year, y = `Food-insecure households-percent`)) +
  labs(y = 'Percent of Children Experiencing Food Insecurity', title = 'Yearly Percent of Children Experiencing Food Insecurity')