Loading data etc.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(patchwork)
data_dir <- "/Users/rileystern/Downloads/intrvw23 3/"
list.files(data_dir)
##  [1] "expn23"       "fmli232.csv"  "fmli233.csv"  "fmli234.csv"  "fmli241.csv" 
##  [6] "itbi232.csv"  "itbi233.csv"  "itbi234.csv"  "itbi241.csv"  "itii232.csv" 
## [11] "itii233.csv"  "itii234.csv"  "itii241.csv"  "memi232.csv"  "memi233.csv" 
## [16] "memi234.csv"  "memi241.csv"  "mtbi232.csv"  "mtbi233.csv"  "mtbi234.csv" 
## [21] "mtbi241.csv"  "ntaxi232.csv" "ntaxi233.csv" "ntaxi234.csv" "ntaxi241.csv"
## [26] "para23"
fmli_files <- list.files(data_dir, pattern = "^fmli.*\\.csv$", full.names = TRUE)

read_and_combine <- function(file_list) {
  combined <- lapply(file_list, function(f) read.csv(f, stringsAsFactors = FALSE)) %>%
    bind_rows()
  return(combined)
}

Selecting columns from FMLI.

fmli_all <- read_and_combine(fmli_files)
library(ggplot2)
childcare_data2 <- fmli_all %>%
  select(NEWID, TOTEXPPQ, BBYDAYPQ, CHILDAGE)

childcare_users2 <- childcare_data2 %>%
  filter(!is.na(TOTEXPPQ), !is.na(BBYDAYPQ), CHILDAGE > 0) %>%
  mutate(
    childcare_share = BBYDAYPQ / TOTEXPPQ,
    expend_quintile = ntile(TOTEXPPQ, 5)
  )

childcare_payers2 <- childcare_users2 %>%
  filter(BBYDAYPQ > 0)

payersage1 <- childcare_payers2 %>%
  filter(CHILDAGE == 1)

payersage2 <- childcare_payers2 %>%
  filter(CHILDAGE == 2)

payersage3 <- childcare_payers2 %>%
  filter(CHILDAGE == 3)

payersage4 <- childcare_payers2 %>%
  filter(CHILDAGE == 4)

payersage5 <- childcare_payers2 %>%
  filter(CHILDAGE == 5)

childcareages <- childcare_payers2 %>%
  filter(CHILDAGE %in% 1:5)

Summarizing and plotting data.

sumdata2 <- childcareages %>%
  group_by(CHILDAGE, expend_quintile) %>%
  summarise(mean_expend = mean(childcare_share, na.rm = TRUE), .groups = "drop")

ggplot(sumdata2, aes(x = factor(expend_quintile), y = mean_expend, fill = factor(CHILDAGE))) +
  geom_col(position = "dodge") +
  scale_fill_brewer(palette = "Set1") +
  labs(
    x = "Expenditure Quintile",
    y = "Mean Childcare Expenditure",
    fill = "Child Age",
    title = "Mean Childcare Expenditure by Quintile and Age"
  ) +
  theme_minimal()