Loading data etc.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(patchwork)
data_dir <- "/Users/rileystern/Downloads/intrvw23 3/"
list.files(data_dir)
## [1] "expn23" "fmli232.csv" "fmli233.csv" "fmli234.csv" "fmli241.csv"
## [6] "itbi232.csv" "itbi233.csv" "itbi234.csv" "itbi241.csv" "itii232.csv"
## [11] "itii233.csv" "itii234.csv" "itii241.csv" "memi232.csv" "memi233.csv"
## [16] "memi234.csv" "memi241.csv" "mtbi232.csv" "mtbi233.csv" "mtbi234.csv"
## [21] "mtbi241.csv" "ntaxi232.csv" "ntaxi233.csv" "ntaxi234.csv" "ntaxi241.csv"
## [26] "para23"
fmli_files <- list.files(data_dir, pattern = "^fmli.*\\.csv$", full.names = TRUE)
read_and_combine <- function(file_list) {
combined <- lapply(file_list, function(f) read.csv(f, stringsAsFactors = FALSE)) %>%
bind_rows()
return(combined)
}
Selecting columns from FMLI.
fmli_all <- read_and_combine(fmli_files)
library(ggplot2)
childcare_data2 <- fmli_all %>%
select(NEWID, TOTEXPPQ, BBYDAYPQ, CHILDAGE)
childcare_users2 <- childcare_data2 %>%
filter(!is.na(TOTEXPPQ), !is.na(BBYDAYPQ), CHILDAGE > 0) %>%
mutate(
childcare_share = BBYDAYPQ / TOTEXPPQ,
expend_quintile = ntile(TOTEXPPQ, 5)
)
childcare_payers2 <- childcare_users2 %>%
filter(BBYDAYPQ > 0)
payersage1 <- childcare_payers2 %>%
filter(CHILDAGE == 1)
payersage2 <- childcare_payers2 %>%
filter(CHILDAGE == 2)
payersage3 <- childcare_payers2 %>%
filter(CHILDAGE == 3)
payersage4 <- childcare_payers2 %>%
filter(CHILDAGE == 4)
payersage5 <- childcare_payers2 %>%
filter(CHILDAGE == 5)
childcareages <- childcare_payers2 %>%
filter(CHILDAGE %in% 1:5)
Summarizing and plotting data.
sumdata2 <- childcareages %>%
group_by(CHILDAGE, expend_quintile) %>%
summarise(mean_expend = mean(childcare_share, na.rm = TRUE), .groups = "drop")
ggplot(sumdata2, aes(x = factor(expend_quintile), y = mean_expend, fill = factor(CHILDAGE))) +
geom_col(position = "dodge") +
scale_fill_brewer(palette = "Set1") +
labs(
x = "Expenditure Quintile",
y = "Mean Childcare Expenditure",
fill = "Child Age",
title = "Mean Childcare Expenditure by Quintile and Age"
) +
theme_minimal()