1) Import and clean data

Load packages, import Excel file, skip first line, rename data file, convert to tibble, clean up variable names This gives us a data frame with 1714 observations of 73 variables

library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)
library(readxl)

ep_data <- read_excel("RVTICN_EP_data.xlsx", 
    skip = 1)
ep_data <- ep_data %>%
  clean_names() 

ep_data
## # A tibble: 1,714 × 74
##    session          created modified ended expired ep_what ep_what_other teacher
##    <chr>            <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
##  1 <NA>             2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  2 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
##  3 <NA>             2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  4 zanyTortoiseXXX… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  5 darkEelXXXn1bps… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  6 <NA>             2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  7 jwfpui27QyMPIDi… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  8 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  9 4XFTbS1lynxtwmE… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
## 10 -E0cUzh4UJQpwKl… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
## # ℹ 1,704 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>, …

2) Removing missing data

Drop rows with data missing in the session or modified column This gives us a data frame with 94 observations of 73 variables

ep_data <-ep_data %>%
  drop_na(session, modified)

ep_data
## # A tibble: 94 × 74
##    session          created modified ended expired ep_what ep_what_other teacher
##    <chr>            <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
##  1 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
##  2 zanyTortoiseXXX… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  3 darkEelXXXn1bps… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  4 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  5 Q13gEgNnVlr2fuN… 2023-0… 2023-02… <NA>  NA      13.0    NA                  1
##  6 whkMSapD6z3QnFn… 2023-0… 2023-02… 2023… NA      <NA>    NA                  1
##  7 0a_xbAPNdpM-Kxt… 2023-0… 2023-02… <NA>  NA      <NA>    NA                 NA
##  8 8HbVA9v6BS98pWO… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
##  9 0DmnsxoDUAoU2H2… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
## 10 LyA6W1isX5jE8Ub… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
## # ℹ 84 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>, …

3) Data Wrangling

Create factor variables with plan English responses, re-level factors into normal order. This gives us a data frame with 94 observations of 91 variables

ep_data <- ep_data %>%
  mutate(ep_role = case_when(teacher == 1 ~ "Teacher", 
                               ep_child == 1 ~ "Caregiver",
                               ep_self == 1 ~ "Self",
                               TRUE ~ "No role specified"))|>
  mutate(zip = case_when(zip_teach  >= 1 ~ zip_teach,
                         zip_child  >= 1 ~ zip_child,
                         zip_adult  >= 1 ~ zip_adult))|>
  mutate(ep_child_f = case_when(ep_child == 1 ~ "Yes",
                                ep_child == 2 ~ "No",
                                ep_child == 3 ~ "Unsure"))|>
  mutate(adult_attributes_1 = case_when(str_detect(adult_attributes, "1") ~ 1))|>
  mutate(adult_attributes_2 = case_when(str_detect(adult_attributes, "2") ~ 1))|>
  mutate(adult_attributes_3 = case_when(str_detect(adult_attributes, "3") ~ 1))|>
  mutate(adult_attributes_4 = case_when(str_detect(adult_attributes, "4") ~ 1))|>
  mutate(adult_attributes_5 = case_when(str_detect(adult_attributes, "5") ~ 1))|>
  mutate(adult_attributes_6 = case_when(str_detect(adult_attributes, "6") ~ 1))|>
  mutate(adult_attributes_7 = case_when(str_detect(adult_attributes, "7") ~ 1))|>
  mutate(adult_race_1 = case_when(str_detect(adult_race, "1") ~ 1))|>
  mutate(adult_race_2 = case_when(str_detect(adult_race, "2") ~ 1))|>
  mutate(adult_race_3 = case_when(str_detect(adult_race, "3") ~ 1))|>
  mutate(adult_race_4 = case_when(str_detect(adult_race, "4") ~ 1))|>
  mutate(adult_race_5 = case_when(str_detect(adult_race, "5") ~ 1))|>
  mutate(adult_race_6 = case_when(str_detect(adult_race, "6") ~ 1))|>
  mutate(employment_f = case_when(employment == 1 ~ "Employed Full-Time",
                                  employment == 2 ~ "Employed Part-Time",
                                  employment == 3 ~ "Unemployed",
                                  employment == 4 ~ "Recieving SSI or SSDI",
                                  employment == 5 ~ "Retired"))|>
  mutate(education_f = case_when(education == 1 ~ "Some High School",
                                 education == 2 ~ " High School Diploma",
                                 education == 3 ~ "GED",
                                 education == 4 ~ "Some College",
                                 education == 5 ~ "College or Trade Certificate",
                                 education == 6 ~ "College Degree",
                                 education == 7 ~ "Post Graduate Degree"))|>
  mutate(all_suspended = case_when(self_suspended == 1 ~ 1,
                                   child_suspended == 1 ~ 1))
 
head(ep_data)
## # A tibble: 6 × 93
##   session           created modified ended expired ep_what ep_what_other teacher
##   <chr>             <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
## 1 okSk8ugA7x7LS_zx… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
## 2 zanyTortoiseXXXf… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 3 darkEelXXXn1bpsS… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 4 UfKwkbNB1UQO_Zcp… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 5 Q13gEgNnVlr2fuNO… 2023-0… 2023-02… <NA>  NA      13.0    NA                  1
## 6 whkMSapD6z3QnFn0… 2023-0… 2023-02… 2023… NA      <NA>    NA                  1
## # ℹ 85 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>,
## #   teacher_expelled_steps <chr>, teacher_expelled_steps_other <lgl>, …

4 Demographics

library(gt)
library(gtsummary)

# table
sum_adult_attributes <- ep_data |>
  select(adult_attributes_1, adult_attributes_2, adult_attributes_3, adult_attributes_4, adult_attributes_5, adult_attributes_6, adult_attributes_7)|>
  tbl_summary(missing = "no",
              label = list(adult_attributes_1 ~ "Has a developmental disability", 
                           adult_attributes_2 ~ "Has another type of disability",
                           adult_attributes_3 ~ "Has been in foster care",
                           adult_attributes_4 ~ "Has a history of trauma or adverse childhood experiences (ACES)",
                           adult_attributes_5 ~ "Has been in a childcare subsidy program through DSS",
                           adult_attributes_6 ~ "Has recieved therapy (speech, occupational therapy, counseling)",
                           adult_attributes_7 ~ "Has recieved funding through FAPT, WIC, SNAP, or medicaid"))|>
  modify_header(label ~ "Do any of these statements apply to you?")

sum_adult_attributes
Do any of these statements apply to you? N = 941
Has a developmental disability 1 (100%)
Has another type of disability 5 (100%)
Has been in foster care 1 (100%)
Has a history of trauma or adverse childhood experiences (ACES) 12 (100%)
Has been in a childcare subsidy program through DSS NA (NA, NA)
Has recieved therapy (speech, occupational therapy, counseling) 13 (100%)
Has recieved funding through FAPT, WIC, SNAP, or medicaid 8 (100%)
1 n (%); Median (IQR)
# table
sum_adult_race_f <- ep_data |>
  select(adult_race_1, adult_race_2, adult_race_3, adult_race_4, adult_race_5, adult_race_6)|>
  tbl_summary(missing = "no",
              label = list(adult_race_1 ~ "Black or African American", 
                           adult_race_2 ~ "Asian",
                           adult_race_3 ~ "Pacific Islander or Hawaiian Native",
                           adult_race_4 ~ "Native American or Alaska Native",
                           adult_race_5 ~ "White or Caucasian",
                           adult_race_6 ~ "Other"))

sum_adult_race_f
Characteristic N = 941
Black or African American 2 (100%)
Asian NA (NA, NA)
Pacific Islander or Hawaiian Native NA (NA, NA)
Native American or Alaska Native NA (NA, NA)
White or Caucasian 27 (100%)
Other NA (NA, NA)
1 n (%); Median (IQR)
#table
sum_employment_f <- ep_data |> 
  drop_na(employment_f)|>
  group_by(employment_f)|>
  select(employment_f)|>
  tbl_summary(missing = "no")|>
  modify_header(label ~ "Are you currently employed?")

sum_employment_f
Are you currently employed? N = 291
employment_f
    Employed Full-Time 26 (90%)
    Employed Part-Time 2 (6.9%)
    Recieving SSI or SSDI 1 (3.4%)
1 n (%)
# graph
ep_data_na <- ep_data|>
  drop_na(employment_f)

sum_employment_f_g <- ggplot(data = ep_data_na, aes(x = employment_f)) + 
    geom_bar(colour="black", fill="#DD8888", width=.8,stat="count") +
    xlab("Are you currently employed?") + ylab("Count") +
    ggtitle("")

sum_employment_f_g

#table
sum_education_f <- ep_data |> 
  drop_na(education_f)|>
  group_by(education_f)|>
  select(education_f)|>
  tbl_summary(missing = "no")|>
  modify_header(label ~ "What is your highest level of education?")

sum_education_f
What is your highest level of education? N = 291
education_f
     High School Diploma 1 (3.4%)
    College Degree 10 (34%)
    College or Trade Certificate 1 (3.4%)
    Post Graduate Degree 13 (45%)
    Some College 4 (14%)
1 n (%)
# graph
ep_data_na <- ep_data|>
  drop_na(education_f)

sum_education_f_g <- ggplot(data = ep_data_na, aes(x = education_f)) + 
    geom_bar(colour="black", fill="#DD8888", width=.8,stat="count") +
    xlab("What is your highest level of education?") + ylab("Count") +
    ggtitle("")

sum_education_f_g