1) Import and clean data
Load packages, import Excel file, skip first line, rename data file,
convert to tibble, clean up variable names This gives us a data frame
with 1714 observations of 73 variables
library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)
library(readxl)
ep_data <- read_excel("RVTICN_EP_data.xlsx",
skip = 1)
ep_data <- ep_data %>%
clean_names()
ep_data
## # A tibble: 1,714 × 74
## session created modified ended expired ep_what ep_what_other teacher
## <chr> <chr> <chr> <chr> <lgl> <chr> <lgl> <dbl>
## 1 <NA> 2023-0… <NA> <NA> NA <NA> NA NA
## 2 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA> NA 5, 10 NA NA
## 3 <NA> 2023-0… <NA> <NA> NA <NA> NA NA
## 4 zanyTortoiseXXX… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 5 darkEelXXXn1bps… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 6 <NA> 2023-0… 2023-01… <NA> NA <NA> NA NA
## 7 jwfpui27QyMPIDi… 2023-0… <NA> <NA> NA <NA> NA NA
## 8 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 9 4XFTbS1lynxtwmE… 2023-0… <NA> <NA> NA <NA> NA NA
## 10 -E0cUzh4UJQpwKl… 2023-0… <NA> <NA> NA <NA> NA NA
## # ℹ 1,704 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## # teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## # teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## # reasons_suspended <chr>, teacher_suspended_steps <chr>,
## # teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## # teacher_times_expelled <dbl>, reasons_expelled <chr>, …
2) Removing missing data
Drop rows with data missing in the session or modified column This
gives us a data frame with 94 observations of 73 variables
ep_data <-ep_data %>%
drop_na(session, modified)
ep_data
## # A tibble: 94 × 74
## session created modified ended expired ep_what ep_what_other teacher
## <chr> <chr> <chr> <chr> <lgl> <chr> <lgl> <dbl>
## 1 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA> NA 5, 10 NA NA
## 2 zanyTortoiseXXX… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 3 darkEelXXXn1bps… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 4 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 5 Q13gEgNnVlr2fuN… 2023-0… 2023-02… <NA> NA 13.0 NA 1
## 6 whkMSapD6z3QnFn… 2023-0… 2023-02… 2023… NA <NA> NA 1
## 7 0a_xbAPNdpM-Kxt… 2023-0… 2023-02… <NA> NA <NA> NA NA
## 8 8HbVA9v6BS98pWO… 2023-0… 2023-03… <NA> NA <NA> NA NA
## 9 0DmnsxoDUAoU2H2… 2023-0… 2023-03… <NA> NA <NA> NA NA
## 10 LyA6W1isX5jE8Ub… 2023-0… 2023-03… <NA> NA <NA> NA NA
## # ℹ 84 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## # teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## # teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## # reasons_suspended <chr>, teacher_suspended_steps <chr>,
## # teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## # teacher_times_expelled <dbl>, reasons_expelled <chr>, …
3) Data Wrangling
Create factor variables with plan English responses, re-level factors
into normal order. This gives us a data frame with 94 observations of 91
variables
ep_data <- ep_data %>%
mutate(ep_role = case_when(teacher == 1 ~ "Teacher",
ep_child == 1 ~ "Caregiver",
ep_self == 1 ~ "Self",
TRUE ~ "No role specified"))|>
mutate(zip = case_when(zip_teach >= 1 ~ zip_teach,
zip_child >= 1 ~ zip_child,
zip_adult >= 1 ~ zip_adult))|>
mutate(ep_child_f = case_when(ep_child == 1 ~ "Yes",
ep_child == 2 ~ "No",
ep_child == 3 ~ "Unsure"))|>
mutate(adult_attributes_1 = case_when(str_detect(adult_attributes, "1") ~ 1))|>
mutate(adult_attributes_2 = case_when(str_detect(adult_attributes, "2") ~ 1))|>
mutate(adult_attributes_3 = case_when(str_detect(adult_attributes, "3") ~ 1))|>
mutate(adult_attributes_4 = case_when(str_detect(adult_attributes, "4") ~ 1))|>
mutate(adult_attributes_5 = case_when(str_detect(adult_attributes, "5") ~ 1))|>
mutate(adult_attributes_6 = case_when(str_detect(adult_attributes, "6") ~ 1))|>
mutate(adult_attributes_7 = case_when(str_detect(adult_attributes, "7") ~ 1))|>
mutate(adult_race_1 = case_when(str_detect(adult_race, "1") ~ 1))|>
mutate(adult_race_2 = case_when(str_detect(adult_race, "2") ~ 1))|>
mutate(adult_race_3 = case_when(str_detect(adult_race, "3") ~ 1))|>
mutate(adult_race_4 = case_when(str_detect(adult_race, "4") ~ 1))|>
mutate(adult_race_5 = case_when(str_detect(adult_race, "5") ~ 1))|>
mutate(adult_race_6 = case_when(str_detect(adult_race, "6") ~ 1))|>
mutate(employment_f = case_when(employment == 1 ~ "Employed Full-Time",
employment == 2 ~ "Employed Part-Time",
employment == 3 ~ "Unemployed",
employment == 4 ~ "Recieving SSI or SSDI",
employment == 5 ~ "Retired"))|>
mutate(education_f = case_when(education == 1 ~ "Some High School",
education == 2 ~ " High School Diploma",
education == 3 ~ "GED",
education == 4 ~ "Some College",
education == 5 ~ "College or Trade Certificate",
education == 6 ~ "College Degree",
education == 7 ~ "Post Graduate Degree"))|>
mutate(all_suspended = case_when(self_suspended == 1 ~ 1,
child_suspended == 1 ~ 1))
head(ep_data)
## # A tibble: 6 × 93
## session created modified ended expired ep_what ep_what_other teacher
## <chr> <chr> <chr> <chr> <lgl> <chr> <lgl> <dbl>
## 1 okSk8ugA7x7LS_zx… 2023-0… 2023-02… <NA> NA 5, 10 NA NA
## 2 zanyTortoiseXXXf… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 3 darkEelXXXn1bpsS… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 4 UfKwkbNB1UQO_Zcp… 2023-0… 2023-01… <NA> NA <NA> NA NA
## 5 Q13gEgNnVlr2fuNO… 2023-0… 2023-02… <NA> NA 13.0 NA 1
## 6 whkMSapD6z3QnFn0… 2023-0… 2023-02… 2023… NA <NA> NA 1
## # ℹ 85 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## # teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## # teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## # reasons_suspended <chr>, teacher_suspended_steps <chr>,
## # teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## # teacher_times_expelled <dbl>, reasons_expelled <chr>,
## # teacher_expelled_steps <chr>, teacher_expelled_steps_other <lgl>, …
4 Demographics
library(gt)
library(gtsummary)
# table
sum_adult_attributes <- ep_data |>
select(adult_attributes_1, adult_attributes_2, adult_attributes_3, adult_attributes_4, adult_attributes_5, adult_attributes_6, adult_attributes_7)|>
tbl_summary(missing = "no",
label = list(adult_attributes_1 ~ "Has a developmental disability",
adult_attributes_2 ~ "Has another type of disability",
adult_attributes_3 ~ "Has been in foster care",
adult_attributes_4 ~ "Has a history of trauma or adverse childhood experiences (ACES)",
adult_attributes_5 ~ "Has been in a childcare subsidy program through DSS",
adult_attributes_6 ~ "Has recieved therapy (speech, occupational therapy, counseling)",
adult_attributes_7 ~ "Has recieved funding through FAPT, WIC, SNAP, or medicaid"))|>
modify_header(label ~ "Do any of these statements apply to you?")
sum_adult_attributes
| Do any of these statements apply to you? |
N = 94 |
| Has a developmental disability |
1 (100%) |
| Has another type of disability |
5 (100%) |
| Has been in foster care |
1 (100%) |
| Has a history of trauma or adverse childhood experiences (ACES) |
12 (100%) |
| Has been in a childcare subsidy program through DSS |
NA (NA, NA) |
| Has recieved therapy (speech, occupational therapy, counseling) |
13 (100%) |
| Has recieved funding through FAPT, WIC, SNAP, or medicaid |
8 (100%) |
# table
sum_adult_race_f <- ep_data |>
select(adult_race_1, adult_race_2, adult_race_3, adult_race_4, adult_race_5, adult_race_6)|>
tbl_summary(missing = "no",
label = list(adult_race_1 ~ "Black or African American",
adult_race_2 ~ "Asian",
adult_race_3 ~ "Pacific Islander or Hawaiian Native",
adult_race_4 ~ "Native American or Alaska Native",
adult_race_5 ~ "White or Caucasian",
adult_race_6 ~ "Other"))
sum_adult_race_f
| Characteristic |
N = 94 |
| Black or African American |
2 (100%) |
| Asian |
NA (NA, NA) |
| Pacific Islander or Hawaiian Native |
NA (NA, NA) |
| Native American or Alaska Native |
NA (NA, NA) |
| White or Caucasian |
27 (100%) |
| Other |
NA (NA, NA) |
#table
sum_employment_f <- ep_data |>
drop_na(employment_f)|>
group_by(employment_f)|>
select(employment_f)|>
tbl_summary(missing = "no")|>
modify_header(label ~ "Are you currently employed?")
sum_employment_f
| Are you currently employed? |
N = 29 |
| employment_f |
|
| Employed Full-Time |
26 (90%) |
| Employed Part-Time |
2 (6.9%) |
| Recieving SSI or SSDI |
1 (3.4%) |
# graph
ep_data_na <- ep_data|>
drop_na(employment_f)
sum_employment_f_g <- ggplot(data = ep_data_na, aes(x = employment_f)) +
geom_bar(colour="black", fill="#DD8888", width=.8,stat="count") +
xlab("Are you currently employed?") + ylab("Count") +
ggtitle("")
sum_employment_f_g

#table
sum_education_f <- ep_data |>
drop_na(education_f)|>
group_by(education_f)|>
select(education_f)|>
tbl_summary(missing = "no")|>
modify_header(label ~ "What is your highest level of education?")
sum_education_f
| What is your highest level of education? |
N = 29 |
| education_f |
|
| High School Diploma |
1 (3.4%) |
| College Degree |
10 (34%) |
| College or Trade Certificate |
1 (3.4%) |
| Post Graduate Degree |
13 (45%) |
| Some College |
4 (14%) |
# graph
ep_data_na <- ep_data|>
drop_na(education_f)
sum_education_f_g <- ggplot(data = ep_data_na, aes(x = education_f)) +
geom_bar(colour="black", fill="#DD8888", width=.8,stat="count") +
xlab("What is your highest level of education?") + ylab("Count") +
ggtitle("")
sum_education_f_g
