1) Import and clean data

Load packages, import Excel file, skip first line, rename data file, convert to tibble, clean up variable names. This gives us a data frame with 1714 observations of 73 variable

library(tidyverse)
library(lubridate)
library(janitor)
library(readxl)
library(rmarkdown)
library(readxl)
library(scales)
library(gt)
library(gtsummary)
library(ggplot2)
library(ggmap)
library(plotly)
library(tidygeocoder)
library(zipcodeR)

ep_data <- read_excel("RVTICN_EP_data.xlsx", 
    skip = 1)

ep_data <- ep_data|>
  clean_names()

ep_data
## # A tibble: 1,714 × 74
##    session          created modified ended expired ep_what ep_what_other teacher
##    <chr>            <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
##  1 <NA>             2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  2 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
##  3 <NA>             2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  4 zanyTortoiseXXX… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  5 darkEelXXXn1bps… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  6 <NA>             2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  7 jwfpui27QyMPIDi… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
##  8 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  9 4XFTbS1lynxtwmE… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
## 10 -E0cUzh4UJQpwKl… 2023-0… <NA>     <NA>  NA      <NA>    NA                 NA
## # ℹ 1,704 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>, …

2) Removing missing data

Drop rows with data missing in the session or modified column. This gives us a data frame with 94 observations of 73 variables

ep_data <-ep_data %>%
  drop_na(session, modified)

ep_data
## # A tibble: 94 × 74
##    session          created modified ended expired ep_what ep_what_other teacher
##    <chr>            <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
##  1 okSk8ugA7x7LS_z… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
##  2 zanyTortoiseXXX… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  3 darkEelXXXn1bps… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  4 UfKwkbNB1UQO_Zc… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
##  5 Q13gEgNnVlr2fuN… 2023-0… 2023-02… <NA>  NA      13.0    NA                  1
##  6 whkMSapD6z3QnFn… 2023-0… 2023-02… 2023… NA      <NA>    NA                  1
##  7 0a_xbAPNdpM-Kxt… 2023-0… 2023-02… <NA>  NA      <NA>    NA                 NA
##  8 8HbVA9v6BS98pWO… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
##  9 0DmnsxoDUAoU2H2… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
## 10 LyA6W1isX5jE8Ub… 2023-0… 2023-03… <NA>  NA      <NA>    NA                 NA
## # ℹ 84 more rows
## # ℹ 66 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>, …

3 Data Wrangling

Create factor variables with plan English responses, re-level factors into normal order. This gives us a data frame with 94 observations of 119 variables

ep_data <- ep_data %>%
  mutate(ep_role = case_when(teacher == 1 ~ "Teacher", 
                               ep_child == 1 ~ "Caregiver",
                               ep_self == 1 ~ "Self",
                               TRUE ~ "No role specified"))|>
  mutate(zip = case_when(zip_teach  >= 1 ~ zip_teach,
                         zip_child  >= 1 ~ zip_child,
                         zip_adult  >= 1 ~ zip_adult))|>
  mutate(ep_child_f = case_when(ep_child == 1 ~ "Yes",
                                ep_child == 2 ~ "No",
                                ep_child == 3 ~ "Unsure"))|>
  mutate(ep_child_f=factor(ep_child_f)) |> 
  mutate(ep_child_f = fct_relevel( ep_child_f, 'Yes','No','Unsure')) |>
  mutate(relationship_f = case_when(relationship == 1 ~ "Parent",
                                    relationship == 3 ~ "Foster Parent",
                                    relationship == 7 ~ "Aunt or Uncle",
                                    relationship == 8 & relationship_other == "TDT, Intensive in home" |
                                    relationship == 8 & relationship_other == "family member and social worker" ~  "Mental Health Provider"))|>
  mutate(relationship_f=factor(relationship_f)) |>
  mutate(relationship_f = fct_relevel( relationship_f, 'Parent','Foster Parent','Aunt or Uncle','Mental Health Provider')) |>
 mutate(child_expelled_times_f = case_when(child_expelled_times == 1 ~ "Once",
                                            child_expelled_times == 2 ~ "Twice",
                                            child_expelled_times == 3 ~ "Three Times",
                                            child_expelled_times == 4 ~ "More than Three Times"))|>
  mutate(child_expelled_times_f = factor(child_expelled_times_f)) |> 
  mutate(child_expelled_times_f = fct_relevel(child_expelled_times_f, 'Once','Twice','Three Times', 'More than Three Times')) |>
 mutate(child_expelled_age_f = case_when(child_expelled_age == 1 ~ "0 - 2 Years",
                                          child_expelled_age == 2 ~ "3 - 5 Years",
                                          child_expelled_age == 3 ~ "6 - 12 Years",
                                          child_expelled_age == 4 ~ "Over 12 Years"))|>
  mutate(child_expelled_age_f = factor(child_expelled_age_f)) |> 
  mutate(child_expelled_age_f = fct_relevel(child_expelled_age_f, '0 - 2 Years','3 - 5 Years','6 - 12 Years', 'Over 12 Years')) |>
  mutate(child_expelled_steps_1 = case_when(str_detect(child_expelled_steps, "1") ~ 1))|>
  mutate(child_expelled_steps_2 = case_when(str_detect(child_expelled_steps, "2") ~ 1))|>
  mutate(child_expelled_steps_3 = case_when(str_detect(child_expelled_steps, "3") ~ 1))|>
  mutate(child_expelled_steps_4 = case_when(str_detect(child_expelled_steps, "4") ~ 1))|>
  mutate(child_expelled_steps_5 = case_when(str_detect(child_expelled_steps, "5") ~ 1))|>
  mutate(child_expelled_steps_6 = case_when(str_detect(child_expelled_steps, "6") ~ 1))|>
  mutate(rate_relationship_expelled_child_f = case_when(rate_relationship_expelled_child == 1 ~ "Very Good",
                                                        rate_relationship_expelled_child == 2 ~ "Good",
                                                        rate_relationship_expelled_child == 3 ~ "Neutral",
                                                        rate_relationship_expelled_child == 4 ~ "Bad",
                                                        rate_relationship_expelled_child == 5 ~ "Very Bad"))|>
  mutate(rate_relationship_expelled_child_f = factor(rate_relationship_expelled_child_f)) |> 
  mutate(rate_relationship_expelled_child_f = fct_relevel(rate_relationship_expelled_child_f, 'Very Good','Good','Neutral', 'Bad', 'Very Bad')) |>
  mutate(child_suspended_f = case_when(child_suspended == 1 ~ "Yes",
                                       child_suspended == 2 ~ "No",
                                       child_suspended == 3 ~ "Unsure"))|>
  mutate(child_suspended_f=factor(child_suspended_f)) |> 
  mutate(child_suspended_f = fct_relevel( child_suspended_f, 'Yes','No','Unsure')) |>
  mutate(child_suspended_times_f = case_when(child_suspended_times == 1 ~ "Once",
                                             child_suspended_times == 2 ~ "Twice",
                                             child_suspended_times == 3 ~ "Three Times",
                                             child_suspended_times == 4 ~ "More than Three Times"))|>
  mutate(child_suspended_times_f = factor(child_suspended_times_f)) |> 
  mutate(child_suspended_times_f = fct_relevel(child_suspended_times_f, 'Once','Twice','Three Times', 'More than Three Times')) |>
  mutate(child_suspended_age_f = case_when(child_suspended_age == 1 ~ "0 - 2 Years",
                                           child_suspended_age == 2 ~ "3 - 5 Years",
                                           child_suspended_age == 3 ~ "6 - 12 Years",
                                           child_suspended_age == 4 ~ "Over 12 Years"))|>
  mutate(child_suspended_age_f = factor(child_suspended_age_f)) |> 
  mutate(child_suspended_age_f = fct_relevel(child_suspended_age_f, '0 - 2 Years','3 - 5 Years','6 - 12 Years', 'Over 12 Years')) |>
  mutate(child_suspended_steps_1 = case_when(str_detect(child_suspended_steps, "1") ~ 1))|>
  mutate(child_suspended_steps_2 = case_when(str_detect(child_suspended_steps, "2") ~ 1))|>
  mutate(child_suspended_steps_3 = case_when(str_detect(child_suspended_steps, "3") ~ 1))|>
  mutate(child_suspended_steps_4 = case_when(str_detect(child_suspended_steps, "4") ~ 1))|>
  mutate(child_suspended_steps_5 = case_when(str_detect(child_suspended_steps, "5") ~ 1))|>
  mutate(child_suspended_steps_6 = case_when(str_detect(child_suspended_steps, "6") ~ 1))|>
  mutate(rate_relationship_suspended_child_f = case_when(rate_relationship_suspended_child == 1 ~ "Very Good",
                                                        rate_relationship_suspended_child == 2 ~ "Good",
                                                        rate_relationship_suspended_child == 3 ~ "Neutral",
                                                        rate_relationship_suspended_child == 4 ~ "Bad",
                                                        rate_relationship_suspended_child == 5 ~ "Very Bad"))|>
  mutate(rate_relationship_suspended_child_f = factor(rate_relationship_suspended_child_f)) |> 
  mutate(rate_relationship_suspended_child_f = fct_relevel(rate_relationship_suspended_child_f, 'Very Good','Good','Neutral', 'Bad', 'Very Bad')) |>
  mutate(ep_effects_1 = case_when(str_detect(ep_effects, "1") ~ 1))|>
  mutate(ep_effects_2 = case_when(str_detect(ep_effects, "2") ~ 1))|>
  mutate(ep_effects_3 = case_when(str_detect(ep_effects, "3") ~ 1))|>
  mutate(ep_effects_4 = case_when(str_detect(ep_effects, "4") ~ 1))|>
  mutate(child_attributes_1 = case_when(str_detect(child_attributes, "1") ~ 1))|>
  mutate(child_attributes_2 = case_when(str_detect(child_attributes, "2") ~ 1))|>
  mutate(child_attributes_3 = case_when(str_detect(child_attributes, "3") ~ 1))|>
  mutate(child_attributes_4 = case_when(str_detect(child_attributes, "4") ~ 1))|>
  mutate(child_attributes_5 = case_when(str_detect(child_attributes, "5") ~ 1))|>
  mutate(child_attributes_6 = case_when(str_detect(child_attributes, "6") ~ 1))|>
  mutate(child_attributes_7 = case_when(str_detect(child_attributes, "7") ~ 1))|>
  mutate(child_race_1 = case_when(str_detect(child_race, "1") ~ 1))|>
  mutate(child_race_2 = case_when(str_detect(child_race, "2") ~ 1))|>
  mutate(child_race_3 = case_when(str_detect(child_race, "3") ~ 1))|>
  mutate(child_race_4 = case_when(str_detect(child_race, "4") ~ 1))|>
  mutate(child_race_5 = case_when(str_detect(child_race, "5") ~ 1))|>
  mutate(child_race_6 = case_when(str_detect(child_race, "6") ~ 1))|>
  mutate(child_language_f = case_when(child_language == 1 ~ "English",
                                             child_language == 2 ~ "Spanish",
                                             child_language == 3 ~ "Child is non-verbal",
                                             child_language == 4 ~ "Other"))|>
  mutate(child_language_f = factor(child_language_f)) |> 
  mutate(child_language_f = fct_relevel(child_language_f, 'English','Spanish','Child is non-verbal', 'Other'))|>
  mutate(child_foster = case_when(relationship == 3|child_attributes_3 == 1 ~ 1,
                                  TRUE ~ 0))|>
 mutate(child_foster_f = case_when(child_foster == 1 ~ "Child is or has been in foster care",
                                    child_foster == 0 ~ "Child is not and has not been in foster care"))|>
 mutate(child_disability_f = case_when(child_attributes_1 ==1 | child_attributes_2 == 1 ~ "Child has a disability",
                                      TRUE ~ "Child does not have a disability"))|>
 mutate(child_trauma_f = case_when(child_attributes_4 == 1  ~ "Child has experienced trauma",
                                      TRUE ~ "Child has not experienced trauma"))|>
 mutate(child_childcare_subsidy_f = case_when(child_attributes_5 == 1  ~ "Child has recieved childcare subsidy",
                                      TRUE ~ "Child has not recieved childcare subsidy"))|>
 mutate(child_therapy_f = case_when(child_attributes_6 == 1  ~ "Child has recieved therapy",
                                      TRUE ~ "Child has not recieved therapy"))|>
 mutate(child_funding_f = case_when(child_attributes_7 == 1  ~ "Child has recieved funding through FAPT, SNAP, WIC or Medicaid",
                                      TRUE ~ "Child has not recieved funding through FAPT, SNAP, WIC, or Medicaid"))|>
 mutate(child_race_f = case_when(child_race == "1.0" ~ "Black or African-American",
                                 child_race == "5.0" ~ "White or Caucasian",
                                 child_race == "1, 5" ~ "Multiracial White and Black"))|>
 mutate(child_race_2factor = case_when(child_race == "1.0" ~ "Black or African-American",
                                       child_race == "5.0" ~ "White or Caucasian",
                                       child_race == "1, 5" ~ "Black or African-American"))


head(ep_data)
## # A tibble: 6 × 124
##   session           created modified ended expired ep_what ep_what_other teacher
##   <chr>             <chr>   <chr>    <chr> <lgl>   <chr>   <lgl>           <dbl>
## 1 okSk8ugA7x7LS_zx… 2023-0… 2023-02… <NA>  NA      5, 10   NA                 NA
## 2 zanyTortoiseXXXf… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 3 darkEelXXXn1bpsS… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 4 UfKwkbNB1UQO_Zcp… 2023-0… 2023-01… <NA>  NA      <NA>    NA                 NA
## 5 Q13gEgNnVlr2fuNO… 2023-0… 2023-02… <NA>  NA      13.0    NA                  1
## 6 whkMSapD6z3QnFn0… 2023-0… 2023-02… 2023… NA      <NA>    NA                  1
## # ℹ 116 more variables: teacher_role <dbl>, teacher_role_other <chr>,
## #   teacher_center <dbl>, teacher_center_other <chr>, teacher_age <chr>,
## #   teacher_suspended <dbl>, teacher_suspended_times <dbl>,
## #   reasons_suspended <chr>, teacher_suspended_steps <chr>,
## #   teacher_suspended_steps_other <lgl>, teacher_expelled <dbl>,
## #   teacher_times_expelled <dbl>, reasons_expelled <chr>,
## #   teacher_expelled_steps <chr>, teacher_expelled_steps_other <lgl>, …

4 Zipcode Data

register_google(key = "AIzaSyDAsMocKSMJFKdRQYGV-cCl8INFU0sFWbg", write = TRUE)

roanoke_bw_map <- get_googlemap(center = c(-79.93996, 37.269),
                        zoom = 11,
                        style = c(feature = "all", element = "labels", 
                                  visibility = "off"))

# Load file
ep_data_zip <- read_excel("G:/My Drive/R Files/TICN/TICN_data/EP_survey_2023/EP_survey_2023_zips.xlsx", 
    skip = 1)

ep_data <- ep_data|>
  mutate(zip = case_when(zip_teach  >= 1 ~ zip_teach,
                         zip_child  >= 1 ~ zip_child,
                         zip_adult  >= 1 ~ zip_adult))|>
  mutate(zip = as.character(zip))|>
  drop_na(zip)



# geocode_zip('24015')
# zip_distance("24015", "24016")

va_zips <- search_state("VA")|>
  select(lat, lng, zipcode)|>
  mutate(zip = zipcode)|>
  drop_na(lat)

ep_data <- ep_data |> 
  left_join(va_zips, join_by(zip))

d_zip <-  ep_data |>
  filter(ep_child == 1)|>
  group_by(zip)|>
  arrange(zip)|>
  mutate(count = n())|>
  select(ep_child, zip, count, lng, lat, child_expelled, child_expelled_times)

# color gradient map
m2 <- ggmap(roanoke_bw_map) +
  geom_point(aes(x = lng, y = lat, colour = count, label = zip),
             size = 5, alpha = 1,
             data = d_zip) +
  geom_text(data = d_zip, aes(lng, lat, label = zip)) +
  scale_color_gradientn(limits=c(0,6), breaks=seq(0, 6, by=2), colors = c("darkgreen", "yellow", "red")) +
    theme(axis.ticks.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.y =element_blank(),
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  labs(title = "Has your child been suspended or expelled from child care?")

m3 <- ggplotly(m2)

m3

5 Write Data and Tables to Exl Files

# Write Graphs to PDF
library(ggpubr)

child_zipcode_graphs <- ggarrange(
                        m2,
                        nrow = 1, ncol = 1)

ggexport(child_zipcode_graphs,
         filename = "child_zipcode_graphs.pdf")