Data Processing: Varsity Tutors, Kent SD

Author

Alexis Davila

Clean Data

Import

Packages selected for inspecting and cleaning Varsity Tutors, district data from Cumberland schools.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import roster.

roster <- read_csv("roster_vt_kent.csv")

Rows: 638 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Grade, School, MOY i-Ready Math Composite Scaled Score*, MOY i-Rea...
dbl  (5): Student Number, Sped, MLE, Low Income, LAP
lgl  (1): Treatment

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Inspect data variables. Data file appears to contain all requested elements - except for MLL Level (“Mulit-lingual learner level on a scale of 1 to 6 in SY25-26 based on beginning of year assessment). MLE, or multilingual education status, presents in place of ELL status. Low Income presents in place of Free/Reduced Price Lunch status. Sped, or Special Education, presents in place of IEP status.

colnames(roster)

 [1] "Student Number"                          
 [2] "Grade"                                   
 [3] "School"                                  
 [4] "Treatment"                               
 [5] "MOY i-Ready Math Composite Scaled Score*"
 [6] "MOY i-Ready Math Math Test Date*"        
 [7] "EOY i-Ready Math Composite Scaled Score*"
 [8] "EOY i-Ready Math Math Test Date*"        
 [9] "MOY i-Ready ELA Composite Scaled Score*" 
[10] "MOY i-Ready ELA Test Date*"              
[11] "EOY i-Ready ELA Composite  Scaled Score*"
[12] "EOY i-Ready ELA Test Date*"              
[13] "Gender Code"                             
[14] "Sped"                                    
[15] "MLE"                                     
[16] "Low Income"                              
[17] "Race/Ethnicity"                          
[18] "LAP"

Change column headers for roster.

roster = roster %>%
  rename(
    student_id = "Student Number",
    grade = "Grade", 
    school = "School", 
    treat = "Treatment", 
    
    moy_math_comp = "MOY i-Ready Math Composite Scaled Score*", 
    moy_math_date = "MOY i-Ready Math Math Test Date*", 
    moy_ela_comp = "MOY i-Ready ELA Composite Scaled Score*", 
    moy_ela_date = "MOY i-Ready ELA Test Date*", 
    
    eoy_math_comp = "EOY i-Ready Math Composite Scaled Score*", 
    eoy_math_date = "EOY i-Ready Math Math Test Date*", 
    eoy_ela_comp = "EOY i-Ready ELA Composite  Scaled Score*", 
    eoy_ela_date = "EOY i-Ready ELA Test Date*", 
    
    gender = "Gender Code", 
    sped = "Sped", 
    mll = "MLE", #multilingual education/multilingual learners
    low_inc = "Low Income", 
    race_eth = "Race/Ethnicity", 
    low_ap = "LAP" #low academic performance
    
  )
colnames(roster)

 [1] "student_id"    "grade"         "school"        "treat"        
 [5] "moy_math_comp" "moy_math_date" "eoy_math_comp" "eoy_math_date"
 [9] "moy_ela_comp"  "moy_ela_date"  "eoy_ela_comp"  "eoy_ela_date" 
[13] "gender"        "sped"          "mll"           "low_inc"      
[17] "race_eth"      "low_ap"

Inspect duplicates

Identify duplicate student records within roster file.

# frequency table of student_id occurrences
id_counts_roster = roster %>%
  group_by(student_id) %>%
  summarise(n = n()) %>%
  arrange(desc(n))

# filter only IDs that appear > 1
dupes_roster = id_counts_roster %>%
  filter(n > 1)

# view results
print(dupes_roster)

# A tibble: 2 × 2
  student_id     n
       <dbl> <int>
1     380845     2
2     386424     2

#export
write.csv(dupes_roster, file = "duplicate_ids_roster_vt_kent.csv", row.names = FALSE)

Inspect duplicate student records in roster. Observation: for a given duplicate student record, it appears to be the same individual (e.g., same school, same grade, same scores).

dup_preview_roster = roster %>%
  group_by(student_id) %>%
  filter(n() > 1) %>%
  ungroup()

print(dup_preview_roster)

# A tibble: 4 × 18
  student_id grade school        treat moy_math_comp moy_math_date eoy_math_comp
       <dbl> <chr> <chr>         <lgl> <chr>         <chr>         <chr>        
1     380845 08    Meridian Mid… NA    #N/A          #N/A          587          
2     386424 08    Meridian Mid… NA    #N/A          #N/A          510          
3     386424 08    Meridian Mid… NA    #N/A          #N/A          510          
4     380845 08    Meridian Mid… NA    #N/A          #N/A          587          
# ℹ 11 more variables: eoy_math_date <chr>, moy_ela_comp <chr>,
#   moy_ela_date <chr>, eoy_ela_comp <chr>, eoy_ela_date <chr>, gender <chr>,
#   sped <dbl>, mll <dbl>, low_inc <dbl>, race_eth <chr>, low_ap <dbl>

Decision: keep first occurrence of student ID based on current row order (N=636).

roster_unique = roster %>%
  distinct(student_id, .keep_all = TRUE)

print(roster_unique)

# A tibble: 636 × 18
   student_id grade school       treat moy_math_comp moy_math_date eoy_math_comp
        <dbl> <chr> <chr>        <lgl> <chr>         <chr>         <chr>        
 1     391033 07    Meridian Mi… NA    484           1/6/2026      490          
 2     412115 08    Meridian Mi… NA    384           1/7/2026      409          
 3     425285 06    Meridian Mi… NA    468           1/6/2026      #N/A         
 4     373185 07    Meridian Mi… NA    533           1/7/2026      549          
 5     394412 06    Meridian Mi… NA    571           1/7/2026      568          
 6     389860 07    Meridian Mi… NA    530           1/7/2026      549          
 7     389442 07    Meridian Mi… NA    467           1/6/2026      454          
 8     384402 07    Meridian Mi… NA    506           1/7/2026      496          
 9     382167 07    Meridian Mi… NA    481           1/7/2026      477          
10     417702 06    Meridian Mi… NA    505           1/6/2026      499          
# ℹ 626 more rows
# ℹ 11 more variables: eoy_math_date <chr>, moy_ela_comp <chr>,
#   moy_ela_date <chr>, eoy_ela_comp <chr>, eoy_ela_date <chr>, gender <chr>,
#   sped <dbl>, mll <dbl>, low_inc <dbl>, race_eth <chr>, low_ap <dbl>

Merge

Insert treatment status

Import roster from VT that details which students have accounts in their system

#import ID crosswalk from VT
vt_roster = read_csv("vt_roster_all_districts.csv")

Rows: 1496 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): District, School
dbl (2): District ID, Varsity Tutors ID

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# rename vars
vt_roster = vt_roster %>%
  rename(
    district_vt = "District",
    school_vt = "School",
    student_id = "District ID", 
    vt_acct_id = "Varsity Tutors ID"
  )

Add treatment status. Equals 1 if student id appears in roster from VT (N=88). All else equals 0.

merge_vt_kent_unclean = roster_unique %>%
  mutate(
    treat = ifelse(student_id %in% vt_roster$student_id, 1, 0)
  )

table(merge_vt_kent_unclean$treat)


  0   1 
548  88

Inspect Merged Data

Missing Data Summary

Recode “#N/A” to NA, missing value, in R.

merge_vt_kent_unclean[merge_vt_kent_unclean == "#N/A"] = NA

sum(merge_vt_kent_unclean == "#N/A", na.rm = TRUE)

[1] 0

Percentage missing for each variable. Observations: All other demographic info is present for all students. Some missing data for test info.

missing_summary = merge_vt_kent_unclean %>%
  summarise(across(
    everything(),
    ~ mean(is.na(.)) * 100
  )) %>%
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "percent_missing"
  ) %>%
  arrange(desc(percent_missing))

print(missing_summary)

# A tibble: 18 × 2
   variable      percent_missing
   <chr>                   <dbl>
 1 eoy_math_comp            4.25
 2 eoy_math_date            4.25
 3 moy_ela_comp             4.25
 4 moy_ela_date             4.25
 5 moy_math_comp            3.77
 6 moy_math_date            3.77
 7 eoy_ela_comp             3.46
 8 eoy_ela_date             3.46
 9 student_id               0   
10 grade                    0   
11 school                   0   
12 treat                    0   
13 gender                   0   
14 sped                     0   
15 mll                      0   
16 low_inc                  0   
17 race_eth                 0   
18 low_ap                   0

Percentage of student who have both MOY and EOY data overall

#indicator for having BOTH MOY and EOY scores
merge_vt_kent_test_info = merge_vt_kent_unclean %>%
  mutate(
    both_math = !is.na(moy_math_comp) & !is.na(eoy_math_comp),
    both_ela  = !is.na(moy_ela_comp)  & !is.na(eoy_ela_comp),
    all_tests = both_math & both_ela
  )

#pct across schools
overall_pct = merge_vt_kent_test_info %>%
  summarise(
    total_students = n(),
    students_with_all_tests = sum(all_tests),
    percent_with_all_tests = (students_with_all_tests / total_students) * 100
  ) %>%
  mutate(percent_with_all_tests = round(percent_with_all_tests, 2))

print(overall_pct)

# A tibble: 1 × 3
  total_students students_with_all_tests percent_with_all_tests
           <int>                   <int>                  <dbl>
1            636                     593                   93.2

Percentage of student who have both MOY and EOY data by school Note: only 1 school site from Kent SD, Meridian Middle.

by_school_pct = merge_vt_kent_test_info %>%
  group_by(school) %>%
  summarise(
    total_students = n(),
    students_with_all_tests = sum(all_tests),
    percent_with_all_tests = (students_with_all_tests / total_students) * 100
  ) %>%
  mutate(percent_with_all_tests = round(percent_with_all_tests, 2)) %>%
  arrange(desc(percent_with_all_tests))

print(by_school_pct)

# A tibble: 1 × 4
  school          total_students students_with_all_tests percent_with_all_tests
  <chr>                    <int>                   <int>                  <dbl>
1 Meridian Middle            636                     593                   93.2

Dummies for test info

both_math: dummy for students with complete math test info across MOY and EOY.

table(merge_vt_kent_test_info$both_math)


FALSE  TRUE 
   37   599

#complete math: MOY and EOY present
merge_vt_kent_test_info$both_math = ifelse(merge_vt_kent_test_info$both_math, 1, 0)
table(merge_vt_kent_test_info$both_math) #check counts


  0   1 
 37 599

both_ela: dummy for students with complete ELA test info across MOY and EOY..

table(merge_vt_kent_test_info$both_ela)


FALSE  TRUE 
   31   605

#complete ELA: MOY and EOY present
merge_vt_kent_test_info$both_ela = ifelse(merge_vt_kent_test_info$both_ela, 1, 0)
table(merge_vt_kent_test_info$both_ela) #check counts


  0   1 
 31 605

all_tests: dummy for students with both math and ELA test info complete across MOY and EOY.

table(merge_vt_kent_test_info$all_tests)


FALSE  TRUE 
   43   593

#complete math and ELA: MOY and EOY present
merge_vt_kent_test_info$all_tests = ifelse(merge_vt_kent_test_info$all_tests, 1, 0)
table(merge_vt_kent_test_info$all_tests) #check counts


  0   1 
 43 593

math_moy_missing_eoy_present: dummy for students who have EOY math test info but MOY test info is missing.

merge_vt_kent_test_info = merge_vt_kent_test_info %>%
  mutate(
    moy_math_present = !is.na(moy_math_comp) & !is.na(moy_math_date),
    eoy_math_present = !is.na(eoy_math_comp) & !is.na(eoy_math_date),
    
    math_moy_missing_eoy_present = case_when(
      !moy_math_present &  eoy_math_present ~ 1,   # moy missing, eoy present
       moy_math_present &  eoy_math_present ~ 0,   # both present
      !moy_math_present & !eoy_math_present ~ NA_real_  # both missing
    )
  )
table(merge_vt_kent_test_info$math_moy_missing_eoy_present)


  0   1 
599  10

ela_moy_missing_eoy_present: dummy for students who have EOY ELA test info but MOY test info is missing.

merge_vt_kent_test_info = merge_vt_kent_test_info %>%
  mutate(
    moy_ela_present = !is.na(moy_ela_comp) & !is.na(moy_ela_date),
    eoy_ela_present = !is.na(eoy_ela_comp) & !is.na(eoy_ela_date),
    
    ela_moy_missing_eoy_present = case_when(
      !moy_ela_present &  eoy_ela_present ~ 1,   # moy missing, eoy present
       moy_ela_present &  eoy_ela_present ~ 0,   # both present
      !moy_ela_present & !eoy_ela_present ~ NA_real_  # both missing
    )
  )
table(merge_vt_kent_test_info$ela_moy_missing_eoy_present)


  0   1 
605   9

###Test info summary

Proportion of math test info present (MOY present and EOY present, grouped by treatment status).

summary_math_tests_present = merge_vt_kent_test_info %>%
  mutate(
    moy_math_present = !is.na(moy_math_comp) & !is.na(moy_math_date),
    eoy_math_present = !is.na(eoy_math_comp) & !is.na(eoy_math_date)
  ) %>%
  group_by(treat) %>%
  summarise(
    n_students = n(),
    
    moy_count = sum(moy_math_present),
    moy_prop  = mean(moy_math_present),
    
    eoy_count = sum(eoy_math_present),
    eoy_prop  = mean(eoy_math_present),
    
    .groups = "drop"
  )

# View result
print(summary_math_tests_present)

# A tibble: 2 × 6
  treat n_students moy_count moy_prop eoy_count eoy_prop
  <dbl>      <int>     <int>    <dbl>     <int>    <dbl>
1     0        548       524    0.956       526    0.960
2     1         88        88    1            83    0.943

Proportion of ELA test info present (MOY present and EOY present, grouped by treatment status).

summary_ela_tests_present = merge_vt_kent_test_info %>%
  mutate(
    moy_ela_present = !is.na(moy_ela_comp) & !is.na(moy_ela_date),
    eoy_ela_present = !is.na(eoy_ela_comp) & !is.na(eoy_ela_date)
  ) %>%
  group_by(treat) %>%
  summarise(
    n_students = n(),
    
    moy_count = sum(moy_ela_present),
    moy_prop  = mean(moy_ela_present),
    
    eoy_count = sum(eoy_ela_present),
    eoy_prop  = mean(eoy_ela_present),
    
    .groups = "drop"
  )

# View result
print(summary_ela_tests_present)

# A tibble: 2 × 6
  treat n_students moy_count moy_prop eoy_count eoy_prop
  <dbl>      <int>     <int>    <dbl>     <int>    <dbl>
1     0        548       521    0.951       527    0.962
2     1         88        88    1            87    0.989

Data Summary

Generate school-level means for MOY ELA and Math assessment data, number of students, plus distribution across demographics.

#create function to streamline calculations
calc_pct = function(x) {
  prop.table(table(x)) * 100
}

#school-level means and counts for TEST data
merge_vt_kent_test_info = merge_vt_kent_test_info %>%
  mutate(across(c(moy_math_comp, #transform all scores to numeric value
                  moy_ela_comp, 
                  eoy_math_comp, 
                  eoy_ela_comp), as.numeric))

school_means = merge_vt_kent_test_info %>%
  group_by(school)%>%
  summarise(
    n_students = n(),
    mean_moy_math = mean(moy_math_comp, na.rm = TRUE),
    mean_moy_ela  = mean(moy_ela_comp, na.rm = TRUE), 
    mean_eoy_math = mean(eoy_math_comp, na.rm = TRUE),
    mean_eoy_ela  = mean(eoy_ela_comp, na.rm = TRUE), 
  )

# DEMOGRAPHICS

# race/ethnicity
race_pct = merge_vt_kent_test_info %>%
  group_by(school, race_eth) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = race_eth,
    values_from = pct,
    names_prefix = "race_"
  )

#gender
gender_pct = merge_vt_kent_test_info %>%
  group_by(school, gender) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = gender,
    values_from = pct,
    names_prefix = "gender_"
  )

#low income status
low_inc_pct = merge_vt_kent_test_info %>%
  group_by(school, low_inc) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = low_inc,
    values_from = pct,
    names_prefix = "low_inc_"
  )

#multilingual learner
mll_pct = merge_vt_kent_test_info %>%
  group_by(school, mll) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = mll,
    values_from = pct,
    names_prefix = "mll_"
  )

#at-risk students
low_ap_pct = merge_vt_kent_test_info %>%
  group_by(school, low_ap) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = low_ap,
    values_from = pct,
    names_prefix = "low_ap_"
  )

#sped students
sped_pct = merge_vt_kent_test_info %>%
  group_by(school, sped) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = sped,
    values_from = pct,
    names_prefix = "sped_"
  )

# COMBINE summaries
school_summary = school_means %>%
  left_join(race_pct,   by = "school") %>%
  left_join(gender_pct, by = "school") %>%
  left_join(low_inc_pct,   by = "school") %>%
  left_join(mll_pct,    by = "school") %>%
  left_join(low_ap_pct, by = "school")%>%
  left_join(sped_pct, by = "school")

school_summary = school_summary %>%
  mutate(across(where(is.numeric), ~ round(.x, 2)))

print(school_summary)

# A tibble: 1 × 23
  school        n_students mean_moy_math mean_moy_ela mean_eoy_math mean_eoy_ela
  <chr>              <dbl>         <dbl>        <dbl>         <dbl>        <dbl>
1 Meridian Mid…        636          487.         583.          494.         588.
# ℹ 17 more variables: race_Asian <dbl>,
#   `race_Black or African American` <dbl>, `race_Hispanic/Latino` <dbl>,
#   `race_Native Hawaiian or Other Pacific Islander` <dbl>,
#   `race_Two or More Races` <dbl>, race_White <dbl>, gender_F <dbl>,
#   gender_M <dbl>, gender_NB <dbl>, low_inc_0 <dbl>, low_inc_1 <dbl>,
#   mll_0 <dbl>, mll_1 <dbl>, low_ap_0 <dbl>, low_ap_1 <dbl>, sped_0 <dbl>,
#   sped_1 <dbl>

write.csv(school_summary, "test_summary_by_school_vt_kent.csv", row.names = FALSE)

Reformat Data

Create dummy variables

Create numeric binary for gender. Observation: 1 NB student; NB student will present as NA under the male dummy.

table(merge_vt_kent_test_info$gender)


  F   M  NB 
285 350   1

merge_vt_kent_test_info = merge_vt_kent_test_info %>%
  mutate(
    male = case_when(
      gender == "M" ~ 1,
      gender == "F" ~ 0,
      TRUE ~ NA_real_   # handles missing or unexpected values
    )
  )
table(merge_vt_kent_test_info$male)


  0   1 
285 350

Create numeric binaries for each race.

table(merge_vt_kent_test_info$race_eth)


                                    Asian 
                                      212 
                Black or African American 
                                      101 
                          Hispanic/Latino 
                                      149 
Native Hawaiian or Other Pacific Islander 
                                       14 
                        Two or More Races 
                                       50 
                                    White 
                                      110

merge_vt_kent_test_info = merge_vt_kent_test_info %>%
  mutate(
    race_A     = ifelse(race_eth == "Asian", 1, ifelse(is.na(race_eth), NA, 0)),
    race_B     = ifelse(race_eth == "Black or African American", 1, ifelse(is.na(race_eth), NA, 0)),
    race_H     = ifelse(race_eth == "Hispanic/Latino", 1, ifelse(is.na(race_eth), NA, 0)),
    race_Multi     = ifelse(race_eth == "Two or More Races", 1, ifelse(is.na(race_eth), NA, 0)),
    race_P     = ifelse(race_eth == "Native Hawaiian or Other Pacific Islander", 1, ifelse(is.na(race_eth), NA, 0)),
    race_W     = ifelse(race_eth == "White", 1, ifelse(is.na(race_eth), NA, 0))
  )
table(merge_vt_kent_test_info$race_B) #quick check coding on race_B


  0   1 
535 101

Export files for e2i Coach

Save e2i file with all matched students from the merge, regardless of missing test info.

#select columns ready for e2i
vt_kent_e2i_test_info = merge_vt_kent_test_info %>%
  dplyr::select(student_id, 
                grade, 
                school, 
                moy_math_comp, moy_math_date, 
                moy_ela_comp,moy_ela_date, 
                eoy_math_comp, eoy_math_date,
                eoy_ela_comp, eoy_ela_date,
                male, sped, mll, low_inc, low_ap,
                treat,
                race_A, race_B, race_H, race_Multi, race_P, race_W,
                both_math, both_ela, all_tests, 
                math_moy_missing_eoy_present, ela_moy_missing_eoy_present)

#export e2i with test indicators, all matched students from the merge, regardless of missing test info (all_tests == 1 or 0)
write.csv(vt_kent_e2i_test_info, file = "vt_kent_e2i_all_students.csv", row.names = FALSE)

Save e2i file with only containing matched students from the merge with complete test info.

table(vt_kent_e2i_test_info$all_tests)


  0   1 
 43 593

#subset: keep only students with complete test info (all_tests == 1)
vt_kent_e2i_all_tests = vt_kent_e2i_test_info %>% filter(all_tests == 1)
print(vt_kent_e2i_all_tests) # N=593

# A tibble: 593 × 28
   student_id grade school moy_math_comp moy_math_date moy_ela_comp moy_ela_date
        <dbl> <chr> <chr>          <dbl> <chr>                <dbl> <chr>       
 1     391033 07    Merid…           484 1/6/2026               616 1/14/2026   
 2     412115 08    Merid…           384 1/7/2026               474 1/13/2026   
 3     373185 07    Merid…           533 1/7/2026               666 1/13/2026   
 4     394412 06    Merid…           571 1/7/2026               641 1/12/2026   
 5     389860 07    Merid…           530 1/7/2026               620 1/13/2026   
 6     389442 07    Merid…           467 1/6/2026               529 1/13/2026   
 7     384402 07    Merid…           506 1/7/2026               593 1/13/2026   
 8     382167 07    Merid…           481 1/7/2026               576 1/14/2026   
 9     417702 06    Merid…           505 1/6/2026               584 1/12/2026   
10     415707 08    Merid…           442 1/6/2026               555 1/12/2026   
# ℹ 583 more rows
# ℹ 21 more variables: eoy_math_comp <dbl>, eoy_math_date <chr>,
#   eoy_ela_comp <dbl>, eoy_ela_date <chr>, male <dbl>, sped <dbl>, mll <dbl>,
#   low_inc <dbl>, low_ap <dbl>, treat <dbl>, race_A <dbl>, race_B <dbl>,
#   race_H <dbl>, race_Multi <dbl>, race_P <dbl>, race_W <dbl>,
#   both_math <dbl>, both_ela <dbl>, all_tests <dbl>,
#   math_moy_missing_eoy_present <dbl>, ela_moy_missing_eoy_present <dbl>

Count of treatment students among the subset with complete test info.

table(vt_kent_e2i_all_tests$treat)


  0   1 
510  83

#export e2i only containing matched students from the merge with complete test info (all_tests == 1)
write.csv(vt_kent_e2i_all_tests, file = "vt_kent_e2i_complete_tests.csv", row.names = FALSE)