Data Processing: Varsity Tutors, William Penn SD

Author

Alexis Davila

Clean Data

Import

Packages selected for inspecting and cleaning Varsity Tutors, district data from William Penn School District.

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import roster.

roster <- read_csv("roster_vt_penn.csv")

New names:
Rows: 1279 Columns: 41
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(10): School Name*, MOY MAP Math Math Test Date*, EOY MAP Math Math Test... dbl
(7): Student ID*, Student Grade Level*, Treatment*, MOY MAP Math Compos... lgl
(24): ...18, ...19, ...20, ...21, ...22, ...23, ...24, ...25, ...26, ......
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
• `` -> `...24`
• `` -> `...25`
• `` -> `...26`
• `` -> `...27`
• `` -> `...28`
• `` -> `...29`
• `` -> `...30`
• `` -> `...31`
• `` -> `...32`
• `` -> `...33`
• `` -> `...34`
• `` -> `...35`
• `` -> `...36`
• `` -> `...37`
• `` -> `...38`
• `` -> `...39`
• `` -> `...40`
• `` -> `...41`

Inspect data variables. Data file appears to contain all requested elements - except for Academically at-risk.

colnames(roster)

 [1] "Student ID*"                         
 [2] "Student Grade Level*"                
 [3] "School Name*"                        
 [4] "Treatment*"                          
 [5] "MOY MAP Math Composite Scaled Score*"
 [6] "MOY MAP Math Math Test Date*"        
 [7] "EOY MAP Math Composite Scaled Score*"
 [8] "EOY MAP Math Math Test Date*"        
 [9] "MOY MAP ELA Composite Scaled Score*" 
[10] "MOY MAP ELA Test Date*"              
[11] "EOY MAP ELA Composite  Scaled Score*"
[12] "EOY MAP ELA Test Date*"              
[13] "Gender"                              
[14] "IEP"                                 
[15] "ELL"                                 
[16] "FRPL"                                
[17] "Race/Ethnicity"                      
[18] "...18"                               
[19] "...19"                               
[20] "...20"                               
[21] "...21"                               
[22] "...22"                               
[23] "...23"                               
[24] "...24"                               
[25] "...25"                               
[26] "...26"                               
[27] "...27"                               
[28] "...28"                               
[29] "...29"                               
[30] "...30"                               
[31] "...31"                               
[32] "...32"                               
[33] "...33"                               
[34] "...34"                               
[35] "...35"                               
[36] "...36"                               
[37] "...37"                               
[38] "...38"                               
[39] "...39"                               
[40] "...40"                               
[41] "...41"

Change column headers for roster.

roster = roster [, -c(18:41)] #drop empty columns
 
roster = roster %>%
  rename(
    student_id = "Student ID*",
    grade = "Student Grade Level*", 
    school = "School Name*", 
    treat = "Treatment*", 
    
    moy_math_comp = "MOY MAP Math Composite Scaled Score*", 
    moy_math_date = "MOY MAP Math Math Test Date*", 
    moy_ela_comp = "MOY MAP ELA Composite Scaled Score*", 
    moy_ela_date = "MOY MAP ELA Test Date*", 
    
    eoy_math_comp = "EOY MAP Math Composite Scaled Score*", 
    eoy_math_date = "EOY MAP Math Math Test Date*", 
    eoy_ela_comp = "EOY MAP ELA Composite  Scaled Score*", 
    eoy_ela_date = "EOY MAP ELA Test Date*", 
    
    gender = "Gender", 
    ell = "ELL", 
    frpl = "FRPL", 
    race_eth = "Race/Ethnicity", 
    iep = "IEP" 
    
  )
colnames(roster)

 [1] "student_id"    "grade"         "school"        "treat"        
 [5] "moy_math_comp" "moy_math_date" "eoy_math_comp" "eoy_math_date"
 [9] "moy_ela_comp"  "moy_ela_date"  "eoy_ela_comp"  "eoy_ela_date" 
[13] "gender"        "iep"           "ell"           "frpl"         
[17] "race_eth"

Inspect duplicates

Identify duplicate student records within roster file. No duplicates to report.

# frequency table of student_id occurrences
id_counts_roster = roster %>%
  group_by(student_id) %>%
  summarise(n = n()) %>%
  arrange(desc(n))

# filter only IDs that appear > 1
dupes_roster = id_counts_roster %>%
  filter(n > 1)

# view results
print(dupes_roster)

# A tibble: 0 × 2
# ℹ 2 variables: student_id <dbl>, n <int>

#export - NA: no duplicates to report
#write.csv(dupes_roster, file = "duplicate_ids_roster_vt_penn.csv", row.names = FALSE)

Merge

Insert treatment status

Import roster from VT that details which students have accounts in their system.

#import ID crosswalk from VT
vt_roster = read_csv("vt_roster_all_districts.csv")

Rows: 1496 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): District, School
dbl (2): District ID, Varsity Tutors ID

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# rename vars
vt_roster = vt_roster %>%
  rename(
    district_vt = "District",
    school_vt = "School",
    student_id = "District ID", 
    vt_acct_id = "Varsity Tutors ID"
  )

Add treatment status. Equals 1 if student id appears in roster from VT (N=445). All else equals 0.

merge_vt_penn_unclean = roster %>%
  mutate(
    treat = ifelse(student_id %in% vt_roster$student_id, 1, 0)
  )

table(merge_vt_penn_unclean$treat)


  0   1 
834 445

Inspect Merged Data

Missing Data Summary

Percentage missing for each variable. Observations: All other demographic info is present for all students. Some missing data for test info.

missing_summary = merge_vt_penn_unclean %>%
  summarise(across(
    everything(),
    ~ mean(is.na(.)) * 100
  )) %>%
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "percent_missing"
  ) %>%
  arrange(desc(percent_missing))

print(missing_summary)

# A tibble: 17 × 2
   variable      percent_missing
   <chr>                   <dbl>
 1 moy_ela_comp             5.39
 2 moy_ela_date             5.39
 3 moy_math_comp            5.08
 4 moy_math_date            5.08
 5 eoy_ela_comp             3.60
 6 eoy_ela_date             3.60
 7 eoy_math_comp            3.13
 8 eoy_math_date            3.13
 9 student_id               0   
10 grade                    0   
11 school                   0   
12 treat                    0   
13 gender                   0   
14 iep                      0   
15 ell                      0   
16 frpl                     0   
17 race_eth                 0

Percentage of student who have both MOY and EOY data overall

#indicator for having BOTH MOY and EOY scores
merge_vt_penn_test_info = merge_vt_penn_unclean %>%
  mutate(
    both_math = !is.na(moy_math_comp) & !is.na(eoy_math_comp),
    both_ela  = !is.na(moy_ela_comp)  & !is.na(eoy_ela_comp),
    all_tests = both_math & both_ela
  )

#pct across schools
overall_pct = merge_vt_penn_test_info %>%
  summarise(
    total_students = n(),
    students_with_all_tests = sum(all_tests),
    percent_with_all_tests = (students_with_all_tests / total_students) * 100
  ) %>%
  mutate(percent_with_all_tests = round(percent_with_all_tests, 2))

print(overall_pct)

# A tibble: 1 × 3
  total_students students_with_all_tests percent_with_all_tests
           <int>                   <int>                  <dbl>
1           1279                    1194                   93.4

Percentage of student who have both MOY and EOY data by school

by_school_pct = merge_vt_penn_test_info %>%
  group_by(school) %>%
  summarise(
    total_students = n(),
    students_with_all_tests = sum(all_tests),
    percent_with_all_tests = (students_with_all_tests / total_students) * 100
  ) %>%
  mutate(percent_with_all_tests = round(percent_with_all_tests, 2)) %>%
  arrange(desc(percent_with_all_tests))

print(by_school_pct)

# A tibble: 8 × 4
  school            total_students students_with_all_te…¹ percent_with_all_tests
  <chr>                      <int>                  <int>                  <dbl>
1 Colwyn Elementar…             72                     71                   98.6
2 Bell Avenue Elem…            128                    123                   96.1
3 East Lansdowne E…            155                    148                   95.5
4 Park Lane Elemen…            170                    161                   94.7
5 Walnut Street El…            147                    137                   93.2
6 W.B. Evans Eleme…            179                    166                   92.7
7 Ardmore Avenue E…            311                    284                   91.3
8 Aldan Elementary…            117                    104                   88.9
# ℹ abbreviated name: ¹students_with_all_tests

Dummies for test info

both_math: dummy for students with complete math test info across MOY and EOY.

table(merge_vt_penn_test_info$both_math)


FALSE  TRUE 
   76  1203

#complete math: MOY and EOY present
merge_vt_penn_test_info$both_math = ifelse(merge_vt_penn_test_info$both_math, 1, 0)
table(merge_vt_penn_test_info$both_math) #check counts


   0    1 
  76 1203

both_ela: dummy for students with complete ELA test info across MOY and EOY..

table(merge_vt_penn_test_info$both_ela)


FALSE  TRUE 
   83  1196

#complete ELA: MOY and EOY present
merge_vt_penn_test_info$both_ela = ifelse(merge_vt_penn_test_info$both_ela, 1, 0)
table(merge_vt_penn_test_info$both_ela) #check counts


   0    1 
  83 1196

all_tests: dummy for students with both math and ELA test info complete across MOY and EOY.

table(merge_vt_penn_test_info$all_tests)


FALSE  TRUE 
   85  1194

#complete math and ELA: MOY and EOY present
merge_vt_penn_test_info$all_tests = ifelse(merge_vt_penn_test_info$all_tests, 1, 0)
table(merge_vt_penn_test_info$all_tests) #check counts


   0    1 
  85 1194

math_moy_missing_eoy_present: dummy for students who have EOY math test info but MOY test info is missing.

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    moy_math_present = !is.na(moy_math_comp) & !is.na(moy_math_date),
    eoy_math_present = !is.na(eoy_math_comp) & !is.na(eoy_math_date),
    
    math_moy_missing_eoy_present = case_when(
      !moy_math_present &  eoy_math_present ~ 1,   # moy missing, eoy present
       moy_math_present &  eoy_math_present ~ 0,   # both present
      !moy_math_present & !eoy_math_present ~ NA_real_  # both missing
    )
  )
table(merge_vt_penn_test_info$math_moy_missing_eoy_present)


   0    1 
1203   36

ela_moy_missing_eoy_present: dummy for students who have EOY ELA test info but MOY test info is missing.

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    moy_ela_present = !is.na(moy_ela_comp) & !is.na(moy_ela_date),
    eoy_ela_present = !is.na(eoy_ela_comp) & !is.na(eoy_ela_date),
    
    ela_moy_missing_eoy_present = case_when(
      !moy_ela_present &  eoy_ela_present ~ 1,   # moy missing, eoy present
       moy_ela_present &  eoy_ela_present ~ 0,   # both present
      !moy_ela_present & !eoy_ela_present ~ NA_real_  # both missing
    )
  )
table(merge_vt_penn_test_info$ela_moy_missing_eoy_present)


   0    1 
1196   37

###Test info summary

Proportion of math test info present (MOY present and EOY present, grouped by treatment status).

summary_math_tests_present = merge_vt_penn_test_info %>%
  mutate(
    moy_math_present = !is.na(moy_math_comp) & !is.na(moy_math_date),
    eoy_math_present = !is.na(eoy_math_comp) & !is.na(eoy_math_date)
  ) %>%
  group_by(treat) %>%
  summarise(
    n_students = n(),
    
    moy_count = sum(moy_math_present),
    moy_prop  = mean(moy_math_present),
    
    eoy_count = sum(eoy_math_present),
    eoy_prop  = mean(eoy_math_present),
    
    .groups = "drop"
  )

# View result
print(summary_math_tests_present)

# A tibble: 2 × 6
  treat n_students moy_count moy_prop eoy_count eoy_prop
  <dbl>      <int>     <int>    <dbl>     <int>    <dbl>
1     0        834       780    0.935       802    0.962
2     1        445       434    0.975       437    0.982

Proportion of ELA test info present (MOY present and EOY present, grouped by treatment status).

summary_ela_tests_present = merge_vt_penn_test_info %>%
  mutate(
    moy_ela_present = !is.na(moy_ela_comp) & !is.na(moy_ela_date),
    eoy_ela_present = !is.na(eoy_ela_comp) & !is.na(eoy_ela_date)
  ) %>%
  group_by(treat) %>%
  summarise(
    n_students = n(),
    
    moy_count = sum(moy_ela_present),
    moy_prop  = mean(moy_ela_present),
    
    eoy_count = sum(eoy_ela_present),
    eoy_prop  = mean(eoy_ela_present),
    
    .groups = "drop"
  )

# View result
print(summary_ela_tests_present)

# A tibble: 2 × 6
  treat n_students moy_count moy_prop eoy_count eoy_prop
  <dbl>      <int>     <int>    <dbl>     <int>    <dbl>
1     0        834       777    0.932       796    0.954
2     1        445       433    0.973       437    0.982

Data Summary

Generate school-level means for MOY ELA and Math assessment data, number of students, plus distribution across demographics.

#create function to streamline calculations
calc_pct = function(x) {
  prop.table(table(x)) * 100
}

#school-level means and counts for TEST data
merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(across(c(moy_math_comp, #transform all scores to numeric value
                  moy_ela_comp, 
                  eoy_math_comp, 
                  eoy_ela_comp), as.numeric))

school_means = merge_vt_penn_test_info %>%
  group_by(school)%>%
  summarise(
    n_students = n(),
    mean_moy_math = mean(moy_math_comp, na.rm = TRUE),
    mean_moy_ela  = mean(moy_ela_comp, na.rm = TRUE), 
    mean_eoy_math = mean(eoy_math_comp, na.rm = TRUE),
    mean_eoy_ela  = mean(eoy_ela_comp, na.rm = TRUE), 
  )

# DEMOGRAPHICS

# race/ethnicity
race_pct = merge_vt_penn_test_info %>%
  group_by(school, race_eth) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = race_eth,
    values_from = pct,
    names_prefix = "race_"
  )

#gender
gender_pct = merge_vt_penn_test_info %>%
  group_by(school, gender) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = gender,
    values_from = pct,
    names_prefix = "gender_"
  )

#frpl
frpl_pct = merge_vt_penn_test_info %>%
  group_by(school, frpl) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = frpl,
    values_from = pct,
    names_prefix = "frpl_"
  )

#ell
ell_pct = merge_vt_penn_test_info %>%
  group_by(school, ell) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = ell,
    values_from = pct,
    names_prefix = "ell_"
  )

#iep 
iep_pct = merge_vt_penn_test_info %>%
  group_by(school, iep) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(school) %>%
  mutate(pct = (n / sum(n)) * 100) %>%
  select(-n) %>%
  pivot_wider(
    names_from = iep,
    values_from = pct,
    names_prefix = "iep_"
  )

# COMBINE summaries
school_summary = school_means %>%
  left_join(race_pct,   by = "school") %>%
  left_join(gender_pct, by = "school") %>%
  left_join(frpl_pct,   by = "school") %>%
  left_join(ell_pct,    by = "school") %>%
  left_join(iep_pct, by = "school")

school_summary = school_summary %>%
  mutate(across(where(is.numeric), ~ round(.x, 2)))

print(school_summary)

# A tibble: 8 × 21
  school        n_students mean_moy_math mean_moy_ela mean_eoy_math mean_eoy_ela
  <chr>              <dbl>         <dbl>        <dbl>         <dbl>        <dbl>
1 Aldan Elemen…        117          201.         198.          204.         198.
2 Ardmore Aven…        311          202.         194.          208.         196.
3 Bell Avenue …        128          204.         194.          211.         194.
4 Colwyn Eleme…         72          203.         199.          211.         202.
5 East Lansdow…        155          206.         202.          214.         205.
6 Park Lane El…        170          202.         195.          207.         193.
7 W.B. Evans E…        179          205.         199.          213.         201.
8 Walnut Stree…        147          199.         194.          205          195.
# ℹ 15 more variables: `race_AMERICAN INDIAN/ALASKAN` <dbl>, race_ASIAN <dbl>,
#   `race_BLACK/AFRICAN AMERICAN` <dbl>, race_HISPANIC <dbl>,
#   `race_NATIVE HAWAIIAN` <dbl>, `race_WHITE/CAUCASIAN` <dbl>,
#   `race_MULTI RACIAL - NON HISPANIC` <dbl>, gender_F <dbl>, gender_M <dbl>,
#   frpl_0 <dbl>, frpl_Y <dbl>, ell_N <dbl>, ell_Y <dbl>, iep_N <dbl>,
#   iep_Y <dbl>

write.csv(school_summary, "test_summary_by_school_vt_penn.csv", row.names = FALSE)

Reformat Data

Create dummy variables

Create numeric binary for gender. Observation: binary variable already present, prime for male dummy coding.

table(merge_vt_penn_test_info$gender)


  F   M 
667 612

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    male = case_when(
      gender == "M" ~ 1,
      gender == "F" ~ 0,
      TRUE ~ NA_real_   # handles missing or unexpected values
    )
  )
table(merge_vt_penn_test_info$male)


  0   1 
667 612

Create numeric binary for IEP.

table(merge_vt_penn_test_info$iep)


   N    Y 
1009  270

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    iep = case_when(
      iep == "Y" ~ 1,
      iep == "N" ~ 0,
      TRUE ~ NA_real_   # handles missing or unexpected values
    )
  )
table(merge_vt_penn_test_info$iep)


   0    1 
1009  270

Create numeric binary for ELL.

table(merge_vt_penn_test_info$ell)


   N    Y 
1175  104

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    ell = case_when(
      ell == "Y" ~ 1,
      ell == "N" ~ 0,
      TRUE ~ NA_real_   # handles missing or unexpected values
    )
  )
table(merge_vt_penn_test_info$ell)


   0    1 
1175  104

Create numeric binary for FRPL.

table(merge_vt_penn_test_info$frpl)


  0   Y 
353 926

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    frpl = case_when(
      frpl == "Y" ~ 1,
      frpl == "0" ~ 0,
      TRUE ~ NA_real_   # handles missing or unexpected values
    )
  )
table(merge_vt_penn_test_info$frpl)


  0   1 
353 926

Create numeric binaries for each race.

table(merge_vt_penn_test_info$race_eth)


    AMERICAN INDIAN/ALASKAN                       ASIAN 
                         22                          31 
     BLACK/AFRICAN AMERICAN                    HISPANIC 
                       1101                          43 
MULTI RACIAL - NON HISPANIC             NATIVE HAWAIIAN 
                          1                           8 
            WHITE/CAUCASIAN 
                         73

merge_vt_penn_test_info = merge_vt_penn_test_info %>%
  mutate(
    race_A     = ifelse(race_eth == "ASIAN", 1, ifelse(is.na(race_eth), NA, 0)),
    race_B     = ifelse(race_eth == "BLACK/AFRICAN AMERICAN", 1, ifelse(is.na(race_eth), NA, 0)),
    race_H     = ifelse(race_eth == "HISPANIC", 1, ifelse(is.na(race_eth), NA, 0)),
    race_Multi     = ifelse(race_eth == "MULTI RACIAL - NON HISPANIC", 1, ifelse(is.na(race_eth), NA, 0)),
    race_NH     = ifelse(race_eth == "NATIVE HAWAIIAN", 1, ifelse(is.na(race_eth), NA, 0)),
    race_AI     = ifelse(race_eth == "AMERICAN INDIAN/ALASKAN", 1, ifelse(is.na(race_eth), NA, 0)),
    race_W     = ifelse(race_eth == "WHITE/CAUCASIAN", 1, ifelse(is.na(race_eth), NA, 0))
  )
table(merge_vt_penn_test_info$race_B) #quick check coding on race_B


   0    1 
 178 1101

Export files for e2i Coach

Save e2i file with all matched students from the merge, regardless of missing test info.

#select columns ready for e2i
vt_penn_e2i_test_info = merge_vt_penn_test_info %>%
  dplyr::select(student_id, 
                grade, 
                school, 
                moy_math_comp, moy_math_date, 
                moy_ela_comp,moy_ela_date, 
                eoy_math_comp, eoy_math_date,
                eoy_ela_comp, eoy_ela_date,
                male, iep, ell, frpl,
                treat,
                race_A, race_B, race_H, race_Multi, race_NH, race_AI, race_W,
                both_math, both_ela, all_tests, 
                math_moy_missing_eoy_present, ela_moy_missing_eoy_present)

#export e2i with test indicators, all matched students from the merge, regardless of missing test info (all_tests == 1 or 0)
write.csv(vt_penn_e2i_test_info, file = "vt_kent_e2i_all_students.csv", row.names = FALSE)

Save e2i file with only containing matched students from the merge with complete test info.

table(vt_penn_e2i_test_info$all_tests)


   0    1 
  85 1194

#subset: keep only students with complete test info (all_tests == 1)
vt_penn_e2i_all_tests = vt_penn_e2i_test_info %>% filter(all_tests == 1)
print(vt_penn_e2i_all_tests) # N=1194

# A tibble: 1,194 × 28
   student_id grade school moy_math_comp moy_math_date moy_ela_comp moy_ela_date
        <dbl> <dbl> <chr>          <dbl> <chr>                <dbl> <chr>       
 1     203802     6 Colwy…           186 1/9/2026               181 1/7/2026    
 2     203862     6 Colwy…           223 1/8/2026               230 1/7/2026    
 3     204106     6 Colwy…           230 1/13/2026              226 1/7/2026    
 4     204166     6 Colwy…           210 1/8/2026               212 1/7/2026    
 5     204211     6 Colwy…           181 1/8/2026               231 1/7/2026    
 6     204755     5 Colwy…           226 1/6/2026               219 1/7/2026    
 7     204759     5 Colwy…           205 1/6/2026               199 1/7/2026    
 8     204769     5 Colwy…           209 1/6/2026               215 1/7/2026    
 9     204787     5 Colwy…           170 1/6/2026               170 1/7/2026    
10     204843     5 Colwy…           213 1/6/2026               208 1/7/2026    
# ℹ 1,184 more rows
# ℹ 21 more variables: eoy_math_comp <dbl>, eoy_math_date <chr>,
#   eoy_ela_comp <dbl>, eoy_ela_date <chr>, male <dbl>, iep <dbl>, ell <dbl>,
#   frpl <dbl>, treat <dbl>, race_A <dbl>, race_B <dbl>, race_H <dbl>,
#   race_Multi <dbl>, race_NH <dbl>, race_AI <dbl>, race_W <dbl>,
#   both_math <dbl>, both_ela <dbl>, all_tests <dbl>,
#   math_moy_missing_eoy_present <dbl>, ela_moy_missing_eoy_present <dbl>

Count of treatment students among the subset with complete test info.

table(vt_penn_e2i_all_tests$treat)


  0   1 
765 429

#export e2i only containing matched students from the merge with complete test info (all_tests == 1)
write.csv(vt_penn_e2i_all_tests, file = "vt_penn_e2i_complete_tests.csv", row.names = FALSE)