Packages selected for inspecting and cleaning Varsity Tutors, district data from Cumberland schools.
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.1 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.3 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Import roster.
roster <-read_csv("roster_vt_kent.csv")
Rows: 638 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Grade, School, MOY i-Ready Math Composite Scaled Score*, MOY i-Rea...
dbl (5): Student Number, Sped, MLE, Low Income, LAP
lgl (1): Treatment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Inspect data variables. Data file appears to contain all requested elements - except for MLL Level (“Mulit-lingual learner level on a scale of 1 to 6 in SY25-26 based on beginning of year assessment). MLE, or multilingual education status, presents in place of ELL status. Low Income presents in place of Free/Reduced Price Lunch status. Sped, or Special Education, presents in place of IEP status.
colnames(roster)
[1] "Student Number"
[2] "Grade"
[3] "School"
[4] "Treatment"
[5] "MOY i-Ready Math Composite Scaled Score*"
[6] "MOY i-Ready Math Math Test Date*"
[7] "EOY i-Ready Math Composite Scaled Score*"
[8] "EOY i-Ready Math Math Test Date*"
[9] "MOY i-Ready ELA Composite Scaled Score*"
[10] "MOY i-Ready ELA Test Date*"
[11] "EOY i-Ready ELA Composite Scaled Score*"
[12] "EOY i-Ready ELA Test Date*"
[13] "Gender Code"
[14] "Sped"
[15] "MLE"
[16] "Low Income"
[17] "Race/Ethnicity"
[18] "LAP"
Change column headers for roster.
roster = roster %>%rename(student_id ="Student Number",grade ="Grade", school ="School", treat ="Treatment", moy_math_comp ="MOY i-Ready Math Composite Scaled Score*", moy_math_date ="MOY i-Ready Math Math Test Date*", moy_ela_comp ="MOY i-Ready ELA Composite Scaled Score*", moy_ela_date ="MOY i-Ready ELA Test Date*", eoy_math_comp ="EOY i-Ready Math Composite Scaled Score*", eoy_math_date ="EOY i-Ready Math Math Test Date*", eoy_ela_comp ="EOY i-Ready ELA Composite Scaled Score*", eoy_ela_date ="EOY i-Ready ELA Test Date*", gender ="Gender Code", sped ="Sped", mll ="MLE", #multilingual education/multilingual learnerslow_inc ="Low Income", race_eth ="Race/Ethnicity", low_ap ="LAP"#low academic performance )colnames(roster)
Inspect duplicate student records in roster. Observation: for a given duplicate student record, it appears to be the same individual (e.g., same school, same grade, same scores).
# A tibble: 636 × 18
student_id grade school treat moy_math_comp moy_math_date eoy_math_comp
<dbl> <chr> <chr> <lgl> <chr> <chr> <chr>
1 391033 07 Meridian Mi… NA 484 1/6/2026 490
2 412115 08 Meridian Mi… NA 384 1/7/2026 409
3 425285 06 Meridian Mi… NA 468 1/6/2026 #N/A
4 373185 07 Meridian Mi… NA 533 1/7/2026 549
5 394412 06 Meridian Mi… NA 571 1/7/2026 568
6 389860 07 Meridian Mi… NA 530 1/7/2026 549
7 389442 07 Meridian Mi… NA 467 1/6/2026 454
8 384402 07 Meridian Mi… NA 506 1/7/2026 496
9 382167 07 Meridian Mi… NA 481 1/7/2026 477
10 417702 06 Meridian Mi… NA 505 1/6/2026 499
# ℹ 626 more rows
# ℹ 11 more variables: eoy_math_date <chr>, moy_ela_comp <chr>,
# moy_ela_date <chr>, eoy_ela_comp <chr>, eoy_ela_date <chr>, gender <chr>,
# sped <dbl>, mll <dbl>, low_inc <dbl>, race_eth <chr>, low_ap <dbl>
Merge
Insert treatment status
Import roster from VT that details which students have accounts in their system
#import ID crosswalk from VTvt_roster =read_csv("vt_roster_all_districts.csv")
Rows: 1496 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): District, School
dbl (2): District ID, Varsity Tutors ID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_tests: dummy for students with both math and ELA test info complete across MOY and EOY.
table(merge_vt_kent_test_info$all_tests)
FALSE TRUE
43 593
#complete math and ELA: MOY and EOY presentmerge_vt_kent_test_info$all_tests =ifelse(merge_vt_kent_test_info$all_tests, 1, 0)table(merge_vt_kent_test_info$all_tests) #check counts
0 1
43 593
math_moy_missing_eoy_present: dummy for students who have EOY math test info but MOY test info is missing.
Asian
212
Black or African American
101
Hispanic/Latino
149
Native Hawaiian or Other Pacific Islander
14
Two or More Races
50
White
110
merge_vt_kent_test_info = merge_vt_kent_test_info %>%mutate(race_A =ifelse(race_eth =="Asian", 1, ifelse(is.na(race_eth), NA, 0)),race_B =ifelse(race_eth =="Black or African American", 1, ifelse(is.na(race_eth), NA, 0)),race_H =ifelse(race_eth =="Hispanic/Latino", 1, ifelse(is.na(race_eth), NA, 0)),race_Multi =ifelse(race_eth =="Two or More Races", 1, ifelse(is.na(race_eth), NA, 0)),race_P =ifelse(race_eth =="Native Hawaiian or Other Pacific Islander", 1, ifelse(is.na(race_eth), NA, 0)),race_W =ifelse(race_eth =="White", 1, ifelse(is.na(race_eth), NA, 0)) )table(merge_vt_kent_test_info$race_B) #quick check coding on race_B
0 1
535 101
Export files for e2i Coach
Save e2i file with all matched students from the merge, regardless of missing test info.
#select columns ready for e2ivt_kent_e2i_test_info = merge_vt_kent_test_info %>% dplyr::select(student_id, grade, school, moy_math_comp, moy_math_date, moy_ela_comp,moy_ela_date, eoy_math_comp, eoy_math_date, eoy_ela_comp, eoy_ela_date, male, sped, mll, low_inc, low_ap, treat, race_A, race_B, race_H, race_Multi, race_P, race_W, both_math, both_ela, all_tests, math_moy_missing_eoy_present, ela_moy_missing_eoy_present)#export e2i with test indicators, all matched students from the merge, regardless of missing test info (all_tests == 1 or 0)write.csv(vt_kent_e2i_test_info, file ="vt_kent_e2i_all_students.csv", row.names =FALSE)
Save e2i file with only containing matched students from the merge with complete test info.
table(vt_kent_e2i_test_info$all_tests)
0 1
43 593
#subset: keep only students with complete test info (all_tests == 1)vt_kent_e2i_all_tests = vt_kent_e2i_test_info %>%filter(all_tests ==1)print(vt_kent_e2i_all_tests) # N=593
Count of treatment students among the subset with complete test info.
table(vt_kent_e2i_all_tests$treat)
0 1
510 83
#export e2i only containing matched students from the merge with complete test info (all_tests == 1)write.csv(vt_kent_e2i_all_tests, file ="vt_kent_e2i_complete_tests.csv", row.names =FALSE)