Identify duplicate student records within roster file. No duplicates to report.
# frequency table of student_id occurrencesid_counts_roster = roster %>%group_by(student_id) %>%summarise(n =n()) %>%arrange(desc(n))# filter only IDs that appear > 1dupes_roster = id_counts_roster %>%filter(n >1)# view resultsprint(dupes_roster)
# A tibble: 0 × 2
# ℹ 2 variables: student_id <dbl>, n <int>
#export - NA: no duplicates to report#write.csv(dupes_roster, file = "duplicate_ids_roster_vt_penn.csv", row.names = FALSE)
Merge
Insert treatment status
Import roster from VT that details which students have accounts in their system.
#import ID crosswalk from VTvt_roster =read_csv("vt_roster_all_districts.csv")
Rows: 1496 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): District, School
dbl (2): District ID, Varsity Tutors ID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_tests: dummy for students with both math and ELA test info complete across MOY and EOY.
table(merge_vt_penn_test_info$all_tests)
FALSE TRUE
85 1194
#complete math and ELA: MOY and EOY presentmerge_vt_penn_test_info$all_tests =ifelse(merge_vt_penn_test_info$all_tests, 1, 0)table(merge_vt_penn_test_info$all_tests) #check counts
0 1
85 1194
math_moy_missing_eoy_present: dummy for students who have EOY math test info but MOY test info is missing.
Save e2i file with all matched students from the merge, regardless of missing test info.
#select columns ready for e2ivt_penn_e2i_test_info = merge_vt_penn_test_info %>% dplyr::select(student_id, grade, school, moy_math_comp, moy_math_date, moy_ela_comp,moy_ela_date, eoy_math_comp, eoy_math_date, eoy_ela_comp, eoy_ela_date, male, iep, ell, frpl, treat, race_A, race_B, race_H, race_Multi, race_NH, race_AI, race_W, both_math, both_ela, all_tests, math_moy_missing_eoy_present, ela_moy_missing_eoy_present)#export e2i with test indicators, all matched students from the merge, regardless of missing test info (all_tests == 1 or 0)write.csv(vt_penn_e2i_test_info, file ="vt_kent_e2i_all_students.csv", row.names =FALSE)
Save e2i file with only containing matched students from the merge with complete test info.
table(vt_penn_e2i_test_info$all_tests)
0 1
85 1194
#subset: keep only students with complete test info (all_tests == 1)vt_penn_e2i_all_tests = vt_penn_e2i_test_info %>%filter(all_tests ==1)print(vt_penn_e2i_all_tests) # N=1194
Count of treatment students among the subset with complete test info.
table(vt_penn_e2i_all_tests$treat)
0 1
765 429
#export e2i only containing matched students from the merge with complete test info (all_tests == 1)write.csv(vt_penn_e2i_all_tests, file ="vt_penn_e2i_complete_tests.csv", row.names =FALSE)