Using Program Info to standardize program code and title
matching
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#standardized admitted program titles
PI<-program_info[,c("OLD_BANNER_CODE","PROGRAM_DESC")] #get old banner code and program names
names(PI)[names(PI) == 'OLD_BANNER_CODE'] <- 'Admitted.Program.Code'
data1<-left_join(data,PI, by='Admitted.Program.Code')
data1 <- data1[, c('Cohort', 'Academic.Period', 'ID', 'Transfer.Pathway.ID', 'Previous.Program.Code', 'Previous.Institution.Name', 'Previous.Institution.Type', 'Multiple.Previous.Institution.Count', 'Cumulative.Transfer.Credits.Awarded', 'Admitted.Program.Code', 'PROGRAM_DESC', 'Admitted.GPA', 'Admitted.Academic.Period', 'Current.Credential', 'Current.Program.Code', 'Current.Program.Title', 'Current.GPA', 'Current.Courses.Completed', 'Total.Program.Courses', 'Percent.Complete', 'Credit.Type', 'Registration.Status', 'Gender', 'Birth.Year', 'Immigration.Status', 'Postal.Code', 'Mother.Tongue', 'Semester', 'SZBMTCU_ELEMENT_43_COHORT_START_DATE', 'Current.Program.Length', 'Graduation.Date')]
names(data1)[names(data1) == 'PROGRAM_DESC'] <- 'Admitted.Program.Title'
#standardized current program titles
names(PI)[names(PI) == 'Admitted.Program.Code'] <- 'Current.Program.Code'
data1<-left_join(data1,PI, by='Current.Program.Code')
data1<-data1[,c(2:4,10:15,32,17,23:29,31)]
names(data1)[names(data1) == 'PROGRAM_DESC'] <- 'Current.Program.Title'
Filter for Transfer student SEM2 and Year2 retention and
persistence
# Join remapped FASHION_BA semesters with all data
BA_transfer_programs2<- left_join(BA_transfer_programs,fashion_BA_remapped[,c(1,3,4,10)], by=c("ID", "Academic.Period", "Admitted.Program.Code"))
#replace semesters in FASHION_BA with remapped values
BA_transfer_programs2$Semester <- ifelse(!is.na(BA_transfer_programs2$Mapped_SEM), BA_transfer_programs2$Mapped_SEM, BA_transfer_programs2$Semester)
BA_transfer_programs2<-BA_transfer_programs2[,1:26]
# There are 5 students in FASHION_BA with multiple admission records where there supposed to only be 1
BA_transfer_programs2$Admitted.Academic.Period <- with(BA_transfer_programs2, ifelse(ID %in% c("N01029710", "N01271901", "N01288856", "N01313853", "N01068843"), min(Admitted.Academic.Period), Admitted.Academic.Period))
# colnames(g)[colnames(g) == "Var1"] <- "ID"
# g<-g[g$Freq==2,]
# datata<-data[,c(2,3,10,13,15,28)]
# abc<-left_join(g,datata,by=c("ID"))
# abc <- abc[order(abc$ID, abc$Academic.Period), ]
#5 more students with multiple admission records where there supposed to only be 1
BA_transfer_programs2$Admitted.Academic.Period <- with(BA_transfer_programs2, ifelse(ID %in% c("N00397703", "N00837554", "N01047068", "N01352123", "N01359941"), min(Admitted.Academic.Period), Admitted.Academic.Period))
TransferStudent_RetentionPersistence<- BA_transfer_programs2 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) > 1) %>%
mutate(Min_Sem=min(Semester))
TransferStudent_RetentionPersistence$PathwayStudent <- "1"
# count how many students entered at each advanced SEM level
c<- unique(TransferStudent_RetentionPersistence[,c(1,2,27)])
table(c$Min_Sem)
##
## 2 3 4 5 6 7 8 PT
## 97 515 334 1245 69 57 21 2
# number of Credit Transfer students who did not start from Semester 1
length(unique(subset(TransferStudent_RetentionPersistence, Transfer.Pathway.ID == "ZZZ")$ID))
## [1] 67
# number of students without Pathway ID who did not start from Semester 1
length(unique(subset(TransferStudent_RetentionPersistence, is.na(Transfer.Pathway.ID))$ID))
## [1] 139
Get a count of transfer students by admitted academic period
# find SEM3 transfer students
SEM3_transfers <- TransferStudent_RetentionPersistence[TransferStudent_RetentionPersistence$Min_Sem=="3",]
SEM3_transfers %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 10 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 44
## 2 201730 1
## 3 201770 78
## 4 201830 3
## 5 201870 87
## 6 201970 86
## 7 202030 1
## 8 202070 93
## 9 202170 76
## 10 202270 46
# find SEM4 transfer students
SEM4_transfers <- TransferStudent_RetentionPersistence[TransferStudent_RetentionPersistence$Min_Sem=="4",]
SEM4_transfers %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 13 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 9
## 2 201730 17
## 3 201770 7
## 4 201830 10
## 5 201870 10
## 6 201930 37
## 7 201970 8
## 8 202030 45
## 9 202070 14
## 10 202130 73
## 11 202170 1
## 12 202230 65
## 13 202330 38
# find SEM5 transfer students
SEM5_transfers <- TransferStudent_RetentionPersistence[TransferStudent_RetentionPersistence$Min_Sem=="5",]
SEM5_transfers %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 11 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 64
## 2 201730 1
## 3 201770 93
## 4 201850 1
## 5 201870 163
## 6 201970 196
## 7 202070 247
## 8 202130 1
## 9 202170 270
## 10 202250 1
## 11 202270 208
SEM3 Transfers
Find SEM3 transfer students by admission cohorts
SEM3_201770 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="201770",]
SEM3_201830 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="201830",] #2 students
SEM3_201870 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="201870",]
SEM3_201930 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="201930",] #1 student
SEM3_201970 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="201970",]
SEM3_202030 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202030",] #0
SEM3_202070 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202070",]
SEM3_202130 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202130",] #0
SEM3_202170 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202170",]
SEM3_202230 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202230",] #0
SEM3_202270 <- SEM3_transfers[SEM3_transfers$Admitted.Academic.Period=="202270",]
Get Non-transfer data
df<-BA_transfer_programs2[,c(1:20)]
Non_Transfer<- df %>%
group_by(ID, Admitted.Academic.Period, Transfer.Pathway.ID) %>%
filter(min(Semester)==1) %>%
mutate(Min_Sem=min(Semester))
Non_Transfer %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 14 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 965
## 2 201730 141
## 3 201770 1071
## 4 201830 197
## 5 201870 992
## 6 201930 158
## 7 201970 966
## 8 202030 156
## 9 202070 870
## 10 202130 151
## 11 202170 798
## 12 202230 152
## 13 202270 703
## 14 202330 164
Find 201670 Non-transfer cohort who are in SEM3 in 201770
# Select only non-transfer students in SEM3 in 201770
# Non_Transfer_201770_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201670" & Non_Transfer$Semester>="3",]
subset_201770_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 201670 &
Academic.Period == 201770 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_201770_SEM3$Match<-1
Non_Transfer_201770_SEM3<-left_join(Non_Transfer,subset_201770_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201770_SEM3<-Non_Transfer_201770_SEM3[Non_Transfer_201770_SEM3$Match==1,]
Non_Transfer_201770_SEM3 <- Non_Transfer_201770_SEM3[!is.na(Non_Transfer_201770_SEM3$ID), ]
Non_Transfer_201770_SEM3<- Non_Transfer_201770_SEM3[Non_Transfer_201770_SEM3$Semester>=3, ]
Non_Transfer_201770_SEM3<- Non_Transfer_201770_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201770_SEM3 <- Non_Transfer_201770_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201770 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201770 + 60), Current.GPA[Academic.Period == (201770 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_201770_SEM3 <- Non_Transfer_201770_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201770 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201770 + 100), Current.GPA[Academic.Period == (201770 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201770_SEM3 <- Non_Transfer_201770_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201770 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201770_SEM3 <- Non_Transfer_201770_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201770 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201770_SEM3$PathwayStudent <- "0"
Non_Transfer_201770_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_201770_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_201770_AllStudents <- rbind(SEM3_201770, Non_Transfer_201770_SEM3)
SEM3_201770_AllStudents_1<- SEM3_201770_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3201770 <- SEM3_201770_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3201770$Comparison_Period<-"201770"
Summary_Sem3_201770<-OneRowSem3201770 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_201770$Comparison_Period<- "201770"
Summary_Sem3_201770
## # A tibble: 3 × 6
## PathwayStudent Sem2_Persisted2 Year2_Persisted2 Sem2_Retained2 Year2_Retained2
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.920 0.866 0.906 0.846
## 2 1 0.872 0.744 0.846 0.718
## 3 Credit 0.901 0.901 0.889 0.889
## # ℹ 1 more variable: Comparison_Period <chr>
Find 201770 cohort who are in SEM3 in 201870
# Select only non-transfer students in SEM3 in 201870
# Non_Transfer_201870_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201770" & Non_Transfer$Semester>="3",]
subset_201870_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 201770 &
Academic.Period == 201870 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_201870_SEM3$Match<-1
Non_Transfer_201870_SEM3<-left_join(Non_Transfer,subset_201870_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201870_SEM3<-Non_Transfer_201870_SEM3[Non_Transfer_201870_SEM3$Match==1,]
Non_Transfer_201870_SEM3 <- Non_Transfer_201870_SEM3[!is.na(Non_Transfer_201870_SEM3$ID), ]
Non_Transfer_201870_SEM3<- Non_Transfer_201870_SEM3[Non_Transfer_201870_SEM3$Semester>=3, ]
Non_Transfer_201870_SEM3<- Non_Transfer_201870_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201870_SEM3 <- Non_Transfer_201870_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201870 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201870 + 60), Current.GPA[Academic.Period == (201870 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_201870_SEM3 <- Non_Transfer_201870_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201870 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201870 + 100), Current.GPA[Academic.Period == (201870 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201870_SEM3 <- Non_Transfer_201870_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201870 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201870_SEM3 <- Non_Transfer_201870_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201870 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201870_SEM3$PathwayStudent <- "0"
Non_Transfer_201870_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_201870_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_201870_AllStudents <- rbind(SEM3_201870, Non_Transfer_201870_SEM3)
SEM3_201870_AllStudents_1<- SEM3_201870_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3201870 <- SEM3_201870_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3201870$Comparison_Period<-"201870"
Summary_Sem3_201870<-OneRowSem3201870 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_201870$Comparison_Period<- "201870"
Find 201870 Non-transfer cohort who are in SEM3 in 201970
# Select only non-transfer students in SEM3 in 201970
# Non_Transfer_201970_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201870" & Non_Transfer$Semester>="3",]
subset_201970_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 201870 &
Academic.Period == 201970 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_201970_SEM3$Match<-1
Non_Transfer_201970_SEM3<-left_join(Non_Transfer,subset_201970_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201970_SEM3<-Non_Transfer_201970_SEM3[Non_Transfer_201970_SEM3$Match==1,]
Non_Transfer_201970_SEM3 <- Non_Transfer_201970_SEM3[!is.na(Non_Transfer_201970_SEM3$ID), ]
Non_Transfer_201970_SEM3<- Non_Transfer_201970_SEM3[Non_Transfer_201970_SEM3$Semester>=3, ]
Non_Transfer_201970_SEM3<- Non_Transfer_201970_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201970_SEM3 <- Non_Transfer_201970_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201970 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201970 + 60), Current.GPA[Academic.Period == (201970 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_201970_SEM3 <- Non_Transfer_201970_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201970 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201970 + 100), Current.GPA[Academic.Period == (201970 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201970_SEM3 <- Non_Transfer_201970_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201970 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201970_SEM3 <- Non_Transfer_201970_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201970 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201970_SEM3$PathwayStudent <- "0"
Non_Transfer_201970_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_201970_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_201970_AllStudents <- rbind(SEM3_201970, Non_Transfer_201970_SEM3)
SEM3_201970_AllStudents_1<- SEM3_201970_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3201970 <- SEM3_201970_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3201970$Comparison_Period<-"201970"
Summary_Sem3_201970<-OneRowSem3201970 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_201970$Comparison_Period<- "201970"
Find 201970 Non-transfer cohort who are in SEM3 in 202070
# Select only non-transfer students in SEM3 in 202070
# Non_Transfer_202070_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201970" & Non_Transfer$Semester>="3",]
subset_202070_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 201970 &
Academic.Period == 202070 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_202070_SEM3$Match<-1
Non_Transfer_202070_SEM3<-left_join(Non_Transfer,subset_202070_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202070_SEM3<-Non_Transfer_202070_SEM3[Non_Transfer_202070_SEM3$Match==1,]
Non_Transfer_202070_SEM3 <- Non_Transfer_202070_SEM3[!is.na(Non_Transfer_202070_SEM3$ID), ]
Non_Transfer_202070_SEM3<- Non_Transfer_202070_SEM3[Non_Transfer_202070_SEM3$Semester>=3, ]
Non_Transfer_202070_SEM3<- Non_Transfer_202070_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202070_SEM3 <- Non_Transfer_202070_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202070 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202070 + 60), Current.GPA[Academic.Period == (202070 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202070_SEM3 <- Non_Transfer_202070_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202070 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202070 + 100), Current.GPA[Academic.Period == (202070 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202070_SEM3 <- Non_Transfer_202070_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202070 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202070_SEM3 <- Non_Transfer_202070_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202070 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202070_SEM3$PathwayStudent <- "0"
Non_Transfer_202070_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_202070_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_202070_AllStudents <- rbind(SEM3_202070, Non_Transfer_202070_SEM3)
SEM3_202070_AllStudents_1<- SEM3_202070_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3202070 <- SEM3_202070_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3202070$Comparison_Period<-"202070"
Summary_Sem3_202070<-OneRowSem3202070 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_202070$Comparison_Period<- "202070"
Find 202070 Non-transfer cohort who are in SEM3 in 202170
# Select only non-transfer students in SEM3 in 202170
# Non_Transfer_202170_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="202070" & Non_Transfer$Semester>="3",]
subset_202170_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 202070 &
Academic.Period == 202170 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_202170_SEM3$Match<-1
Non_Transfer_202170_SEM3<-left_join(Non_Transfer,subset_202170_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202170_SEM3<-Non_Transfer_202170_SEM3[Non_Transfer_202170_SEM3$Match==1,]
Non_Transfer_202170_SEM3 <- Non_Transfer_202170_SEM3[!is.na(Non_Transfer_202170_SEM3$ID), ]
Non_Transfer_202170_SEM3<- Non_Transfer_202170_SEM3[Non_Transfer_202170_SEM3$Semester>=3, ]
Non_Transfer_202170_SEM3<- Non_Transfer_202170_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202170_SEM3 <- Non_Transfer_202170_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202170 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202170 + 60), Current.GPA[Academic.Period == (202170 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202170_SEM3 <- Non_Transfer_202170_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202170 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202170 + 100), Current.GPA[Academic.Period == (202170 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202170_SEM3 <- Non_Transfer_202170_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202170 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202170_SEM3 <- Non_Transfer_202170_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202170 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202170_SEM3$PathwayStudent <- "0"
Non_Transfer_202170_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_202170_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_202170_AllStudents <- rbind(SEM3_202170, Non_Transfer_202170_SEM3)
SEM3_202170_AllStudents_1<- SEM3_202170_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3202170 <- SEM3_202170_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3202170$Comparison_Period<-"202170"
Summary_Sem3_202170<-OneRowSem3202170 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_202170$Comparison_Period<- "202170"
Find 202170 Non-transfer cohort who are in SEM3 in 202270
# Select only non-transfer students in SEM3 in 202270
# Non_Transfer_202270_SEM3<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="202170" & Non_Transfer$Semester>="3",]
subset_202270_SEM3 <- subset(Non_Transfer, Admitted.Academic.Period == 202170 &
Academic.Period == 202270 & Semester == 3 & Admitted.Program.Code == Current.Program.Code)
subset_202270_SEM3$Match<-1
Non_Transfer_202270_SEM3<-left_join(Non_Transfer,subset_202270_SEM3[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202270_SEM3<-Non_Transfer_202270_SEM3[Non_Transfer_202270_SEM3$Match==1,]
Non_Transfer_202270_SEM3 <- Non_Transfer_202270_SEM3[!is.na(Non_Transfer_202270_SEM3$ID), ]
Non_Transfer_202270_SEM3<- Non_Transfer_202270_SEM3[Non_Transfer_202270_SEM3$Semester>=3, ]
Non_Transfer_202270_SEM3<- Non_Transfer_202270_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 3) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202270_SEM3 <- Non_Transfer_202270_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202270 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202270 + 60), Current.GPA[Academic.Period == (202270 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202270_SEM3 <- Non_Transfer_202270_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202270 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202270 + 100), Current.GPA[Academic.Period == (202270 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202270_SEM3 <- Non_Transfer_202270_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202270 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202270_SEM3 <- Non_Transfer_202270_SEM3 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202270 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202270_SEM3$PathwayStudent <- "0"
Non_Transfer_202270_SEM3$PathwayStudent <- ifelse(is.na(Non_Transfer_202270_SEM3$Transfer.Pathway.ID), 0, "Credit")
SEM3_202270_AllStudents <- rbind(SEM3_202270, Non_Transfer_202270_SEM3)
SEM3_202270_AllStudents_1<- SEM3_202270_AllStudents[, c(1,2,5,9,12:28)]
OneRowSem3202270 <- SEM3_202270_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSem3202270$Comparison_Period<-"202270"
Summary_Sem3_202270<-OneRowSem3202270 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_Sem3_202270$Comparison_Period<- "202270"
summary<-rbind(Summary_Sem3_201770, Summary_Sem3_201870, Summary_Sem3_201970, Summary_Sem3_202070, Summary_Sem3_202170)
library(knitr)
kable(summary)
| 0 |
0.9201597 |
0.8662675 |
0.9061876 |
0.8463074 |
201770 |
| 1 |
0.8717949 |
0.7435897 |
0.8461538 |
0.7179487 |
201770 |
| Credit |
0.9012346 |
0.9012346 |
0.8888889 |
0.8888889 |
201770 |
| 0 |
0.9377224 |
0.8843416 |
0.9270463 |
0.8629893 |
201870 |
| 1 |
0.8965517 |
0.7586207 |
0.8965517 |
0.7471264 |
201870 |
| Credit |
0.9764706 |
0.9529412 |
0.9647059 |
0.9411765 |
201870 |
| 0 |
0.9168174 |
0.8734177 |
0.9150090 |
0.8625678 |
201970 |
| 1 |
0.8720930 |
0.8837209 |
0.8604651 |
0.8720930 |
201970 |
| Credit |
0.9550562 |
0.9662921 |
0.9438202 |
0.9438202 |
201970 |
| 0 |
0.9223301 |
0.8462783 |
0.9174757 |
0.8365696 |
202070 |
| 1 |
0.9462366 |
0.8279570 |
0.9354839 |
0.8172043 |
202070 |
| Credit |
0.9861111 |
0.9305556 |
0.9861111 |
0.9305556 |
202070 |
| 0 |
0.8970588 |
0.8125000 |
0.8878676 |
0.7996324 |
202170 |
| 1 |
0.8026316 |
0.7105263 |
0.8026316 |
0.7105263 |
202170 |
| Credit |
0.9268293 |
0.8292683 |
0.9024390 |
0.8292683 |
202170 |
Get overall SEM3 retention persistence rates - Collapsing all
comparison periods
All_Cohort_SEM3<-rbind(OneRowSem3201770,OneRowSem3201870,OneRowSem3201970,OneRowSem3202070,OneRowSem3202170, OneRowSem3202270)
All_Cohort_SEM3$Transfer_SEM<-"3"
# This include 201770 - 202270 comparison periods
Summary_SEM3_SEM2<-All_Cohort_SEM3[,c(1:3,4,6)] %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted = sum(Sem2_Persisted1)/n_distinct(ID),
Sem2_Retained = sum(Sem2_Retained1)/n_distinct(ID))
# This include 201870 - 202170 comparison periods
Summary_SEM3_Year2<-All_Cohort_SEM3[!((All_Cohort_SEM3$Admitted.Academic.Period=="202170"& All_Cohort_SEM3$PathwayStudent==0)|(All_Cohort_SEM3$Admitted.Academic.Period=="202270"& All_Cohort_SEM3$PathwayStudent==1)),c(1:3,5,7)] %>%
group_by(PathwayStudent) %>%
summarize(Year2_Persisted = sum(Year2_Persisted1)/n_distinct(ID),
Year2_Retained = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM3_SEM2
## # A tibble: 3 × 3
## PathwayStudent Sem2_Persisted Sem2_Retained
## <chr> <dbl> <dbl>
## 1 0 0.924 0.916
## 2 1 0.883 0.875
## 3 Credit 0.951 0.941
Summary_SEM3_Year2
## # A tibble: 3 × 3
## PathwayStudent Year2_Persisted Year2_Retained
## <chr> <dbl> <dbl>
## 1 0 0.859 0.844
## 2 1 0.794 0.782
## 3 Credit 0.838 0.828
SEM5 Transfers
# find SEM5 transfer students
SEM5_transfers <- TransferStudent_RetentionPersistence[TransferStudent_RetentionPersistence$Min_Sem=="5",]
SEM5_transfers %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 11 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 64
## 2 201730 1
## 3 201770 93
## 4 201850 1
## 5 201870 163
## 6 201970 196
## 7 202070 247
## 8 202130 1
## 9 202170 270
## 10 202250 1
## 11 202270 208
Find SEM5 transfer students by admission cohorts
SEM5_201770 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="201770",]
SEM5_201830 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="201830",]
SEM5_201870 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="201870",]
SEM5_201930 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="201930",]
SEM5_201970 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="201970",]
SEM5_202030 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202030",]
SEM5_202070 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202070",]
SEM5_202130 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202130",]
SEM5_202170 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202170",]
SEM5_202230 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202230",]
SEM5_202270 <- SEM5_transfers[SEM5_transfers$Admitted.Academic.Period=="202270",]
Identify transfer students with inaccurate Catalog cohort
# Catalog cohort comes from Block transfer report
table(SEM5_201870$Element43_translated)
##
## 201670 201770 201870
## 778 4 7
d1<-SEM5_201870[SEM5_201870$Element43_translated=="201870",]
length(unique(d1$ID)) # number of students with inaccurate catalog cohort
## [1] 3
ONCAT admission period only goes back to 201670, so we don’t have
control group for SEM5 in 201770
Find 201670 Non-transfer cohort who are in SEM5 in 201870
# Select only non-transfer students in SEM5 in 201870
# Non_Transfer_201870_SEM5<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201670" & Non_Transfer$Semester>="5",]
subset_201870_SEM5 <- subset(Non_Transfer, Admitted.Academic.Period == 201670 &
Academic.Period == 201870 & Semester == 5 & Admitted.Program.Code == Current.Program.Code)
subset_201870_SEM5$Match<-1
Non_Transfer_201870_SEM5<-left_join(Non_Transfer,subset_201870_SEM5[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201870_SEM5<-Non_Transfer_201870_SEM5[Non_Transfer_201870_SEM5$Match==1,]
Non_Transfer_201870_SEM5 <- Non_Transfer_201870_SEM5[!is.na(Non_Transfer_201870_SEM5$ID), ]
Non_Transfer_201870_SEM5<- Non_Transfer_201870_SEM5[Non_Transfer_201870_SEM5$Semester>=5, ]
Non_Transfer_201870_SEM5<- Non_Transfer_201870_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 5) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201870_SEM5 <- Non_Transfer_201870_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201870 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201870 + 60), Current.GPA[Academic.Period == (201870 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_201870_SEM5 <- Non_Transfer_201870_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201870 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201870 + 100), Current.GPA[Academic.Period == (201870 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201870_SEM5 <- Non_Transfer_201870_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201870 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201870_SEM5 <- Non_Transfer_201870_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201870 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201870_SEM5$PathwayStudent <- "0"
Non_Transfer_201870_SEM5$PathwayStudent <- ifelse(is.na(Non_Transfer_201870_SEM5$Transfer.Pathway.ID), 0, "Credit")
SEM5_201870_AllStudents <- rbind(SEM5_201870, Non_Transfer_201870_SEM5)
SEM5_201870_AllStudents_1<- SEM5_201870_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM5201870 <- SEM5_201870_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM5201870$Comparison_Period<-"201870"
Summary_SEM5_201870<-OneRowSEM5201870 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_201870$Comparison_Period<- "201870"
Find 201770 Non-transfer cohort who are in SEM5 in 201970
# Select only non-transfer students in SEM5 in 201970
# Non_Transfer_201970_SEM5<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201770" & Non_Transfer$Semester>="5",]
subset_201970_SEM5 <- subset(Non_Transfer, Admitted.Academic.Period == 201770 &
Academic.Period == 201970 & Semester == 5 & Admitted.Program.Code == Current.Program.Code)
subset_201970_SEM5$Match<-1
Non_Transfer_201970_SEM5<-left_join(Non_Transfer,subset_201970_SEM5[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201970_SEM5<-Non_Transfer_201970_SEM5[Non_Transfer_201970_SEM5$Match==1,]
Non_Transfer_201970_SEM5 <- Non_Transfer_201970_SEM5[!is.na(Non_Transfer_201970_SEM5$ID), ]
Non_Transfer_201970_SEM5<- Non_Transfer_201970_SEM5[Non_Transfer_201970_SEM5$Semester>=5, ]
Non_Transfer_201970_SEM5<- Non_Transfer_201970_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 5) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201970_SEM5 <- Non_Transfer_201970_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201970 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201970 + 60), Current.GPA[Academic.Period == (201970 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_201970_SEM5 <- Non_Transfer_201970_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201970 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201970 + 100), Current.GPA[Academic.Period == (201970 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201970_SEM5 <- Non_Transfer_201970_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201970 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201970_SEM5 <- Non_Transfer_201970_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201970 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201970_SEM5$PathwayStudent <- "0"
Non_Transfer_201970_SEM5$PathwayStudent <- ifelse(is.na(Non_Transfer_201970_SEM5$Transfer.Pathway.ID), 0, "Credit")
SEM5_201970_AllStudents <- rbind(SEM5_201970, Non_Transfer_201970_SEM5)
SEM5_201970_AllStudents_1<- SEM5_201970_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM5201970 <- SEM5_201970_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM5201970$Comparison_Period<-"201970"
Summary_SEM5_201970<-OneRowSEM5201970 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_201970$Comparison_Period<- "201970"
Find 201870 Non-transfer cohort who are in SEM5 in 202070
# Select only non-transfer students in SEM5 in 202070
# Non_Transfer_202070_SEM5<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201870" & Non_Transfer$Semester>="5",]
subset_202070_SEM5 <- subset(Non_Transfer, Admitted.Academic.Period == 201870 &
Academic.Period == 202070 & Semester == 5 & Admitted.Program.Code == Current.Program.Code)
subset_202070_SEM5$Match<-1
Non_Transfer_202070_SEM5<-left_join(Non_Transfer,subset_202070_SEM5[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202070_SEM5<-Non_Transfer_202070_SEM5[Non_Transfer_202070_SEM5$Match==1,]
Non_Transfer_202070_SEM5 <- Non_Transfer_202070_SEM5[!is.na(Non_Transfer_202070_SEM5$ID), ]
Non_Transfer_202070_SEM5<- Non_Transfer_202070_SEM5[Non_Transfer_202070_SEM5$Semester>=5, ]
Non_Transfer_202070_SEM5<- Non_Transfer_202070_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 5) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202070_SEM5 <- Non_Transfer_202070_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202070 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202070 + 60), Current.GPA[Academic.Period == (202070 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202070_SEM5 <- Non_Transfer_202070_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202070 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202070 + 100), Current.GPA[Academic.Period == (202070 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202070_SEM5 <- Non_Transfer_202070_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202070 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202070_SEM5 <- Non_Transfer_202070_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202070 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202070_SEM5$PathwayStudent <- "0"
Non_Transfer_202070_SEM5$PathwayStudent <- ifelse(is.na(Non_Transfer_202070_SEM5$Transfer.Pathway.ID), 0, "Credit")
SEM5_202070_AllStudents <- rbind(SEM5_202070, Non_Transfer_202070_SEM5)
SEM5_202070_AllStudents_1<- SEM5_202070_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM5202070 <- SEM5_202070_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM5202070$Comparison_Period<-"202070"
Summary_SEM5_202070<-OneRowSEM5202070 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_202070$Comparison_Period<- "202070"
Find 201970 Non-transfer cohort who are in SEM5 in 202170
# Select only non-transfer students in SEM5 in 202170
# Non_Transfer_202170_SEM5<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201970" & Non_Transfer$Semester>="5",]
subset_202170_SEM5 <- subset(Non_Transfer, Admitted.Academic.Period == 201970 &
Academic.Period == 202170 & Semester == 5 & Admitted.Program.Code == Current.Program.Code)
subset_202170_SEM5$Match<-1
Non_Transfer_202170_SEM5<-left_join(Non_Transfer,subset_202170_SEM5[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202170_SEM5<-Non_Transfer_202170_SEM5[Non_Transfer_202170_SEM5$Match==1,]
Non_Transfer_202170_SEM5 <- Non_Transfer_202170_SEM5[!is.na(Non_Transfer_202170_SEM5$ID), ]
Non_Transfer_202170_SEM5<- Non_Transfer_202170_SEM5[Non_Transfer_202170_SEM5$Semester>=5, ]
Non_Transfer_202170_SEM5<- Non_Transfer_202170_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 5) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202170_SEM5 <- Non_Transfer_202170_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202170 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202170 + 60), Current.GPA[Academic.Period == (202170 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202170_SEM5 <- Non_Transfer_202170_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202170 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202170 + 100), Current.GPA[Academic.Period == (202170 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202170_SEM5 <- Non_Transfer_202170_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202170 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202170_SEM5 <- Non_Transfer_202170_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202170 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202170_SEM5$PathwayStudent <- "0"
Non_Transfer_202170_SEM5$PathwayStudent <- ifelse(is.na(Non_Transfer_202170_SEM5$Transfer.Pathway.ID), 0, "Credit")
SEM5_202170_AllStudents <- rbind(SEM5_202170, Non_Transfer_202170_SEM5)
SEM5_202170_AllStudents_1<- SEM5_202170_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM5202170 <- SEM5_202170_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM5202170$Comparison_Period<-"202170"
Summary_SEM5_202170<-OneRowSEM5202170 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_202170$Comparison_Period<- "202170"
Find 202070 Non-transfer cohort who are in SEM5 in 202270
# Select only non-transfer students in SEM5 in 202270
# Non_Transfer_202270_SEM5<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="202070" & Non_Transfer$Semester>="5",]
subset_202270_SEM5 <- subset(Non_Transfer, Admitted.Academic.Period == 202070 &
Academic.Period == 202270 & Semester == 5 & Admitted.Program.Code == Current.Program.Code)
subset_202270_SEM5$Match<-1
Non_Transfer_202270_SEM5<-left_join(Non_Transfer,subset_202270_SEM5[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202270_SEM5<-Non_Transfer_202270_SEM5[Non_Transfer_202270_SEM5$Match==1,]
Non_Transfer_202270_SEM5 <- Non_Transfer_202270_SEM5[!is.na(Non_Transfer_202270_SEM5$ID), ]
Non_Transfer_202270_SEM5<- Non_Transfer_202270_SEM5[Non_Transfer_202270_SEM5$Semester>=5, ]
Non_Transfer_202270_SEM5<- Non_Transfer_202270_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 5) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202270_SEM5 <- Non_Transfer_202270_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202270 + 60), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202270 + 60), Current.GPA[Academic.Period == (202270 + 60)], NA)
)
# Add Year2 persistence
Non_Transfer_202270_SEM5 <- Non_Transfer_202270_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202270 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202270 + 100), Current.GPA[Academic.Period == (202270 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202270_SEM5 <- Non_Transfer_202270_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202270 + 60) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202270_SEM5 <- Non_Transfer_202270_SEM5 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202270 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202270_SEM5$PathwayStudent <- "0"
Non_Transfer_202270_SEM5$PathwayStudent <- ifelse(is.na(Non_Transfer_202270_SEM5$Transfer.Pathway.ID), 0, "Credit")
SEM5_202270_AllStudents <- rbind(SEM5_202270, Non_Transfer_202270_SEM5)
SEM5_202270_AllStudents_1<- SEM5_202270_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM5202270 <- SEM5_202270_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM5202270$Comparison_Period<-"202270"
Summary_SEM5_202270<-OneRowSEM5202270 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_202270$Comparison_Period<- "202270"
summary1<-rbind(Summary_SEM5_201870, Summary_SEM5_201970, Summary_SEM5_202070, Summary_SEM5_202170, Summary_SEM5_202270)
library(knitr)
kable(summary1)
| 0 |
0.9805353 |
0.9391727 |
0.9732360 |
0.9318735 |
201870 |
| 1 |
0.9018405 |
0.8527607 |
0.8957055 |
0.8466258 |
201870 |
| Credit |
0.9411765 |
0.9117647 |
0.9411765 |
0.9117647 |
201870 |
| 0 |
0.9772727 |
0.9566116 |
0.9752066 |
0.9545455 |
201970 |
| 1 |
0.9030612 |
0.8469388 |
0.9030612 |
0.8469388 |
201970 |
| Credit |
1.0000000 |
0.9358974 |
1.0000000 |
0.9358974 |
201970 |
| 0 |
0.9476987 |
0.9309623 |
0.9476987 |
0.9309623 |
202070 |
| 1 |
0.9190283 |
0.8704453 |
0.9190283 |
0.8704453 |
202070 |
| Credit |
0.9879518 |
0.8795181 |
0.9879518 |
0.8795181 |
202070 |
| 0 |
0.9464286 |
0.9206349 |
0.9464286 |
0.9166667 |
202170 |
| 1 |
0.8555556 |
0.7185185 |
0.8555556 |
0.7185185 |
202170 |
| Credit |
0.9016393 |
0.8360656 |
0.9016393 |
0.8360656 |
202170 |
| 0 |
0.9505882 |
0.0000000 |
0.9505882 |
0.0000000 |
202270 |
| 1 |
0.9134615 |
0.0000000 |
0.9134615 |
0.0000000 |
202270 |
| Credit |
0.9117647 |
0.0000000 |
0.9117647 |
0.0000000 |
202270 |
Get overall SEM5 retention persistence rates - Collapsing all
comparison periods
All_Cohort_SEM5<-rbind(OneRowSEM5201870,OneRowSEM5201970,OneRowSEM5202070,OneRowSEM5202170,OneRowSEM5202270)
All_Cohort_SEM5$Transfer_SEM<-"5"
# This include 201870 - 202270 comparison periods
Summary_SEM5_SEM2<-All_Cohort_SEM5[,c(1:3,4,6)] %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted = sum(Sem2_Persisted1)/n_distinct(ID),
Sem2_Retained = sum(Sem2_Retained1)/n_distinct(ID))
# This include 201870 - 202170 comparison periods
Summary_SEM5_Year2<-All_Cohort_SEM5[!((All_Cohort_SEM5$Admitted.Academic.Period=="202070"& All_Cohort_SEM5$PathwayStudent==0)|(All_Cohort_SEM5$Admitted.Academic.Period=="202270"& All_Cohort_SEM5$PathwayStudent==1)),c(1:3,5,7)] %>%
group_by(PathwayStudent) %>%
summarize(Year2_Persisted = sum(Year2_Persisted1)/n_distinct(ID),
Year2_Retained = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM5_SEM2
## # A tibble: 3 × 3
## PathwayStudent Sem2_Persisted Sem2_Retained
## <chr> <dbl> <dbl>
## 1 0 0.960 0.958
## 2 1 0.899 0.898
## 3 Credit 0.957 0.957
Summary_SEM5_Year2
## # A tibble: 3 × 3
## PathwayStudent Year2_Persisted Year2_Retained
## <chr> <dbl> <dbl>
## 1 0 0.937 0.933
## 2 1 0.816 0.815
## 3 Credit 0.799 0.799
SEM4 Transfers
# find SEM4 transfer students
SEM4_transfers <- TransferStudent_RetentionPersistence[TransferStudent_RetentionPersistence$Min_Sem=="4",]
SEM4_transfers %>%
group_by(Admitted.Academic.Period) %>%
summarise(CountDistinctID = n_distinct(ID))
## # A tibble: 13 × 2
## Admitted.Academic.Period CountDistinctID
## <int> <int>
## 1 201670 9
## 2 201730 17
## 3 201770 7
## 4 201830 10
## 5 201870 10
## 6 201930 37
## 7 201970 8
## 8 202030 45
## 9 202070 14
## 10 202130 73
## 11 202170 1
## 12 202230 65
## 13 202330 38
Find SEM4 transfer students by admission cohorts
SEM4_201830 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="201830",]
SEM4_201870 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="201870",]
SEM4_201930 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="201930",]
SEM4_201970 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="201970",]
SEM4_202030 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202030",]
SEM4_202070 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202070",]
SEM4_202130 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202130",]
SEM4_202170 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202170",]
SEM4_202230 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202230",]
SEM4_202270 <- SEM4_transfers[SEM4_transfers$Admitted.Academic.Period=="202270",]
Identify transfer students with inaccurate Catalog cohort
# Catalog cohort comes from Block transfer report
table(SEM4_202230$Element43_translated)
##
## 202070 202230
## 191 7
d2<-SEM4_202230[SEM4_202230$Element43_translated=="202230",]
length(unique(d2$ID)) # number of students with inaccurate catalog cohort
## [1] 2
Find 201670 Non-transfer cohort who are in SEM4 in 201830
# Select only non-transfer students in SEM4 in 201830
# Non_Transfer_201830_SEM4<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201670" & Non_Transfer$Semester>="4",]
subset_201830_SEM4 <- subset(Non_Transfer, Admitted.Academic.Period == 201670 &
Academic.Period == 201830 & Semester == 4 & Admitted.Program.Code == Current.Program.Code)
subset_201830_SEM4$Match<-1
Non_Transfer_201830_SEM4<-left_join(Non_Transfer,subset_201830_SEM4[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201830_SEM4<-Non_Transfer_201830_SEM4[Non_Transfer_201830_SEM4$Match==1,]
Non_Transfer_201830_SEM4 <- Non_Transfer_201830_SEM4[!is.na(Non_Transfer_201830_SEM4$ID), ]
Non_Transfer_201830_SEM4<- Non_Transfer_201830_SEM4[Non_Transfer_201830_SEM4$Semester>=4, ]
Non_Transfer_201830_SEM4<- Non_Transfer_201830_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 4) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201830_SEM4 <- Non_Transfer_201830_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201830 + 40), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201830 + 40), Current.GPA[Academic.Period == (201830 + 40)], NA)
)
# Add Year2 persistence
Non_Transfer_201830_SEM4 <- Non_Transfer_201830_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201830 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201830 + 100), Current.GPA[Academic.Period == (201830 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201830_SEM4 <- Non_Transfer_201830_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201830 + 40) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201830_SEM4 <- Non_Transfer_201830_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201830 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201830_SEM4$PathwayStudent <- "0"
Non_Transfer_201830_SEM4$PathwayStudent <- ifelse(is.na(Non_Transfer_201830_SEM4$Transfer.Pathway.ID), 0, "Credit")
SEM4_201830_AllStudents <- rbind(SEM4_201830, Non_Transfer_201830_SEM4)
SEM4_201830_AllStudents_1<- SEM4_201830_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM4201830 <- SEM4_201830_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM4201830$Comparison_Period<-"201830"
Summary_SEM4_201830<-OneRowSEM4201830 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4_201830$Comparison_Period<- "201830"
Find 201770 Non-transfer cohort who are in SEM4 in 201930
# Select only non-transfer students in SEM4 in 201930
# Non_Transfer_201930_SEM4<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201770" & Non_Transfer$Semester>="4",]
subset_201930_SEM4 <- subset(Non_Transfer, Admitted.Academic.Period == 201770 &
Academic.Period == 201930 & Semester == 4 & Admitted.Program.Code == Current.Program.Code)
subset_201930_SEM4$Match<-1
Non_Transfer_201930_SEM4<-left_join(Non_Transfer,subset_201930_SEM4[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_201930_SEM4<-Non_Transfer_201930_SEM4[Non_Transfer_201930_SEM4$Match==1,]
Non_Transfer_201930_SEM4 <- Non_Transfer_201930_SEM4[!is.na(Non_Transfer_201930_SEM4$ID), ]
Non_Transfer_201930_SEM4<- Non_Transfer_201930_SEM4[Non_Transfer_201930_SEM4$Semester>=4, ]
Non_Transfer_201930_SEM4<- Non_Transfer_201930_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 4) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_201930_SEM4 <- Non_Transfer_201930_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 201930 + 40), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 201930 + 40), Current.GPA[Academic.Period == (201930 + 40)], NA)
)
# Add Year2 persistence
Non_Transfer_201930_SEM4 <- Non_Transfer_201930_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 201930 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 201930 + 100), Current.GPA[Academic.Period == (201930 + 100)], NA))
# Add SEM2 retention
Non_Transfer_201930_SEM4 <- Non_Transfer_201930_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 201930 + 40) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_201930_SEM4 <- Non_Transfer_201930_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 201930 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_201930_SEM4$PathwayStudent <- "0"
Non_Transfer_201930_SEM4$PathwayStudent <- ifelse(is.na(Non_Transfer_201930_SEM4$Transfer.Pathway.ID), 0, "Credit")
SEM4_201930_AllStudents <- rbind(SEM4_201930, Non_Transfer_201930_SEM4)
SEM4_201930_AllStudents_1<- SEM4_201930_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM4201930 <- SEM4_201930_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM4201930$Comparison_Period<-"201930"
Summary_SEM4_201930<-OneRowSEM4201930 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4_201930$Comparison_Period<- "201930"
Find 201870 Non-transfer cohort who are in SEM4 in 202030
# Select only non-transfer students in SEM4 in 202030
# Non_Transfer_202030_SEM4<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201870" & Non_Transfer$Semester>="4",]
subset_202030_SEM4 <- subset(Non_Transfer, Admitted.Academic.Period == 201870 &
Academic.Period == 202030 & Semester == 4 & Admitted.Program.Code == Current.Program.Code)
subset_202030_SEM4$Match<-1
Non_Transfer_202030_SEM4<-left_join(Non_Transfer,subset_202030_SEM4[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202030_SEM4<-Non_Transfer_202030_SEM4[Non_Transfer_202030_SEM4$Match==1,]
Non_Transfer_202030_SEM4 <- Non_Transfer_202030_SEM4[!is.na(Non_Transfer_202030_SEM4$ID), ]
Non_Transfer_202030_SEM4<- Non_Transfer_202030_SEM4[Non_Transfer_202030_SEM4$Semester>=4, ]
Non_Transfer_202030_SEM4<- Non_Transfer_202030_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 4) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202030_SEM4 <- Non_Transfer_202030_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202030 + 40), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202030 + 40), Current.GPA[Academic.Period == (202030 + 40)], NA)
)
# Add Year2 persistence
Non_Transfer_202030_SEM4 <- Non_Transfer_202030_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202030 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202030 + 100), Current.GPA[Academic.Period == (202030 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202030_SEM4 <- Non_Transfer_202030_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202030 + 40) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202030_SEM4 <- Non_Transfer_202030_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202030 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202030_SEM4$PathwayStudent <- "0"
Non_Transfer_202030_SEM4$PathwayStudent <- ifelse(is.na(Non_Transfer_202030_SEM4$Transfer.Pathway.ID), 0, "Credit")
SEM4_202030_AllStudents <- rbind(SEM4_202030, Non_Transfer_202030_SEM4)
SEM4_202030_AllStudents_1<- SEM4_202030_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM4202030 <- SEM4_202030_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM4202030$Comparison_Period<-"202030"
Summary_SEM4_202030<-OneRowSEM4202030 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4_202030$Comparison_Period<- "202030"
Find 201970 Non-transfer cohort who are in SEM4 in 202130
# Select only non-transfer students in SEM4 in 202130
# Non_Transfer_202130_SEM4<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="201970" & Non_Transfer$Semester>="4",]
subset_202130_SEM4 <- subset(Non_Transfer, Admitted.Academic.Period == 201970 &
Academic.Period == 202130 & Semester == 4 & Admitted.Program.Code == Current.Program.Code)
subset_202130_SEM4$Match<-1
Non_Transfer_202130_SEM4<-left_join(Non_Transfer,subset_202130_SEM4[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202130_SEM4<-Non_Transfer_202130_SEM4[Non_Transfer_202130_SEM4$Match==1,]
Non_Transfer_202130_SEM4 <- Non_Transfer_202130_SEM4[!is.na(Non_Transfer_202130_SEM4$ID), ]
Non_Transfer_202130_SEM4<- Non_Transfer_202130_SEM4[Non_Transfer_202130_SEM4$Semester>=4, ]
Non_Transfer_202130_SEM4<- Non_Transfer_202130_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 4) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202130_SEM4 <- Non_Transfer_202130_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202130 + 40), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202130 + 40), Current.GPA[Academic.Period == (202130 + 40)], NA)
)
# Add Year2 persistence
Non_Transfer_202130_SEM4 <- Non_Transfer_202130_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202130 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202130 + 100), Current.GPA[Academic.Period == (202130 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202130_SEM4 <- Non_Transfer_202130_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202130 + 40) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202130_SEM4 <- Non_Transfer_202130_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202130 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202130_SEM4$PathwayStudent <- "0"
Non_Transfer_202130_SEM4$PathwayStudent <- ifelse(is.na(Non_Transfer_202130_SEM4$Transfer.Pathway.ID), 0, "Credit")
SEM4_202130_AllStudents <- rbind(SEM4_202130, Non_Transfer_202130_SEM4)
SEM4_202130_AllStudents_1<- SEM4_202130_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM4202130 <- SEM4_202130_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM4202130$Comparison_Period<-"202130"
Summary_SEM4_202130<-OneRowSEM4202130 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4_202130$Comparison_Period<- "202130"
Find 202070 Non-transfer cohort who are in SEM4 in 202230
# Select only non-transfer students in SEM4 in 202230
# Non_Transfer_202230_SEM4<- Non_Transfer[Non_Transfer$Admitted.Academic.Period=="202070" & Non_Transfer$Semester>="4",]
subset_202230_SEM4 <- subset(Non_Transfer, Admitted.Academic.Period == 202070 &
Academic.Period == 202230 & Semester == 4 & Admitted.Program.Code == Current.Program.Code)
subset_202230_SEM4$Match<-1
Non_Transfer_202230_SEM4<-left_join(Non_Transfer,subset_202230_SEM4[,c(1,2,22)], by=c("ID","Admitted.Academic.Period"))
Non_Transfer_202230_SEM4<-Non_Transfer_202230_SEM4[Non_Transfer_202230_SEM4$Match==1,]
Non_Transfer_202230_SEM4 <- Non_Transfer_202230_SEM4[!is.na(Non_Transfer_202230_SEM4$ID), ]
Non_Transfer_202230_SEM4<- Non_Transfer_202230_SEM4[Non_Transfer_202230_SEM4$Semester>=4, ]
Non_Transfer_202230_SEM4<- Non_Transfer_202230_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
filter(min(Semester) == 4) %>%
mutate(Min_Sem=min(Semester))
# Add SEM2 persistence
Non_Transfer_202230_SEM4 <- Non_Transfer_202230_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Persisted = ifelse((Academic.Period == 202230 + 40), 1, 0),
SEM2.GPA = ifelse((Academic.Period == 202230 + 40), Current.GPA[Academic.Period == (202230 + 40)], NA)
)
# Add Year2 persistence
Non_Transfer_202230_SEM4 <- Non_Transfer_202230_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Persisted = ifelse((Academic.Period == 202230 + 100), 1, 0),
Year2.GPA = ifelse((Academic.Period == 202230 + 100), Current.GPA[Academic.Period == (202230 + 100)], NA))
# Add SEM2 retention
Non_Transfer_202230_SEM4 <- Non_Transfer_202230_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Sem2_Retained = ifelse((Academic.Period == 202230 + 40) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Add Year2 retention
Non_Transfer_202230_SEM4 <- Non_Transfer_202230_SEM4 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(Year2_Retained = ifelse((Academic.Period == 202230 + 100) & Admitted.Program.Code==Current.Program.Code, 1, 0))
# Non_Transfer_202230_SEM4$PathwayStudent <- "0"
Non_Transfer_202230_SEM4$PathwayStudent <- ifelse(is.na(Non_Transfer_202230_SEM4$Transfer.Pathway.ID), 0, "Credit")
SEM4_202230_AllStudents <- rbind(SEM4_202230, Non_Transfer_202230_SEM4)
SEM4_202230_AllStudents_1<- SEM4_202230_AllStudents[, c(1,2,5,9,12:28)]
OneRowSEM4202230 <- SEM4_202230_AllStudents_1 %>%
group_by(ID, Admitted.Academic.Period, PathwayStudent) %>%
summarize(Sem2_Persisted1 = sum(Sem2_Persisted),
Year2_Persisted1 = sum(Year2_Persisted),
Sem2_Retained1 = sum(Sem2_Retained),
Year2_Retained1 = sum(Year2_Retained),
SEM2_GPA1=sum(SEM2.GPA, na.rm = TRUE),
YEAR2_GPA1=sum(Year2.GPA, na.rm=TRUE),
Element43=min(Element43_translated))
## `summarise()` has grouped output by 'ID', 'Admitted.Academic.Period'. You can
## override using the `.groups` argument.
OneRowSEM4202230$Comparison_Period<-"202230"
Summary_SEM4_202230<-OneRowSEM4202230 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4_202230$Comparison_Period<- "202230"
summary2<-rbind(Summary_SEM4_201830, Summary_SEM4_201930, Summary_SEM4_202030, Summary_SEM4_202130, Summary_SEM4_202230)
library(knitr)
kable(summary2)
| 0 |
0.9139073 |
0.9094923 |
0.9028698 |
0.8940397 |
201830 |
| 1 |
0.8000000 |
0.7000000 |
0.8000000 |
0.7000000 |
201830 |
| Credit |
0.9577465 |
0.9154930 |
0.9577465 |
0.9154930 |
201830 |
| 0 |
0.9349904 |
0.9292543 |
0.9235182 |
0.9158700 |
201930 |
| 1 |
0.8648649 |
0.8378378 |
0.8648649 |
0.8378378 |
201930 |
| Credit |
0.9506173 |
0.9506173 |
0.9506173 |
0.9506173 |
201930 |
| 0 |
0.9388560 |
0.9013807 |
0.9309665 |
0.8934911 |
202030 |
| 1 |
0.8888889 |
0.8444444 |
0.8888889 |
0.8444444 |
202030 |
| Credit |
1.0000000 |
0.9879518 |
0.9879518 |
0.9759036 |
202030 |
| 0 |
0.8926056 |
0.8626761 |
0.8873239 |
0.8609155 |
202130 |
| 1 |
0.8767123 |
0.8219178 |
0.8630137 |
0.8082192 |
202130 |
| Credit |
0.9130435 |
0.8550725 |
0.9130435 |
0.8550725 |
202130 |
| 0 |
0.8856549 |
0.8690229 |
0.8814969 |
0.8627859 |
202230 |
| 1 |
0.8923077 |
0.9230769 |
0.8923077 |
0.9230769 |
202230 |
| Credit |
0.9166667 |
0.8611111 |
0.9166667 |
0.8611111 |
202230 |
Get overall SEM4 retention persistence rates - Collapsing all
comparison periods
All_Cohort_SEM4<-rbind(OneRowSEM4201830,OneRowSEM4201930,OneRowSEM4202030,OneRowSEM4202130, OneRowSEM4202230)
All_Cohort_SEM4$Transfer_SEM<-"4"
# This include 201830 - 202230 comparison periods
Summary_SEM4<-All_Cohort_SEM4 %>%
group_by(PathwayStudent) %>%
summarize(Sem2_Persisted = sum(Sem2_Persisted1)/n_distinct(ID),
Sem2_Retained = sum(Sem2_Retained1)/n_distinct(ID),
Year2_Persisted = sum(Year2_Persisted1)/n_distinct(ID),
Year2_Retained = sum(Year2_Retained1)/n_distinct(ID))
Summary_SEM4
## # A tibble: 3 × 5
## PathwayStudent Sem2_Persisted Sem2_Retained Year2_Persisted Year2_Retained
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 0 0.914 0.906 0.895 0.886
## 2 1 0.878 0.874 0.852 0.848
## 3 Credit 0.953 0.95 0.924 0.921
Prepare final data for modelling
All_Cohort<- rbind(All_Cohort_SEM3,All_Cohort_SEM4,All_Cohort_SEM5)
colnames(All_Cohort)[colnames(All_Cohort) == "Transfer_SEM"] <- "Comparison_SEM"
a<-as.data.frame(table(All_Cohort$ID))
a<-a[order(a$Freq, decreasing = TRUE), ]
# some students in df3 don't exist in All_Cohort because they didn't persist into at least SEM3
# b<-as.data.frame(table(df3$ID))
# b<-b[order(b$Freq, decreasing = TRUE), ]
# change multiple Immigration status, postal code, mother tongue to the latest value
BA_transfer_programs3 <- BA_transfer_programs2 %>%
group_by(ID, Admitted.Academic.Period) %>%
mutate(largest_period = max(Academic.Period, na.rm = TRUE)) %>%
mutate(Immigration.Status = ifelse(Academic.Period == largest_period, Immigration.Status, Immigration.Status[which.max(Academic.Period)]),
Postal.Code = ifelse(Academic.Period == largest_period, Postal.Code, Postal.Code[which.max(Academic.Period)]),
Mother.Tongue = ifelse(Academic.Period == largest_period, Mother.Tongue, Mother.Tongue[which.max(Academic.Period)])) %>%
ungroup() %>%
select(-largest_period)
# for each student change Gender "Not Reported" to the available value
df2<-BA_transfer_programs3 %>%
mutate(Gender = na_if(Gender, "Not Reported"))
df3 <- df2 %>%
group_by(ID) %>%
mutate(Gender = ifelse(is.na(Gender), Gender[!is.na(Gender)][1], Gender))
df3 <- df3 %>%
group_by(ID) %>%
mutate(Birth.Year = ifelse(is.na(Birth.Year), Birth.Year[!is.na(Birth.Year)][1], Birth.Year))
# resolve gender transitioning issues
df3$Gender[df3$ID == "N01037047"] <- "Female"
df3$Gender[df3$ID == "N01070542"] <- "Male"
df3$Gender[df3$ID == "N01185978"] <- "Male"
df3$Gender[df3$ID == "N01226236"] <- "Male"
df3$Gender[df3$ID == "N01331096"] <- "Female"
df3$Gender[df3$ID == "N01470508"] <- "Female"
df3$Gender[df3$ID == "N00650738"] <- "Male"
df3$Gender[df3$ID == "N01210934"] <- "Male"
All_df4<- left_join(All_Cohort[,c(1:9,11,12)], unique(df3[,c(1,2,4,5,7,12:16,18,19)]), by=c("ID","Admitted.Academic.Period"))
## Warning in left_join(All_Cohort[, c(1:9, 11, 12)], unique(df3[, c(1, 2, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 8698 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
multiple_Element43<-as.data.frame(table(All_df4[All_df4$PathwayStudent==1,]$ID))
multiple_Element43<-multiple_Element43[order(multiple_Element43$Freq, decreasing = TRUE), ]
Calculate Graduation at 100%
#Find catalog cohort for transfer students based on their Min semester
Add_Cohort<- All_df4 %>%
mutate(Calculated_Cohort = case_when(
PathwayStudent == 0 ~ Admitted.Academic.Period,
PathwayStudent == "Credit" ~ Admitted.Academic.Period,
PathwayStudent == 1 & Comparison_SEM %in% c(3, 5) ~ Admitted.Academic.Period - ((as.numeric(Comparison_SEM) - 1) / 2) * 100,
Comparison_SEM == 4 ~ Admitted.Academic.Period - 160,
TRUE ~ NA_real_ # If none of the conditions match, set cohort as NA
))
Add_Cohort$Supposed_Grad_Period <- as.numeric(Add_Cohort$Calculated_Cohort)+360
All_df5 <- Add_Cohort %>%
mutate(Grad_at_100 = ifelse(is.na(Graduation.Date)|Supposed_Grad_Period < Graduation.Date, 0, 1))
# All_df5$Element43_translated <- ifelse(
# substring(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE, nchar(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE) - 1) == "09",
# paste0(substr(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE, 1, 4), "70"),
# ifelse(
# substring(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE, nchar(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE) - 1) == "01",
# paste0(substr(All_df5$SZBMTCU_ELEMENT_43_COHORT_START_DATE, 1, 4), "30"),
# NA
# )
# )
All_df5<-All_df5[,c(1,22,2,21,23,3:19,24)] #remove Element43 column to get one row per student
All_df5<-unique(All_df5)
No_Sem1_No_PathwayCode_IDs <- All_df5$ID[is.na(All_df5$Transfer.Pathway.ID) & All_df5$PathwayStudent == 1] #70
No_Sem1_CreditTransfer <- All_df5$ID[!is.na(All_df5$Transfer.Pathway.ID) & All_df5$Transfer.Pathway.ID == "ZZZ" & All_df5$PathwayStudent == 1] #41
Sem1_BlockTransfer <- All_df5$ID[(!is.na(All_df5$Transfer.Pathway.ID) & All_df5$Transfer.Pathway.ID != "ZZZ") & All_df5$PathwayStudent == 0] #0
All_df5_2<-All_df5[All_df5$PathwayStudent==1,]
# 21 transfer students have multiple Admitted.Academic.Period where there supposed to only be 1
g<-as.data.frame(table(All_df5_2$ID))
g<-g[order(g$Freq, decreasing = TRUE), ]
# Check how many students there are for each SEM level comparison
Cohort_SEM_Count <- All_df5 %>%
group_by(Comparison_SEM, PathwayStudent) %>%
summarize(Count = n_distinct(ID))
## `summarise()` has grouped output by 'Comparison_SEM'. You can override using
## the `.groups` argument.
kable(Cohort_SEM_Count)
| 3 |
0 |
3259 |
| 3 |
1 |
463 |
| 3 |
Credit |
407 |
| 4 |
0 |
2529 |
| 4 |
1 |
230 |
| 4 |
Credit |
340 |
| 5 |
0 |
2302 |
| 5 |
1 |
1081 |
| 5 |
Credit |
324 |
Graph retention and persistence across the Comparison semesters
Get a summary of Sem2 retention persistence for each Comparison_SEM
level
Summary_All <-All_df5 %>%
group_by(PathwayStudent, Comparison_SEM) %>%
summarize(Sem2_Persisted2 = sum(Sem2_Persisted1)/n_distinct(ID),
Sem2_Retained2 = sum(Sem2_Retained1)/n_distinct(ID))
## `summarise()` has grouped output by 'PathwayStudent'. You can override using
## the `.groups` argument.
kable(Summary_All)
| 0 |
3 |
0.9235962 |
0.9159251 |
| 0 |
4 |
0.9141953 |
0.9062871 |
| 0 |
5 |
0.9600348 |
0.9582971 |
| 1 |
3 |
0.8833693 |
0.8747300 |
| 1 |
4 |
0.8782609 |
0.8739130 |
| 1 |
5 |
0.9000925 |
0.8991674 |
| Credit |
3 |
0.9508600 |
0.9410319 |
| Credit |
4 |
0.9529412 |
0.9500000 |
| Credit |
5 |
0.9567901 |
0.9567901 |
Graph Sem2 Persistence
library(ggplot2)
ggplot(Summary_All, aes(x = Comparison_SEM, y = Sem2_Persisted2, group = PathwayStudent, color = PathwayStudent)) +
geom_line() +
labs(x = "Comparison Semester", y = "SEM2 Persistence") +
scale_color_manual(values = c("red", "blue","dark green"))

Graph SEM2 Retention
library(ggplot2)
ggplot(Summary_All, aes(x = Comparison_SEM, y = Sem2_Retained2, group = PathwayStudent, color = PathwayStudent)) +
geom_line() +
labs(x = "Comparison Semester", y = "SEM2 Retention") +
scale_color_manual(values = c("red", "blue", "dark green"))

Get a summary of Year2 retention persistence for each Comparison_SEM
level
Summary_All2 <-All_df5[All_df5$Comparison_Period!="202270",] %>%
group_by(PathwayStudent, Comparison_SEM) %>%
summarize(Year2_Persisted2 = sum(Year2_Persisted1)/n_distinct(ID),
Year2_Retained2 = sum(Year2_Retained1)/n_distinct(ID))
## `summarise()` has grouped output by 'PathwayStudent'. You can override using
## the `.groups` argument.
kable(Summary_All2)
| 0 |
3 |
0.8585348 |
0.8437387 |
| 0 |
4 |
0.8948201 |
0.8861210 |
| 0 |
5 |
0.9366010 |
0.9334044 |
| 1 |
3 |
0.7937650 |
0.7817746 |
| 1 |
4 |
0.8521739 |
0.8478261 |
| 1 |
5 |
0.8171429 |
0.8160000 |
| Credit |
3 |
0.9266304 |
0.9157609 |
| Credit |
4 |
0.9235294 |
0.9205882 |
| Credit |
5 |
0.8931034 |
0.8931034 |
Graph Year2 Persistence
ggplot(Summary_All2, aes(x = Comparison_SEM, y = Year2_Persisted2, group = PathwayStudent, color = PathwayStudent)) +
geom_line() +
labs(x = "Comparison Semester", y = "Year2 Persistence") +
scale_color_manual(values = c("red", "blue","dark green"))

Graph Year2 Retention
ggplot(Summary_All2, aes(x = Comparison_SEM, y = Year2_Retained2, group = PathwayStudent, color = PathwayStudent)) +
geom_line() +
labs(x = "Comparison Semester", y = "Year2 Retention") +
scale_color_manual(values = c("red", "blue","dark green"))

Get a summary of Graduation at 100% for each Comparison_SEM
level
Summary_All_Grad <-All_df5[All_df5$Supposed_Grad_Period<202350,] %>%
group_by(PathwayStudent, Comparison_SEM) %>%
summarize(Grad_at_100 = sum(Grad_at_100)/n_distinct(ID))
## `summarise()` has grouped output by 'PathwayStudent'. You can override using
## the `.groups` argument.
kable(Summary_All_Grad)
| 0 |
3 |
0.5605925 |
| 0 |
4 |
0.6095656 |
| 0 |
5 |
0.6654235 |
| 1 |
3 |
0.4721408 |
| 1 |
4 |
0.5090909 |
| 1 |
5 |
0.5965714 |
| Credit |
3 |
0.6544343 |
| Credit |
4 |
0.6809211 |
| Credit |
5 |
0.7068966 |
# All_df5<- All_df5[,c(1:7,17,8:15)]
Graph Graduation at 100%
ggplot(Summary_All_Grad, aes(x = Comparison_SEM, y = Grad_at_100, group = PathwayStudent, color = PathwayStudent)) +
geom_line() +
labs(x = "Comparison Semester", y = "Graduation at 100%") +
scale_color_manual(values = c("red", "blue","dark green"))
