Tailor termClassInfo table
tailor.termClassInfo <- function(termClassInfo) {
termClassInfoTailored <- select(termClassInfo, STDNT_ID, TERM_CD, TERM_SHORT_DES,SBJCT_CD,
CATLG_NBR, CLASS_SCTN_CD, CLASS_ENRL_TOTAL_NBR,
GRD_PNTS_PER_UNIT_NBR, EXCL_CLASS_CUM_GPA)
names(termClassInfoTailored) <- c("STUDENT", "TERM_NBR", "TERM_NAME","SUBJECT", "CATALOG_NBR",
"CLASS_SCTN_NBR", "NBR_STUDENTS", "GRADE", "GPAO")
return(termClassInfoTailored)
}
Tailor studentInfo table
tailor.studentInfo <- function(studentInfo) {
studentInfoTailored <- select(studentInfo, STDNT_ID, STDNT_GNDR_CD, STDNT_GNDR_SHORT_DES)
names(studentInfoTailored) <- c("STUDENT", "GENDER_NBR", "GENDER")
return(studentInfoTailored)
}
Merge termClassInfo and studentInfo tables by student
merge.tables <- function(table1,table2) {
allInfo <- merge(table1, table2, by = "STUDENT")
}
Tailor merged table based on GPAO, Grade, Term (Fall 2005 onward). Add new variable for grade anomaly
tailor.allInfo <- function(allInfo) {
allInfoTailored <- filter(allInfo, GPAO <= 4.000 & GPAO > 0.000 &
GRADE <= 4.000 & GRADE > 0.000 &
TERM_NBR >= 1560) %>% mutate(GRADE_ANOMALY = GRADE - GPAO)
}
Create intro physics table
table.introPhysics <- function(allInfoTailored) {
introPhysics <- filter (allInfoTailored, SUBJECT == "PHYSICS" &
(CATALOG_NBR == 135 | CATALOG_NBR == 140) &
(CLASS_SCTN_NBR == 100 | CLASS_SCTN_NBR == 200 |
CLASS_SCTN_NBR == 300| CLASS_SCTN_NBR == 400))
}
Find average grade anomaly
avg.anomaly <- function(classTable, Gender) {
genderTable <- filter(classTable, GENDER_NBR == Gender)
avgGenderAnomaly = mean(genderTable$GRADE_ANOMALY)
}
Find average GPD
avg.GPD <- function(femAnomaly, maleAnomaly) {
avgGPD = femAnomaly - maleAnomaly
}
Bootstrap grade anomaly by gender and class section (CSP or not)
bootstrap.method <- function(classTable, Gender) {
genderTable <- filter(classTable, GENDER_NBR == Gender)
resampleAnomaly <- c()
error <- c()
for (j in 1:1000) {
resampleAnomaly <- c(resampleAnomaly, mean(sample(genderTable$GRADE_ANOMALY,
size = nrow(genderTable), replace = TRUE)))
}
return(resampleAnomaly)
}
Calculate error on grade anomaly
error.anomaly <- function(resampleAnomaly, classTable, Gender) {
genderTable <- filter(classTable, GENDER_NBR == Gender)
classSize = nrow(genderTable)
errorAnomaly = sd(resampleAnomaly)/sqrt(classSize)
}
Calculate error on GPD
error.GPD <- function(resampleFemAnomalyError, resampleMaleAnomalyError) {
errorGPD = sqrt((resampleFemAnomalyError*resampleFemAnomalyError +
resampleMaleAnomalyError*resampleMaleAnomalyError))
}
Create phys140GPD table, which shows GPD per semester, per class, with error
phys140.GPD <- function(introPhysics) {
phys140GPD <- data.frame()
for (i in unique(introPhysics$TERM_NBR)) {
Classtable <- filter(introPhysics, CATALOG_NBR == 140 & TERM_NBR == i)
smallClass <- filter(Classtable, NBR_STUDENTS <= 35)
bigClass <- filter(Classtable, NBR_STUDENTS > 35)
# Find avg gpd for small and big classes
smallClassGPD = avg.GPD(avg.anomaly(smallClass, 1), avg.anomaly(smallClass, 2))
bigClassGPD = avg.GPD(avg.anomaly(bigClass, 1), avg.anomaly(bigClass, 2))
######################################################
# BOOTSTRAP
resampleSmallGPD = bootstrap.method(smallClass, 1) - bootstrap.method(smallClass, 2)
resampleBigGPD = bootstrap.method(bigClass, 1) - bootstrap.method(bigClass, 2)
# Error on resampled values
smallClassErrorGPD = error.GPD(error.anomaly(bootstrap.method(smallClass, 1),
smallClass, 1),
error.anomaly(bootstrap.method(smallClass, 2),
smallClass, 2))
bigClassErrorGPD = error.GPD(error.anomaly(bootstrap.method(bigClass, 1),
bigClass, 1),
error.anomaly(bootstrap.method(bigClass, 2),
bigClass, 2))
# Add each new term's GPD and error to phys140GPD table
phys140GPD <- na.omit(rbind(phys140GPD, data.frame("TERM_NAME" = Classtable$TERM_NAME,
"TERM" = i,
"SMALL_CLASS_GPD" = smallClassGPD,
"SMALL_CLASS_ERROR" = smallClassErrorGPD,
"BIG_CLASS_GPD" = bigClassGPD,
"BIG_CLASS_ERROR" = bigClassErrorGPD)))
phys140GPD <- subset(phys140GPD,!duplicated(phys140GPD$TERM))
}
return(phys140GPD)
}