Tailor termClassInfo table
tailor.termClassInfo <- function(termClassInfo) {
  termClassInfoTailored <- select(termClassInfo, STDNT_ID, TERM_CD, TERM_SHORT_DES,SBJCT_CD, 
                                  CATLG_NBR, CLASS_SCTN_CD, CLASS_ENRL_TOTAL_NBR, 
                                  GRD_PNTS_PER_UNIT_NBR, EXCL_CLASS_CUM_GPA)
  names(termClassInfoTailored) <- c("STUDENT", "TERM_NBR", "TERM_NAME","SUBJECT", "CATALOG_NBR", 
                                    "CLASS_SCTN_NBR", "NBR_STUDENTS", "GRADE", "GPAO")
  return(termClassInfoTailored)
}
Tailor studentInfo table
tailor.studentInfo <- function(studentInfo) {
  studentInfoTailored <- select(studentInfo, STDNT_ID, STDNT_GNDR_CD, STDNT_GNDR_SHORT_DES)
  names(studentInfoTailored) <- c("STUDENT", "GENDER_NBR", "GENDER")
  return(studentInfoTailored)
}
Merge termClassInfo and studentInfo tables by student
merge.tables <- function(table1,table2) {
   allInfo <- merge(table1, table2, by = "STUDENT")
}
Tailor merged table based on GPAO, Grade, Term (Fall 2005 onward). Add new variable for grade anomaly
tailor.allInfo <- function(allInfo) {
  allInfoTailored <- filter(allInfo, GPAO <= 4.000 & GPAO > 0.000 & 
                              GRADE <= 4.000 & GRADE > 0.000 & 
                              TERM_NBR >= 1560) %>% mutate(GRADE_ANOMALY = GRADE - GPAO)
}
Create intro physics table
table.introPhysics <- function(allInfoTailored) {
  introPhysics <- filter (allInfoTailored, SUBJECT == "PHYSICS" & 
                            (CATALOG_NBR == 135 | CATALOG_NBR == 140) &
                            (CLASS_SCTN_NBR == 100 | CLASS_SCTN_NBR == 200 |
                               CLASS_SCTN_NBR == 300| CLASS_SCTN_NBR == 400))
}

Find average grade anomaly
avg.anomaly <- function(classTable, Gender) {
  genderTable <- filter(classTable, GENDER_NBR == Gender)
  avgGenderAnomaly = mean(genderTable$GRADE_ANOMALY)
}
Find average GPD
avg.GPD <- function(femAnomaly, maleAnomaly) {
  avgGPD = femAnomaly - maleAnomaly
}
Bootstrap grade anomaly by gender and class section (CSP or not)
bootstrap.method <- function(classTable, Gender) {
  genderTable <- filter(classTable, GENDER_NBR == Gender)
  resampleAnomaly <- c()
  error <- c()
  for (j in 1:1000) {
    resampleAnomaly <- c(resampleAnomaly, mean(sample(genderTable$GRADE_ANOMALY,
                                                      size = nrow(genderTable), replace = TRUE)))
  }
  return(resampleAnomaly)
}
Calculate error on grade anomaly
error.anomaly <- function(resampleAnomaly, classTable, Gender) {
  genderTable <- filter(classTable, GENDER_NBR == Gender)
  classSize = nrow(genderTable)
  errorAnomaly = sd(resampleAnomaly)/sqrt(classSize)
}
Calculate error on GPD
error.GPD <- function(resampleFemAnomalyError, resampleMaleAnomalyError) {
  errorGPD = sqrt((resampleFemAnomalyError*resampleFemAnomalyError +
                     resampleMaleAnomalyError*resampleMaleAnomalyError))
}
Create phys140GPD table, which shows GPD per semester, per class, with error
phys140.GPD <- function(introPhysics) {
  phys140GPD <- data.frame()
  for (i in unique(introPhysics$TERM_NBR)) {
    Classtable <- filter(introPhysics, CATALOG_NBR == 140 & TERM_NBR == i)
    smallClass <- filter(Classtable, NBR_STUDENTS <= 35)
    bigClass <- filter(Classtable, NBR_STUDENTS > 35)
  
    # Find avg gpd for small and big classes
    smallClassGPD = avg.GPD(avg.anomaly(smallClass, 1), avg.anomaly(smallClass, 2))
    bigClassGPD = avg.GPD(avg.anomaly(bigClass, 1), avg.anomaly(bigClass, 2))
    
    ######################################################
    # BOOTSTRAP
    resampleSmallGPD = bootstrap.method(smallClass, 1) - bootstrap.method(smallClass, 2)
    resampleBigGPD = bootstrap.method(bigClass, 1) - bootstrap.method(bigClass, 2)

    # Error on resampled values
    smallClassErrorGPD = error.GPD(error.anomaly(bootstrap.method(smallClass, 1),
                                                 smallClass, 1), 
                                      error.anomaly(bootstrap.method(smallClass, 2),
                                                    smallClass, 2))
    bigClassErrorGPD = error.GPD(error.anomaly(bootstrap.method(bigClass, 1),
                                               bigClass, 1),
                                    error.anomaly(bootstrap.method(bigClass, 2),
                                                  bigClass, 2))
    
    # Add each new term's GPD and error to phys140GPD table
    phys140GPD <- na.omit(rbind(phys140GPD, data.frame("TERM_NAME" = Classtable$TERM_NAME,
                                                       "TERM" = i, 
                                                       "SMALL_CLASS_GPD" = smallClassGPD,
                                                       "SMALL_CLASS_ERROR" = smallClassErrorGPD,
                                                       "BIG_CLASS_GPD" = bigClassGPD, 
                                                       "BIG_CLASS_ERROR" = bigClassErrorGPD)))
    
    phys140GPD <- subset(phys140GPD,!duplicated(phys140GPD$TERM))
    
  }
  return(phys140GPD)
}