---
title: "VA Intervention GPD Results - Physics 140 2018"
output: html_notebook
---

```{r, echo = FALSE, message = FALSE}
library(tidyverse)
library(RColorBrewer)
library(boot)
```

```{r, echo = FALSE}
theme_pub <- 
    theme(plot.title = element_text(face = "bold",
                                      size = rel(1.2), hjust = 0.5),
            text = element_text(),
            panel.background = element_blank(),
            plot.background = element_rect(colour = "white"),
            axis.title = element_text(face = "bold",size = 14),
            axis.title.y = element_text(angle=90,vjust =2,size=12),
            axis.title.x = element_text(vjust = -0.2,size=12),
            axis.text = element_text(size=10), 
            axis.line = element_line(colour="black"),
            axis.ticks = element_line(),
            panel.grid.major = element_line(colour="grey90"),
            panel.grid.minor = element_blank(),
            legend.key = element_rect(colour = NA),
            legend.position = "bottom",
            legend.direction = "horizontal",
            legend.key.size= unit(0.2, "cm"),
            legend.title = element_text(face="italic"),
            plot.margin=unit(c(10,5,5,5),"mm"),
            strip.background=element_rect(colour="#f0f0f0",fill="#f0f0f0"),
            strip.text = element_text(face="bold"),
          aspect.ratio=.65
    )
```

### Comparing gendered performance differences between Winter 2018 and Fall 2018 terms.

```{r, echo = FALSE, message = FALSE, warning = FALSE}
allData <- list()
allData[[1]] <- read_csv("Midterm1_scores_FA18_P140.csv") %>% filter(first_name != "0")
allData[[1]]$student_id <- as.numeric(allData[[1]]$student_id)
allData[[2]] <- read_csv("Midterm2_scores_FA18_P140.csv") %>% filter(first_name != "0")
allData[[2]]$student_id <- as.numeric(allData[[2]]$student_id)
allData[[3]] <- read_csv("Midterm3_scores_FA18_P140.csv") %>% filter(first_name != "0")
allData[[3]]$student_id <- as.numeric(allData[[3]]$student_id)

allData[[4]] <- read_csv("Midterm1_scores_WN18_P140.csv") %>% filter(first_name != "0")
allData[[4]]$student_id <- as.numeric(allData[[4]]$student_id)
allData[[5]] <- read_csv("Midterm2_scores_WN18_P140.csv") %>% filter(first_name != "0")
allData[[5]]$student_id <- as.numeric(allData[[5]]$student_id)
allData[[6]] <- read_csv("Midterm3_scores_WN18_P140.csv") %>% filter(first_name != "0")
allData[[6]]$student_id <- as.numeric(allData[[6]]$student_id)

allData[[7]] <- read_csv("Demographics_FA18_P140.csv")[c(1,3)]
colnames(allData[[7]]) <- c("student_id", "Gender")
allData[[8]] <- read_csv("Demographics_WN18_P140.csv")[c(4,8)]
colnames(allData[[8]]) <- c("student_id", "gender")
```

```{r, echo = FALSE}
Gender <- c()
for(i in 1:nrow(allData[[8]])) {
  if(allData[[8]]$gender[i] == "Male") Gender[i] = "M"
  else if(allData[[8]]$gender[i] == "Female") Gender[i] = "F"
}
allData[[8]]$Gender <- Gender
```

```{r, echo = FALSE}
# add leading zeros to student ID numbers
addZeros <- function(mytable) {
  student_id0 <- c()
  for(i in 1:nrow(mytable)) {
    if (ceiling(log10(mytable$student_id[i]+1)) == 6) {
      student_id0[i] = paste0("00", mytable$student_id[i])
    } 
    else if (ceiling(log10(mytable$student_id[i]+1)) == 7) {
      student_id0[i] = paste0("0", mytable$student_id[i])
    }
    else {
      student_id0[i] = mytable$student_id[i]
    }
  }
  mytable$studentID <- student_id0
  return(mytable)
}
```

```{r, echo = FALSE}
Mid1_FA18_P140 <- left_join(addZeros(allData[[1]])[c(39,5)],
                            addZeros(allData[[7]])[c(3,2)], by = "studentID") %>%
  mutate(Midterm = 1) %>% 
  mutate(Term = "Fall 2018") %>% 
  mutate(Course = "P140")
Mid2_FA18_P140 <- left_join(addZeros(allData[[2]])[c(39,5)],
                            addZeros(allData[[7]])[c(3,2)], by = "studentID") %>%
  mutate(Midterm = 2) %>% 
  mutate(Term = "Fall 2018") %>% 
  mutate(Course = "P140")
Mid3_FA18_P140 <- left_join(addZeros(allData[[3]])[c(39,5)],
                            addZeros(allData[[7]])[c(3,2)], by = "studentID") %>%
  mutate(Midterm = 3) %>% 
  mutate(Term = "Fall 2018") %>% 
  mutate(Course = "P140")

Mid1_WN18_P140 <- left_join(addZeros(allData[[4]])[c(39,5)],
                            addZeros(allData[[8]])[c(4,3)], by = "studentID") %>%
  mutate(Midterm = 1) %>% 
  mutate(Term = "Winter 2018") %>% 
  mutate(Course = "P140")
Mid2_WN18_P140 <- left_join(addZeros(allData[[5]])[c(39,5)],
                            addZeros(allData[[8]])[c(4,3)], by = "studentID") %>%
  mutate(Midterm = 2) %>% 
  mutate(Term = "Winter 2018") %>% 
  mutate(Course = "P140")
Mid3_WN18_P140 <- left_join(addZeros(allData[[6]])[c(39,5)],
                            addZeros(allData[[8]])[c(4,3)], by = "studentID") %>%
  mutate(Midterm = 3) %>% 
  mutate(Term = "Winter 2018") %>% 
  mutate(Course = "P140")
```

```{r, echo = FALSE}
bootfunc <- function(d,i) {
  d1 <- d[i]
  return(mean(d1))
}
```

```{r, echo = FALSE}
demoError <-c()
# Female error - FA 18
demoError[1] <- sd(boot(filter(Mid1_FA18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)
demoError[2] <- sd(boot(filter(Mid2_FA18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)
demoError[3] <- sd(boot(filter(Mid3_FA18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)
# Female error - WN 18
demoError[4] <- sd(boot(filter(Mid1_WN18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)
demoError[5] <- sd(boot(filter(Mid2_WN18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)
demoError[6] <- sd(boot(filter(Mid3_WN18_P140, Gender == "F")$`reg score`, bootfunc, 10000)$t)

# Male error - FA 18
demoError[7] <- sd(boot(filter(Mid1_FA18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
demoError[8] <- sd(boot(filter(Mid2_FA18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
demoError[9] <- sd(boot(filter(Mid3_FA18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
# Male error - WN 18
demoError[10] <- sd(boot(filter(Mid1_WN18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
demoError[11] <- sd(boot(filter(Mid2_WN18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
demoError[12] <- sd(boot(filter(Mid3_WN18_P140, Gender == "M")$`reg score`, bootfunc, 10000)$t)
```

```{r, echo = FALSE}
PerformDiff <- data_frame(c(replicate(6, "F"), replicate(6,"M")),
                          c("1", "2", "3", "1", "2", "3",
                            "1", "2", "3", "1", "2", "3"),
                          c(replicate(3, "Fall 2018"), replicate(3, "Winter 2018"),
                            replicate(3, "Fall 2018"), replicate(3, "Winter 2018")),
                          c("P140"),
                          c(mean(filter(Mid1_FA18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid2_FA18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid3_FA18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid1_WN18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid2_WN18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid3_WN18_P140, Gender == "F")$`reg score`),
                            mean(filter(Mid1_FA18_P140, Gender == "M")$`reg score`),
                            mean(filter(Mid2_FA18_P140, Gender == "M")$`reg score`),
                            mean(filter(Mid3_FA18_P140, Gender == "M")$`reg score`),
                            mean(filter(Mid1_WN18_P140, Gender == "M")$`reg score`),
                            mean(filter(Mid2_WN18_P140, Gender == "M")$`reg score`),
                            mean(filter(Mid3_WN18_P140, Gender == "M")$`reg score`)),
                          demoError)
colnames(PerformDiff) <- c("Gender", "Exam", "Term","Course", "Mean Exam Score", "Error")
PerformDiff$Term <- ordered(PerformDiff$Term, levels = c("Winter 2018", "Fall 2018"))
PerformDiff
```

```{r, echo = FALSE}
ggplot(PerformDiff, aes(Exam, `Mean Exam Score`, color = Gender)) +
  geom_point(position = position_dodge(width = .2)) +
  geom_errorbar(aes(ymin = `Mean Exam Score` - Error,
                    ymax = `Mean Exam Score` + Error), width = .3,
                position = position_dodge(width = .2)) +
  facet_wrap(~Term) +
  ggtitle("Exam Performance by Gender and Term for Physics 140") +
  scale_color_brewer(palette = "Set2") + labs(color = "") +
  theme_pub
```

#### GPD = Average_Female_Score - Average_Male_Score
##### This GPD calculation only looks at raw scores, it does not make any BTE measurements.
```{r, echo = FALSE}
GPD_P140 <- c()
GPD_P140[1] <- mean(filter(Mid1_WN18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid1_WN18_P140, Gender == "M")$`reg score`)
GPD_P140[2] <- mean(filter(Mid2_WN18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid2_WN18_P140, Gender == "M")$`reg score`)
GPD_P140[3] <- mean(filter(Mid3_WN18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid3_WN18_P140, Gender == "M")$`reg score`)

GPD_P140[4] <- mean(filter(Mid1_FA18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid1_FA18_P140, Gender == "M")$`reg score`)
GPD_P140[5] <- mean(filter(Mid2_FA18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid2_FA18_P140, Gender == "M")$`reg score`)
GPD_P140[6] <- mean(filter(Mid3_FA18_P140, Gender == "F")$`reg score`) -
  mean(filter(Mid3_FA18_P140, Gender == "M")$`reg score`)
```

```{r, echo = FALSE}
GPD_error_P140 <- c()
GPD_error_P140[1] <- sqrt(demoError[4]^2 + demoError[10]^2)
GPD_error_P140[2] <- sqrt(demoError[5]^2 + demoError[11]^2)
GPD_error_P140[3] <- sqrt(demoError[6]^2 + demoError[12]^2)
GPD_error_P140[4] <- sqrt(demoError[1]^2 + demoError[7]^2)
GPD_error_P140[5] <- sqrt(demoError[2]^2 + demoError[8]^2)
GPD_error_P140[6] <- sqrt(demoError[3]^2 + demoError[9]^2)
```

```{r, echo = FALSE}
GPD_table <- data_frame(c("1", "2", "3", "1", "2", "3"),
                        c(replicate(3, "Winter 2018"), replicate(3, "Fall 2018")),
                          c("P140","P140","P140","P140","P140","P140"),
                          GPD_P140, GPD_error_P140)
colnames(GPD_table) <- c("Exam", "Term", "Course", "GPD", "Error")
GPD_table$Term <- ordered(GPD_table$Term, levels = c("Winter 2018", "Fall 2018"))
GPD_table
```

```{r, echo = FALSE}
ggplot(GPD_table, aes(Exam, GPD, color = Term)) +
  geom_point(position = position_dodge(width = .2)) +
  geom_errorbar(aes(ymin = GPD - Error,
                    ymax = GPD + Error), width = .3,
                position = position_dodge(width = .2)) +
  ggtitle("Gendered Performance Difference by Exam and Term for Physics 140") +
  scale_color_brewer(palette = "Set1") + labs(color = "") +
  theme_pub
```



