1. Pre-processing (for semesters 1 and 2)

# install.packages("devtools")
# devtools::install_github("jrosen48/jmRtools")
Sys.setenv(TZ='America/Detroit')

library(jmRtools)
library(readxl)
library(tidyverse)
library(lubridate)
RR_Course_Data <- read_csv("RR_Course_Data.csv")

CS1 <- read_csv("data/CS1.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters
CS1_ss <- dplyr::filter(CS1, 
                        !is.na(Q1MaincellgroupRow1),
                        opdata_username != "_49147_1",
                        opdata_username != "_93993_1",
                        opdata_username != "@X@user.pk_string@X@",
                        opdata_username != "_80624_1",
                        opdata_CourseID != "@X@course.course_id@X@",
                        opdata_username != "") # must revisit

ps12 <- dplyr::arrange(CS1_ss, opdata_username, opdata_CourseID, StartDate)

# ps12$Q1MaincellgroupRow4_rc <- car::recode(ps12$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
ps12$Q1MaincellgroupRow7_rc <- car::recode(ps12$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

ps12$int <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5) / 4
ps12$uv <- (ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 3 # dropped 7 (is this supposed to be dropped 4?)
ps12$percomp <- (ps12$Q1MaincellgroupRow3 + ps12$Q1MaincellgroupRow7_rc) / 2

x <- str_split(ps12$opdata_CourseID, "-")

ps12_f <- mutate(ps12,
                 subject = map_chr(x, ~ .[1]),
                 semester = map_chr(x, ~ .[2]),
                 section = map_chr(x, ~ .[3]))

ps12_f <- select(ps12_f,
                 student_ID = opdata_username,
                 course_ID = opdata_CourseID,
                 subject, semester, section,
                 int, uv, percomp)

ps12_f <- mutate(ps12_f, student_ID = str_sub(student_ID, start = 2L, end = -3L))
ps12_f <- arrange(ps12_f, student_ID)

CS2 <- read_csv("data/CS2.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters
CS2$Q1MaincellgroupRow7_rc <- car::recode(CS2$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")
CS2$post_int <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5) / 4
CS2$post_uv <- (CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 3 # dropped 7 (is this supposed to be dropped 4?)
CS2$post_percomp <- (CS2$Q1MaincellgroupRow3 + CS2$Q1MaincellgroupRow7_rc) / 2
CS2$date <- lubridate::ymd_hm(CS2$CompletedDate, tz = "America/Detroit")
CS2 <- arrange(CS2, date)
    
CS2 <- CS2 %>% 
    mutate(student_ID = str_sub(opdata_username, start = 2L, end = -3L)) %>% 
    select(student_ID, contains("post"), date)

CS2 <- CS2[complete.cases(CS2), ]

CS2 <- filter(CS2, 
              student_ID != "49147",
              student_ID != "93993",
              student_ID != "80624",
              student_ID != "@X@user.pk_string@X@",
              student_ID != "@X@course.course_id@X@",
              student_ID != "")

CS2 <- distinct(CS2, student_ID, .keep_all = T)
CS2 <- select(CS2, -date)
CS2 <- arrange(CS2, student_ID)

ps12_f <- left_join(ps12_f, CS2, by = "student_ID")

2. Pre-processing (for semester 3)

ps3 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_1_7_13_17.xls")

ps3$Q1MaincellgroupRow61_rc <- car::recode(ps3$Q1MaincellgroupRow61, "1=5; 2=4; 5=1; 4=2")

# ps3$int <- (ps3$Q1MaincellgroupRow01 + ps3$Q1MaincellgroupRow71 + ps3$Q1MaincellgroupRow91+ ps3$Q1MaincellgroupRow41) / 4
# ps3$uv <- (ps3$Q1MaincellgroupRow11 + ps3$Q1MaincellgroupRow51+ ps3$Q1MaincellgroupRow81) / 3 # dropped 7
# ps3$percomp <- (ps3$Q1MaincellgroupRow21 + ps3$Q1MaincellgroupRow61_rc) / 2

ps3 <- ps3 %>% 
    mutate(int = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow71, Q1MaincellgroupRow91, Q1MaincellgroupRow41),
           uv = composite_mean_maker(ps3, Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           percomp = composite_mean_maker(ps3, Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81)) %>% 
    filter(opdata_CourseID != "@X@course.course_id@X@") %>% 
    separate(opdata_CourseID, c("subject", "semester", "section"), sep = "-", remove = F) 

ps3_f <- select(ps3,
                student_ID = opdata_username,
                course_ID = opdata_CourseID,
                subject, semester, section,
                int, uv, percomp)

df2 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_2_7_13_17.xls")
df2$post_int <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41) / 4
df2$post_uv <- (df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 3 # dropped 7
df2$post_percomp <- (df2$Q2MaincellgroupRow21)

df2 <- mutate(df2, date = lubridate::mdy_hm(CompletedDate, tz = "America/Detroit"))

df2 <- arrange(df2, date)

df2 <- select(df2, student_ID = opdata_username, contains("post"), date)
df2 <- distinct(df2)
df2 <- select(df2, -date)

df2 <- left_join(ps3_f, df2, by = "student_ID")

3. Merging and processing merged data

ps12s <- dplyr::select(ps12_f, student_ID, course_ID, subject, semester, section, int, uv, percomp)
ps3s <- dplyr::select(ps3_f, course_ID, subject, semester, section, int, uv, percomp)

x <- bind_rows(ps12s, ps3s)
x <- as_tibble(x)

d <- bind_rows(ps12_f, ps3_f)

# treatment vs. control for sems 1 and 2
# https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

d <- mutate(d,
            intervention_dummy = case_when(
                # Fall 15
                course_ID == "AnPhA-S116-01" ~ 1,
                course_ID == "AnPhA-S116-02" ~ 0,
                course_ID == "BioA-S116-01" ~ 1,
                course_ID == "BioA-T116-01" ~ 0,
                course_ID == "FrScA-S116-01" ~ 1,
                course_ID == "FrScA-S116-02" ~ 0,
                course_ID == "FrScA-S116-03" ~ 1,
                course_ID == "FrScA-S116-04" ~ 0,
                course_ID == "FrScA-T116-01" ~ 0,
                course_ID == "OcnA-S116-01" ~ 1,
                course_ID == "OcnA-S116-01" ~ 0,
                course_ID == "OcnA-S116-03" ~ 1,
                course_ID == "OcnA-T116-01" ~ 0,
                course_ID == "PhysA-S116-01" ~ 1,
                course_ID == "PhysA-T116-01" ~ 0,
                
                # Spring 16
                course_ID == "AnPhA-S216-01" ~ 0,
                course_ID == "AnPhA-S216-02" ~ 1,
                course_ID == "BioA-S216-01" ~ 0,
                course_ID == "FrScA-S216-01" ~ 0,
                course_ID == "FrScA-S216-02" ~ 1,
                course_ID == "FrScA-S216-03" ~ 0,
                course_ID == "FrScA-S216-04" ~ 1,
                course_ID == "OcnA-S216-01" ~ 0,
                course_ID == "OcnA-S216-02" ~ 1,
                course_ID == "PhysA-S216-01" ~ 0,
                
                # Spring 17
                course_ID == "AnPhA-S217-01" ~ 1,
                course_ID == "AnPhA-S217-01" ~ 0,
                course_ID == "Bio-S217-01" ~ 1,
                course_ID == "FrScA-S217-01" ~ 1,
                course_ID == "FrScA-S217-02" ~ 0,
                course_ID == "FrScA-S217-02." ~ 0,
                course_ID == "FrScA-S217-03" ~ 1,
                course_ID == "OcnA-S217-01" ~ 0,
                course_ID == "OcnA-S217-02" ~ 1,
                course_ID == "OcnA-S217-03" ~ 1,
                course_ID == "PhysA-S217-01" ~ 0,
                TRUE ~ 0
            ))

d <- rename(d, pre_int = int, pre_uv = uv, pre_percomp = percomp)

4. Processing all gradebook data

x <- read_csv("RR_S3.csv")
x <- select(x, course_ID = Course_ID, student_ID = CU_Pk1, Item_Position:last_access_date)
x <- rename(x, Grade_Category = Grade_Catagory)

RR_Course_Data <- select(RR_Course_Data, course_ID = CourseSectionOrigID, student_ID = Bb_UserPK, Gradebook_Item:last_access_date)

xx <- bind_rows(RR_Course_Data, x)
# write_csv(RR_Course_Data, "s12_gradebook_data.csv")

5. Merging self-report and gradebook data (not run yet)

d$student_ID <- as.character(d$student_ID)
xx$student_ID <- as.character(xx$student_ID)
df <- left_join(d, xx, by = "student_ID")

6. Pre-post analysis

Using multi-level models by course.

60. Looking first at n’s

d %>% 
    count(intervention_dummy)
## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0   380
## 2                  1   429
d[complete.cases(d), ] %>% 
    count(intervention_dummy)
## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0     8
## 2                  1   110

6A. Just looking at pre-post changes in interest and UV

sjPlot::sjt.lmer(lme4::lmer(post_int ~ intervention_dummy + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   4.51 3.71 – 5.31 <.001
intervention_dummy   -0.85 -1.80 – 0.11 .096
Random Parts
σ2   0.705
τ00, course_ID   0.370
Ncourse_ID   12
ICCcourse_ID   0.344
Observations   121
R2 / Ω02   .262 / .252
sjPlot::sjt.lmer(lme4::lmer(post_uv ~ intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   3.86 3.03 – 4.70 <.001
intervention_dummy   -0.72 -1.70 – 0.27 .166
Random Parts
σ2   0.853
τ00, course_ID   0.354
Ncourse_ID   12
ICCcourse_ID   0.293
Observations   121
R2 / Ω02   .205 / .187

6B. With pre-values added

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   1.24 0.13 – 2.35 .031
pre_int   0.73 0.52 – 0.94 <.001
intervention_dummy   -0.43 -1.06 – 0.21 .194
Random Parts
σ2   0.535
τ00, course_ID   0.078
Ncourse_ID   12
ICCcourse_ID   0.127
Observations   120
R2 / Ω02   .421 / .420
sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   0.75 -0.20 – 1.71 .126
pre_uv   0.80 0.61 – 0.98 <.001
intervention_dummy   -0.22 -0.91 – 0.46 .525
Random Parts
σ2   0.556
τ00, course_ID   0.115
Ncourse_ID   12
ICCcourse_ID   0.172
Observations   118
R2 / Ω02   .457 / .456

6C. With all variables added

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   0.54 -0.72 – 1.81 .400
pre_uv   0.76 0.51 – 1.00 <.001
pre_int   0.04 -0.26 – 0.34 .802
pre_percomp   0.05 -0.23 – 0.32 .750
intervention_dummy   -0.20 -0.90 – 0.50 .580
Random Parts
σ2   0.565
τ00, course_ID   0.118
Ncourse_ID   12
ICCcourse_ID   0.173
Observations   118
R2 / Ω02   .459 / .458