Workspace doc

https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

Next steps

  • look at measurement of interest and value
  • add dummy for intervention

Overall takeaways

  • UV and interest seem to need to be in separate models - along with perceived competence
  • Task value (UV and interest and perceived competence seem to be needing to be in separate models)
  • Using just the control data, it seems important to run the models separate

Items

In general…

  1. I think this course is an interesting subject. (Int)
  2. What I am learning in this class is relevant to my life. (UV)
  3. I consider this topic to be one of my best subjects. (PC)
  4. I am not interested in this course. (Int - Rev)
  5. I think I will like learning about this topic. (Int)
  6. I think what we are studying in this course is useful for me to know. (UV)
  7. I don’t feel comfortable when it comes to answering questions in this area. (PC - Rev)
  8. I think this subject is interesting. (Int)
  9. I find the content of this course to be personally meaningful. (UV)
  10. I’ve always wanted to learn more about this subject. (Int)

Int: 1, 4, 5, 8, 10 UV: 2, 6, 9 PC: 3, 7

1. Pre-processing (for semesters 1 and 2)

# install.packages("devtools")
# devtools::install_github("jrosen48/jmRtools")
Sys.setenv(TZ='America/Detroit')

library(jmRtools)
library(readxl)
library(tidyverse)
library(lubridate)
RR_Course_Data <- read_csv("RR_Course_Data.csv")

CS1 <- read_csv("data/CS1.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters
CS1_ss <- dplyr::filter(CS1, 
                        !is.na(Q1MaincellgroupRow1),
                        opdata_username != "_49147_1",
                        opdata_username != "_93993_1",
                        opdata_username != "@X@user.pk_string@X@",
                        opdata_username != "_80624_1",
                        opdata_CourseID != "@X@course.course_id@X@",
                        opdata_username != "") # must revisit

ps12 <- dplyr::arrange(CS1_ss, opdata_username, opdata_CourseID, StartDate)

ps12$Q1MaincellgroupRow4_rc <- car::recode(ps12$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
ps12$Q1MaincellgroupRow7_rc <- car::recode(ps12$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps12 <- ps12 %>% 
    mutate(q1 = Q1MaincellgroupRow1,
           q2 = Q1MaincellgroupRow2,
           q3 = Q1MaincellgroupRow3,
           q4 = Q1MaincellgroupRow4_rc, 
           q5 = Q1MaincellgroupRow5,
           q6 = Q1MaincellgroupRow6,
           q7 = Q1MaincellgroupRow7_rc,
           q8 = Q1MaincellgroupRow8,
           q9 = Q1MaincellgroupRow9,
           q10 = Q1MaincellgroupRow10)

ps12$int <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow4_rc + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5) / 5
ps12$uv <- (ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 3 
ps12$percomp <- (ps12$Q1MaincellgroupRow3 + ps12$Q1MaincellgroupRow7_rc) / 2
ps12$tv <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5 + ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 7

x <- str_split(ps12$opdata_CourseID, "-")

ps12_f <- mutate(ps12,
                 subject = map_chr(x, ~ .[1]),
                 semester = map_chr(x, ~ .[2]),
                 section = map_chr(x, ~ .[3]))

ps12_f$date <- ymd_hm(ps12_f$CompletedDate, tz = "America/Detroit")

ps12_f <- select(ps12_f,
                 student_ID = opdata_username,
                 course_ID = opdata_CourseID,
                 subject, semester, section,
                 int, uv, percomp, tv, 
                 q1:q10,
                 date)

ps12_f <- mutate(ps12_f, student_ID = str_sub(student_ID, start = 2L, end = -3L))
ps12_f <- arrange(ps12_f, student_ID, date)

ps12_f <- ps12_f %>% distinct(student_ID, .keep_all=T)

CS2 <- read_csv("data/CS2.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters

CS2$Q1MaincellgroupRow4_rc <- car::recode(CS2$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
CS2$Q1MaincellgroupRow7_rc <- car::recode(CS2$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

CS2$post_int <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5) / 4
CS2$post_uv <- (CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 3 # dropped 7 (is this supposed to be dropped 4?)

CS2$post_tv <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5 + CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 7

CS2$post_percomp <- (CS2$Q1MaincellgroupRow3 + CS2$Q1MaincellgroupRow7_rc) / 2
CS2$date <- lubridate::ymd_hm(CS2$CompletedDate, tz = "America/Detroit")
CS2 <- arrange(CS2, date)

CS2 <- CS2 %>% 
    mutate(student_ID = str_sub(opdata_username, start = 2L, end = -3L)) %>% 
    select(student_ID, contains("post"), date)

CS2 <- CS2[complete.cases(CS2), ]

CS2 <- filter(CS2, 
              student_ID != "49147",
              student_ID != "93993",
              student_ID != "80624",
              student_ID != "@X@user.pk_string@X@",
              student_ID != "@X@course.course_id@X@",
              student_ID != "")
CS2 <- arrange(CS2, student_ID, date)
CS2 <- distinct(CS2, student_ID, .keep_all = T)

ps12_f <- left_join(ps12_f, CS2, by = "student_ID")

2. Pre-processing (for semester 3)

ps3 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_1_7_13_17.xls")

ps3$Q1MaincellgroupRow31_rc <- car::recode(ps3$Q1MaincellgroupRow31, "1=5; 2=4; 5=1; 4=2")
ps3$Q1MaincellgroupRow61_rc <- car::recode(ps3$Q1MaincellgroupRow61, "1=5; 2=4; 5=1; 4=2")

# ps3$int <- (ps3$Q1MaincellgroupRow01 + ps3$Q1MaincellgroupRow71 + ps3$Q1MaincellgroupRow91+ ps3$Q1MaincellgroupRow41) / 4
# ps3$uv <- (ps3$Q1MaincellgroupRow11 + ps3$Q1MaincellgroupRow51+ ps3$Q1MaincellgroupRow81) / 3 # dropped 7
# ps3$percomp <- (ps3$Q1MaincellgroupRow21 + ps3$Q1MaincellgroupRow61_rc) / 2

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps3 <- ps3 %>% 
    mutate(int = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91),
           uv = composite_mean_maker(ps3, Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           percomp = composite_mean_maker(ps3, Q1MaincellgroupRow21, Q1MaincellgroupRow61_rc),
           tv = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91,Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           q1 = Q1MaincellgroupRow01,
           q2 = Q1MaincellgroupRow11,
           q3 = Q1MaincellgroupRow21,
           q4 = Q1MaincellgroupRow31_rc, 
           q5 = Q1MaincellgroupRow41,
           q6 = Q1MaincellgroupRow51,
           q7 = Q1MaincellgroupRow61_rc,
           q8 = Q1MaincellgroupRow71,
           q9 = Q1MaincellgroupRow81,
           q10 = Q1MaincellgroupRow91
    ) %>% 
    filter(opdata_CourseID != "@X@course.course_id@X@") %>% 
    separate(opdata_CourseID, c("subject", "semester", "section"), sep = "-", remove = F) 

ps3$date <- ymd_hm(ps3$CompletedDate, tz = "America/Detroit")

ps3_f <- select(ps3,
                student_ID = opdata_username,
                course_ID = opdata_CourseID,
                subject, semester, section,
                int, uv, percomp, tv,
                q1:q10, 
                date)

ps3_f <- ps3_f %>% arrange(student_ID, date) %>% distinct(student_ID, .keep_all = T)

df2 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_2_7_13_17.xls")
df2$post_int <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41) / 4
df2$post_uv <- (df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 3 # dropped 7
df2$post_percomp <- (df2$Q2MaincellgroupRow21)
df2$post_tv <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41 + df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 7 

df2 <- mutate(df2, date = lubridate::mdy_hm(CompletedDate, tz = "America/Detroit"))

df2 <- arrange(df2, date)

df2 <- select(df2, student_ID = opdata_username, contains("post"), date)
df2 <- distinct(df2)
df2 <- select(df2, -date)

pd3_f <- left_join(ps3_f, df2, by = "student_ID")
ps3_f <- mutate(ps3_f,
                student_ID = str_sub(student_ID, start = 2, end = -3))

3. Merging and processing merged data

ps12s <- dplyr::select(ps12_f, student_ID, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)
ps3s <- dplyr::select(ps3_f, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)

x <- bind_rows(ps12s, ps3s)
x <- as_tibble(x)

d <- bind_rows(ps12_f, ps3_f)

# treatment vs. control for sems 1 and 2
# https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

d <- mutate(d,
            intervention_dummy = case_when(
                # Fall 15
                course_ID == "AnPhA-S116-01" ~ 1,
                course_ID == "AnPhA-S116-02" ~ 0,
                course_ID == "BioA-S116-01" ~ 1,
                course_ID == "BioA-T116-01" ~ 0,
                course_ID == "FrScA-S116-01" ~ 1,
                course_ID == "FrScA-S116-02" ~ 0,
                course_ID == "FrScA-S116-03" ~ 1,
                course_ID == "FrScA-S116-04" ~ 0,
                course_ID == "FrScA-T116-01" ~ 0,
                course_ID == "OcnA-S116-01" ~ 1,
                course_ID == "OcnA-S116-01" ~ 0,
                course_ID == "OcnA-S116-03" ~ 1,
                course_ID == "OcnA-T116-01" ~ 0,
                course_ID == "PhysA-S116-01" ~ 1,
                course_ID == "PhysA-T116-01" ~ 0,
                
                # Spring 16
                course_ID == "AnPhA-S216-01" ~ 0,
                course_ID == "AnPhA-S216-02" ~ 1,
                course_ID == "BioA-S216-01" ~ 0,
                course_ID == "FrScA-S216-01" ~ 0,
                course_ID == "FrScA-S216-02" ~ 1,
                course_ID == "FrScA-S216-03" ~ 0,
                course_ID == "FrScA-S216-04" ~ 1,
                course_ID == "OcnA-S216-01" ~ 0,
                course_ID == "OcnA-S216-02" ~ 1,
                course_ID == "PhysA-S216-01" ~ 0,
                
                # Spring 17
                course_ID == "AnPhA-S217-01" ~ 1,
                course_ID == "AnPhA-S217-01" ~ 0,
                course_ID == "Bio-S217-01" ~ 1,
                course_ID == "FrScA-S217-01" ~ 1,
                course_ID == "FrScA-S217-02" ~ 0,
                course_ID == "FrScA-S217-02." ~ 0,
                course_ID == "FrScA-S217-03" ~ 1,
                course_ID == "OcnA-S217-01" ~ 0,
                course_ID == "OcnA-S217-02" ~ 1,
                course_ID == "OcnA-S217-03" ~ 1,
                course_ID == "PhysA-S217-01" ~ 0,
                TRUE ~ 0
            ))

d <- rename(d, pre_int = int, pre_uv = uv, pre_percomp = percomp, pre_tv = tv)
d <- select(d, -date.y, -date.x)

3.5 CFA

BIC is lower for m1, with three factors - tested with chi-square test

library(lavaan)

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

m1 <- '
    uv =~ q1 + q4 + q5 + q8 + q10
    int =~ q2 + q6 + q9
    pc =~ q3 + q7
'

out1 <- sem(m1, data = d)
summary(out1, fit.measures = T, standardized = T)
## lavaan 0.6-2 ended normally after 35 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         23
## 
##                                                   Used       Total
##   Number of observations                           640         662
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     156.377
##   Degrees of freedom                                32
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             2949.434
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.957
##   Tucker-Lewis Index (TLI)                       0.940
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -6210.618
##   Loglikelihood unrestricted model (H1)      -6132.430
## 
##   Number of free parameters                         23
##   Akaike (AIC)                               12467.237
##   Bayesian (BIC)                             12569.850
##   Sample-size adjusted Bayesian (BIC)        12496.827
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.078
##   90 Percent Confidence Interval          0.066  0.090
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.041
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   uv =~                                                                 
##     q1                1.000                               0.529    0.802
##     q4                0.979    0.057   17.102    0.000    0.518    0.652
##     q5                0.994    0.046   21.624    0.000    0.526    0.789
##     q8                1.050    0.044   23.708    0.000    0.555    0.849
##     q10               1.285    0.061   21.204    0.000    0.680    0.776
##   int =~                                                                
##     q2                1.000                               0.719    0.773
##     q6                0.856    0.049   17.492    0.000    0.615    0.767
##     q9                0.949    0.056   17.029    0.000    0.682    0.739
##   pc =~                                                                 
##     q3                1.000                               0.549    0.644
##     q7                0.709    0.073    9.736    0.000    0.389    0.457
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   uv ~~                                                                 
##     int               0.233    0.022   10.433    0.000    0.612    0.612
##     pc                0.247    0.022   11.154    0.000    0.852    0.852
##   int ~~                                                                
##     pc                0.316    0.031   10.250    0.000    0.799    0.799
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .q1                0.155    0.011   14.232    0.000    0.155    0.357
##    .q4                0.363    0.022   16.418    0.000    0.363    0.575
##    .q5                0.168    0.012   14.563    0.000    0.168    0.378
##    .q8                0.119    0.009   12.633    0.000    0.119    0.279
##    .q10               0.304    0.021   14.826    0.000    0.304    0.397
##    .q2                0.347    0.029   12.033    0.000    0.347    0.402
##    .q6                0.266    0.022   12.280    0.000    0.266    0.412
##    .q9                0.388    0.029   13.192    0.000    0.388    0.454
##    .q3                0.425    0.039   10.817    0.000    0.425    0.585
##    .q7                0.574    0.036   16.063    0.000    0.574    0.791
##     uv                0.280    0.024   11.830    0.000    1.000    1.000
##     int               0.517    0.049   10.524    0.000    1.000    1.000
##     pc                0.302    0.045    6.636    0.000    1.000    1.000
m2 <- '
    tv =~ q1 + q4 + q5 + q8 + q10 + q2 + q6 + q9
    pc =~ q3 + q7
'

out2 <- sem(m2, data = d)
summary(out2, fit.measures = T, standardized = T)
## lavaan 0.6-2 ended normally after 32 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         21
## 
##                                                   Used       Total
##   Number of observations                           640         662
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     496.942
##   Degrees of freedom                                34
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             2949.434
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.841
##   Tucker-Lewis Index (TLI)                       0.789
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -6380.901
##   Loglikelihood unrestricted model (H1)      -6132.430
## 
##   Number of free parameters                         21
##   Akaike (AIC)                               12803.801
##   Bayesian (BIC)                             12897.492
##   Sample-size adjusted Bayesian (BIC)        12830.818
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.146
##   90 Percent Confidence Interval          0.135  0.157
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.080
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   tv =~                                                                 
##     q1                1.000                               0.520    0.788
##     q4                0.991    0.059   16.832    0.000    0.515    0.648
##     q5                0.991    0.048   20.751    0.000    0.515    0.773
##     q8                1.030    0.046   22.267    0.000    0.535    0.819
##     q10               1.319    0.063   21.084    0.000    0.685    0.783
##     q2                0.875    0.071   12.279    0.000    0.455    0.489
##     q6                0.877    0.061   14.465    0.000    0.456    0.568
##     q9                1.049    0.069   15.103    0.000    0.545    0.590
##   pc =~                                                                 
##     q3                1.000                               0.544    0.638
##     q7                0.723    0.075    9.586    0.000    0.393    0.462
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   tv ~~                                                                 
##     pc                0.256    0.022   11.427    0.000    0.908    0.908
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .q1                0.164    0.011   14.736    0.000    0.164    0.379
##    .q4                0.366    0.022   16.508    0.000    0.366    0.580
##    .q5                0.179    0.012   15.044    0.000    0.179    0.403
##    .q8                0.141    0.010   13.979    0.000    0.141    0.330
##    .q10               0.297    0.020   14.848    0.000    0.297    0.387
##    .q2                0.657    0.038   17.292    0.000    0.657    0.761
##    .q6                0.437    0.026   16.986    0.000    0.437    0.678
##    .q9                0.557    0.033   16.875    0.000    0.557    0.652
##    .q3                0.431    0.040   10.908    0.000    0.431    0.593
##    .q7                0.570    0.036   15.909    0.000    0.570    0.787
##     tv                0.270    0.023   11.561    0.000    1.000    1.000
##     pc                0.295    0.045    6.523    0.000    1.000    1.000
anova(out1, out2)
## Chi Square Difference Test
## 
##      Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## out1 32 12467 12570 156.38                                  
## out2 34 12804 12898 496.94     340.56       2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Processing all gradebook data

x <- read_csv("RR_S3.csv")
x <- select(x, course_ID = Course_ID, student_ID = CU_Pk1, Item_Position:last_access_date)
x <- rename(x, Grade_Category = Grade_Catagory)

RR_Course_Data <- select(RR_Course_Data, course_ID = CourseSectionOrigID, student_ID = Bb_UserPK, Gradebook_Item:last_access_date)

xx <- bind_rows(RR_Course_Data, x)
# write_csv(RR_Course_Data, "s12_gradebook_data.csv")

5. Merging self-report and gradebook data (not run yet)

d$student_ID <- as.character(d$student_ID)
xx$student_ID <- as.character(xx$student_ID)
df <- left_join(d, xx, by = "student_ID")

Processing trace data

library(readxl)
ts_12 <- read_csv("RR_Minutes.csv")

td_12 <- read_csv("RR_Course_Data.csv")
td_3 <- read_excel("Ranelluci Study Data Pull Request.xlsx")

td_12 <- td_12 %>% 
    select(student_ID = Bb_UserPK, course_ID = CourseSectionOrigID,
           gender = Gender, enrollment_reason = EnrollmentReason, 
           enrollment_status = EnrollmentStatus,
           final_grade = FinalGradeCEMS) %>% 
    distinct()

ts_12 <- ts_12 %>% 
    select(student_ID = Bb_UserPK,
           course_ID = CourseSectionOrigID,
           time_spent = TimeSpent)

td_12 <- left_join(td_12, ts_12)

td_3 <- td_3 %>% 
    select(student_ID = CEMS_Bb_UserPK, 
           course_ID = Section_ID,
           gender = Gender,
           enrollment_reason = EnrollmentReason,
           enrollment_status = EnrollmentStatus,
           final_grade = Final_Grade,
           time_spent = `Sum of time spent in course`) %>% 
    mutate(final_grade = as.numeric(final_grade))

trace_data <- bind_rows(td_12, td_3)

Merging trace data with other data

d$student_ID <- as.integer(d$student_ID)
d <- left_join(d, trace_data)
dd <- select(d, student_ID:pre_tv, post_int:post_percomp, enrollment_reason, enrollment_status, final_grade, time_spent)
write_csv(dd, "online-science-data.csv")

6. Pre-post analysis

Using multi-level models by course.

60. Looking first at n’s

d %>% 
    count(intervention_dummy)
## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0   304
## 2                  1   358
d[complete.cases(d), ] %>% 
    count(intervention_dummy)
## # A tibble: 0 x 2
## # ... with 2 variables: intervention_dummy <dbl>, n <int>

6A. Just looking at pre-post changes in interest and UV and grades

sjPlot::sjt.lmer(lme4::lmer(post_int ~ intervention_dummy + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   4.29 3.35 – 5.23 <.001
intervention_dummy   -0.62 -1.67 – 0.44 .270
Random Parts
σ2   0.762
τ00, course_ID   0.286
Ncourse_ID   11
ICCcourse_ID   0.273
Observations   95
R2 / Ω02   .018 / .286
sjPlot::sjt.lmer(lme4::lmer(post_uv ~ intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   3.75 2.81 – 4.69 <.001
intervention_dummy   -0.47 -1.51 – 0.56 .381
Random Parts
σ2   0.879
τ00, course_ID   0.204
Ncourse_ID   11
ICCcourse_ID   0.189
Observations   95
R2 / Ω02   .010 / .197
sjPlot::sjt.lmer(lme4::lmer(final_grade ~ intervention_dummy + (1 | course_ID), data = d))
    final_grade
    B CI p
Fixed Parts
(Intercept)   76.07 72.69 – 79.45 <.001
intervention_dummy   1.83 -2.81 – 6.47 .454
Random Parts
σ2   442.712
τ00, course_ID   15.906
Ncourse_ID   36
ICCcourse_ID   0.035
Observations   634
R2 / Ω02   .002 / .036

6B. With pre-values added (pre per-comp for final grades)

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   0.68 -0.61 – 1.96 .317
pre_int   0.80 0.56 – 1.03 <.001
intervention_dummy   -0.17 -0.89 – 0.55 .643
Random Parts
σ2   0.582
τ00, course_ID   0.021
Ncourse_ID   11
ICCcourse_ID   0.035
Observations   94
R2 / Ω02   .328 / .352
sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   0.77 -0.37 – 1.91 .195
pre_uv   0.76 0.54 – 0.98 <.001
intervention_dummy   -0.14 -0.97 – 0.70 .756
Random Parts
σ2   0.596
τ00, course_ID   0.120
Ncourse_ID   11
ICCcourse_ID   0.168
Observations   94
R2 / Ω02   .321 / .435
sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_percomp + intervention_dummy + (1 | course_ID), data = d))
    final_grade
    B CI p
Fixed Parts
(Intercept)   65.08 55.64 – 74.53 <.001
pre_percomp   3.06 0.63 – 5.48 .014
intervention_dummy   1.51 -3.11 – 6.13 .534
Random Parts
σ2   438.364
τ00, course_ID   15.461
Ncourse_ID   36
ICCcourse_ID   0.034
Observations   627
R2 / Ω02   .011 / .045

6C. With pre-values added + interactions with pre perceived competence

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy*pre_percomp + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   0.12 -4.17 – 4.41 .955
pre_int   0.71 0.41 – 1.01 <.001
intervention_dummy   0.21 -4.23 – 4.65 .928
pre_percomp   0.23 -0.85 – 1.31 .679
intervention_dummy:pre_percomp   -0.08 -1.16 – 1.00 .882
Random Parts
σ2   0.588
τ00, course_ID   0.025
Ncourse_ID   11
ICCcourse_ID   0.041
Observations   94
R2 / Ω02   .329 / .356
sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   0.63 -4.05 – 5.31 .793
pre_uv   0.70 0.45 – 0.95 <.001
intervention_dummy   -0.35 -5.11 – 4.40 .886
pre_percomp   0.09 -1.03 – 1.22 .874
intervention_dummy:pre_percomp   0.07 -1.08 – 1.22 .907
Random Parts
σ2   0.601
τ00, course_ID   0.123
Ncourse_ID   11
ICCcourse_ID   0.170
Observations   94
R2 / Ω02   .321 / .437
sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))
    final_grade
    B CI p
Fixed Parts
(Intercept)   61.31 47.11 – 75.51 <.001
pre_uv   0.05 -2.66 – 2.75 .973
intervention_dummy   9.09 -9.24 – 27.41 .333
pre_percomp   4.05 0.28 – 7.82 .036
intervention_dummy:pre_percomp   -2.02 -6.89 – 2.85 .417
Random Parts
σ2   436.011
τ00, course_ID   16.184
Ncourse_ID   36
ICCcourse_ID   0.036
Observations   624
R2 / Ω02   .012 / .047

6D. With all variables added

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))
    post_uv
    B CI p
Fixed Parts
(Intercept)   0.28 -1.21 – 1.77 .718
pre_uv   0.69 0.43 – 0.96 <.001
pre_int   0.08 -0.26 – 0.41 .669
pre_percomp   0.10 -0.24 – 0.44 .564
intervention_dummy   -0.07 -0.92 – 0.77 .872
Random Parts
σ2   0.607
τ00, course_ID   0.110
Ncourse_ID   11
ICCcourse_ID   0.153
Observations   93
R2 / Ω02   .328 / .431
sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))
    post_int
    B CI p
Fixed Parts
(Intercept)   0.29 -1.10 – 1.69 .686
pre_uv   0.19 -0.06 – 0.44 .151
pre_int   0.63 0.32 – 0.95 <.001
pre_percomp   0.09 -0.24 – 0.42 .597
intervention_dummy   -0.13 -0.87 – 0.61 .733
Random Parts
σ2   0.578
τ00, course_ID   0.028
Ncourse_ID   11
ICCcourse_ID   0.046
Observations   93
R2 / Ω02   .341 / .372
sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))
    final_grade
    B CI p
Fixed Parts
(Intercept)   53.26 40.00 – 66.51 <.001
pre_uv   -1.55 -4.46 – 1.36 .304
pre_int   5.75 1.98 – 9.53 .003
pre_percomp   1.08 -2.01 – 4.16 .494
intervention_dummy   1.74 -2.92 – 6.40 .478
Random Parts
σ2   434.151
τ00, course_ID   15.909
Ncourse_ID   36
ICCcourse_ID   0.035
Observations   616
R2 / Ω02   .026 / .061