Workspace doc

https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

Next steps

look at measurement of interest and value
add dummy for intervention

Overall takeaways

UV and interest seem to need to be in separate models - along with perceived competence
Task value (UV and interest and perceived competence seem to be needing to be in separate models)
Using just the control data, it seems important to run the models separate

Items

In general…

I think this course is an interesting subject. (Int)
What I am learning in this class is relevant to my life. (UV)
I consider this topic to be one of my best subjects. (PC)
I am not interested in this course. (Int - Rev)
I think I will like learning about this topic. (Int)
I think what we are studying in this course is useful for me to know. (UV)
I don’t feel comfortable when it comes to answering questions in this area. (PC - Rev)
I think this subject is interesting. (Int)
I find the content of this course to be personally meaningful. (UV)
I’ve always wanted to learn more about this subject. (Int)

Int: 1, 4, 5, 8, 10 UV: 2, 6, 9 PC: 3, 7

1. Pre-processing (for semesters 1 and 2)

# install.packages("devtools")
# devtools::install_github("jrosen48/jmRtools")
Sys.setenv(TZ='America/Detroit')

library(jmRtools)
library(readxl)
library(tidyverse)
library(lubridate)

RR_Course_Data <- read_csv("RR_Course_Data.csv")

CS1 <- read_csv("data/CS1.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters

CS1_ss <- dplyr::filter(CS1, 
                        !is.na(Q1MaincellgroupRow1),
                        opdata_username != "_49147_1",
                        opdata_username != "_93993_1",
                        opdata_username != "@X@user.pk_string@X@",
                        opdata_username != "_80624_1",
                        opdata_CourseID != "@X@course.course_id@X@",
                        opdata_username != "") # must revisit

ps12 <- dplyr::arrange(CS1_ss, opdata_username, opdata_CourseID, StartDate)

ps12$Q1MaincellgroupRow4_rc <- car::recode(ps12$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
ps12$Q1MaincellgroupRow7_rc <- car::recode(ps12$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps12 <- ps12 %>% 
    mutate(q1 = Q1MaincellgroupRow1,
           q2 = Q1MaincellgroupRow2,
           q3 = Q1MaincellgroupRow3,
           q4 = Q1MaincellgroupRow4_rc, 
           q5 = Q1MaincellgroupRow5,
           q6 = Q1MaincellgroupRow6,
           q7 = Q1MaincellgroupRow7_rc,
           q8 = Q1MaincellgroupRow8,
           q9 = Q1MaincellgroupRow9,
           q10 = Q1MaincellgroupRow10)

ps12$int <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow4_rc + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5) / 5
ps12$uv <- (ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 3 
ps12$percomp <- (ps12$Q1MaincellgroupRow3 + ps12$Q1MaincellgroupRow7_rc) / 2
ps12$tv <- (ps12$Q1MaincellgroupRow1 + ps12$Q1MaincellgroupRow8 + ps12$Q1MaincellgroupRow10+ ps12$Q1MaincellgroupRow5 + ps12$Q1MaincellgroupRow2 + ps12$Q1MaincellgroupRow6+ ps12$Q1MaincellgroupRow9) / 7

x <- str_split(ps12$opdata_CourseID, "-")

ps12_f <- mutate(ps12,
                 subject = map_chr(x, ~ .[1]),
                 semester = map_chr(x, ~ .[2]),
                 section = map_chr(x, ~ .[3]))

ps12_f$date <- ymd_hm(ps12_f$CompletedDate, tz = "America/Detroit")

ps12_f <- select(ps12_f,
                 student_ID = opdata_username,
                 course_ID = opdata_CourseID,
                 subject, semester, section,
                 int, uv, percomp, tv, 
                 q1:q10,
                 date)

ps12_f <- mutate(ps12_f, student_ID = str_sub(student_ID, start = 2L, end = -3L))
ps12_f <- arrange(ps12_f, student_ID, date)

ps12_f <- ps12_f %>% distinct(student_ID, .keep_all=T)

CS2 <- read_csv("data/CS2.csv") # this is the pre-survey for the Fall, 2015 and Spring, 2016 semesters

CS2$Q1MaincellgroupRow4_rc <- car::recode(CS2$Q1MaincellgroupRow4, "1=5; 2=4; 5=1; 4=2")
CS2$Q1MaincellgroupRow7_rc <- car::recode(CS2$Q1MaincellgroupRow7, "1=5; 2=4; 5=1; 4=2")

CS2$post_int <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5) / 4
CS2$post_uv <- (CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 3 # dropped 7 (is this supposed to be dropped 4?)

CS2$post_tv <- (CS2$Q1MaincellgroupRow1 + CS2$Q1MaincellgroupRow8 + CS2$Q1MaincellgroupRow10+ CS2$Q1MaincellgroupRow5 + CS2$Q1MaincellgroupRow2 + CS2$Q1MaincellgroupRow6+ CS2$Q1MaincellgroupRow9) / 7

CS2$post_percomp <- (CS2$Q1MaincellgroupRow3 + CS2$Q1MaincellgroupRow7_rc) / 2
CS2$date <- lubridate::ymd_hm(CS2$CompletedDate, tz = "America/Detroit")
CS2 <- arrange(CS2, date)

CS2 <- CS2 %>% 
    mutate(student_ID = str_sub(opdata_username, start = 2L, end = -3L)) %>% 
    select(student_ID, contains("post"), date)

CS2 <- CS2[complete.cases(CS2), ]

CS2 <- filter(CS2, 
              student_ID != "49147",
              student_ID != "93993",
              student_ID != "80624",
              student_ID != "@X@user.pk_string@X@",
              student_ID != "@X@course.course_id@X@",
              student_ID != "")
CS2 <- arrange(CS2, student_ID, date)
CS2 <- distinct(CS2, student_ID, .keep_all = T)

ps12_f <- left_join(ps12_f, CS2, by = "student_ID")

2. Pre-processing (for semester 3)

ps3 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_1_7_13_17.xls")

ps3$Q1MaincellgroupRow31_rc <- car::recode(ps3$Q1MaincellgroupRow31, "1=5; 2=4; 5=1; 4=2")
ps3$Q1MaincellgroupRow61_rc <- car::recode(ps3$Q1MaincellgroupRow61, "1=5; 2=4; 5=1; 4=2")

# ps3$int <- (ps3$Q1MaincellgroupRow01 + ps3$Q1MaincellgroupRow71 + ps3$Q1MaincellgroupRow91+ ps3$Q1MaincellgroupRow41) / 4
# ps3$uv <- (ps3$Q1MaincellgroupRow11 + ps3$Q1MaincellgroupRow51+ ps3$Q1MaincellgroupRow81) / 3 # dropped 7
# ps3$percomp <- (ps3$Q1MaincellgroupRow21 + ps3$Q1MaincellgroupRow61_rc) / 2

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

ps3 <- ps3 %>% 
    mutate(int = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91),
           uv = composite_mean_maker(ps3, Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           percomp = composite_mean_maker(ps3, Q1MaincellgroupRow21, Q1MaincellgroupRow61_rc),
           tv = composite_mean_maker(ps3, Q1MaincellgroupRow01, Q1MaincellgroupRow31_rc, Q1MaincellgroupRow41, Q1MaincellgroupRow71, Q1MaincellgroupRow91,Q1MaincellgroupRow11, Q1MaincellgroupRow51, Q1MaincellgroupRow81),
           q1 = Q1MaincellgroupRow01,
           q2 = Q1MaincellgroupRow11,
           q3 = Q1MaincellgroupRow21,
           q4 = Q1MaincellgroupRow31_rc, 
           q5 = Q1MaincellgroupRow41,
           q6 = Q1MaincellgroupRow51,
           q7 = Q1MaincellgroupRow61_rc,
           q8 = Q1MaincellgroupRow71,
           q9 = Q1MaincellgroupRow81,
           q10 = Q1MaincellgroupRow91
    ) %>% 
    filter(opdata_CourseID != "@X@course.course_id@X@") %>% 
    separate(opdata_CourseID, c("subject", "semester", "section"), sep = "-", remove = F) 

ps3$date <- ymd_hm(ps3$CompletedDate, tz = "America/Detroit")

ps3_f <- select(ps3,
                student_ID = opdata_username,
                course_ID = opdata_CourseID,
                subject, semester, section,
                int, uv, percomp, tv,
                q1:q10, 
                date)

ps3_f <- ps3_f %>% arrange(student_ID, date) %>% distinct(student_ID, .keep_all = T)

df2 <- read_excel("~/Dropbox/1_Research/utility_value_intervention_online_science/CS_2_7_13_17.xls")
df2$post_int <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41) / 4
df2$post_uv <- (df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 3 # dropped 7
df2$post_percomp <- (df2$Q2MaincellgroupRow21)
df2$post_tv <- (df2$Q2MaincellgroupRow01 + df2$Q2MaincellgroupRow71 + df2$Q2MaincellgroupRow91 + df2$Q2MaincellgroupRow41 + df2$Q2MaincellgroupRow11 + df2$Q2MaincellgroupRow51+ df2$Q2MaincellgroupRow81) / 7 

df2 <- mutate(df2, date = lubridate::mdy_hm(CompletedDate, tz = "America/Detroit"))

df2 <- arrange(df2, date)

df2 <- select(df2, student_ID = opdata_username, contains("post"), date)
df2 <- distinct(df2)
df2 <- select(df2, -date)

pd3_f <- left_join(ps3_f, df2, by = "student_ID")
ps3_f <- mutate(ps3_f,
                student_ID = str_sub(student_ID, start = 2, end = -3))

3. Merging and processing merged data

ps12s <- dplyr::select(ps12_f, student_ID, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)
ps3s <- dplyr::select(ps3_f, course_ID, subject, semester, section, int, uv, percomp, tv, q1:q10)

x <- bind_rows(ps12s, ps3s)
x <- as_tibble(x)

d <- bind_rows(ps12_f, ps3_f)

# treatment vs. control for sems 1 and 2
# https://docs.google.com/document/d/1g52pl-0JyEO26bFEJ9aE295dL7oZSOU1wrVXvVbd2lg/edit

d <- mutate(d,
            intervention_dummy = case_when(
                # Fall 15
                course_ID == "AnPhA-S116-01" ~ 1,
                course_ID == "AnPhA-S116-02" ~ 0,
                course_ID == "BioA-S116-01" ~ 1,
                course_ID == "BioA-T116-01" ~ 0,
                course_ID == "FrScA-S116-01" ~ 1,
                course_ID == "FrScA-S116-02" ~ 0,
                course_ID == "FrScA-S116-03" ~ 1,
                course_ID == "FrScA-S116-04" ~ 0,
                course_ID == "FrScA-T116-01" ~ 0,
                course_ID == "OcnA-S116-01" ~ 1,
                course_ID == "OcnA-S116-01" ~ 0,
                course_ID == "OcnA-S116-03" ~ 1,
                course_ID == "OcnA-T116-01" ~ 0,
                course_ID == "PhysA-S116-01" ~ 1,
                course_ID == "PhysA-T116-01" ~ 0,
                
                # Spring 16
                course_ID == "AnPhA-S216-01" ~ 0,
                course_ID == "AnPhA-S216-02" ~ 1,
                course_ID == "BioA-S216-01" ~ 0,
                course_ID == "FrScA-S216-01" ~ 0,
                course_ID == "FrScA-S216-02" ~ 1,
                course_ID == "FrScA-S216-03" ~ 0,
                course_ID == "FrScA-S216-04" ~ 1,
                course_ID == "OcnA-S216-01" ~ 0,
                course_ID == "OcnA-S216-02" ~ 1,
                course_ID == "PhysA-S216-01" ~ 0,
                
                # Spring 17
                course_ID == "AnPhA-S217-01" ~ 1,
                course_ID == "AnPhA-S217-01" ~ 0,
                course_ID == "Bio-S217-01" ~ 1,
                course_ID == "FrScA-S217-01" ~ 1,
                course_ID == "FrScA-S217-02" ~ 0,
                course_ID == "FrScA-S217-02." ~ 0,
                course_ID == "FrScA-S217-03" ~ 1,
                course_ID == "OcnA-S217-01" ~ 0,
                course_ID == "OcnA-S217-02" ~ 1,
                course_ID == "OcnA-S217-03" ~ 1,
                course_ID == "PhysA-S217-01" ~ 0,
                TRUE ~ 0
            ))

d <- rename(d, pre_int = int, pre_uv = uv, pre_percomp = percomp, pre_tv = tv)
d <- select(d, -date.y, -date.x)

3.5 CFA

BIC is lower for m1, with three factors - tested with chi-square test

library(lavaan)

# Int: 1, 4, 5, 8, 10
# UV: 2, 6, 9
# PC: 3, 7

m1 <- '
    uv =~ q1 + q4 + q5 + q8 + q10
    int =~ q2 + q6 + q9
    pc =~ q3 + q7
'

out1 <- sem(m1, data = d)
summary(out1, fit.measures = T, standardized = T)

## lavaan 0.6-2 ended normally after 35 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         23
## 
##                                                   Used       Total
##   Number of observations                           640         662
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     156.377
##   Degrees of freedom                                32
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             2949.434
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.957
##   Tucker-Lewis Index (TLI)                       0.940
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -6210.618
##   Loglikelihood unrestricted model (H1)      -6132.430
## 
##   Number of free parameters                         23
##   Akaike (AIC)                               12467.237
##   Bayesian (BIC)                             12569.850
##   Sample-size adjusted Bayesian (BIC)        12496.827
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.078
##   90 Percent Confidence Interval          0.066  0.090
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.041
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   uv =~                                                                 
##     q1                1.000                               0.529    0.802
##     q4                0.979    0.057   17.102    0.000    0.518    0.652
##     q5                0.994    0.046   21.624    0.000    0.526    0.789
##     q8                1.050    0.044   23.708    0.000    0.555    0.849
##     q10               1.285    0.061   21.204    0.000    0.680    0.776
##   int =~                                                                
##     q2                1.000                               0.719    0.773
##     q6                0.856    0.049   17.492    0.000    0.615    0.767
##     q9                0.949    0.056   17.029    0.000    0.682    0.739
##   pc =~                                                                 
##     q3                1.000                               0.549    0.644
##     q7                0.709    0.073    9.736    0.000    0.389    0.457
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   uv ~~                                                                 
##     int               0.233    0.022   10.433    0.000    0.612    0.612
##     pc                0.247    0.022   11.154    0.000    0.852    0.852
##   int ~~                                                                
##     pc                0.316    0.031   10.250    0.000    0.799    0.799
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .q1                0.155    0.011   14.232    0.000    0.155    0.357
##    .q4                0.363    0.022   16.418    0.000    0.363    0.575
##    .q5                0.168    0.012   14.563    0.000    0.168    0.378
##    .q8                0.119    0.009   12.633    0.000    0.119    0.279
##    .q10               0.304    0.021   14.826    0.000    0.304    0.397
##    .q2                0.347    0.029   12.033    0.000    0.347    0.402
##    .q6                0.266    0.022   12.280    0.000    0.266    0.412
##    .q9                0.388    0.029   13.192    0.000    0.388    0.454
##    .q3                0.425    0.039   10.817    0.000    0.425    0.585
##    .q7                0.574    0.036   16.063    0.000    0.574    0.791
##     uv                0.280    0.024   11.830    0.000    1.000    1.000
##     int               0.517    0.049   10.524    0.000    1.000    1.000
##     pc                0.302    0.045    6.636    0.000    1.000    1.000

m2 <- '
    tv =~ q1 + q4 + q5 + q8 + q10 + q2 + q6 + q9
    pc =~ q3 + q7
'

out2 <- sem(m2, data = d)
summary(out2, fit.measures = T, standardized = T)

## lavaan 0.6-2 ended normally after 32 iterations
## 
##   Optimization method                           NLMINB
##   Number of free parameters                         21
## 
##                                                   Used       Total
##   Number of observations                           640         662
## 
##   Estimator                                         ML
##   Model Fit Test Statistic                     496.942
##   Degrees of freedom                                34
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             2949.434
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.841
##   Tucker-Lewis Index (TLI)                       0.789
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -6380.901
##   Loglikelihood unrestricted model (H1)      -6132.430
## 
##   Number of free parameters                         21
##   Akaike (AIC)                               12803.801
##   Bayesian (BIC)                             12897.492
##   Sample-size adjusted Bayesian (BIC)        12830.818
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.146
##   90 Percent Confidence Interval          0.135  0.157
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.080
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Information saturated (h1) model          Structured
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   tv =~                                                                 
##     q1                1.000                               0.520    0.788
##     q4                0.991    0.059   16.832    0.000    0.515    0.648
##     q5                0.991    0.048   20.751    0.000    0.515    0.773
##     q8                1.030    0.046   22.267    0.000    0.535    0.819
##     q10               1.319    0.063   21.084    0.000    0.685    0.783
##     q2                0.875    0.071   12.279    0.000    0.455    0.489
##     q6                0.877    0.061   14.465    0.000    0.456    0.568
##     q9                1.049    0.069   15.103    0.000    0.545    0.590
##   pc =~                                                                 
##     q3                1.000                               0.544    0.638
##     q7                0.723    0.075    9.586    0.000    0.393    0.462
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   tv ~~                                                                 
##     pc                0.256    0.022   11.427    0.000    0.908    0.908
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .q1                0.164    0.011   14.736    0.000    0.164    0.379
##    .q4                0.366    0.022   16.508    0.000    0.366    0.580
##    .q5                0.179    0.012   15.044    0.000    0.179    0.403
##    .q8                0.141    0.010   13.979    0.000    0.141    0.330
##    .q10               0.297    0.020   14.848    0.000    0.297    0.387
##    .q2                0.657    0.038   17.292    0.000    0.657    0.761
##    .q6                0.437    0.026   16.986    0.000    0.437    0.678
##    .q9                0.557    0.033   16.875    0.000    0.557    0.652
##    .q3                0.431    0.040   10.908    0.000    0.431    0.593
##    .q7                0.570    0.036   15.909    0.000    0.570    0.787
##     tv                0.270    0.023   11.561    0.000    1.000    1.000
##     pc                0.295    0.045    6.523    0.000    1.000    1.000

anova(out1, out2)

## Chi Square Difference Test
## 
##      Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## out1 32 12467 12570 156.38                                  
## out2 34 12804 12898 496.94     340.56       2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Processing all gradebook data

x <- read_csv("RR_S3.csv")
x <- select(x, course_ID = Course_ID, student_ID = CU_Pk1, Item_Position:last_access_date)
x <- rename(x, Grade_Category = Grade_Catagory)

RR_Course_Data <- select(RR_Course_Data, course_ID = CourseSectionOrigID, student_ID = Bb_UserPK, Gradebook_Item:last_access_date)

xx <- bind_rows(RR_Course_Data, x)
# write_csv(RR_Course_Data, "s12_gradebook_data.csv")

5. Merging self-report and gradebook data (not run yet)

d$student_ID <- as.character(d$student_ID)
xx$student_ID <- as.character(xx$student_ID)
df <- left_join(d, xx, by = "student_ID")

Processing trace data

library(readxl)
ts_12 <- read_csv("RR_Minutes.csv")

td_12 <- read_csv("RR_Course_Data.csv")
td_3 <- read_excel("Ranelluci Study Data Pull Request.xlsx")

td_12 <- td_12 %>% 
    select(student_ID = Bb_UserPK, course_ID = CourseSectionOrigID,
           gender = Gender, enrollment_reason = EnrollmentReason, 
           enrollment_status = EnrollmentStatus,
           final_grade = FinalGradeCEMS) %>% 
    distinct()

ts_12 <- ts_12 %>% 
    select(student_ID = Bb_UserPK,
           course_ID = CourseSectionOrigID,
           time_spent = TimeSpent)

td_12 <- left_join(td_12, ts_12)

td_3 <- td_3 %>% 
    select(student_ID = CEMS_Bb_UserPK, 
           course_ID = Section_ID,
           gender = Gender,
           enrollment_reason = EnrollmentReason,
           enrollment_status = EnrollmentStatus,
           final_grade = Final_Grade,
           time_spent = `Sum of time spent in course`) %>% 
    mutate(final_grade = as.numeric(final_grade))

trace_data <- bind_rows(td_12, td_3)

Merging trace data with other data

d$student_ID <- as.integer(d$student_ID)
d <- left_join(d, trace_data)
dd <- select(d, student_ID:pre_tv, post_int:post_percomp, enrollment_reason, enrollment_status, final_grade, time_spent)
write_csv(dd, "online-science-data.csv")

6. Pre-post analysis

Using multi-level models by course.

60. Looking first at n’s

d %>% 
    count(intervention_dummy)

## # A tibble: 2 x 2
##   intervention_dummy     n
##                <dbl> <int>
## 1                  0   304
## 2                  1   358

d[complete.cases(d), ] %>% 
    count(intervention_dummy)

## # A tibble: 0 x 2
## # ... with 2 variables: intervention_dummy <dbl>, n <int>

6A. Just looking at pre-post changes in interest and UV and grades

sjPlot::sjt.lmer(lme4::lmer(post_int ~ intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	4.29	3.35 – 5.23	<.001
intervention_dummy	-0.62	-1.67 – 0.44	.270
Random Parts
σ²	0.762
τ_{00, course_ID}	0.286
N_{course_ID}	11
ICC_{course_ID}	0.273
Observations	95
R² / Ω₀²	.018 / .286

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	3.75	2.81 – 4.69	<.001
intervention_dummy	-0.47	-1.51 – 0.56	.381
Random Parts
σ²	0.879
τ_{00, course_ID}	0.204
N_{course_ID}	11
ICC_{course_ID}	0.189
Observations	95
R² / Ω₀²	.010 / .197

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	76.07	72.69 – 79.45	<.001
intervention_dummy	1.83	-2.81 – 6.47	.454
Random Parts
σ²	442.712
τ_{00, course_ID}	15.906
N_{course_ID}	36
ICC_{course_ID}	0.035
Observations	634
R² / Ω₀²	.002 / .036

6B. With pre-values added (pre per-comp for final grades)

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	0.68	-0.61 – 1.96	.317
pre_int	0.80	0.56 – 1.03	<.001
intervention_dummy	-0.17	-0.89 – 0.55	.643
Random Parts
σ²	0.582
τ_{00, course_ID}	0.021
N_{course_ID}	11
ICC_{course_ID}	0.035
Observations	94
R² / Ω₀²	.328 / .352

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.77	-0.37 – 1.91	.195
pre_uv	0.76	0.54 – 0.98	<.001
intervention_dummy	-0.14	-0.97 – 0.70	.756
Random Parts
σ²	0.596
τ_{00, course_ID}	0.120
N_{course_ID}	11
ICC_{course_ID}	0.168
Observations	94
R² / Ω₀²	.321 / .435

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	65.08	55.64 – 74.53	<.001
pre_percomp	3.06	0.63 – 5.48	.014
intervention_dummy	1.51	-3.11 – 6.13	.534
Random Parts
σ²	438.364
τ_{00, course_ID}	15.461
N_{course_ID}	36
ICC_{course_ID}	0.034
Observations	627
R² / Ω₀²	.011 / .045

6C. With pre-values added + interactions with pre perceived competence

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_int + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	0.12	-4.17 – 4.41	.955
pre_int	0.71	0.41 – 1.01	<.001
intervention_dummy	0.21	-4.23 – 4.65	.928
pre_percomp	0.23	-0.85 – 1.31	.679
intervention_dummy:pre_percomp	-0.08	-1.16 – 1.00	.882
Random Parts
σ²	0.588
τ_{00, course_ID}	0.025
N_{course_ID}	11
ICC_{course_ID}	0.041
Observations	94
R² / Ω₀²	.329 / .356

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.63	-4.05 – 5.31	.793
pre_uv	0.70	0.45 – 0.95	<.001
intervention_dummy	-0.35	-5.11 – 4.40	.886
pre_percomp	0.09	-1.03 – 1.22	.874
intervention_dummy:pre_percomp	0.07	-1.08 – 1.22	.907
Random Parts
σ²	0.601
τ_{00, course_ID}	0.123
N_{course_ID}	11
ICC_{course_ID}	0.170
Observations	94
R² / Ω₀²	.321 / .437

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + intervention_dummy*pre_percomp + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	61.31	47.11 – 75.51	<.001
pre_uv	0.05	-2.66 – 2.75	.973
intervention_dummy	9.09	-9.24 – 27.41	.333
pre_percomp	4.05	0.28 – 7.82	.036
intervention_dummy:pre_percomp	-2.02	-6.89 – 2.85	.417
Random Parts
σ²	436.011
τ_{00, course_ID}	16.184
N_{course_ID}	36
ICC_{course_ID}	0.036
Observations	624
R² / Ω₀²	.012 / .047

6D. With all variables added

sjPlot::sjt.lmer(lme4::lmer(post_uv ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	post_uv
	B	CI	p
Fixed Parts
(Intercept)	0.28	-1.21 – 1.77	.718
pre_uv	0.69	0.43 – 0.96	<.001
pre_int	0.08	-0.26 – 0.41	.669
pre_percomp	0.10	-0.24 – 0.44	.564
intervention_dummy	-0.07	-0.92 – 0.77	.872
Random Parts
σ²	0.607
τ_{00, course_ID}	0.110
N_{course_ID}	11
ICC_{course_ID}	0.153
Observations	93
R² / Ω₀²	.328 / .431

sjPlot::sjt.lmer(lme4::lmer(post_int ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	post_int
	B	CI	p
Fixed Parts
(Intercept)	0.29	-1.10 – 1.69	.686
pre_uv	0.19	-0.06 – 0.44	.151
pre_int	0.63	0.32 – 0.95	<.001
pre_percomp	0.09	-0.24 – 0.42	.597
intervention_dummy	-0.13	-0.87 – 0.61	.733
Random Parts
σ²	0.578
τ_{00, course_ID}	0.028
N_{course_ID}	11
ICC_{course_ID}	0.046
Observations	93
R² / Ω₀²	.341 / .372

sjPlot::sjt.lmer(lme4::lmer(final_grade ~ pre_uv + pre_int + pre_percomp + intervention_dummy + (1 | course_ID), data = d))

	final_grade
	B	CI	p
Fixed Parts
(Intercept)	53.26	40.00 – 66.51	<.001
pre_uv	-1.55	-4.46 – 1.36	.304
pre_int	5.75	1.98 – 9.53	.003
pre_percomp	1.08	-2.01 – 4.16	.494
intervention_dummy	1.74	-2.92 – 6.40	.478
Random Parts
σ²	434.151
τ_{00, course_ID}	15.909
N_{course_ID}	36
ICC_{course_ID}	0.035
Observations	616
R² / Ω₀²	.026 / .061

New Analysis for MVS UV Study

Joshua Rosenberg

12/21/2017

Workspace doc

Next steps

Overall takeaways

1. Pre-processing (for semesters 1 and 2)

2. Pre-processing (for semester 3)

3. Merging and processing merged data

3.5 CFA

4. Processing all gradebook data

5. Merging self-report and gradebook data (not run yet)

Processing trace data

Merging trace data with other data

6. Pre-post analysis

60. Looking first at n’s

6A. Just looking at pre-post changes in interest and UV and grades

6B. With pre-values added (pre per-comp for final grades)

6C. With pre-values added + interactions with pre perceived competence

6D. With all variables added