Loading Packages

library(dplyr)
library(tidyverse)
library(sjstats)
library(jmRtools)
library(MuMIn)
library(psych)
library(here)
library(car)
# install.packages("chemometrics")
library(chemometrics)

Loading Data

here::set_here("/Volumes/SCHMIDTLAB/PSE")
SciMo_esm <- read_csv(here::here("Data", "SciMo", "Sci-Mo-esm.csv"))
SciMo_student_survey <- read_csv(here::here("Data", "SciMo", "Sci-Mo-student-survey.csv"))
SciMo_video_value <- read_csv(here::here("Data", "SciMo", "scimo_value_final.csv"))

Fixing Joining Variables

#Fixing response_date in ESM dataset
SciMo_esm$year <- ifelse(SciMo_esm$year == 8, 2008, 2009)
SciMo_esm$response_date <- as.Date(with(SciMo_esm, paste(year, month, day, sep="-")), "%Y-%m-%d")
SciMo_esm$response_date <- format(as.Date(SciMo_esm$response_date, format = "%Y-%m-%d"), "%Y-%m-%d")

#Renaming variables in video value dataset (right side is what they were named; new mame on left)
SciMo_video_value <- rename(SciMo_video_value,
                response_date = date,
                teacher_ID = teacher)
#Formating as response date variable as a date for video value dataset
SciMo_video_value$response_date <- format(as.Date(SciMo_video_value$response_date, format = "%m/%d/%Y"), "%Y-%m-%d")

Creating New Signal Variables in ESM Dataset

SciMo_esm$signal_value <- ifelse(SciMo_esm$pager == 1 & SciMo_esm$signal == 1, 1,
                                 ifelse(SciMo_esm$pager == 2 & SciMo_esm$signal == 1, 2,
                                        ifelse(SciMo_esm$pager == 1 & SciMo_esm$signal == 2, 3,
                                               ifelse(SciMo_esm$pager == 2 & SciMo_esm$signal == 4, 4,
                                                      ifelse(SciMo_esm$pager == 3 & SciMo_esm$signal == 1, 1,
                                                             ifelse(SciMo_esm$pager == 4 & SciMo_esm$signal == 2, 2,
                                                                    ifelse(SciMo_esm$pager == 3 & SciMo_esm$signal == 2, 3, 4)))))))

Creating Predictor Variables

#Creating dummy female variable
SciMo_student_survey$female <- ifelse(SciMo_student_survey$gender == 2, 1, 0)

#Creating minority dummy variable - 1 = minority status, 0 = white/asian
SciMo_student_survey$minority <- ifelse(SciMo_student_survey$race == 4 | SciMo_student_survey$race == 1, 0, 1)

#Creating Overall Value Sum in Video Value Dataset
SciMo_video_value$ov_sum <- (SciMo_video_value$high_utility_sum + SciMo_video_value$high_attainment_sum + SciMo_video_value$high_intrinsic_sum)

#Renaming utility value variable
SciMo_video_value <- rename(SciMo_video_value, uv_sum = high_utility_sum)

#Creating Perceived Competence Variable
SciMo_student_survey$perceived_comp <- composite_mean_maker(SciMo_student_survey, confident1, capable1)

#Creation of Formal Setting Variable
SciMo_esm <- mutate(SciMo_esm, form_set = 1)

#Renaming instructional practice variable
SciMo_esm <- rename(SciMo_esm, activity = instructional_practice)

#Creating class activity variable, same as in JRST paper
SciMo_esm$act_re <- car::recode(SciMo_esm$activity,
                            "1 = 'Lecture';
                                              c(2, 3) = 'Individual Work';
                                              c(4, 5) = 'Group Work';
                                              c(6, 8, 7) = 'Quiz and Test';
                                              c(9) = 'Discussion';
                                              c(11, 10) = 'Presentation';
                                              c(12, 13) = 'Video';
                                              c(15, 14, 16) = 'Laboratory';
                                              c(17) = 'Non-instructional';
                                              c(18) = NA")
 
SciMo_esm$act_re <- ifelse((SciMo_esm$act_re == "Discussion" | SciMo_esm$act_re == "Non-instructional" | SciMo_esm$act_re == "Presentation" | SciMo_esm$act_re == "Video" | SciMo_esm$act_re == "Group Work"), "Other", SciMo_esm$act_re)

Creating Engagement Variables

SciMo_esm$beh_enga <- jmRtools::composite_mean_maker(SciMo_esm, conc, hardwk)
SciMo_esm$cog_enga <- jmRtools::composite_mean_maker(SciMo_esm, imp_fut, imp_y)
SciMo_esm$aff_enga <- jmRtools::composite_mean_maker(SciMo_esm, enjoy, interest)

Descriptive Statistics

ds_scimo_esm <- describe(select(SciMo_esm, "grade", "form_set", "activity", "beh_enga", "cog_enga", "aff_enga")) %>%
   rownames_to_column(var = "Variable")
ds_scimo_survey <- describe(select(SciMo_student_survey, "female", "minority")) %>%
   rownames_to_column(var = "Variable")
ds_scimo_value <- describe(select(SciMo_video_value, "ov_sum", "uv_sum")) %>%
   rownames_to_column(var = "Variable")
ds_all_scimo <- bind_rows(ds_scimo_esm, ds_scimo_survey, ds_scimo_value)

lapply(SciMo_esm[c("activity", "act_re")], table)
## $activity
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
## 557 447 224 250  21 226  23 439  79  72 241  74  82 199 763  61 322   5 
## 
## $act_re
## 
## Individual Work      Laboratory         Lecture           Other 
##             671            1023             557            1141 
##   Quiz and Test 
##             688
lapply(SciMo_student_survey[c("female", "minority")], table)
## $female
## 
##   0   1 
## 129 115 
## 
## $minority
## 
##   0   1 
##  95 146
lapply(SciMo_video_value[c("ov_sum", "uv_sum")], table)
## $ov_sum
## 
##   0   1   2   3   4   5   6   7   9  10  11 
## 316  94  30  24   6   4   1   2   1   1   1 
## 
## $uv_sum
## 
##   0   1   2   3   4   5   6   7   8   9  10 
## 327  92  27  19   6   3   1   2   1   1   1

Joining datasets

SciMo_student_survey <- select(SciMo_student_survey, -'teacher_ID')
scimo_merged <- left_join(SciMo_esm, SciMo_student_survey, by = "stud_ID")
scimo_merged <- left_join(scimo_merged, SciMo_video_value, by = c("response_date", "teacher_ID", "signal_value"))

Remove duplicate

to_remove <- scimo_merged$stud_ID == "AXR050594" & scimo_merged$month == 10 & scimo_merged$day == 20 & scimo_merged$year == 8 & scimo_merged$signal == 2 # removing 10/20/8, ID = "AXR050594", second beeper
table(to_remove)
## to_remove
## FALSE 
##  4136
nrow(scimo_merged)
## [1] 4136
scimo_merged <- scimo_merged[!to_remove, ]
nrow(scimo_merged) # - 1 case
## [1] 4136

Detecting outliers

uv_outlier_detector <- function(x, na.rm = T, ...) {
    # need to figure out where this came from - from a SO question, can probably re-write
    qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
    H <- 1.5 * IQR(x, na.rm = na.rm)
    y <- x
    y[x < (qnt[1] - H)] <- NA
    y[x > (qnt[2] + H)] <- NA
    return(y)
}

remove_uv_out_func <- function(data){
    x <- sapply(data, uv_outlier_detector)
    return(x)
}

remove_mv_out_func <- function(data){
    mvout <- chemometrics::Moutlier(data, quantile = 0.99, plot = F)
    the_index <- which(mvout$md > mvout$cutoff)
    if (any(the_index) == T){
        return(the_index)
    } else{
        return(data)
    }
}

Univariate outliers

scimo_merged$beh_enga %>% is.na() %>% table()
## .
## FALSE  TRUE 
##  4103    33
scimo_merged$beh_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## .
## FALSE  TRUE 
##  4103    33
scimo_merged$cog_enga %>% is.na() %>% table()
## .
## FALSE  TRUE 
##  3997   139
scimo_merged$co_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## < table of extent 0 >
scimo_merged$aff_enga %>% is.na() %>% table()
## .
## FALSE  TRUE 
##  4107    29
scimo_merged$aff_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## .
## FALSE  TRUE 
##  4107    29

Multivariate outliers

scimo_merged %>% 
    select(beh_enga, aff_enga, cog_enga) %>% 
    remove_mv_out_func()
## # A tibble: 4,136 x 3
##    beh_enga aff_enga cog_enga
##       <dbl>    <dbl>    <dbl>
##  1      2.5      1.5      2.5
##  2      3        2.5      2  
##  3      2.5      2        2  
##  4      2        2        2  
##  5      2.5      1.5      2.5
##  6      1        1        1  
##  7     NA       NA       NA  
##  8     NA       NA       NA  
##  9      0        0        0  
## 10      1        0.5      0  
## # ... with 4,126 more rows