Loading Packages
library(dplyr)
library(tidyverse)
library(sjstats)
library(jmRtools)
library(MuMIn)
library(psych)
library(here)
library(car)
# install.packages("chemometrics")
library(chemometrics)
Loading Data
here::set_here("/Volumes/SCHMIDTLAB/PSE")
SciMo_esm <- read_csv(here::here("Data", "SciMo", "Sci-Mo-esm.csv"))
SciMo_student_survey <- read_csv(here::here("Data", "SciMo", "Sci-Mo-student-survey.csv"))
SciMo_video_value <- read_csv(here::here("Data", "SciMo", "scimo_value_final.csv"))
Fixing Joining Variables
#Fixing response_date in ESM dataset
SciMo_esm$year <- ifelse(SciMo_esm$year == 8, 2008, 2009)
SciMo_esm$response_date <- as.Date(with(SciMo_esm, paste(year, month, day, sep="-")), "%Y-%m-%d")
SciMo_esm$response_date <- format(as.Date(SciMo_esm$response_date, format = "%Y-%m-%d"), "%Y-%m-%d")
#Renaming variables in video value dataset (right side is what they were named; new mame on left)
SciMo_video_value <- rename(SciMo_video_value,
response_date = date,
teacher_ID = teacher)
#Formating as response date variable as a date for video value dataset
SciMo_video_value$response_date <- format(as.Date(SciMo_video_value$response_date, format = "%m/%d/%Y"), "%Y-%m-%d")
Creating New Signal Variables in ESM Dataset
SciMo_esm$signal_value <- ifelse(SciMo_esm$pager == 1 & SciMo_esm$signal == 1, 1,
ifelse(SciMo_esm$pager == 2 & SciMo_esm$signal == 1, 2,
ifelse(SciMo_esm$pager == 1 & SciMo_esm$signal == 2, 3,
ifelse(SciMo_esm$pager == 2 & SciMo_esm$signal == 4, 4,
ifelse(SciMo_esm$pager == 3 & SciMo_esm$signal == 1, 1,
ifelse(SciMo_esm$pager == 4 & SciMo_esm$signal == 2, 2,
ifelse(SciMo_esm$pager == 3 & SciMo_esm$signal == 2, 3, 4)))))))
Creating Predictor Variables
#Creating dummy female variable
SciMo_student_survey$female <- ifelse(SciMo_student_survey$gender == 2, 1, 0)
#Creating minority dummy variable - 1 = minority status, 0 = white/asian
SciMo_student_survey$minority <- ifelse(SciMo_student_survey$race == 4 | SciMo_student_survey$race == 1, 0, 1)
#Creating Overall Value Sum in Video Value Dataset
SciMo_video_value$ov_sum <- (SciMo_video_value$high_utility_sum + SciMo_video_value$high_attainment_sum + SciMo_video_value$high_intrinsic_sum)
#Renaming utility value variable
SciMo_video_value <- rename(SciMo_video_value, uv_sum = high_utility_sum)
#Creating Perceived Competence Variable
SciMo_student_survey$perceived_comp <- composite_mean_maker(SciMo_student_survey, confident1, capable1)
#Creation of Formal Setting Variable
SciMo_esm <- mutate(SciMo_esm, form_set = 1)
#Renaming instructional practice variable
SciMo_esm <- rename(SciMo_esm, activity = instructional_practice)
#Creating class activity variable, same as in JRST paper
SciMo_esm$act_re <- car::recode(SciMo_esm$activity,
"1 = 'Lecture';
c(2, 3) = 'Individual Work';
c(4, 5) = 'Group Work';
c(6, 8, 7) = 'Quiz and Test';
c(9) = 'Discussion';
c(11, 10) = 'Presentation';
c(12, 13) = 'Video';
c(15, 14, 16) = 'Laboratory';
c(17) = 'Non-instructional';
c(18) = NA")
SciMo_esm$act_re <- ifelse((SciMo_esm$act_re == "Discussion" | SciMo_esm$act_re == "Non-instructional" | SciMo_esm$act_re == "Presentation" | SciMo_esm$act_re == "Video" | SciMo_esm$act_re == "Group Work"), "Other", SciMo_esm$act_re)
Creating Engagement Variables
SciMo_esm$beh_enga <- jmRtools::composite_mean_maker(SciMo_esm, conc, hardwk)
SciMo_esm$cog_enga <- jmRtools::composite_mean_maker(SciMo_esm, imp_fut, imp_y)
SciMo_esm$aff_enga <- jmRtools::composite_mean_maker(SciMo_esm, enjoy, interest)
Descriptive Statistics
ds_scimo_esm <- describe(select(SciMo_esm, "grade", "form_set", "activity", "beh_enga", "cog_enga", "aff_enga")) %>%
rownames_to_column(var = "Variable")
ds_scimo_survey <- describe(select(SciMo_student_survey, "female", "minority")) %>%
rownames_to_column(var = "Variable")
ds_scimo_value <- describe(select(SciMo_video_value, "ov_sum", "uv_sum")) %>%
rownames_to_column(var = "Variable")
ds_all_scimo <- bind_rows(ds_scimo_esm, ds_scimo_survey, ds_scimo_value)
lapply(SciMo_esm[c("activity", "act_re")], table)
## $activity
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 557 447 224 250 21 226 23 439 79 72 241 74 82 199 763 61 322 5
##
## $act_re
##
## Individual Work Laboratory Lecture Other
## 671 1023 557 1141
## Quiz and Test
## 688
lapply(SciMo_student_survey[c("female", "minority")], table)
## $female
##
## 0 1
## 129 115
##
## $minority
##
## 0 1
## 95 146
lapply(SciMo_video_value[c("ov_sum", "uv_sum")], table)
## $ov_sum
##
## 0 1 2 3 4 5 6 7 9 10 11
## 316 94 30 24 6 4 1 2 1 1 1
##
## $uv_sum
##
## 0 1 2 3 4 5 6 7 8 9 10
## 327 92 27 19 6 3 1 2 1 1 1
Joining datasets
SciMo_student_survey <- select(SciMo_student_survey, -'teacher_ID')
scimo_merged <- left_join(SciMo_esm, SciMo_student_survey, by = "stud_ID")
scimo_merged <- left_join(scimo_merged, SciMo_video_value, by = c("response_date", "teacher_ID", "signal_value"))
to_remove <- scimo_merged$stud_ID == "AXR050594" & scimo_merged$month == 10 & scimo_merged$day == 20 & scimo_merged$year == 8 & scimo_merged$signal == 2 # removing 10/20/8, ID = "AXR050594", second beeper
table(to_remove)
## to_remove
## FALSE
## 4136
nrow(scimo_merged)
## [1] 4136
scimo_merged <- scimo_merged[!to_remove, ]
nrow(scimo_merged) # - 1 case
## [1] 4136
uv_outlier_detector <- function(x, na.rm = T, ...) {
# need to figure out where this came from - from a SO question, can probably re-write
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
return(y)
}
remove_uv_out_func <- function(data){
x <- sapply(data, uv_outlier_detector)
return(x)
}
remove_mv_out_func <- function(data){
mvout <- chemometrics::Moutlier(data, quantile = 0.99, plot = F)
the_index <- which(mvout$md > mvout$cutoff)
if (any(the_index) == T){
return(the_index)
} else{
return(data)
}
}
scimo_merged$beh_enga %>% is.na() %>% table()
## .
## FALSE TRUE
## 4103 33
scimo_merged$beh_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## .
## FALSE TRUE
## 4103 33
scimo_merged$cog_enga %>% is.na() %>% table()
## .
## FALSE TRUE
## 3997 139
scimo_merged$co_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## < table of extent 0 >
scimo_merged$aff_enga %>% is.na() %>% table()
## .
## FALSE TRUE
## 4107 29
scimo_merged$aff_enga %>% uv_outlier_detector() %>% is.na() %>% table()
## .
## FALSE TRUE
## 4107 29
scimo_merged %>%
select(beh_enga, aff_enga, cog_enga) %>%
remove_mv_out_func()
## # A tibble: 4,136 x 3
## beh_enga aff_enga cog_enga
## <dbl> <dbl> <dbl>
## 1 2.5 1.5 2.5
## 2 3 2.5 2
## 3 2.5 2 2
## 4 2 2 2
## 5 2.5 1.5 2.5
## 6 1 1 1
## 7 NA NA NA
## 8 NA NA NA
## 9 0 0 0
## 10 1 0.5 0
## # ... with 4,126 more rows