This is a sandbox for exploring the data, in preparation for adding to the results section.
This document is organized around two sets of model specifications:
mclust
MPlus
Both are interfaced to through the tidyLPA
package.
mclust
For models fit using mclust
, eight model specifications are tested, where all five refers to the variables for cognitive (learning), behavioral (hard working), and affective (enjoy), and challenge and competence, where three refers to just the three engagement measures. Note that the prior is to regularize the parameter estimates to be more less likely to be extreme (and therefore to be less likely to lead to convergence problems, but possibly less meaningful / interpretable).
MPlus
library(tidyverse)
library(lmerTest)
library(lme4)
library(corrr)
library(jmRtools)
library(tidyLPA)
esm <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-esm.csv")
pre_survey_data_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pre-survey.csv")
post_survey_data_partially_processed <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-post-survey.csv")
video <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-video.csv")
pqa <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-pqa.csv")
attendance <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-attendance.csv")
class_data <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-class-video.csv")
demographics <- read_csv("/Volumes/SCHMIDTLAB/PSE/data/STEM-IE/STEM-IE-demographics.csv")
pm <- read_csv("/Volumes/SCHMIDTLAB/PSE/Data/STEM-IE/STEM-IE-program-match.csv")
# save.image("~/desktop/sandbox-01.Rdata")
load("~/desktop/sandbox-01.Rdata")
attendance <- rename(attendance, participant_ID = ParticipantID)
attendance <- mutate(attendance, prop_attend = DaysAttended / DaysScheduled,
participant_ID = as.integer(participant_ID))
attendance <- select(attendance, participant_ID, prop_attend)
demographics <- filter(demographics, participant_ID!= 7187)
demographics <- left_join(demographics, attendance)
esm$overall_engagement <- jmRtools::composite_mean_maker(esm, hard_working, concentrating, enjoy, interest)
df <- left_join(esm, pre_survey_data_processed, by = "participant_ID") # df & post-survey
df <- left_join(df, video, by = c("program_ID", "response_date", "sociedad_class", "signal_number")) # df & video
df <- left_join(df, demographics, by = c("participant_ID", "program_ID")) # df and demographics
pqa <- mutate(pqa,
active = active_part_1 + active_part_2,
ho_thinking = ho_thinking_1 + ho_thinking_2 + ho_thinking_3,
belonging = belonging_1 + belonging_2,
agency = agency_1 + agency_2 + agency_3 + agency_4,
youth_development_overall = active_part_1 + active_part_2 + ho_thinking_1 + ho_thinking_2 + ho_thinking_3 + belonging_1 + belonging_2 + agency_1 + agency_2 + agency_3 + agency_4,
making_observations = stem_sb_8,
data_modeling = stem_sb_2 + stem_sb_3 + stem_sb_9,
interpreting_communicating = stem_sb_6,
generating_data = stem_sb_4,
asking_questions = stem_sb_1,
stem_sb = stem_sb_1 + stem_sb_2 + stem_sb_3 + stem_sb_4 + stem_sb_5 + stem_sb_6 + stem_sb_7 + stem_sb_8 + stem_sb_9)
# pqa <- rename(pqa, sixth_math_sociedad = sixth_math)
# pqa <- rename(pqa, seventh_math_sociedad = seventh_math)
# pqa <- rename(pqa, eighth_math_sociedad = eighth_math)
# pqa <- rename(pqa, dance_sociedad = dance)
# pqa <- rename(pqa, robotics_sociedad = robotics)
pqa$sociedad_class <- ifelse(pqa$eighth_math == 1, "8th Math",
ifelse(pqa$seventh_math == 1, "7th Math",
ifelse(pqa$sixth_math == 1, "6th Math",
ifelse(pqa$robotics == 1, "Robotics",
ifelse(pqa$dance == 1, "Dance", NA)))))
pqa <- rename(pqa,
program_ID = SiteIDNumeric,
response_date = resp_date,
signal_number = signal)
pqa$program_ID <- as.character(pqa$program_ID)
df <- left_join(df, pqa, by = c("response_date", "program_ID", "signal_number", "sociedad_class"))
df <- df %>%
mutate(youth_activity_three = case_when(
youth_activity_rc == "Creating Product" ~ "Creating Product",
youth_activity_rc == "Basic Skills Activity" ~ "Basic Skills Activity",
TRUE ~ "Other"
))
df$youth_activity_three <- fct_relevel(df$youth_activity_three,
"Other")
# <!-- Cognitive engagement As you were signaled, were you learning anything or getting better at something? -->
# <!-- Behavioral engagement As you were signaled, how hard were you working? -->
# <!-- Affective engagement As you were signaled, did you enjoy what you are doing? -->
# <!-- Perceived challenge As you were signaled, how challenging was the main activity? -->
# <!-- Perceived competence As you were signaled, were you good at the main activity? -->
library(jmRtools)
df <- df %>%
mutate(dm_cog_eng = learning,
dm_beh_eng = hard_working,
dm_aff_eng = enjoy,
dm_challenge = challenge,
dm_competence = good_at) %>%
rename(ssb_predict = stem_sb_1,
ssb_model = stem_sb_2 ,
ssb_analyze = stem_sb_3,
ssb_measure = stem_sb_4,
ssb_tools = stem_sb_5,
ssb_precision = stem_sb_6,
ssb_vocabulary = stem_sb_7,
ssb_classification = stem_sb_8,
ssb_symbols = stem_sb_9) %>%
mutate(dm_ask = ssb_predict,
dm_obs = ssb_classification,
dm_gen = ifelse(ssb_measure == 1 | ssb_precision == 1, 1, 0),
dm_mod = ifelse(ssb_model == 1 | ssb_analyze == 1, 1, 0),
dm_com = ssb_symbols) %>%
mutate(ov_cog_eng = (important + future_goals) / 2,
ov_beh_eng = (hard_working + concentrating) / 2,
ov_aff_eng = (enjoy + interest) / 2)
out <- df %>%
group_by(program_ID) %>%
select(contains("ssb")) %>%
summarize_all(sum, na.rm = T)
out1 <- pqa %>%
select(contains("stem"), -sum_stem_sb, -stem_sb) %>%
summarize_all(sum, na.rm = T) / 236
names(out1) <- df %>% select(contains("ssb")) %>% names()
pqa_out <- pqa %>%
group_by(program_ID) %>%
select(contains("stem"), -sum_stem_sb, -stem_sb) %>%
summarize_all(sum, na.rm = T)
names(pqa_out) <- names(out)
df %>%
select(dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence) %>%
compare_solutions(contains("dm"))
df %>%
select(dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence) %>%
compare_solutions(contains("dm"), prior_control = T)
out_df1 <- compare_solutions_mplus(df,
dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence,
n_processors = 8)
## n_profiles model_1 model_2 model_3
## 1 2 39916.157 38423.27 Convergence problem
## 2 3 39082.592 38049.88 Convergence problem
## 3 4 38616.439 37741.96 Convergence problem
## 4 5 38161.673 37490.66 Convergence problem
## 5 6 37718.88 37076.46 Convergence problem
## 6 7 37900.884 36874.38 Convergence problem
## 7 8 37443.138 37045.28 Convergence problem
## 8 9 Convergence problem 36561.57 Convergence problem
## 9 10 Convergence problem 36328.91 Convergence problem
## model_4 model_5 model_6
## 1 Convergence problem Convergence problem Convergence problem
## 2 Convergence problem Convergence problem Convergence problem
## 3 Convergence problem Convergence problem Convergence problem
## 4 Convergence problem Convergence problem Convergence problem
## 5 Convergence problem Convergence problem Convergence problem
## 6 Convergence problem Convergence problem Convergence problem
## 7 Convergence problem Convergence problem Convergence problem
## 8 Convergence problem Convergence problem Convergence problem
## 9 Convergence problem Convergence problem Convergence problem
# estimate_profiles_mplus(df,
# dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, model = 2, n_profiles=2)
center_and_scale_vector <- function(x) {
if (stats::sd(x, na.rm = TRUE) == 0) {
x - mean(x, na.rm = TRUE)
} else {
(x - mean(x, na.rm = TRUE)) / stats::sd(x, na.rm = TRUE)
}
}
df_ss <- df %>% select(dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence) %>% mutate_all(center_and_scale_vector)
out_df2 <- compare_solutions_mplus(df_ss, dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, n_processors=8)
## n_profiles model_1 model_2 model_3
## 1 2 38639.754 37146.86 Convergence problem
## 2 3 37806.188 36773.47 Convergence problem
## 3 4 37340.035 36465.56 Convergence problem
## 4 5 36885.269 36214.25 Convergence problem
## 5 6 36442.477 35800.05 Convergence problem
## 6 7 36624.481 35597.97 Convergence problem
## 7 8 36166.735 35768.87 Convergence problem
## 8 9 Convergence problem 35285.17 Convergence problem
## 9 10 Convergence problem 35052.50 Convergence problem
## model_4 model_5 model_6
## 1 Convergence problem Convergence problem Convergence problem
## 2 Convergence problem Convergence problem Convergence problem
## 3 Convergence problem Convergence problem Convergence problem
## 4 Convergence problem Convergence problem Convergence problem
## 5 Convergence problem Convergence problem Convergence problem
## 6 Convergence problem Convergence problem Convergence problem
## 7 Convergence problem Convergence problem Convergence problem
## 8 Convergence problem Convergence problem Convergence problem
## 9 Convergence problem Convergence problem Convergence problem
out_df3 <- compare_solutions_mplus(df, dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, st_iterations = 50, starts = c(500, 50))
## n_profiles model_1 model_2 model_3
## 1 2 39916.157 38423.266 Convergence problem
## 2 3 39082.592 LL not replicated Convergence problem
## 3 4 Convergence problem LL not replicated Convergence problem
## 4 5 Convergence problem LL not replicated Convergence problem
## 5 6 Convergence problem LL not replicated Convergence problem
## 6 7 Convergence problem LL not replicated Convergence problem
## 7 8 Convergence problem Convergence problem Convergence problem
## 8 9 Convergence problem Convergence problem Convergence problem
## 9 10 Convergence problem Convergence problem Convergence problem
## model_4 model_5 model_6
## 1 Convergence problem Convergence problem Convergence problem
## 2 Convergence problem Convergence problem Convergence problem
## 3 Convergence problem Convergence problem Convergence problem
## 4 Convergence problem Convergence problem Convergence problem
## 5 Convergence problem Convergence problem Convergence problem
## 6 Convergence problem Convergence problem Convergence problem
## 7 Convergence problem Convergence problem Convergence problem
## 8 Convergence problem Convergence problem Convergence problem
## 9 Convergence problem Convergence problem Convergence problem
out_df4 <- compare_solutions_mplus(df_ss, dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, st_iterations = 50, starts = c(500, 50))
## n_profiles model_1 model_2 model_3
## 1 2 38639.754 37146.863 Convergence problem
## 2 3 37806.188 LL not replicated Convergence problem
## 3 4 Convergence problem LL not replicated Convergence problem
## 4 5 Convergence problem LL not replicated Convergence problem
## 5 6 Convergence problem LL not replicated Convergence problem
## 6 7 Convergence problem LL not replicated Convergence problem
## 7 8 Convergence problem Convergence problem Convergence problem
## 8 9 Convergence problem Convergence problem Convergence problem
## 9 10 Convergence problem Convergence problem Convergence problem
## model_4 model_5 model_6
## 1 Convergence problem Convergence problem Convergence problem
## 2 Convergence problem Convergence problem Convergence problem
## 3 Convergence problem Convergence problem Convergence problem
## 4 Convergence problem Convergence problem Convergence problem
## 5 Convergence problem Convergence problem Convergence problem
## 6 Convergence problem Convergence problem Convergence problem
## 7 Convergence problem Convergence problem Convergence problem
## 8 Convergence problem Convergence problem Convergence problem
## 9 Convergence problem Convergence problem Convergence problem
out_df5 <- compare_solutions_mplus(df, dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, st_iterations = 50, starts = c(500, 50), convergence_criterion=1E-7)
out_df6 <- compare_solutions_mplus(df_ss, dm_cog_eng, dm_beh_eng, dm_aff_eng, dm_challenge, dm_competence, st_iterations = 50, starts = c(500, 50),convergence_criterion= 1E-7)