Engineering Success: Identifying Key Predictors of GPA Trajectories Through Latent Class Trajectory Modeling

Author

Heather Perkins, Justin C. Major, John Chen, Edward J. Berger, Allison Godwin

Presentation

Code

Load Libraries

Code
# library(LCTMtools)
library(lcmm)
library(ggplot2)
library(tidyr)
library(dplyr)
library(psych)
library(DT)
# library(VIM)
library(grid)
library(car)
library(moments)
library(modi)
library(nnet)
library(kableExtra)
library(stringr)

Load Data

Code
# data for LCTM
load(file = "uniscreen_output.RData")

# data for LCTM plots
dfc <- read.csv(file="classes_wide.csv", header = T)
postprob <- read.csv(file="classes_pp.csv", header=T)
postprob$probfin <- postprob[cbind(seq_len(nrow(postprob)), postprob$class + 2)]
dfc <- merge(dfc, subset(postprob, select=-c(2:8)), by="altid")

# data for regression
load(file="finaldata.RData")

GPA Plot

Code
time_levels <- 1:12

time_labels <- c("Fall '17", "Winter '18", "Spring '18", "Summer '18",
                 "Fall '18", "Winter '19", "Spring '19", "Summer '19",
                 "Fall '19", "Winter '20", "Spring '20", "Summer '20")

df1$time <- factor(df1$time, levels = time_levels, labels = time_labels)
df1$time_num <- as.numeric(df1$time)

gpa <- ggplot(df1, aes(x = time, y = gpa, color = time_num, fill = time_num)) +
  geom_jitter(width = .05, height = .05, alpha = .5, size = 2) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4, linewidth = .4) +
  geom_violin(alpha = 0.15, linewidth = .5, scale = "width", width = .6) +
  labs(y = "GPA", x = "Academic Term") +
  scale_color_gradientn(colors = c("#149ED5", "#5C9CCD", "#9696B7",
                                   "#C28D97", "#DE816B", "#E97235")) +
  scale_fill_gradientn(colors = c("#149ED5", "#5C9CCD", "#9696B7",
                                  "#C28D97", "#DE816B", "#E97235")) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.title.y = element_text(size = 13),
    axis.title.x = element_text(size = 13),
    axis.text.y  = element_text(size = 9),
    strip.text   = element_text(size = 12, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 11)
  )

gpa

Code
# ggsave("pres/gpa.png", plot = gpa, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)

LCTM Code (Commented Out)

Code
# # descriptives ------------------------------------------------------------
# table(df1$time)
# 
# df1$time <- as.character(df1$time)
# df1$time[df1$time == "fall17"] <- 1
# df1$time[df1$time == "winter18"] <- 2
# df1$time[df1$time == "spring18"] <- 3
# df1$time[df1$time == "summer18"] <- 4
# df1$time[df1$time == "fall18"] <- 5
# df1$time[df1$time == "winter19"] <- 6
# df1$time[df1$time == "spring19"] <- 7
# df1$time[df1$time == "summer19"] <- 8
# df1$time[df1$time == "fall19"] <- 9
# df1$time[df1$time == "winter20"] <- 10
# df1$time[df1$time == "spring20"] <- 11
# df1$time[df1$time == "summer20"] <- 12
# df1$time <- as.numeric(df1$time)
# df1$UID <- as.factor(df1$UID)
# df1$altid <- as.numeric(df1$UID)
# str(df1)
# 
# describe(df)
# 
# # determine fixed or random effects ---------------------------------------
# model1 <- hlme(fixed = gpa~1+time+I(time^2),
#                random = ~-1,
#                subject = "altid",
#                ng = 1,
#                nwg = FALSE,
#                data = data.frame(df1))
# 
# rp <- residualplot_step1( model1,
#                           nameofoutcome="gpa",  nameofage = "time",
#                           data = df1,
#                           ylimit=c(-3,3))
# 
# rp + xlab("Semester")
# 
# # residual profile can be approximated by a flat line, so a random intercept, slope, or quadratic term should be considered instead of a fixed intercept/slope/term
# 
# # determine the number of classes -----------------------------------------
# # test the models with different numbers of classes
# # initial values are specified using gridsearch - hlme is run for 30 times from 100 random vectors, and the estimation procedure is finalized for the initial value that produced the best log-likelihood
# 
# set.seed(100)
# model1_v1 <- hlme(fixed = gpa~1+time+I(time^2),
#                   random = ~ 1 + time,
#                   subject = "altid",
#                   ng = 1,
#                   nwg = FALSE,
#                   data = data.frame(df1))
# 
# model2_grid <- gridsearch(hlme(fixed = gpa ~ 1 + time + I(time^2),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 2,
#                                mixture = ~1+time+I(time^2)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# 
# model3_grid <- gridsearch(hlme(fixed = gpa~1+time+I(time^2),
#                                 random = ~ 1 + time,
#                                 subject = "altid",
#                                 data = data.frame(df1),
#                                 ng = 3,
#                                 mixture = ~1+time+I(time^2)),
#                            rep=100,
#                            maxiter=30,
#                            minit=model1_v1)
# 
# model4_grid <- gridsearch(hlme(fixed = gpa~1+time+I(time^2),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 4,
#                                mixture = ~1+time+I(time^2)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# 
# model5_grid <- gridsearch(hlme(fixed = gpa~1+time+I(time^2),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 5,
#                                mixture = ~1+time+I(time^2)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# 
# model6_grid <- gridsearch(hlme(fixed = gpa~1+time+I(time^2),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 6,
#                                mixture = ~1+time+I(time^2)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# 
# model7_grid <- gridsearch(hlme(fixed = gpa~1+time+I(time^2),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 7,
#                                mixture = ~1+time+I(time^2)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# #
# # # once all models have been tested, remove all non-model output (e.g., anything not labeled model# or model#_grid) and save as an RData file
# # # this will prevent having to re-run the models, and since there is an element of randomness in the process, will preserve the output
# #
# # rm(df, df1, rp)
# # save.image(file = "uniscreen_output.RData")

Fit Indices

Code
# choose the best model ---------------------------------------------------
# choose the best model by comparing the fit indices and other output
  # npm = number of parameters - This corresponds to the additional class-specific
  # parameters: the proportion of the class, the two Weibull parameters, and the three fixed
  # effects for the quadratic trajectory (intercept, time and time squared).
  # AIC, BIC, SABIC
  # entropy
  # %class - identification of spurious groups


# The IC statistics take both model fit and complexity into consideration. Lower values indicate better trade-off between model  fit  and  complexity.
# However, differences  in  the  penalty  functions  (e.g.,  penalizing  model  complexity)  of  different  model  selection  indices  result  in  inconsistent class solutions (i.e., different indices may favor different  class  solutions).  Previous  research  on  mixture  model  estimation  also  suggests  sample  size  plays  a  role  in  penalty calculations. First,  as  N  approaches  infinity,  penalty  /  N  should  become  closer to zero. Second, as N approaches infinity, log(N) / pen-alty should become closer to zero...Note that the CAIC, BIC, and SABIC fulfill these two conditions.
# AIC  aims  at  finding  the  model  that  minimizes  the  Kullback–Leibler  (K-L)  criterion,  selecting  an  approximate  model,  and  providing  better  predictions  of  the  population  parameters
# On the contrary, BIC targets the “true” underlying model with the highest posterior probability
# Chen, Q., Luo, W., Palardy, G. J., Glaman, R., & McEnturff, A. (2017). The efficacy of common fit indices for enumerating classes in growth mixture models when nested data structure is ignored: A Monte Carlo study. Sage Open, 7(1), 2158244017700459.

# The posterior classification can be used to assess the goodness-of-fit of the model (for the
# selection of the number of latent classes for instance) and the discrimination of the latent
# classes.  Many indicators can be derived from it (Proust-Limaet al.2014).  The package
# lcmm provides two indicators in the function postprob

# The proportion of subjects classified in each latent class with a posterior probability
# above 0.7, 0.8 and 0.9.  This indicates the proportion of subjects not ambiguously
# classified in each latent class.

# The posterior classification table as defined in Table 3 which computes the mean of the
# posterior probabilities of belonging to the latent class among the subjects classified
# a posteriori in each latent class. A perfect classification would provide ones in the 
# diagonal and zeros elsewhere. In practice, high diagonal terms indicate a good
# discrimination of the population.

tab <- summarytable(
  model1, model2_grid, model3_grid, model4_grid, model5_grid,
  model6_grid, model7_grid,
  which = c("G", "loglik", "conv", "npm", "AIC", "BIC", "SABIC",
            "entropy", "%class"),
  display = F)

brks_a <- quantile(tab[,5], probs = seq(.05, .95, .05), na.rm = TRUE)
clrs_a <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
  {paste0("rgb(255,", ., ",", ., ")")}

brks_b <- quantile(tab[,6], probs = seq(.05, .95, .05), na.rm = TRUE)
clrs_b <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
  {paste0("rgb(255,", ., ",", ., ")")}

brks_s <- quantile(tab[,7], probs = seq(.05, .95, .05), na.rm = TRUE)
clrs_s <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
  {paste0("rgb(255,", ., ",", ., ")")}

# lighter shade = lower value = better model
datatable(tab) %>%
  formatRound(columns = c(1:7), digits = 0) %>%
  formatRound(columns = c(8:ncol(tab)), digits = 2) %>%
  formatStyle("AIC", backgroundColor = styleInterval(brks_a, clrs_a)) %>%
  formatStyle("BIC", backgroundColor = styleInterval(brks_b, clrs_b))%>%
  formatStyle("SABIC", backgroundColor = styleInterval(brks_s, clrs_s))
Code
# postprob(model6_grid)

# re-test effects structure and terms
# residualplot_step1( model6_grid, 
#                     nameofoutcome="gpa",  nameofage = "time",
#                     data = df1,
#                     ylimit=c(-3,3))

# model6_grid2 <- gridsearch(hlme(fixed = gpa~1+time+I(time^3),
#                                random = ~ 1 + time,
#                                subject = "altid",
#                                data = data.frame(df1),
#                                ng = 6,
#                                mixture = ~1+time+I(time^3)),
#                           rep=100,
#                           maxiter=30,
#                           minit=model1_v1)
# 
# tab <- summarytable(
#   model6_grid, model6_grid2,
#   which = c("G", "loglik", "conv", "npm", "AIC", "BIC", "SABIC",
#             "entropy", "%class"))
# 
# brks_a <- quantile(tab[,5], probs = seq(.05, .95, .05), na.rm = TRUE)
# clrs_a <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
#   {paste0("rgb(255,", ., ",", ., ")")}
# 
# brks_b <- quantile(tab[,6], probs = seq(.05, .95, .05), na.rm = TRUE)
# clrs_b <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
#   {paste0("rgb(255,", ., ",", ., ")")}
# 
# brks_s <- quantile(tab[,7], probs = seq(.05, .95, .05), na.rm = TRUE)
# clrs_s <- round(seq(240, 40, length.out = length(brks_a) + 1), 0) %>%
#   {paste0("rgb(255,", ., ",", ., ")")}
# 
# # lighter shade = lower value = better model
# datatable(tab) %>%
#   formatRound(columns = c(1:7), digits = 0) %>%
#   formatRound(columns = c(8:ncol(tab)), digits = 2) %>%
#   formatStyle("AIC", backgroundColor = styleInterval(brks_a, clrs_a)) %>%
#   formatStyle("BIC", backgroundColor = styleInterval(brks_b, clrs_b))%>%
#   formatStyle("SABIC", backgroundColor = styleInterval(brks_s, clrs_s))

Visualize Solution

Code
classes_6fac <- as.data.frame(model6_grid$pprob[,1:2])

postprob <- model6_grid$pprob
postprob <- round(postprob, 4)
# write.csv(postprob, file="classes_pp.csv", row.names = F)

df_6fac <- merge(df1, classes_6fac, by = "altid")

df_6fac$classn[df_6fac$class == "1"] <- "Class 5, Consistently Struggling, n = 43"
df_6fac$classn[df_6fac$class == "2"] <- "Class 2, Steadily Improving, n = 331"
df_6fac$classn[df_6fac$class == "3"] <- "Class 6, Consistently At-Risk, n = 25"
df_6fac$classn[df_6fac$class == "4"] <- "Class 3, Sharply Improving, n = 69"
df_6fac$classn[df_6fac$class == "5"] <- "Class 1, Consistently High Performers, n = 370"
df_6fac$classn[df_6fac$class == "6"] <- "Class 4, Starting to Struggle, n = 45"

df_6fac$classn <- as.factor(df_6fac$classn)

ggplot(df_6fac, aes(x=time, y=gpa, group=altid, color=classn)) +
  geom_line(alpha = .25) +
  geom_smooth(aes(group=class), method="loess", size=2, se=T, span = 1) +
  xlab("Time") + ylab("GPA") + labs(color = "Class Assignment")

Code
# 
# ggplot(df_6fac, aes(x=time, y=gpa, group=altid, color=classn)) +
#   geom_line(alpha = .25) +
#   geom_smooth(aes(group=class), method="loess", size=2, se=T, span = 1) +
#   xlab("Time") + ylab("GPA") + labs(color = "Class Assignment") + facet_grid(cols = vars(classn))

# write.csv(df_6fac, file="classes_long.csv", row.names = F)
# write.csv(df6, file="classes_wide.csv", row.names = F)

LCTM Plots

All Participants - Facets

Code
dfc$classn[dfc$class == 5] <- "Consistently High Performers, n = 370"
dfc$classn[dfc$class == 2] <- "Steadily Improving, n = 331"
dfc$classn[dfc$class == 4] <- "Sharply Improving, n = 69"
dfc$classn[dfc$class == 6] <- "Starting to Struggle, n = 45"
dfc$classn[dfc$class == 1] <- "Consistently Struggling, n = 43"
dfc$classn[dfc$class == 3] <- "Consistently At-Risk, n = 25"

df_long <- dfc %>%
  pivot_longer(
    cols = starts_with("X"),
    names_to = "term",
    values_to = "gpa"
  ) %>%
  mutate(
    term_num = as.integer(sub("^X", "", term)),
    classn = factor(classn)
  ) %>%
  filter(!is.na(gpa))

df_long$classn <- factor(
  df_long$classn,
  levels = c(
    "Consistently High Performers, n = 370",
    "Steadily Improving, n = 331",
    "Sharply Improving, n = 69",
    "Starting to Struggle, n = 45",
    "Consistently Struggling, n = 43",
    "Consistently At-Risk, n = 25"
  )
)

# class-by-term mean trajectory
df_mean <- df_long %>%
  group_by(classn, term_num) %>%
  summarize(mean_gpa = mean(gpa, na.rm = TRUE), .groups = "drop")

lctm <- ggplot(df_long, aes(x = term_num, y = gpa, group = UID,
                    color = classn, alpha = classn)) +
  geom_line() +
  geom_smooth(
    data = df_mean,
    aes(x = term_num, y = mean_gpa, group = 1),
    method = "loess",
    se = FALSE,
    linewidth = 1.2,
    span = 1,
    alpha = 1
  ) +
  facet_wrap(
    ~ classn,
    nrow = 1,
    labeller = labeller(classn = label_wrap_gen(width = 10))
  ) +
  scale_color_manual(
    values = c(
      "#149ED5", "#5C9CCD", "#9696B7",
      "#C28D97", "#DE816B", "#E97235"
    )
  ) +
  scale_alpha_manual(
    values = c(
      "Consistently High Performers, n = 370" = 0.025,
      "Steadily Improving, n = 331" = 0.025,
      "Sharply Improving, n = 69" = 0.075,
      "Starting to Struggle, n = 45" = 0.10,
      "Consistently Struggling, n = 43" = 0.10,
      "Consistently At-Risk, n = 25" = 0.20
    )
  ) +
  labs(
    x = "Time",
    y = "Average GPA"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 9),
    axis.title.x = element_text(size = 9),
    axis.title.y = element_text(size = 9),
    strip.text = element_text(size = 8, lineheight = 1),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position = "none"
  )


lctm <- lctm + theme(
  axis.title.y = element_text(size = 11),
  axis.title.x = element_text(size = 11),
  axis.text.y  = element_text(size = 9),
  legend.title = element_text(size = 11, lineheight = 0.9),
  legend.text  = element_text(size = 10, lineheight = 0.9),
  strip.text   = element_text(size = 11, lineheight = 0.9),
  plot.background   = element_rect(fill = "transparent", color = NA),
  legend.background = element_rect(fill = "transparent", color = NA))

lctm

Code
# ggsave("pres/lctm1.png", plot = lctm, bg = "transparent",
#        width = 9, height = 5, dpi = 300)

All Participants - No Facets

Code
df_long$term_label <- factor(df_long$term_num, levels = time_levels, labels = time_labels)

df_long$classn_short <- factor(gsub(", n = [0-9]+", "", df_long$classn),
                                levels = gsub(", n = [0-9]+", "", levels(df_long$classn)))

lctm2 <- ggplot(df_long, aes(x = term_label, y = gpa, group = UID,
                    color = classn_short, alpha = classn_short)) +
  geom_line() +
  geom_smooth(
    data = df_mean,
    aes(x = term_num, y = mean_gpa, group = classn, color = gsub(", n = [0-9]+", "", classn)),
    method = "loess",
    se = FALSE,
    linewidth = 1.2,
    span = 1,
    alpha = 1
  ) +
  scale_color_manual(
    name = "Trajectory Group",
    values = c(
      "#149ED5", "#5C9CCD", "#9696B7",
      "#C28D97", "#DE816B", "#E97235"
    )
  ) +
  scale_alpha_manual(
    guide = "none",
    values = c(
      "Consistently High Performers" = 0.025,
      "Steadily Improving" = 0.025,
      "Sharply Improving" = 0.075,
      "Starting to Struggle" = 0.10,
      "Consistently Struggling" = 0.10,
      "Consistently At-Risk" = 0.20
    )
  ) +
  labs(x = "Time", y = "Average GPA") +
  theme_bw() +
  theme(
    text = element_text(size = 9),
    axis.title.x = element_text(size = 11),
    axis.title.y = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x = element_text(size = 9, angle = 45, hjust = 1),
    axis.ticks.x = element_line(color = "black"),
    legend.position = "bottom",
    legend.direction = "horizontal",
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  ) +
  guides(color = guide_legend(nrow = 2, override.aes = list(alpha = 1, linewidth = 1.5))) +
  scale_color_manual(
    name = "Trajectory Group",
    values = c(
      "Consistently High Performers" = "#149ED5",
      "Steadily Improving"           = "#5C9CCD",
      "Sharply Improving"            = "#9696B7",
      "Starting to Struggle"         = "#C28D97",
      "Consistently Struggling"      = "#DE816B",
      "Consistently At-Risk"         = "#E97235"
    )
  )

lctm2

Code
# ggsave("pres/lctm2.png", plot = lctm2, bg = "transparent",
#        width = 9, height = 5, dpi = 300)

High-Certainty Participants

Posterior probability = 95% or higher

Code
df_long2 <- subset(df_long, probfin > .9499)

df_long2$classn <- factor(df_long2$classn,
                          levels = c("Consistently High Performers, n = 370",
                                     "Steadily Improving, n = 331",
                                     "Sharply Improving, n = 69",
                                     "Starting to Struggle, n = 45",
                                     "Consistently Struggling, n = 43",
                                     "Consistently At-Risk, n = 25"),
                          labels = c("Consistently High Performers, n = 199",
                                     "Steadily Improving, n = 47",
                                     "Sharply Improving, n = 20",
                                     "Starting to Struggle, n = 13",
                                     "Consistently Struggling, n = 19",
                                     "Consistently At-Risk, n = 9"))

df_mean2 <- df_long2 %>%
  group_by(classn, term_num) %>%
  summarize(mean_gpa = mean(gpa, na.rm = TRUE), .groups = "drop")

lctm3 <- ggplot(df_long2, aes(x = term_num, y = gpa, group = UID,
                            color = classn, alpha = classn)) +
  geom_line() +
  geom_smooth(
    data = df_mean2,
    aes(x = term_num, y = mean_gpa, group = 1),
    method = "loess",
    se = FALSE,
    linewidth = 1.2,
    span = 1,
    alpha = 1
  ) +
  facet_wrap(
    ~ classn,
    nrow = 1,
    labeller = labeller(classn = label_wrap_gen(width = 10))
  ) +
  scale_color_manual(
    values = c(
      "#149ED5", "#5C9CCD", "#9696B7",
      "#C28D97", "#DE816B", "#E97235"
    )
  ) +
  scale_alpha_manual(
    values = c(
      "Consistently High Performers, n = 199" = 0.05,
      "Steadily Improving, n = 47" = 0.1,
      "Sharply Improving, n = 20" = 0.2,
      "Starting to Struggle, n = 13" = 0.20,
      "Consistently Struggling, n = 19" = 0.20,
      "Consistently At-Risk, n = 9" = 0.30
    )
  ) +
  labs(
    x = "Time",
    y = "Average GPA"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 9),
    axis.title.x = element_text(size = 9),
    axis.title.y = element_text(size = 9),
    strip.text = element_text(size = 8, lineheight = 1),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position = "none"
  )


lctm3 <- lctm3 + theme(
  axis.title.y = element_text(size = 11),
  axis.title.x = element_text(size = 11),
  axis.text.y  = element_text(size = 9),
  legend.title = element_text(size = 11, lineheight = 0.9),
  legend.text  = element_text(size = 10, lineheight = 0.9),
  strip.text   = element_text(size = 11, lineheight = 0.9),
  plot.background   = element_rect(fill = "transparent", color = NA),
  legend.background = element_rect(fill = "transparent", color = NA)
)

lctm3

Regression

Block 1

Code
df2 <- df

df2$class_rn2 <- df2$class_rn
df2$class_rn2[df2$class_rn == "5 Consistently Struggling (n = 43)"] <- "5 Consistently Struggling & At-Risk (n = 68)"
df2$class_rn2[df2$class_rn == "6 Consistently At-Risk (n = 25)"] <- "5 Consistently Struggling & At-Risk (n = 68)"

# set baselines
df2$class_rn2 <- as.factor(df2$class_rn2)
df2 <- subset(df2, race_cats != "noresponse")
df2$race_cats <- as.factor(df2$race_cats)
df2$gen_cats <- as.factor(df2$gen_cats)
df2$class_rn2 <- relevel(df2$class_rn2, ref = "2 Steadily Improving (n = 331)")
df2$race_cats <- relevel(df2$race_cats, ref = "white")
df2$gen_cats <- relevel(df2$gen_cats, ref="cmal")

test <- multinom(class_rn2 ~ race_cats + gen_cats + ACT_MathEquivalent + ACT_ReadEquivalent, data = df2)
# weights:  40 (28 variable)
initial  value 1380.897729 
iter  10 value 1071.744419
iter  20 value 1045.337396
iter  30 value 1045.079659
final  value 1045.079129 
converged
Code
output <- summary(test)

# coefficient = log odds of being in classif moving from reference group/baseline to indicated group1 
z <- output$coefficients^2 / output$standard.errors^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

# build tidy output per class
class_names <- rownames(output$coefficients)
col_names <- rep(c("OR", "p"), length(class_names))

tableout <- do.call(cbind, lapply(seq_along(class_names), function(i) {
  df <- data.frame(
    OR = round(exp(output$coefficients[i, ]), 3),
    p  = round(p[i, ], 3)
  )
  colnames(df) <- paste0(class_names[i], c("_OR", "_p"))
  df
}))

kable(tableout, format = "html", digits = 3, col.names = col_names) |>
  kable_styling(bootstrap_options = c("striped", "condensed"),
                full_width = TRUE, font_size = 12) |>
  add_header_above(c(" " = 1,
                     "Consistently High Performers" = 2,
                     "Sharply Improving" = 2,
                     "Starting to Struggle" = 2,
                     "Consistently Struggling & At-Risk" = 2)) |>
  column_spec(1, bold = TRUE)
Consistently High Performers
Sharply Improving
Starting to Struggle
Consistently Struggling & At-Risk
OR p OR p OR p OR p
(Intercept) 0.009 0.000 1.513 0.932 0.629 0.939 0.005 0.000
race_catsasian/mideast 0.480 0.000 1.373 0.360 0.433 0.006 0.819 0.798
race_catsbiracial 0.726 0.121 0.451 0.108 0.502 0.235 2.011 0.001
race_catsblack/ind/hislatin 0.678 0.193 1.462 0.469 0.807 0.894 3.719 0.000
gen_catsmarg 1.162 0.461 0.887 0.878 0.799 0.720 0.614 0.052
ACT_MathEquivalent 1.162 0.000 0.901 0.000 0.952 0.380 1.095 0.001
ACT_ReadEquivalent 1.007 0.910 1.040 0.226 1.009 0.967 1.025 0.672

Block 2

Code
block2 <- cbind.data.frame(df2$b5n, df2$b5e, df2$b5a, df2$b5c, df2$b5o, df2$grit, df2$mindset, df2$mindful, df2$mean, df2$grat, df2$selfc)
block2s <- data.frame(scale(block2, center = T, scale = T))

test2 <- multinom(df2$class_rn2 ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent + df2$ACT_ReadEquivalent +
                    block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                    + block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful + block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc)
# weights:  95 (72 variable)
initial  value 1380.897729 
iter  10 value 1050.152500
iter  20 value 1020.295562
iter  30 value 1005.666542
iter  40 value 999.294082
iter  50 value 998.715482
iter  60 value 998.676113
iter  70 value 998.671555
final  value 998.671385 
converged
Code
output <- summary(test2)

# coefficient = log odds of being in classif moving from reference group/baseline to indicated group1 
z <- output$coefficients^2 / output$standard.errors^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

# build tidy output per class
class_names <- rownames(output$coefficients)
col_names <- rep(c("OR", "p"), length(class_names))

tableout <- do.call(cbind, lapply(seq_along(class_names), function(i) {
  df <- data.frame(
    OR = round(exp(output$coefficients[i, ]), 3),
    p  = round(p[i, ], 3)
  )
  colnames(df) <- paste0(class_names[i], c("_OR", "_p"))
  df
}))

kable(tableout, format = "html", digits = 3, col.names = col_names) |>
  kable_styling(bootstrap_options = c("striped", "condensed"),
                full_width = TRUE, font_size = 12) |>
  add_header_above(c(" " = 1,
                     "Consistently High Performers" = 2,
                     "Sharply Improving" = 2,
                     "Starting to Struggle" = 2,
                     "Consistently Struggling & At-Risk" = 2)) |>
  column_spec(1, bold = TRUE)
Consistently High Performers
Sharply Improving
Starting to Struggle
Consistently Struggling & At-Risk
OR p OR p OR p OR p
(Intercept) 0.007 0.000 2.641 0.677 0.719 0.971 0.006 0.000
df2$race_catsasian/mideast 0.478 0.000 1.350 0.444 0.434 0.008 0.760 0.648
df2$race_catsbiracial 0.734 0.170 0.437 0.092 0.469 0.162 1.887 0.012
df2$race_catsblack/ind/hislatin 0.593 0.025 1.389 0.626 0.921 0.985 4.773 0.000
df2$gen_catsmarg 0.988 0.997 1.007 1.000 0.937 0.979 0.706 0.401
df2$ACT_MathEquivalent 1.177 0.000 0.880 0.000 0.944 0.239 1.078 0.025
df2$ACT_ReadEquivalent 1.002 0.990 1.037 0.359 1.007 0.978 1.030 0.589
block2s$df2.b5n 1.186 0.001 0.724 0.000 0.953 0.944 1.064 0.873
block2s$df2.b5e 0.911 0.224 1.237 0.040 0.886 0.647 1.031 0.965
block2s$df2.b5a 0.972 0.911 1.383 0.000 1.068 0.891 0.988 0.995
block2s$df2.b5c 0.876 0.039 1.110 0.678 1.103 0.790 0.924 0.816
block2s$df2.b5o 0.928 0.452 1.139 0.458 1.513 0.000 1.187 0.202
block2s$df2.grit 0.998 1.000 1.047 0.931 0.816 0.241 1.031 0.971
block2s$df2.mindset 1.012 0.984 0.843 0.234 0.846 0.381 0.816 0.081
block2s$df2.mindful 1.187 0.002 0.628 0.000 0.809 0.195 0.930 0.847
block2s$df2.mean 1.104 0.239 0.760 0.002 1.000 1.000 0.955 0.934
block2s$df2.grat 1.169 0.006 0.784 0.013 0.955 0.951 0.697 0.000
block2s$df2.selfc 0.765 0.000 1.093 0.784 1.049 0.954 1.363 0.003

Block 3

Code
block3 <- cbind.data.frame(df2$test, df2$tse, df2$perce, df2$percss, df2$stressch, df2$stressco, df2$stressf, df2$stressr, df2$stresssupp)
block3s <- data.frame(scale(block3, center = T, scale = T))

test3 <- multinom(df2$class_rn2 ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent + df2$ACT_ReadEquivalent +
                    block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                    + block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful + 
                    block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                    block3s$df2.test + block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                    block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp)
# weights:  140 (108 variable)
initial  value 1380.897729 
iter  10 value 1018.835981
iter  20 value 992.378700
iter  30 value 969.965385
iter  40 value 962.225970
iter  50 value 959.980901
iter  60 value 959.632663
iter  70 value 959.599424
iter  80 value 959.593535
iter  90 value 959.592953
final  value 959.592931 
converged
Code
output <- summary(test3)

# coefficient = log odds of being in classif moving from reference group/baseline to indicated group1 
z <- output$coefficients^2 / output$standard.errors^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

# build tidy output per class
class_names <- rownames(output$coefficients)
col_names <- rep(c("OR", "p"), length(class_names))

tableout <- do.call(cbind, lapply(seq_along(class_names), function(i) {
  df <- data.frame(
    OR = round(exp(output$coefficients[i, ]), 3),
    p  = round(p[i, ], 3)
  )
  colnames(df) <- paste0(class_names[i], c("_OR", "_p"))
  df
}))

kable(tableout, format = "html", digits = 3, col.names = col_names) |>
  kable_styling(bootstrap_options = c("striped", "condensed"),
                full_width = TRUE, font_size = 12) |>
  add_header_above(c(" " = 1,
                     "Consistently High Performers" = 2,
                     "Sharply Improving" = 2,
                     "Starting to Struggle" = 2,
                     "Consistently Struggling & At-Risk" = 2)) |>
  column_spec(1, bold = TRUE)
Consistently High Performers
Sharply Improving
Starting to Struggle
Consistently Struggling & At-Risk
OR p OR p OR p OR p
(Intercept) 0.016 0.000 2.342 0.768 0.878 0.996 0.004 0.000
df2$race_catsasian/mideast 0.536 0.000 1.474 0.235 0.447 0.017 0.698 0.454
df2$race_catsbiracial 0.740 0.221 0.444 0.123 0.477 0.192 1.858 0.023
df2$race_catsblack/ind/hislatin 0.657 0.170 1.418 0.597 0.940 0.992 4.485 0.000
df2$gen_catsmarg 0.987 0.997 0.975 0.996 0.918 0.968 0.827 0.824
df2$ACT_MathEquivalent 1.153 0.000 0.881 0.000 0.936 0.141 1.085 0.012
df2$ACT_ReadEquivalent 0.996 0.975 1.036 0.417 1.009 0.967 1.034 0.502
block2s$df2.b5n 1.352 0.000 0.685 0.000 1.026 0.987 1.185 0.332
block2s$df2.b5e 0.856 0.003 1.233 0.072 0.840 0.378 1.046 0.930
block2s$df2.b5a 0.990 0.991 1.277 0.044 1.104 0.775 1.059 0.893
block2s$df2.b5c 0.857 0.009 1.133 0.585 1.095 0.821 0.949 0.922
block2s$df2.b5o 0.903 0.205 1.164 0.341 1.545 0.000 1.206 0.156
block2s$df2.grit 0.896 0.212 1.096 0.770 0.763 0.059 0.984 0.993
block2s$df2.mindset 1.083 0.444 0.808 0.090 0.835 0.340 0.773 0.009
block2s$df2.mindful 1.016 0.982 0.671 0.000 0.758 0.042 0.943 0.907
block2s$df2.mean 1.051 0.792 0.749 0.001 0.968 0.977 0.937 0.873
block2s$df2.grat 1.114 0.247 0.788 0.036 0.952 0.948 0.760 0.002
block2s$df2.selfc 0.792 0.000 1.104 0.775 1.068 0.919 1.397 0.003
block3s$df2.test 0.746 0.000 1.206 0.268 0.918 0.854 1.312 0.025
block3s$df2.tse 1.241 0.000 0.793 0.080 1.015 0.996 0.912 0.784
block3s$df2.perce 0.966 0.901 1.113 0.674 0.879 0.647 0.886 0.600
block3s$df2.percss 1.141 0.093 0.838 0.258 1.233 0.265 1.041 0.954
block3s$df2.stressch 0.713 0.000 1.130 0.649 0.823 0.344 1.064 0.906
block3s$df2.stressco 1.190 0.000 1.211 0.154 0.833 0.303 0.984 0.992
block3s$df2.stressf 0.847 0.008 0.702 0.000 1.024 0.990 0.823 0.229
block3s$df2.stressr 1.042 0.893 1.093 0.844 0.987 0.997 0.687 0.000
block3s$df2.stresssupp 1.214 0.000 1.272 0.033 1.210 0.283 0.885 0.560

Block 4

Code
block4 <- cbind.data.frame(df2$idi, df2$idpc, df2$idr, df2$bel, df2$ftpc, df2$ftpe, df2$ftpi, df2$ftpp, df2$ftpv)
block4s <- data.frame(scale(block4, center = T, scale = T))

test4 <- multinom(df2$class_rn2 ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent + df2$ACT_ReadEquivalent +
                    block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                    + block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful + 
                    block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                    block3s$df2.test + block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                    block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp +
                    block4s$df2.idi + block4s$df2.idpc + block4s$df2.idr + block4s$df2.bel + block4s$df2.ftpc + block4s$df2.ftpe +
                    block4s$df2.ftpi + block4s$df2.ftpp + block4s$df2.ftpv)
# weights:  185 (144 variable)
initial  value 1380.897729 
iter  10 value 1026.161838
iter  20 value 992.101426
iter  30 value 953.358682
iter  40 value 946.018296
iter  50 value 941.495887
iter  60 value 940.305037
iter  70 value 940.080322
iter  80 value 940.049116
iter  90 value 940.042298
iter 100 value 940.041573
final  value 940.041573 
stopped after 100 iterations
Code
output <- summary(test4)

# coefficient = log odds of being in classif moving from reference group/baseline to indicated group1 
z <- output$coefficients^2 / output$standard.errors^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

# build tidy output per class
class_names <- rownames(output$coefficients)
col_names <- rep(c("OR", "p"), length(class_names))

tableout <- do.call(cbind, lapply(seq_along(class_names), function(i) {
  df <- data.frame(
    OR = round(exp(output$coefficients[i, ]), 3),
    p  = round(p[i, ], 3)
  )
  colnames(df) <- paste0(class_names[i], c("_OR", "_p"))
  df
}))

kable(tableout, format = "html", digits = 3, col.names = col_names) |>
  kable_styling(bootstrap_options = c("striped", "condensed"),
                full_width = TRUE, font_size = 12) |>
  add_header_above(c(" " = 1,
                     "Consistently High Performers" = 2,
                     "Sharply Improving" = 2,
                     "Starting to Struggle" = 2,
                     "Consistently Struggling & At-Risk" = 2)) |>
  column_spec(1, bold = TRUE)
Consistently High Performers
Sharply Improving
Starting to Struggle
Consistently Struggling & At-Risk
OR p OR p OR p OR p
(Intercept) 0.013 0.000 1.206 0.990 0.229 0.526 0.002 0.000
df2$race_catsasian/mideast 0.514 0.000 1.623 0.081 0.384 0.001 0.663 0.344
df2$race_catsbiracial 0.716 0.142 0.482 0.237 0.437 0.116 1.884 0.021
df2$race_catsblack/ind/hislatin 0.609 0.065 1.914 0.086 0.893 0.974 4.186 0.000
df2$gen_catsmarg 0.978 0.992 1.037 0.993 0.739 0.646 0.766 0.681
df2$ACT_MathEquivalent 1.159 0.000 0.881 0.000 0.967 0.716 1.102 0.001
df2$ACT_ReadEquivalent 0.999 0.997 1.050 0.160 1.021 0.836 1.044 0.293
block2s$df2.b5n 1.338 0.000 0.664 0.000 0.948 0.946 1.173 0.411
block2s$df2.b5e 0.847 0.001 1.255 0.058 0.770 0.063 1.003 1.000
block2s$df2.b5a 0.984 0.976 1.311 0.024 1.102 0.792 1.032 0.968
block2s$df2.b5c 0.864 0.024 1.167 0.455 1.141 0.655 0.940 0.899
block2s$df2.b5o 0.913 0.375 1.281 0.045 1.802 0.000 1.213 0.189
block2s$df2.grit 0.884 0.129 1.047 0.945 0.759 0.059 0.989 0.997
block2s$df2.mindset 1.048 0.800 0.795 0.082 0.759 0.047 0.735 0.000
block2s$df2.mindful 1.010 0.992 0.626 0.000 0.767 0.074 0.981 0.991
block2s$df2.mean 1.104 0.342 0.765 0.016 1.096 0.848 0.993 0.999
block2s$df2.grat 1.110 0.411 0.968 0.977 1.171 0.644 0.812 0.233
block2s$df2.selfc 0.786 0.000 1.159 0.556 1.032 0.983 1.344 0.029
block3s$df2.test 0.779 0.000 1.157 0.534 0.973 0.987 1.402 0.002
block3s$df2.tse 1.243 0.000 0.786 0.072 1.069 0.918 0.918 0.822
block3s$df2.perce 0.960 0.872 1.247 0.092 0.885 0.714 0.860 0.450
block3s$df2.percss 1.140 0.104 0.851 0.377 1.275 0.158 1.055 0.925
block3s$df2.stressch 0.721 0.000 1.184 0.393 0.853 0.547 1.057 0.928
block3s$df2.stressco 1.179 0.001 1.296 0.017 0.838 0.366 0.969 0.970
block3s$df2.stressf 0.845 0.008 0.652 0.000 0.988 0.997 0.818 0.236
block3s$df2.stressr 1.038 0.915 1.056 0.944 1.007 0.999 0.689 0.000
block3s$df2.stresssupp 1.202 0.000 1.290 0.027 1.155 0.567 0.847 0.311
block4s$df2.idi 0.972 0.968 1.049 0.968 1.003 1.000 0.912 0.883
block4s$df2.idpc 0.997 1.000 1.092 0.901 0.789 0.521 1.154 0.756
block4s$df2.idr 0.869 0.097 0.752 0.018 0.877 0.702 0.985 0.995
block4s$df2.bel 0.963 0.954 1.125 0.829 0.647 0.035 0.668 0.014
block4s$df2.ftpc 0.943 0.710 1.175 0.336 0.794 0.095 0.817 0.090
block4s$df2.ftpe 1.162 0.174 0.837 0.523 1.159 0.742 1.067 0.926
block4s$df2.ftpi 1.123 0.295 0.564 0.000 0.940 0.940 1.133 0.681
block4s$df2.ftpp 0.914 0.648 1.154 0.704 1.123 0.861 1.155 0.718
block4s$df2.ftpv 0.889 0.082 0.955 0.927 0.898 0.700 0.820 0.108

Regression Plots

Code
df2 <- df

df2$class_rn2 <- df2$class_rn
df2$class_rn2[df2$class_rn == "5 Consistently Struggling (n = 43)"] <- "5 Consistently Struggling & At-Risk (n = 68)"
df2$class_rn2[df2$class_rn == "6 Consistently At-Risk (n = 25)"] <- "5 Consistently Struggling & At-Risk (n = 68)"

df2$class_rn2 <- factor(df2$class_rn2,
                       levels = sort(unique(df2$class_rn2)),
                       labels = gsub("^[0-9]+\\s*", "", sort(unique(df2$class_rn2))))

Race

Code
df_sub <- df2[df2$class_rn2 %in% c(
  "Consistently High Performers (n = 370)",
  "Steadily Improving (n = 331)",
  "Sharply Improving (n = 69)",
  "Starting to Struggle (n = 45)",
  "Consistently Struggling & At-Risk (n = 68)"
), ]

df_sub2 <- df_sub[df_sub$race_cats %in% c("asian/mideast", "white", "biracial", "black/ind/hislatin"), ]

df_sub2$race_group <- factor(df_sub2$race_cats,
                             levels = c("white", "asian/mideast", "biracial", "black/ind/hislatin"),
                             labels = c("White", "Asian/Middle Eastern", "Biracial", "Black/Indigenous/Hispanic/Latino"))

df_sub2$class_rn2 <- factor(df_sub2$class_rn2,
                            levels = c("Steadily Improving (n = 331)",
                                       "Consistently High Performers (n = 370)",
                                       "Sharply Improving (n = 69)",
                                       "Starting to Struggle (n = 45)",
                                       "Consistently Struggling & At-Risk (n = 68)"))

race_all <- ggplot(df_sub2, aes(x = class_rn2, fill = race_group)) +
  geom_bar(position = "fill", width = 0.7) +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "grey40") +
  labs(y = "Proportion", x = NULL, fill = NULL) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  ) +
  scale_fill_manual(values = c("White" = "#E97235",
                               "Asian/Middle Eastern" = "#C28D97",
                               "Biracial" = "#5C9CCD",
                               "Black/Indigenous/Hispanic/Latino" = "#0E6ED8")) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  theme(
    axis.title.y = element_text(size = 11),
    axis.title.x = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x  = element_text(size = 10),
    legend.title = element_text(size = 10, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

race_all

Code
# ggsave("pres/race_all.png", plot = race_all, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)

Standardized Math Test Scores

Code
df2$ACT_MathEquivalent_s <- scale(df2$ACT_MathEquivalent, center = T, scale = T)

block2 <- cbind.data.frame(df2$b5n, df2$b5e, df2$b5a, df2$b5c, df2$b5o, df2$grit, df2$mindset, df2$mindful, df2$mean, df2$grat, df2$selfc)
block2s <- data.frame(scale(block2, center = T, scale = T))
block3 <- cbind.data.frame(df2$test, df2$tse, df2$perce, df2$percss, df2$stressch, df2$stressco, df2$stressf, df2$stressr, df2$stresssupp)
block3s <- data.frame(scale(block3, center = T, scale = T))
block4 <- cbind.data.frame(df2$idi, df2$idpc, df2$idr, df2$bel, df2$ftpc, df2$ftpe, df2$ftpi, df2$ftpp, df2$ftpv)
block4s <- data.frame(scale(block4, center = T, scale = T))

# residualize ACT_MathEquivalent against all other predictors
math_resid_model <- lm(df2$ACT_MathEquivalent_s ~ df2$race_cats + df2$gen_cats + df2$ACT_ReadEquivalent +
                         block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                         block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful +
                         block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                         block3s$df2.test + block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                         block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp +
                         block4s$df2.idi + block4s$df2.idpc + block4s$df2.idr + block4s$df2.bel + block4s$df2.ftpc + block4s$df2.ftpe +
                         block4s$df2.ftpi + block4s$df2.ftpp + block4s$df2.ftpv)

df2$math_resid <- NA
df2$math_resid[as.integer(names(resid(math_resid_model)))] <- resid(math_resid_model)

df2$class_rn2 <- factor(df2$class_rn2,
                        levels = c("Steadily Improving (n = 331)",
                                   "Consistently High Performers (n = 370)",
                                   "Sharply Improving (n = 69)",
                                   "Starting to Struggle (n = 45)",
                                   "Consistently Struggling & At-Risk (n = 68)"))

vio_math <- ggplot(df2, aes(x = class_rn2, y = math_resid, color = class_rn2, fill = class_rn2)) +
  geom_jitter(width = .05, height = .05, alpha = .5, size = 2) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4,
               linewidth = .4) +
  geom_violin(alpha = 0.15, linewidth = .5, scale = "width", width = .6) +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "grey40") +
  labs(y = "Standardized Math Score (Centered and Scaled)", x = NULL) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_color_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                                "#C28D97", "#DE816B", "#E97235")) +
  scale_fill_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                               "#C28D97", "#DE816B", "#E97235")) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.title.y = element_text(size = 11),
    axis.title.x = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x  = element_text(size = 10),
    legend.title = element_text(size = 10, lineheight = 0.9),
    legend.text  = element_text(size = 9, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

vio_math

Code
# ggsave("pres/vio_math.png", plot = vio_math, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)

Test Anxiety

Code
# residualize test anxiety against all other predictors
test_resid_model <- lm(block3s$df2.test ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent_s + df2$ACT_ReadEquivalent +
                         block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                         block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful +
                         block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                         block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                         block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp +
                         block4s$df2.idi + block4s$df2.idpc + block4s$df2.idr + block4s$df2.bel + block4s$df2.ftpc + block4s$df2.ftpe +
                         block4s$df2.ftpi + block4s$df2.ftpp + block4s$df2.ftpv)

df2$test_resid <- NA
df2$test_resid[as.integer(names(resid(test_resid_model)))] <- resid(test_resid_model)

vio_test <- ggplot(df2, aes(x = class_rn2, y = test_resid, color = class_rn2, fill = class_rn2)) +
  geom_jitter(width = .05, height = .05, alpha = .5, size = 2) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4,
               linewidth = .4) +
  geom_violin(alpha = 0.15, linewidth = .5, scale = "width", width = .6) +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "grey40") +
  labs(y = "Test Anxiety (Centered and Scaled)", x = NULL) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_color_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                                "#C28D97", "#DE816B", "#E97235")) +
  scale_fill_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                               "#C28D97", "#DE816B", "#E97235")) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.title.y = element_text(size = 11),
    axis.title.x = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x  = element_text(size = 10),
    legend.title = element_text(size = 10, lineheight = 0.9),
    legend.text  = element_text(size = 9, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

vio_test

Code
# ggsave("pres/vio_test.png", plot = vio_test, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)

Neuroticism

Code
neuro_resid_model <- lm(block2s$df2.b5n ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent_s + df2$ACT_ReadEquivalent +
                          block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                          block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful +
                          block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                          block3s$df2.test + block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                          block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp +
                          block4s$df2.idi + block4s$df2.idpc + block4s$df2.idr + block4s$df2.bel + block4s$df2.ftpc + block4s$df2.ftpe +
                          block4s$df2.ftpi + block4s$df2.ftpp + block4s$df2.ftpv)

df2$neuro_resid <- NA
df2$neuro_resid[as.integer(names(resid(neuro_resid_model)))] <- resid(neuro_resid_model)

vio_neuro <- ggplot(df2, aes(x = class_rn2, y = neuro_resid, color = class_rn2, fill = class_rn2)) +
  geom_jitter(width = .05, height = .05, alpha = .5, size = 2) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4,
               linewidth = .4) +
  geom_violin(alpha = 0.15, linewidth = .5, scale = "width", width = .6) +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "grey40") +
  labs(y = "Neuroticism (Centered and Scaled)", x = NULL) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_color_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                                "#C28D97", "#DE816B", "#E97235")) +
  scale_fill_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                               "#C28D97", "#DE816B", "#E97235")) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.title.y = element_text(size = 11),
    axis.title.x = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x  = element_text(size = 10),
    legend.title = element_text(size = 10, lineheight = 0.9),
    legend.text  = element_text(size = 9, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

vio_neuro

Code
# ggsave("pres/vio_neuro.png", plot = vio_neuro, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)

Belonging

Code
bel_resid_model <- lm(block4s$df2.bel ~ df2$race_cats + df2$gen_cats + df2$ACT_MathEquivalent_s + df2$ACT_ReadEquivalent +
                        block2s$df2.b5n + block2s$df2.b5e + block2s$df2.b5a + block2s$df2.b5c + block2s$df2.b5o +
                        block2s$df2.grit + block2s$df2.mindset + block2s$df2.mindful +
                        block2s$df2.mean + block2s$df2.grat + block2s$df2.selfc +
                        block3s$df2.test + block3s$df2.tse + block3s$df2.perce + block3s$df2.percss + block3s$df2.stressch +
                        block3s$df2.stressco + block3s$df2.stressf + block3s$df2.stressr + block3s$df2.stresssupp +
                        block4s$df2.idi + block4s$df2.idpc + block4s$df2.idr + block4s$df2.ftpc + block4s$df2.ftpe +
                        block4s$df2.ftpi + block4s$df2.ftpp + block4s$df2.ftpv)

df2$bel_resid <- NA
df2$bel_resid[as.integer(names(resid(bel_resid_model)))] <- resid(bel_resid_model)

vio_bel <- ggplot(df2, aes(x = class_rn2, y = bel_resid, color = class_rn2, fill = class_rn2)) +
  geom_jitter(width = .05, height = .05, alpha = .5, size = 2) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.4,
               linewidth = .4) +
  geom_violin(alpha = 0.15, linewidth = .5, scale = "width", width = .6) +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "grey40") +
  labs(y = "Belonging (Centered and Scaled)", x = NULL) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
  scale_color_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                                "#C28D97", "#DE816B", "#E97235")) +
  scale_fill_manual(values = c("#149ED5", "#5C9CCD", "#9696B7",
                               "#C28D97", "#DE816B", "#E97235")) +
  theme_bw(base_size = 10) +
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    axis.title.y = element_text(size = 11),
    axis.title.x = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    axis.text.x  = element_text(size = 10),
    legend.title = element_text(size = 10, lineheight = 0.9),
    legend.text  = element_text(size = 9, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

vio_bel

Code
# ggsave("pres/vio_bel.png", plot = vio_bel, bg = "transparent",
#        width = 6.75, height = 5.33, dpi = 300)