Science, Engineering, and Research Identities in Graduate Engineering Education: A Latent Profile Analysis

Author

Heather Perkins, Matthew Bahnson, Marissa A. Tsugawa, & Adam Kirn

Load Libraries

Code
library(psych) #for describe()
library(corrplot) #for corrplot()
library(nFactors) #for factor analysis
library(naniar) #for gg_miss_upset()
library(mclust) #for LPA
library(kableExtra) #for tables
library(tidyr) #for graphs
library(ggplot2) #for graphs
library(Rmisc) #for summarySE()
library(reshape2) #for data transformation
library(fmsb) #for spider plot
library(dplyr) #for data transformation

library(stringr)
library(sjPlot)

Load Data

Code
import <- read.csv(file="finaldata.csv")
# limited to finished and doctoral students
import2 <- subset(import, import$Q71 == 1 & Finished == 1)

d <- subset(import2, select=c(X, grep("Q3.1", colnames(import)),
                                                         grep("Q4.1", colnames(import)),
                                                         grep("Q5.1", colnames(import))))

rm(import, import2)

Calculate Identity Scores

Check Missing Data

Code
mcar_test(d[-1])
# A tibble: 1 x 4
  statistic    df p.value missing.patterns
      <dbl> <dbl>   <dbl>            <int>
1      544.   514   0.178               14

Check Cronbach’s Alpha

Code
alpha_results <- list(
  psych::alpha(subset(d, select = c(grep("Q3",colnames(d))))),
  psych::alpha(subset(d, select = c(grep("Q5",colnames(d))))),
  psych::alpha(subset(d, select = c(grep("Q4",colnames(d))))))

sapply(alpha_results, function(x) round(x$total$raw_alpha, 3))
[1] 0.913 0.935 0.943

Calculate Composites

Code
d$s <- (d$Q3.1_2 + d$Q3.1_3 + d$Q3.1_4 + d$Q3.1_5 + d$Q3.1_6 + d$Q3.1_7 +
        d$Q3.1_11 + d$Q3.1_12 + d$Q3.1_13 + d$Q3.1_14 + d$Q3.1_15 +
        d$Q3.1_8 + d$Q3.1_9 + d$Q3.1_10)/14

d$r <- (d$Q5.1_2 + d$Q5.1_3 + d$Q5.1_4 + d$Q5.1_5 + d$Q5.1_6 + d$Q5.1_7 +
        d$Q5.1_16 + d$Q5.1_12 + d$Q5.1_13 + d$Q5.1_14 + d$Q5.1_15 +
        d$Q5.1_8 + d$Q5.1_9 + d$Q5.1_10 + d$Q5.1_11)/15

d$e <- (d$Q4.1_2 + d$Q4.1_3 + d$Q4.1_4 + d$Q4.1_6 + d$Q4.1_7 +
        d$Q4.1_11 + d$Q4.1_12 + d$Q4.1_13 + d$Q4.1_14 + d$Q4.1_10 +
        d$Q4.1_8 + d$Q4.1_9 + d$Q4.1_5)/17

Check for Outliers

Code
d1 <- na.omit(subset(d, select=c(1, s, r, e)))

m_dist <- mahalanobis(d1[-c(1)], colMeans(d1[-c(1)]), cov(d1[-c(1)]))
d1$MD <- round(m_dist, 1)
plot(d1$MD)
describe(m_dist)
   vars    n mean   sd median trimmed  mad  min   max range skew kurtosis  se
X1    1 1160    3 3.47   2.09    2.32 1.59 0.02 33.43 33.41 3.67    19.63 0.1
Code
cut <- qchisq(.999, df=(ncol(d1)-1))
abline(a=cut, b=0, col="red")

Code
d1$outlier <- F
d1$outlier[d1$MD > cut] <- T
table(d1$outlier)

FALSE  TRUE 
 1149    11 
Code
d1 <- subset(d1, outlier == F)

d2 <- subset(d, X %in% d1$X)

Create Standardized Variables

Code
d2$s_std <- as.vector(scale(d2$s, center=T, scale=T))
d2$r_std <- as.vector(scale(d2$r, center=T, scale=T))
d2$e_std <- as.vector(scale(d2$e, center=T, scale=T))

Create Final Dataframe for LPA

Code
load(file="LPAv6.RData")
d_std <- subset(d2, select=c(X, 68:70))
d_raw <- subset(d2, select=c(X, 59:61))

Run Latent Profile Analysis

Entropy Plot

Code
#entropy plot - elbow indicates number of classes that should be considered
d <- subset(d_std, select=-c(1))
# output <- clustCombi(data = d)
output2 <- clustCombiOptim(output, plot=TRUE)

Code
summary(output)
----------------------------------------------------
Combining Gaussian mixture components for clustering 
----------------------------------------------------

Mclust model name: VII 
Number of components: 8 

Combining steps:

  Step | Classes combined at this step | Class labels after this step
-------|-------------------------------|-----------------------------
   0   |              ---              | 1 2 3 4 5 6 7 8 
   1   |             1 & 6             | 1 2 3 4 5 7 8 
   2   |             1 & 5             | 1 2 3 4 7 8 
   3   |             1 & 3             | 1 2 4 7 8 
   4   |             1 & 2             | 1 4 7 8 
   5   |             1 & 7             | 1 4 8 
   6   |             1 & 4             | 1 8 
   7   |             1 & 8             | 1 
Code
entPlot(output$MclustOutput$z, output$combiM, abc = c("normalized"))

Code
entPlot(output$MclustOutput$z, output$combiM, abc = c("standard"))

Calculate LRT Scores

Code
#calculate and plot LRT scores
#BLRT - bootstrap likelihood ratio test - compares same model with diff number of clases, sig indicates that model with more classes fits better than one with fewer - should be sig
# lrt <- mclustBootstrapLRT(d, mclust.options("emModelNames"))
print(lrt)
------------------------------------------------------------- 
Bootstrap sequential LRT for the number of mixture components 
------------------------------------------------------------- 
Model        = EII 
Replications = 999 
                 LRTS bootstrap p-value
1 vs 2   4.504357e+02             0.001
2 vs 3   7.954769e+01             0.001
3 vs 4   1.784418e+02             0.001
4 vs 5   7.649047e+01             0.001
5 vs 6   5.237173e+01             0.001
6 vs 7   5.490722e+01             0.001
7 vs 8   4.971794e+01             0.001
8 vs 9   1.254765e-03             0.803

Calculate and Plot BIC Scores

Code
#calculate and plot BIC scores
#sample adjusted BIC - in the literature, lowest BIC equals best fitting model (specific to Mplus), in R higher BIC is better
# BIC <- mclustBIC(d, G = 1:9, mclust.options("emModelNames"))
plot.mclustBIC(BIC)

Calculate and Plot ICL Scores

Code
#calculate ICL scores
#BIC penalized by estimated mean entropy, not great for datasets with overlapping, higher BIC is better
# icl <- mclustICL(d, G = 1:9, mclust.options("emModelNames"))
plot.mclustICL(icl)

Create Fit Indices Table

Code
# # calculate stuff for table
# n <- 1
# p <- 1
# q <- 1
# for (i in 1:9) {
#   #run model
#   varname <- paste("m",n,sep="")
#   m <- Mclust(d, G = n, mclust.options("emModelNames"))
#   assign(varname,m)
#   #extract ll
#   varname3 <- paste("logl",n,sep="")
#   ll <- eval(parse(text=paste("m",n,"$loglik",sep = "")))
#   assign(varname3,ll)
#   #calc free parameters
#   #dimension of the data = number of original variables in model
#   #EII 1
#   #VII G
#   #EEI p
#   #free covariance parameters, VEI p + G - 1
#   #EVI pG - G + 1
#   #VVI pG
#   #EEE p(p + 1)/2
#   #free covariance parameters, EEV Gp(p + 1)/2 - (G - 1)p
#   #free covariance parameters, VEV Gp(p + 1)/2 - (G - 1)(p - 1)
#   #VVV Gp(p + 1)/2
#   #EVE p(p + 1)/2 + (G - 1)(p - 1)
#   #VVE p(p + 1)/2 + (G - 1)p
#   #VEE p(p + 1)/2 + (G - 1)
#   #EVV Gp(p + 1)/2 - (G - 1)
#   if (m$modelName == "EII") {
#     freepar <- 1
#   } else if (m$modelName == "VII") {
#     freepar <- m$G
#   } else if (m$modelName == "EEI") {
#     freepar <- m$d
#   } else if (m$modelName == "VEI") {
#     freepar <- m$d + m$G - 1
#   } else if (m$modelName == "EVI") {
#     freepar <- m$d*m$G - m$G + 1
#   } else if (m$modelName == "VVI") {
#     freepar <- m$d*m$G
#   } else if (m$modelName == "EEE") {
#     freepar <- m$d(m$d + 1)/2
#   } else if (m$modelName == "EEV") {
#     freepar <- m$G*m$d*(m$d + 1)/2 - (m$G - 1)*m$d
#   } else if (m$modelName == "VEV") {
#     freepar <- m$G*m2$d*(m$d + 1)/2 - (m$G - 1)*(m$d - 1)
#   } else if (m$modelName == "VVV") {
#     freepar <- m$G*m$d*(m$d + 1)/2
#   } else if (m$modelName == "XXX") {
#     freepar <- "-"
#   } else if (m$modelName == "EVE") {
#     freepar <- m$d*(m$d + 1)/2 + (m$G - 1)*(m$d - 1)
#   } else if (m$modelName == "VVE") {
#     freepar <- m$d*(m$d + 1)/2 + (m$G - 1)*m$d
#   } else if (m$modelName == "VEE") {
#     freepar <- m$d*(m$d + 1)/2 + (m$G - 1)
#   } else if (m$modelName == "EVV") {
#     freepar <- m$G*m$d*(m$d + 1)/2 - (m$G - 1)
#   } else {
#     freepar <- 999
#   }
#   varname4 <- paste("freep",n,sep="")
#   assign(varname4,freepar)
#   #cal post prob
#   m <- eval(parse(text=paste("m",n,sep="")))
#   uncer <- m$uncertainty
#   posterior_prob <- 1 - round(uncer, 5)
#   #cal entropy
#   ent <- round(mean(posterior_prob), 3)
#   varname2 <- paste("ent",n,sep="")
#   assign(varname2, ent)
#   #AIC <- (2 * mcm$df - 2 * mcm$loglik)
#   varname5 <- paste("aic",n,sep="")
#   aic <- (2 * m$df - 2 * m$loglik)
#   assign(varname5,aic)
#   #CAIC <- (((log(mcm$n) + 1) * mcm$df) - 2 * mcm$loglik)
#   varname6 <- paste("caic",n,sep="")
#   caic <- (((log(m$n) + 1) * m$df) - 2 * m$loglik)
#   assign(varname6,caic)
#   #SABIC <- (mcm$df * log((mcm$n + 2)/24) - 2 * mcm$loglik)
#   varname7 <- paste("sabic",n,sep="")
#   sabic <- (m$df * log((m$n + 2)/24) - 2 * m$loglik)
#   assign(varname7, sabic)
#   #iterate
#   n <- n+1
#   p <- p+1
#   #grab group sizes
#   grps <- table(eval(parse(text=paste("m",q,"$classification",sep=""))))
#   tg <- as.list(sort(unique(eval(parse(text=paste("m",q,"$classification",sep=""))))))
#   t <- 1
#   #calculate group percents
#   for (i in tg) {
#     perc <- grps[[t]] / (eval(parse(text=paste("m",i,"$n[1]",sep=""))))
#     varname <- paste("m",q,"perc",t,sep="")
#     assign(varname,perc)
#     t <- t + 1
#   }
#   #combine all group percents into one vector
#   mp <- mapply(get, grep(paste("m",q,"perc",sep=""), ls(), value=T))
#   varname <- paste("allperc",q,sep="")
#   assign(varname,mp)
#   q <- q + 1
# }

#throw stuff together for the table
proflist <- sprintf("Profiles %d", 1:9)
modlist <- rbind(m1$modelName, m2$modelName, m3$modelName, m2$modelName, m2$modelName, m2$modelName, m2$modelName, m2$modelName, m2$modelName)
#fix BLRT columns
  blank <- "-"
  lrtobs <- "-"
  lrtobs <- append(lrtobs, round(lrt$obs[1:7], digits = 2))
  lrtobs <- append(lrtobs, blank)
  lrtp <- "-"
  lrtp <- append(lrtp, lrt$p.value[1:7])
  lrtp <- append(lrtp, blank)
bicval <- round(BIC[1:9], digits = 2)
iclval <- round(icl[1:9], digits = 2)
listent <- round(rbind(ent1,ent2,ent3,ent4,ent5,ent6,ent7,ent8,ent9), digits = 2)
listlog <- round(rbind(logl1,logl2,logl3,logl4,logl5,logl6,logl7,logl8,logl9), digits = 2)
listfreep <- rbind(freep1,freep2,freep3,freep4,freep5,freep6,freep7,freep8,freep9)
listaic <- round(rbind(aic1,aic2,aic3,aic4,aic5,aic6,aic7,aic8,aic9), digits = 2)
listcaic <- round(rbind(caic1,caic2,caic3,caic4,caic5,caic6,caic7,caic8,caic9), digits = 2)
listsabic <- round(rbind(sabic1,sabic2,sabic3,sabic4,sabic5,sabic6,sabic7,sabic8,sabic9), digits = 2)
listperc <- rbind(
  sum(allperc1 < .05),
  sum(allperc2 < .05),
  sum(allperc3 < .05),
  sum(allperc4 < .05),
  sum(allperc5 < .05),
  sum(allperc6 < .05),
  sum(allperc7 < .05),
  sum(allperc8 < .05),
  sum(allperc9 < .05))

indicestab <- cbind(modlist,listlog,listfreep,listaic,listcaic,bicval,listsabic,iclval,lrtobs,lrtp,listent,listperc)
colnames(indicestab) <- c("Model","LL","#FP","AIC","CAIC","BIC","SABIC","ICL","BLRTS","BLRTS p","Entropy","<5%")
rownames(indicestab) <- proflist

indicestab %>%
    kbl() %>%
    kable_styling(font_size = 11)
Model LL #FP AIC CAIC BIC SABIC ICL BLRTS BLRTS p Entropy <5%
Profiles 1 XXX -4519.04 - 9056.07 9110.37 -9671.07 9072.78 -9671.07 - - 1 0
Profiles 2 VEV -4285.58 10 8605.16 8707.71 -9248.77 8636.72 -9541 450.44 0.001 0.98 0
Profiles 3 VEV -4218.65 14 8487.3 8638.12 -9197.35 8533.71 -9535.1 79.55 0.001 0.84 0
Profiles 4 VEV -4128.84 18 8323.68 8522.75 -9047.04 8384.94 -9529.21 178.44 0.001 0.77 0
Profiles 5 VEV -4123.64 5 8295.29 8440.07 -8998.68 8339.84 -9456.41 76.49 0.001 0.85 1
Profiles 6 VEV -4099.49 6 8256.98 8431.93 -8974.44 8310.82 -9407.92 52.37 0.001 0.84 2
Profiles 7 VEV -4080.04 7 8228.07 8433.18 -8947.66 8291.19 -9448.65 54.91 0.001 0.79 2
Profiles 8 VEV -4045.21 8 8168.42 8403.69 -8926.07 8240.82 -9508.31 49.72 0.001 0.8 3
Profiles 9 VEV -4009.91 46 8181.82 8670.46 -8954.2 8332.18 -9648.95 - - 0.76 2

Save LPA Output

Code
# save.image(file="LPAv6.RData")

Visualize Best LPA Models

Code
viz2 <- function(orig){
  mcm <- orig
  print(summary.Mclust(mcm))
  d_std$class <- mcm$classification
  
  df_long <- gather(d_std, key = "variable", value = "value", s_std, r_std, e_std)
  summ <- summarySE(df_long, measurevar="value", groupvars = c("variable","class"), na.rm = T) #the summarySE() command creates a table with the information we need for our plot
  
  # Create a barplot of means for each column by class
  means <- ggplot(summ, aes(x = class, y = value, fill = variable)) +
    geom_bar(stat = "summary", fun = "mean", position = "dodge") +
    geom_errorbar(aes(ymin=value-se, ymax=value+se),
                  width=.2,
                  position=position_dodge(.9))
  print(means)
  
  df_long$class <- as.factor(df_long$class)
  dist <- df_long %>%
    ggplot(aes(x=value, fill=class)) +
    geom_density(alpha=0.4, bw=.20) +
    geom_vline(xintercept=0, size=.5, color="black") +
    facet_wrap(~variable) +
    xlim(-4.5,2)
  print(dist)
  
  vars <- unique(df_long$variable)
  n <- 1
  for (i in 1:length(vars)) {
    print(vars[n])
    print(subset(df_long, variable == vars[n]) %>%
    ggplot(aes(x=value, fill=class)) +
    geom_density(alpha=0.4, bw=.20) +
    geom_vline(xintercept=0, size=.5, color="black") +
    facet_wrap(~class) +
    xlim(-4.5,2))
    n <- n + 1
  }
  
  n <- 1
  for (i in 1:length(unique(df_long$class))) {
    df_long$class_spec <- 0
    df_long$class_spec[d_std$class == n] <- 1
    df_long$class_spec <- as.factor(df_long$class_spec)
    
    scatter <- ggplot(df_long, aes(x = variable, y = value, color = class_spec)) +
      geom_jitter()
    print(paste("Class",n))
    print(scatter)
    n <- n + 1
  }
  
  class <- mcm$classification
  uncer <- mcm$uncertainty
  posterior_prob <- 1 - round(uncer, 5)
  density <- plot(density(posterior_prob))
  print(density)
}

Five-Class Model

Code
viz2(m5)
---------------------------------------------------- 
Gaussian finite mixture model fitted by EM algorithm 
---------------------------------------------------- 

Mclust VII (spherical, varying volume) model with 5 components: 

 log-likelihood    n df       BIC       ICL
      -4123.645 1133 24 -8416.073 -8812.255

Clustering table:
  1   2   3   4   5 
226 179 630  65  33 

[1] "s_std"

[1] "r_std"

[1] "e_std"

[1] "Class 1"

[1] "Class 2"

[1] "Class 3"

[1] "Class 4"

[1] "Class 5"

NULL

Six-Class Model

Code
viz2(m6)
---------------------------------------------------- 
Gaussian finite mixture model fitted by EM algorithm 
---------------------------------------------------- 

Mclust VII (spherical, varying volume) model with 6 components: 

 log-likelihood    n df      BIC       ICL
      -4099.492 1133 29 -8402.93 -8845.879

Clustering table:
  1   2   3   4   5   6 
231 198 554  78  39  33 

[1] "s_std"

[1] "r_std"

[1] "e_std"

[1] "Class 1"

[1] "Class 2"

[1] "Class 3"

[1] "Class 4"

[1] "Class 5"

[1] "Class 6"

NULL
Code
# classes <- cbind.data.frame(
#   subset(d_fin, select=c(X, s_std,r_std,e_std)),
#   subset(d_raw, select=c(s,r,e)),
#   subset(d_fin, select=c(class,class_fin,pp)))
# 
# write.csv(classes, file="classes.csv", row.names = F)
# write.csv(classes, file="classes2.csv", row.names = F)

rm(list = ls())

df <- read.csv(file="auxvar.csv", header=T)
lpa_df <- subset(df, select=c(R_IDs, s_std, r_std, e_std, class_fin))

Final LPA Model

Violin & Scatterplot

Code
df_long <- gather(lpa_df, key = "variable", value = "value", s_std, r_std, e_std)

df_long$variable[df_long$variable == "s_std"] <- "Scientist Identity"
df_long$variable[df_long$variable == "r_std"] <- "Researcher Identity"
df_long$variable[df_long$variable == "e_std"] <- "Engineering Identity"

df_long$class_fin[df_long$class_fin == 1] <- "Dispersed GEI, n = 231"
df_long$class_fin[df_long$class_fin == 2] <- "Strong GEI, n = 198"
df_long$class_fin[df_long$class_fin == 3] <- "Moderate GEI, n = 554"
df_long$class_fin[df_long$class_fin == 4] <- "Mixed, Primarily Scientist, n = 78"
df_long$class_fin[df_long$class_fin == 5] <- "Mixed, Primarily Engineer, n = 39"
df_long$class_fin[df_long$class_fin == 6] <- "Maximum GEI, n = 33"

df_long$class_fin <- factor(df_long$class_fin, levels = c("Dispersed GEI, n = 231",
                                                             "Moderate GEI, n = 554",
                                                             "Strong GEI, n = 198",
                                                             "Mixed, Primarily Scientist, n = 78",
                                                             "Mixed, Primarily Engineer, n = 39",
                                                             "Maximum GEI, n = 33"))

lpa <- ggplot(df_long, aes(x = variable, y = value, color = factor(variable))) +
  geom_violin(fill = "transparent", color = "black", alpha = 0, linewidth = .25) +
  stat_summary(fun = mean, geom = "crossbar", width = 0.5,
             middle.linewidth = 2, color = "black", linewidth = .4) +
  geom_jitter(width = .1, height = .1, alpha = .25, size = 1.5) +
  labs(y = "Identity Score", color = "Identity Domain:") +
  facet_wrap(
    ~ class_fin,
    nrow = 1,
    labeller = label_wrap_gen(width = 12)
  ) +
  xlab(NULL) +
  theme_bw(base_size = 10) +
  theme(
    axis.text.x  = element_blank(),
    axis.ticks.x = element_blank(),

    axis.title.y = element_text(size = 9),
    axis.text.y  = element_text(size = 7),

    # legend at bottom, horizontal
    legend.position   = "bottom",
    legend.direction  = "horizontal",
    legend.box        = "horizontal",

    legend.title = element_text(size = 9, lineheight = 0.9),
    legend.text  = element_text(size = 8, lineheight = 0.9),
    legend.key.height = unit(0.9, "lines"),
    legend.key.width  = unit(1.2, "lines"),

    strip.text = element_text(size = 8, lineheight = 0.9),
    panel.grid.minor = element_blank()
  ) +
  guides(
    color = guide_legend(
      override.aes = list(size = 3, alpha = 1),
      nrow = 1
    )
  )

lpa <- lpa +
  scale_color_viridis_d(option = "D", end = 0.9) +
  scale_y_continuous(
    breaks = function(x) seq(floor(min(x)), ceiling(3), by = 1)
  )

lpa <- lpa +
  theme(
    axis.title.y = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 10, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

lpa

Code
# ggsave("pres/lpa_vio.png", plot = lpa, bg = "transparent",
#        width = 8, height = 5.5, dpi = 300)

Dispersed GEI Difference Plots

Code
# order of sub-constructs within each participant
domain_order <- c("Engineering Identity", "Scientist Identity", "Researcher Identity")

# prep data: dispersed group only, ordered by engineering identity
dispersed <- df_long[df_long$class_fin == "Dispersed GEI, n = 231", ]

eng_order <- dispersed[dispersed$variable == "Engineering Identity",
                       c("R_IDs", "value")]
eng_order <- eng_order[order(eng_order$value), ]
eng_order$participant_rank <- seq_len(nrow(eng_order))

dispersed <- merge(dispersed, eng_order[, c("R_IDs", "participant_rank")], by = "R_IDs")
dispersed$variable <- factor(dispersed$variable, levels = domain_order)
dispersed$reveal_order <- (dispersed$participant_rank - 1) * 3 +
                           as.integer(dispersed$variable)

library(reshape)
dispersed_wide <- reshape(dispersed[, c("R_IDs", "variable", "value")],
                          idvar = "R_IDs", timevar = "variable", direction = "wide")

dispersed_wide$range <- apply(dispersed_wide[, -1], 1, function(x) max(x) - min(x))

# sort descending
dispersed_wide <- dispersed_wide[order(-dispersed_wide$range), ]
head(dispersed_wide)
    R_IDs value.Scientist Identity value.Engineering Identity
157 31714                 1.559984                 -3.7043428
457 69101                 1.321765                 -3.2481202
625 90025                -1.060419                 -0.6248398
238 43265                 1.083547                 -3.2481202
391 61935                -5.110133                 -1.4232295
448 68305                 1.202656                 -3.1340645
    value.Researcher Identity    range
157                  1.208237 5.264327
457                  1.208237 4.569886
625                 -5.127389 4.502549
238                  1.208237 4.456357
391                 -0.757992 4.352141
448                  1.099002 4.336721
Code
dispersed_wide$group <- cut(rank(-dispersed_wide$range),
                            breaks = 6,
                            labels = c("Group 1 (Largest Differences)",
                                       "Group 2", "Group 3",
                                       "Group 4", "Group 5",
                                       "Group 6 (Smallest Differences)"))

dispersed_sub <- dispersed[dispersed$R_IDs %in% dispersed_wide$R_IDs, ]
dispersed_sub <- merge(dispersed_sub, dispersed_wide[, c("R_IDs", "group", "range")], by = "R_IDs")

dispersed_sub$variable <- factor(dispersed_sub$variable,
                                  levels = c("Engineering Identity",
                                             "Researcher Identity",
                                             "Scientist Identity"))

# identify top 5 most and least dispersed within the full dispersed group
top5_ids <- head(dispersed_wide$R_IDs, 5)
bot5_ids <- tail(dispersed_wide$R_IDs, 5)

dispersed_sub$highlight <- ifelse(dispersed_sub$R_IDs %in% top5_ids, "high",
                            ifelse(dispersed_sub$R_IDs %in% bot5_ids, "low", "none"))

disp <- ggplot(dispersed_sub, aes(x = variable, y = value, color = factor(variable))) +
  geom_point(alpha = .6, size = 2) +
  geom_line(aes(group = R_IDs, alpha = highlight, linewidth = highlight), color = "grey30") +
  scale_alpha_manual(values = c("high" = .9, "low" = .9, "none" = .25), guide = "none") +
  scale_linewidth_manual(values = c("high" = .8, "low" = .8, "none" = .2), guide = "none") +
  facet_wrap(~ group, nrow = 2) +
  labs(y = "Identity Score", color = "Identity Domain:") +
  xlab(NULL) +
  scale_color_viridis_d(option = "D", end = 0.9) +
  scale_y_continuous(
    breaks = function(x) seq(floor(min(x)), ceiling(3), by = 1)
  ) +
  theme_bw(base_size = 10) +
  theme(
    axis.text.x       = element_blank(),
    axis.ticks.x      = element_blank(),
    axis.title.y      = element_text(size = 11),
    axis.text.y       = element_text(size = 9),
    legend.position   = "bottom",
    legend.direction  = "horizontal",
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA),
    panel.grid.minor  = element_blank(),
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 10, lineheight = 0.9),
  ) +
  labs(title = "Differences in Domain Scores Among Dispersed GEI Participants")

disp

Code
# ggsave("pres/disp.png", plot = disp, bg = "transparent",
#        width = 8, height = 5.5, dpi = 300)

Auxiliary Analyses

Code
rm(list = ls())

import <- read.csv(file="finaldata.csv", header=T, na.strings = c(""," ","NA"))
df <- subset(import, select=c(R_IDs, Major, Start.Year.Month, Q19.3, Q19.16, Q19.16_7_TEXT, Q19.13, Q19.13_8_TEXT, Q13.1_1, Q13.1_2, Q19.11))
colnames(df) <- c("R_IDs","Major","start","progress","gender","gendertxt","race","racetxt","dadp","itp","intl")

classes <- read.csv(file="classes.csv", header=T)
classes <- merge(classes, subset(import, select=c(R_IDs, X)), by="X")

classes$class_bu <- classes$class
classes$class[classes$class_bu == "2"] <- "3"
classes$class[classes$class_bu == "3"] <- "2"

Clean Major

Code
table(df$Major, useNA = "always")

                       Acoustic Engineering 
                                          2 
                      Aerospace Engineering 
                                         50 
    Agricultural and Biological Engineering 
                                         47 
                     Biological Engineering 
                                          2 
                                 Biomedical 
                                        128 
                     Biomedical Engineering 
                                          3 
      Chemical and Biomolecular Engineering 
                                         18 
         Chemical and Materials Engineering 
                                          2 
                       Chemical Engineering 
                                        170 
        Civil and Environmental Engineering 
                                         18 
                          Civil Engineering 
                                        142 
                       Computer Engineering 
                                         33 
                           Computer Science 
                                         93 
           Computer Science and Engineering 
                                         21 
               Computer Science Engineering 
                                          4 
          Electrical & Computer Engineering 
                                         62 
      Electrical and Electronic Engineering 
                                          2 
                     Electrical Engineering 
                                        200 
Electrical Engineering and Computer Science 
                                          2 
                                Engineering 
                                          2 
                      Engineering Education 
                                          1 
                     Engineering Management 
                                          4 
                        Engineering Physics 
                                          5 
                              Environmental 
                                         51 
                  Environmental Engineering 
                                          1 
                                        Geo 
                                          6 
                                 Industrial 
                                         40 
           Material Science and Engineering 
                                        185 
                     Mechanical Engineering 
                                        260 
                        Nuclear Engineering 
                                         59 
                          Ocean Engineering 
                                         11 
                          Other Engineering 
                                          3 
                                  Petroleum 
                                         17 
                        Systems Engineering 
                                         10 
                                       <NA> 
                                        100 
Code
df$maj_fin <- df$Major
df$maj_fin[df$Major == "Acoustic Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering"] <- "Other Engineering" #n = 2
df$maj_fin[df$Major == "Engineering Education"] <- "Other Engineering" #n = 1
df$maj_fin[df$Major == "Engineering Management"] <- "Other Engineering" #n = 4
df$maj_fin[df$Major == "Engineering Physics"] <- "Other Engineering" #n = 5
df$maj_fin[df$Major == "Geo"] <- "Other Engineering" #n = 6
df$maj_fin[df$Major == "Other Engineering"] <- "Other Engineering" #n = 3

df$maj_fin[df$Major == "Biological Engineering"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical"] <- "Bio Engineering"
df$maj_fin[df$Major == "Biomedical Engineering"] <- "Bio Engineering"

df$maj_fin[df$Major == "Chemical and Biomolecular Engineering"] <- "Chemical Engineering"
df$maj_fin[df$Major == "Chemical and Materials Engineering"] <- "Chemical Engineering"

df$maj_fin[df$Major == "Civil and Environmental Engineering"] <- "Civil Engineering"

df$maj_fin[df$Major == "Computer Science Engineering"] <- "Computer Science and Engineering"

df$maj_fin[df$Major == "Electrical & Computer Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical and Electronic Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering"] <- "Elec Engineering"
df$maj_fin[df$Major == "Electrical Engineering and Computer Science"] <- "Elec Engineering"

df$maj_fin[df$Major == "Environmental"] <- "Environmental Engineering"

df$maj_fin[df$Major == "Industrial"] <- "Industrial Engineering"

df$maj_fin[df$Major == "Ocean Engineering"] <- "Other Engineering"
df$maj_fin[df$Major == "Petroleum"] <- "Other Engineering"
df$maj_fin[df$Major == "Systems Engineering"] <- "Other Engineering"

table(df$Major, df$maj_fin, useNA = "always")
                                             
                                              Aerospace Engineering
  Acoustic Engineering                                            0
  Aerospace Engineering                                          50
  Agricultural and Biological Engineering                         0
  Biological Engineering                                          0
  Biomedical                                                      0
  Biomedical Engineering                                          0
  Chemical and Biomolecular Engineering                           0
  Chemical and Materials Engineering                              0
  Chemical Engineering                                            0
  Civil and Environmental Engineering                             0
  Civil Engineering                                               0
  Computer Engineering                                            0
  Computer Science                                                0
  Computer Science and Engineering                                0
  Computer Science Engineering                                    0
  Electrical & Computer Engineering                               0
  Electrical and Electronic Engineering                           0
  Electrical Engineering                                          0
  Electrical Engineering and Computer Science                     0
  Engineering                                                     0
  Engineering Education                                           0
  Engineering Management                                          0
  Engineering Physics                                             0
  Environmental                                                   0
  Environmental Engineering                                       0
  Geo                                                             0
  Industrial                                                      0
  Material Science and Engineering                                0
  Mechanical Engineering                                          0
  Nuclear Engineering                                             0
  Ocean Engineering                                               0
  Other Engineering                                               0
  Petroleum                                                       0
  Systems Engineering                                             0
  <NA>                                                            0
                                             
                                              Agricultural and Biological Engineering
  Acoustic Engineering                                                              0
  Aerospace Engineering                                                             0
  Agricultural and Biological Engineering                                          47
  Biological Engineering                                                            0
  Biomedical                                                                        0
  Biomedical Engineering                                                            0
  Chemical and Biomolecular Engineering                                             0
  Chemical and Materials Engineering                                                0
  Chemical Engineering                                                              0
  Civil and Environmental Engineering                                               0
  Civil Engineering                                                                 0
  Computer Engineering                                                              0
  Computer Science                                                                  0
  Computer Science and Engineering                                                  0
  Computer Science Engineering                                                      0
  Electrical & Computer Engineering                                                 0
  Electrical and Electronic Engineering                                             0
  Electrical Engineering                                                            0
  Electrical Engineering and Computer Science                                       0
  Engineering                                                                       0
  Engineering Education                                                             0
  Engineering Management                                                            0
  Engineering Physics                                                               0
  Environmental                                                                     0
  Environmental Engineering                                                         0
  Geo                                                                               0
  Industrial                                                                        0
  Material Science and Engineering                                                  0
  Mechanical Engineering                                                            0
  Nuclear Engineering                                                               0
  Ocean Engineering                                                                 0
  Other Engineering                                                                 0
  Petroleum                                                                         0
  Systems Engineering                                                               0
  <NA>                                                                              0
                                             
                                              Bio Engineering
  Acoustic Engineering                                      0
  Aerospace Engineering                                     0
  Agricultural and Biological Engineering                   0
  Biological Engineering                                    2
  Biomedical                                              128
  Biomedical Engineering                                    3
  Chemical and Biomolecular Engineering                     0
  Chemical and Materials Engineering                        0
  Chemical Engineering                                      0
  Civil and Environmental Engineering                       0
  Civil Engineering                                         0
  Computer Engineering                                      0
  Computer Science                                          0
  Computer Science and Engineering                          0
  Computer Science Engineering                              0
  Electrical & Computer Engineering                         0
  Electrical and Electronic Engineering                     0
  Electrical Engineering                                    0
  Electrical Engineering and Computer Science               0
  Engineering                                               0
  Engineering Education                                     0
  Engineering Management                                    0
  Engineering Physics                                       0
  Environmental                                             0
  Environmental Engineering                                 0
  Geo                                                       0
  Industrial                                                0
  Material Science and Engineering                          0
  Mechanical Engineering                                    0
  Nuclear Engineering                                       0
  Ocean Engineering                                         0
  Other Engineering                                         0
  Petroleum                                                 0
  Systems Engineering                                       0
  <NA>                                                      0
                                             
                                              Chemical Engineering
  Acoustic Engineering                                           0
  Aerospace Engineering                                          0
  Agricultural and Biological Engineering                        0
  Biological Engineering                                         0
  Biomedical                                                     0
  Biomedical Engineering                                         0
  Chemical and Biomolecular Engineering                         18
  Chemical and Materials Engineering                             2
  Chemical Engineering                                         170
  Civil and Environmental Engineering                            0
  Civil Engineering                                              0
  Computer Engineering                                           0
  Computer Science                                               0
  Computer Science and Engineering                               0
  Computer Science Engineering                                   0
  Electrical & Computer Engineering                              0
  Electrical and Electronic Engineering                          0
  Electrical Engineering                                         0
  Electrical Engineering and Computer Science                    0
  Engineering                                                    0
  Engineering Education                                          0
  Engineering Management                                         0
  Engineering Physics                                            0
  Environmental                                                  0
  Environmental Engineering                                      0
  Geo                                                            0
  Industrial                                                     0
  Material Science and Engineering                               0
  Mechanical Engineering                                         0
  Nuclear Engineering                                            0
  Ocean Engineering                                              0
  Other Engineering                                              0
  Petroleum                                                      0
  Systems Engineering                                            0
  <NA>                                                           0
                                             
                                              Civil Engineering
  Acoustic Engineering                                        0
  Aerospace Engineering                                       0
  Agricultural and Biological Engineering                     0
  Biological Engineering                                      0
  Biomedical                                                  0
  Biomedical Engineering                                      0
  Chemical and Biomolecular Engineering                       0
  Chemical and Materials Engineering                          0
  Chemical Engineering                                        0
  Civil and Environmental Engineering                        18
  Civil Engineering                                         142
  Computer Engineering                                        0
  Computer Science                                            0
  Computer Science and Engineering                            0
  Computer Science Engineering                                0
  Electrical & Computer Engineering                           0
  Electrical and Electronic Engineering                       0
  Electrical Engineering                                      0
  Electrical Engineering and Computer Science                 0
  Engineering                                                 0
  Engineering Education                                       0
  Engineering Management                                      0
  Engineering Physics                                         0
  Environmental                                               0
  Environmental Engineering                                   0
  Geo                                                         0
  Industrial                                                  0
  Material Science and Engineering                            0
  Mechanical Engineering                                      0
  Nuclear Engineering                                         0
  Ocean Engineering                                           0
  Other Engineering                                           0
  Petroleum                                                   0
  Systems Engineering                                         0
  <NA>                                                        0
                                             
                                              Computer Engineering
  Acoustic Engineering                                           0
  Aerospace Engineering                                          0
  Agricultural and Biological Engineering                        0
  Biological Engineering                                         0
  Biomedical                                                     0
  Biomedical Engineering                                         0
  Chemical and Biomolecular Engineering                          0
  Chemical and Materials Engineering                             0
  Chemical Engineering                                           0
  Civil and Environmental Engineering                            0
  Civil Engineering                                              0
  Computer Engineering                                          33
  Computer Science                                               0
  Computer Science and Engineering                               0
  Computer Science Engineering                                   0
  Electrical & Computer Engineering                              0
  Electrical and Electronic Engineering                          0
  Electrical Engineering                                         0
  Electrical Engineering and Computer Science                    0
  Engineering                                                    0
  Engineering Education                                          0
  Engineering Management                                         0
  Engineering Physics                                            0
  Environmental                                                  0
  Environmental Engineering                                      0
  Geo                                                            0
  Industrial                                                     0
  Material Science and Engineering                               0
  Mechanical Engineering                                         0
  Nuclear Engineering                                            0
  Ocean Engineering                                              0
  Other Engineering                                              0
  Petroleum                                                      0
  Systems Engineering                                            0
  <NA>                                                           0
                                             
                                              Computer Science
  Acoustic Engineering                                       0
  Aerospace Engineering                                      0
  Agricultural and Biological Engineering                    0
  Biological Engineering                                     0
  Biomedical                                                 0
  Biomedical Engineering                                     0
  Chemical and Biomolecular Engineering                      0
  Chemical and Materials Engineering                         0
  Chemical Engineering                                       0
  Civil and Environmental Engineering                        0
  Civil Engineering                                          0
  Computer Engineering                                       0
  Computer Science                                          93
  Computer Science and Engineering                           0
  Computer Science Engineering                               0
  Electrical & Computer Engineering                          0
  Electrical and Electronic Engineering                      0
  Electrical Engineering                                     0
  Electrical Engineering and Computer Science                0
  Engineering                                                0
  Engineering Education                                      0
  Engineering Management                                     0
  Engineering Physics                                        0
  Environmental                                              0
  Environmental Engineering                                  0
  Geo                                                        0
  Industrial                                                 0
  Material Science and Engineering                           0
  Mechanical Engineering                                     0
  Nuclear Engineering                                        0
  Ocean Engineering                                          0
  Other Engineering                                          0
  Petroleum                                                  0
  Systems Engineering                                        0
  <NA>                                                       0
                                             
                                              Computer Science and Engineering
  Acoustic Engineering                                                       0
  Aerospace Engineering                                                      0
  Agricultural and Biological Engineering                                    0
  Biological Engineering                                                     0
  Biomedical                                                                 0
  Biomedical Engineering                                                     0
  Chemical and Biomolecular Engineering                                      0
  Chemical and Materials Engineering                                         0
  Chemical Engineering                                                       0
  Civil and Environmental Engineering                                        0
  Civil Engineering                                                          0
  Computer Engineering                                                       0
  Computer Science                                                           0
  Computer Science and Engineering                                          21
  Computer Science Engineering                                               4
  Electrical & Computer Engineering                                          0
  Electrical and Electronic Engineering                                      0
  Electrical Engineering                                                     0
  Electrical Engineering and Computer Science                                0
  Engineering                                                                0
  Engineering Education                                                      0
  Engineering Management                                                     0
  Engineering Physics                                                        0
  Environmental                                                              0
  Environmental Engineering                                                  0
  Geo                                                                        0
  Industrial                                                                 0
  Material Science and Engineering                                           0
  Mechanical Engineering                                                     0
  Nuclear Engineering                                                        0
  Ocean Engineering                                                          0
  Other Engineering                                                          0
  Petroleum                                                                  0
  Systems Engineering                                                        0
  <NA>                                                                       0
                                             
                                              Elec Engineering
  Acoustic Engineering                                       0
  Aerospace Engineering                                      0
  Agricultural and Biological Engineering                    0
  Biological Engineering                                     0
  Biomedical                                                 0
  Biomedical Engineering                                     0
  Chemical and Biomolecular Engineering                      0
  Chemical and Materials Engineering                         0
  Chemical Engineering                                       0
  Civil and Environmental Engineering                        0
  Civil Engineering                                          0
  Computer Engineering                                       0
  Computer Science                                           0
  Computer Science and Engineering                           0
  Computer Science Engineering                               0
  Electrical & Computer Engineering                         62
  Electrical and Electronic Engineering                      2
  Electrical Engineering                                   200
  Electrical Engineering and Computer Science                2
  Engineering                                                0
  Engineering Education                                      0
  Engineering Management                                     0
  Engineering Physics                                        0
  Environmental                                              0
  Environmental Engineering                                  0
  Geo                                                        0
  Industrial                                                 0
  Material Science and Engineering                           0
  Mechanical Engineering                                     0
  Nuclear Engineering                                        0
  Ocean Engineering                                          0
  Other Engineering                                          0
  Petroleum                                                  0
  Systems Engineering                                        0
  <NA>                                                       0
                                             
                                              Environmental Engineering
  Acoustic Engineering                                                0
  Aerospace Engineering                                               0
  Agricultural and Biological Engineering                             0
  Biological Engineering                                              0
  Biomedical                                                          0
  Biomedical Engineering                                              0
  Chemical and Biomolecular Engineering                               0
  Chemical and Materials Engineering                                  0
  Chemical Engineering                                                0
  Civil and Environmental Engineering                                 0
  Civil Engineering                                                   0
  Computer Engineering                                                0
  Computer Science                                                    0
  Computer Science and Engineering                                    0
  Computer Science Engineering                                        0
  Electrical & Computer Engineering                                   0
  Electrical and Electronic Engineering                               0
  Electrical Engineering                                              0
  Electrical Engineering and Computer Science                         0
  Engineering                                                         0
  Engineering Education                                               0
  Engineering Management                                              0
  Engineering Physics                                                 0
  Environmental                                                      51
  Environmental Engineering                                           1
  Geo                                                                 0
  Industrial                                                          0
  Material Science and Engineering                                    0
  Mechanical Engineering                                              0
  Nuclear Engineering                                                 0
  Ocean Engineering                                                   0
  Other Engineering                                                   0
  Petroleum                                                           0
  Systems Engineering                                                 0
  <NA>                                                                0
                                             
                                              Industrial Engineering
  Acoustic Engineering                                             0
  Aerospace Engineering                                            0
  Agricultural and Biological Engineering                          0
  Biological Engineering                                           0
  Biomedical                                                       0
  Biomedical Engineering                                           0
  Chemical and Biomolecular Engineering                            0
  Chemical and Materials Engineering                               0
  Chemical Engineering                                             0
  Civil and Environmental Engineering                              0
  Civil Engineering                                                0
  Computer Engineering                                             0
  Computer Science                                                 0
  Computer Science and Engineering                                 0
  Computer Science Engineering                                     0
  Electrical & Computer Engineering                                0
  Electrical and Electronic Engineering                            0
  Electrical Engineering                                           0
  Electrical Engineering and Computer Science                      0
  Engineering                                                      0
  Engineering Education                                            0
  Engineering Management                                           0
  Engineering Physics                                              0
  Environmental                                                    0
  Environmental Engineering                                        0
  Geo                                                              0
  Industrial                                                      40
  Material Science and Engineering                                 0
  Mechanical Engineering                                           0
  Nuclear Engineering                                              0
  Ocean Engineering                                                0
  Other Engineering                                                0
  Petroleum                                                        0
  Systems Engineering                                              0
  <NA>                                                             0
                                             
                                              Material Science and Engineering
  Acoustic Engineering                                                       0
  Aerospace Engineering                                                      0
  Agricultural and Biological Engineering                                    0
  Biological Engineering                                                     0
  Biomedical                                                                 0
  Biomedical Engineering                                                     0
  Chemical and Biomolecular Engineering                                      0
  Chemical and Materials Engineering                                         0
  Chemical Engineering                                                       0
  Civil and Environmental Engineering                                        0
  Civil Engineering                                                          0
  Computer Engineering                                                       0
  Computer Science                                                           0
  Computer Science and Engineering                                           0
  Computer Science Engineering                                               0
  Electrical & Computer Engineering                                          0
  Electrical and Electronic Engineering                                      0
  Electrical Engineering                                                     0
  Electrical Engineering and Computer Science                                0
  Engineering                                                                0
  Engineering Education                                                      0
  Engineering Management                                                     0
  Engineering Physics                                                        0
  Environmental                                                              0
  Environmental Engineering                                                  0
  Geo                                                                        0
  Industrial                                                                 0
  Material Science and Engineering                                         185
  Mechanical Engineering                                                     0
  Nuclear Engineering                                                        0
  Ocean Engineering                                                          0
  Other Engineering                                                          0
  Petroleum                                                                  0
  Systems Engineering                                                        0
  <NA>                                                                       0
                                             
                                              Mechanical Engineering
  Acoustic Engineering                                             0
  Aerospace Engineering                                            0
  Agricultural and Biological Engineering                          0
  Biological Engineering                                           0
  Biomedical                                                       0
  Biomedical Engineering                                           0
  Chemical and Biomolecular Engineering                            0
  Chemical and Materials Engineering                               0
  Chemical Engineering                                             0
  Civil and Environmental Engineering                              0
  Civil Engineering                                                0
  Computer Engineering                                             0
  Computer Science                                                 0
  Computer Science and Engineering                                 0
  Computer Science Engineering                                     0
  Electrical & Computer Engineering                                0
  Electrical and Electronic Engineering                            0
  Electrical Engineering                                           0
  Electrical Engineering and Computer Science                      0
  Engineering                                                      0
  Engineering Education                                            0
  Engineering Management                                           0
  Engineering Physics                                              0
  Environmental                                                    0
  Environmental Engineering                                        0
  Geo                                                              0
  Industrial                                                       0
  Material Science and Engineering                                 0
  Mechanical Engineering                                         260
  Nuclear Engineering                                              0
  Ocean Engineering                                                0
  Other Engineering                                                0
  Petroleum                                                        0
  Systems Engineering                                              0
  <NA>                                                             0
                                             
                                              Nuclear Engineering
  Acoustic Engineering                                          0
  Aerospace Engineering                                         0
  Agricultural and Biological Engineering                       0
  Biological Engineering                                        0
  Biomedical                                                    0
  Biomedical Engineering                                        0
  Chemical and Biomolecular Engineering                         0
  Chemical and Materials Engineering                            0
  Chemical Engineering                                          0
  Civil and Environmental Engineering                           0
  Civil Engineering                                             0
  Computer Engineering                                          0
  Computer Science                                              0
  Computer Science and Engineering                              0
  Computer Science Engineering                                  0
  Electrical & Computer Engineering                             0
  Electrical and Electronic Engineering                         0
  Electrical Engineering                                        0
  Electrical Engineering and Computer Science                   0
  Engineering                                                   0
  Engineering Education                                         0
  Engineering Management                                        0
  Engineering Physics                                           0
  Environmental                                                 0
  Environmental Engineering                                     0
  Geo                                                           0
  Industrial                                                    0
  Material Science and Engineering                              0
  Mechanical Engineering                                        0
  Nuclear Engineering                                          59
  Ocean Engineering                                             0
  Other Engineering                                             0
  Petroleum                                                     0
  Systems Engineering                                           0
  <NA>                                                          0
                                             
                                              Other Engineering <NA>
  Acoustic Engineering                                        2    0
  Aerospace Engineering                                       0    0
  Agricultural and Biological Engineering                     0    0
  Biological Engineering                                      0    0
  Biomedical                                                  0    0
  Biomedical Engineering                                      0    0
  Chemical and Biomolecular Engineering                       0    0
  Chemical and Materials Engineering                          0    0
  Chemical Engineering                                        0    0
  Civil and Environmental Engineering                         0    0
  Civil Engineering                                           0    0
  Computer Engineering                                        0    0
  Computer Science                                            0    0
  Computer Science and Engineering                            0    0
  Computer Science Engineering                                0    0
  Electrical & Computer Engineering                           0    0
  Electrical and Electronic Engineering                       0    0
  Electrical Engineering                                      0    0
  Electrical Engineering and Computer Science                 0    0
  Engineering                                                 2    0
  Engineering Education                                       1    0
  Engineering Management                                      4    0
  Engineering Physics                                         5    0
  Environmental                                               0    0
  Environmental Engineering                                   0    0
  Geo                                                         6    0
  Industrial                                                  0    0
  Material Science and Engineering                            0    0
  Mechanical Engineering                                      0    0
  Nuclear Engineering                                         0    0
  Ocean Engineering                                          11    0
  Other Engineering                                           3    0
  Petroleum                                                  17    0
  Systems Engineering                                        10    0
  <NA>                                                        0  100
Code
table(df$maj_fin, useNA = "always")

                  Aerospace Engineering Agricultural and Biological Engineering 
                                     50                                      47 
                        Bio Engineering                    Chemical Engineering 
                                    133                                     190 
                      Civil Engineering                    Computer Engineering 
                                    160                                      33 
                       Computer Science        Computer Science and Engineering 
                                     93                                      25 
                       Elec Engineering               Environmental Engineering 
                                    266                                      52 
                 Industrial Engineering        Material Science and Engineering 
                                     40                                     185 
                 Mechanical Engineering                     Nuclear Engineering 
                                    260                                      59 
                      Other Engineering                                    <NA> 
                                     61                                     100 

Clean Start Date

Code
# survey closed 7/25/2018

spl <- colsplit(df$start," ",c("year", "month"))
spl$year <- as.numeric(spl$year)
spl$years <- 2018-spl$year

df$years <- spl$years

Clean Milestones

Code
table(df$progress, useNA="always")

    1   1,2 1,2,3   1,3   1,4     2   2,3   2,4     3     4  <NA> 
  398   254    41     1     3    36     4     3     9   803   202 
Code
# VALUES: 1 through 4, multiples possible; 1 = “Comprehensive Exam (Written or Oral)
# 2 = “Dissertation Proposal”, 3 = “Dissertation Defense”, 4 = “Not applicable”
df$prog_fin <- NA
df$prog_fin[df$progress == "2,4"] <- 2
df$prog_fin[df$progress == "2,3"] <- 3
df$prog_fin[df$progress == "1,4"] <- 1
df$prog_fin[df$progress == "1,3"] <- 3
df$prog_fin[df$progress == "1,2,3"] <- 3
df$prog_fin[df$progress == "1,2"] <- 2
df$prog_fin[df$progress == "1"] <- 1
df$prog_fin[df$progress == "2"] <- 2
df$prog_fin[df$progress == "3"] <- 3
df$prog_fin[df$progress == "4"] <- 0
table(df$prog_fin, useNA = "always")

   0    1    2    3 <NA> 
 803  401  293   55  202 

Clean Gender

Code
table(df$gender, useNA = "always")

            1 1,2,3,4,5,6,7           1,3         1,3,5           1,4 
          541             1             3             1             1 
          1,5           1,6             2           2,3         2,3,5 
            1            23           987             5             1 
          2,5           2,6           2,7             3             5 
            1            33             6             2             1 
            6             7          <NA> 
            1            12           134 
Code
# VALUES: 1 through 7, multiples possible; 1 = “Female”, 2 = “Male”, 3 = “Genderqueer”
# 4 = “Agender”, 5 = “Transgender”, 6 = “Cisgender”, 7 = “A gender not listed” (Write in response optional)
df$gen_fin <- NA
df$gen_fin[df$gender == 1 | df$gender == "1,6" | df$gender == "1,5"] <- "female"
df$gen_fin[df$gender == 2 | df$gender == "2,6" | df$gender == "2,5"] <- "male"
df$gen_fin[df$gender == "1,3,5"] <- "gq"
df$gen_fin[df$gender == "2,3,5"] <- "gq"
df$gen_fin[df$gender == 3 | df$gender == 4 | df$gender == 7 | df$gender == "2,3" | df$gender == "1,3" | df$gender == "2,7" | df$gender == "1,4"] <- "gq"
table(df$gen_fin, df$gender, useNA = "always")
        
           1 1,2,3,4,5,6,7 1,3 1,3,5 1,4 1,5 1,6   2 2,3 2,3,5 2,5 2,6 2,7   3
  female 541             0   0     0   0   1  23   0   0     0   0   0   0   0
  gq       0             0   3     1   1   0   0   0   5     1   0   0   6   2
  male     0             0   0     0   0   0   0 987   0     0   1  33   0   0
  <NA>     0             1   0     0   0   0   0   0   0     0   0   0   0   0
        
           5   6   7 <NA>
  female   0   0   0    0
  gq       0   0  12    0
  male     0   0   0    0
  <NA>     1   1   0  134
Code
table(df$gen_fin, useNA = "always")

female     gq   male   <NA> 
   565     31   1021    137 
Code
table(df$gendertxt)

                                                                                                      A zorplon from Sagitarius-7 
                                                                                                                                1 
                                                                                                                       Alpha Male 
                                                                                                                                1 
                                                                                        An ether sniffing robot sex toy on wheels 
                                                                                                                                1 
                                                                                                                Attack helicopter 
                                                                                                                                1 
                                                                                                                Attack Helicopter 
                                                                                                                                1 
                                                                                                                      Hairdresser 
                                                                                                                                1 
                                                                                                                            HOKIE 
                                                                                                                                1 
                                                                                                                       Non-binary 
                                                                                                                                1 
                                                                                                                     Not relevant 
                                                                                                                                1 
                                                                                                                     Not Relevant 
                                                                                                                                1 
                                                                                                                      questioning 
                                                                                                                                1 
                                                                                                                  Sexist question 
                                                                                                                                1 
                                                                                                       there are only two genders 
                                                                                                                                1 
                                                                                             there are only two genders fuck off  
                                                                                                                                1 
This is a silly option. Your biology determines this. That's science, that's fact. A better option is "what I feel my gender is". 
                                                                                                                                1 
                                                                                                                           Turtle 
                                                                                                                                1 
                                                                                 XX, XY, XO, XXY, XXX, or XYY the rest is made up 
                                                                                                                                1 
Code
# legitimate responses: "Non-binary" - already recoded as gq, all else as NA

Clean Race

Code
table(df$race, useNA = "always")

      1,2       1,3     1,3,4     1,3,7 1,4,5,7,8     1,4,7       1,7         2 
        1         1         1         1         1         3         4       474 
  2,3,5,7       2,4   2,4,6,7     2,4,7     2,5,7       2,6     2,6,7       2,7 
        1         3         1         3         2         1         1        30 
      2,8         3       3,4       3,6       3,7       3,8         4       4,7 
        1        37         2         1         2         1        53        36 
    4,7,8       4,8         5       5,7         6       6,7         7       7,8 
        1         2        49        21         1         1       846         2 
        8      <NA> 
       25       145 
Code
# 1 = “American Indian or Alaska Native”, 2 = “Asian”, 3 = “Black or African American”
# 4 = “Hispanic, Latino/Latina/Latinx, or Spanish origin”, 5 = “Middle Eastern or North African”
# 6 = “Native Hawaiian or Other Pacific Islander”, 7 = “White”, 8 = “Another race or ethnicity not listed above” (Write in response optional)
df$race_fin <- "Bi/Multiracial"
df$race_fin[df$race == "2"] <- c("Asian")
df$race_fin[df$race == "3"] <- c("Black or African American")
df$race_fin[df$race == "4"] <- c("Hispanic, Latino/Latina/Latinx, or Spanish origin")
df$race_fin[df$race == "5"] <- c("Middle Eastern or North African")
df$race_fin[df$race == "7"] <- c("White")

df$race_fin[df$race == "6"] <- c("Native Hawaiian or Other Pacific Islander")
df$race_fin[df$race == "8"] <- c("Another race or ethnicity not listed above")

df$race_fin[is.na(df$race)] <- NA

# df$race_fin_temp <- 1
# df$race_fin_temp[!is.na(df$race_fin)] <- 2
# table(df$race, df$race_fin_temp)

table(df$racetxt)

                                                                                                                                                                                                                                                                                                                                                                              -- 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                              AS THE DRIVEN SNOW 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                           Asian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                          Basque 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                            Black African/French 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                           Caucasian (Pakistani) 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                         Demographics are racist 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                       Earthling 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                           HOKIE 
                                                                                                                                                                                                                                                                                                                                                                               1 
I highlighted groups that are closely related to mine throughout history/migration. If places are going to put "white", put european american as well. It's only being consistent. Speaking of which, hispanics are europeans, caucasian, white etc. This is all silly. Race/ethnicity is a personal identification, not a tool or weapon which this question aims to use it as. 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                      I identify as a race and I don't want to divulge that info 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                          Indian 
                                                                                                                                                                                                                                                                                                                                                                               2 
                                                                                                                                                                                                                                                                                                                                                                         Iranian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                       Israelite 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                        Nepalese 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                    Not relevant 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                           South American Indian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                     south asian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                     South asian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                     South Asian 
                                                                                                                                                                                                                                                                                                                                                                               2 
                                                                                                                                                                                                                                                                                                                                                                  southern asian 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                        this question is racist  
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                    Unaffiliated 
                                                                                                                                                                                                                                                                                                                                                                               1 
                                                                                                                                                                                                                                                                                                                                                                         unknown 
                                                                                                                                                                                                                                                                                                                                                                               1 
Code
# legitimate entries: "Basque", "Black African/French", "Caucasian (Pakistani)", "Indian", "Iranian", "Israelite", "Nepalese", "South American Indian", "south asian", "South asian", "South Asian", "southern asian"

df$race_fin[df$racetxt == "Indian"] <- c("Asian")
df$race_fin[df$racetxt == "Nepalese"] <- c("Asian")
df$race_fin[df$racetxt == "south asian"] <- c("Asian")
df$race_fin[df$racetxt == "South asian"] <- c("Asian")
df$race_fin[df$racetxt == "South Asian"] <- c("Asian")
df$race_fin[df$racetxt == "southern asian"] <- c("Asian")

table(df$race_fin, useNA = "always")

       Another race or ethnicity not listed above 
                                               18 
                                            Asian 
                                              482 
                                   Bi/Multiracial 
                                              123 
                        Black or African American 
                                               37 
Hispanic, Latino/Latina/Latinx, or Spanish origin 
                                               53 
                  Middle Eastern or North African 
                                               49 
        Native Hawaiian or Other Pacific Islander 
                                                1 
                                            White 
                                              846 
                                             <NA> 
                                              145 
Code
df$race_fin2 <- df$race_fin
df$race_fin2[df$race_fin == "Another race or ethnicity not listed above"] <- "URM"
df$race_fin2[df$race_fin == "Native Hawaiian or Other Pacific Islander"] <- "URM"
df$race_fin2[df$race_fin == "Black or African American"] <- "URM"
df$race_fin2[df$race_fin == "Hispanic, Latino/Latina/Latinx, or Spanish origin"] <- "URM"
df$race_fin2[df$race_fin == "Middle Eastern or North African"] <- "URM"
table(df$race_fin2, useNA = "always")

         Asian Bi/Multiracial            URM          White           <NA> 
           482            123            158            846            145 

Clean Country

Code
table(df$intl, useNA = "always")

   1    2 <NA> 
1073  573  108 
Code
df$intl_fin <- "international"
df$intl_fin[df$intl == 1] <- "domestic"
df$intl_fin[is.na(df$intl)] <- NA

table(df$intl_fin, useNA = "always")

     domestic international          <NA> 
         1073           573           108 

Clean DADP and ITP

Code
table(df$dadp, useNA = "always")

   1    2    3    4    5 <NA> 
 271  441  382  402  222   36 
Code
table(df$itp, useNA = "always")

   1    2    3    4    5 <NA> 
  18   22   69  221 1389   35 
Code
corr.test(df$dadp, df$itp)
Call:corr.test(x = df$dadp, y = df$itp)
Correlation matrix 
[1] -0.11
Sample Size 
[1] 1718
These are the unadjusted probability values.
  The probability values  adjusted for multiple tests are in the p.adj object. 
[1] 0

 To see confidence intervals of the correlations, print with the short=FALSE option
Code
df$itp_rc[df$itp == 1] <- 5
df$itp_rc[df$itp == 2] <- 4
df$itp_rc[df$itp == 3] <- 3
df$itp_rc[df$itp == 4] <- 2
df$itp_rc[df$itp == 5] <- 1
df$uncer <- (df$dadp + df$itp_rc)/2

Finalize Dataset

Code
names(df)
 [1] "R_IDs"     "Major"     "start"     "progress"  "gender"    "gendertxt"
 [7] "race"      "racetxt"   "dadp"      "itp"       "intl"      "maj_fin"  
[13] "years"     "prog_fin"  "gen_fin"   "race_fin"  "race_fin2" "intl_fin" 
[19] "itp_rc"    "uncer"    
Code
df2 <- subset(df, select=c(R_IDs, maj_fin, years, prog_fin, gen_fin, race_fin, race_fin2, intl_fin, dadp, itp, uncer))
d <- merge(df2, classes, by = "R_IDs")
rm(import, spl, df, df2, classes)

# write.csv(d, file="auxvar.csv", row.names = F)

Demographics

Code
table(d$prog_fin, useNA = "always")

   0    1    2    3 <NA> 
 324  373  286   51   99 
Code
prop.table(table(d$prog_fin, useNA = "always"))

         0          1          2          3       <NA> 
0.28596646 0.32921447 0.25242718 0.04501324 0.08737864 
Code
table(d$gen_fin, useNA = "always")

female     gq   male   <NA> 
   398     15    692     28 
Code
prop.table(table(d$gen_fin, useNA = "always"))

    female         gq       male       <NA> 
0.35127979 0.01323919 0.61076787 0.02471315 
Code
table(d$race_fin, useNA = "always")

       Another race or ethnicity not listed above 
                                               10 
                                            Asian 
                                              315 
                                   Bi/Multiracial 
                                               90 
                        Black or African American 
                                               23 
Hispanic, Latino/Latina/Latinx, or Spanish origin 
                                               36 
                  Middle Eastern or North African 
                                               44 
        Native Hawaiian or Other Pacific Islander 
                                                1 
                                            White 
                                              581 
                                             <NA> 
                                               33 
Code
prop.table(table(d$race_fin, useNA = "always"))

       Another race or ethnicity not listed above 
                                     0.0088261253 
                                            Asian 
                                     0.2780229479 
                                   Bi/Multiracial 
                                     0.0794351280 
                        Black or African American 
                                     0.0203000883 
Hispanic, Latino/Latina/Latinx, or Spanish origin 
                                     0.0317740512 
                  Middle Eastern or North African 
                                     0.0388349515 
        Native Hawaiian or Other Pacific Islander 
                                     0.0008826125 
                                            White 
                                     0.5127978817 
                                             <NA> 
                                     0.0291262136 
Code
table(d$race_fin2, useNA = "always")

         Asian Bi/Multiracial            URM          White           <NA> 
           315             90            114            581             33 
Code
table(d$intl_fin, useNA = "always")

     domestic international          <NA> 
          718           405            10 
Code
prop.table(table(d$intl_fin, useNA = "always"))

     domestic international          <NA> 
  0.633715799   0.357458076   0.008826125 

Chi-Square Analyses

Discipline

Code
table(d$maj_fin, useNA = "always")

                  Aerospace Engineering Agricultural and Biological Engineering 
                                     28                                      27 
                        Bio Engineering                    Chemical Engineering 
                                    102                                     149 
                      Civil Engineering                    Computer Engineering 
                                    100                                      19 
                       Computer Science        Computer Science and Engineering 
                                     55                                      21 
                       Elec Engineering               Environmental Engineering 
                                    157                                      28 
                 Industrial Engineering        Material Science and Engineering 
                                     29                                     162 
                 Mechanical Engineering                     Nuclear Engineering 
                                    165                                      44 
                      Other Engineering                                    <NA> 
                                     34                                      13 
Code
d$maj_fin2 <- d$maj_fin
d$maj_fin2[d$maj_fin == "Computer Engineering"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Computer Science"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Agricultural and Biological Engineering"] <- "Bio Engineering"
d$maj_fin2[d$maj_fin == "Aerospace Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Environmental Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Industrial Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Nuclear Engineering"] <- "Other Engineering"
d$maj_fin2[is.na(d$maj_fin)] <- "Other Engineering"
table(d$maj_fin, d$maj_fin2, useNA = "always")
                                         
                                          Bio Engineering Chemical Engineering
  Aerospace Engineering                                 0                    0
  Agricultural and Biological Engineering              27                    0
  Bio Engineering                                     102                    0
  Chemical Engineering                                  0                  149
  Civil Engineering                                     0                    0
  Computer Engineering                                  0                    0
  Computer Science                                      0                    0
  Computer Science and Engineering                      0                    0
  Elec Engineering                                      0                    0
  Environmental Engineering                             0                    0
  Industrial Engineering                                0                    0
  Material Science and Engineering                      0                    0
  Mechanical Engineering                                0                    0
  Nuclear Engineering                                   0                    0
  Other Engineering                                     0                    0
  <NA>                                                  0                    0
                                         
                                          Civil Engineering
  Aerospace Engineering                                   0
  Agricultural and Biological Engineering                 0
  Bio Engineering                                         0
  Chemical Engineering                                    0
  Civil Engineering                                     100
  Computer Engineering                                    0
  Computer Science                                        0
  Computer Science and Engineering                        0
  Elec Engineering                                        0
  Environmental Engineering                               0
  Industrial Engineering                                  0
  Material Science and Engineering                        0
  Mechanical Engineering                                  0
  Nuclear Engineering                                     0
  Other Engineering                                       0
  <NA>                                                    0
                                         
                                          Computer Science and Engineering
  Aerospace Engineering                                                  0
  Agricultural and Biological Engineering                                0
  Bio Engineering                                                        0
  Chemical Engineering                                                   0
  Civil Engineering                                                      0
  Computer Engineering                                                  19
  Computer Science                                                      55
  Computer Science and Engineering                                      21
  Elec Engineering                                                       0
  Environmental Engineering                                              0
  Industrial Engineering                                                 0
  Material Science and Engineering                                       0
  Mechanical Engineering                                                 0
  Nuclear Engineering                                                    0
  Other Engineering                                                      0
  <NA>                                                                   0
                                         
                                          Elec Engineering
  Aerospace Engineering                                  0
  Agricultural and Biological Engineering                0
  Bio Engineering                                        0
  Chemical Engineering                                   0
  Civil Engineering                                      0
  Computer Engineering                                   0
  Computer Science                                       0
  Computer Science and Engineering                       0
  Elec Engineering                                     157
  Environmental Engineering                              0
  Industrial Engineering                                 0
  Material Science and Engineering                       0
  Mechanical Engineering                                 0
  Nuclear Engineering                                    0
  Other Engineering                                      0
  <NA>                                                   0
                                         
                                          Material Science and Engineering
  Aerospace Engineering                                                  0
  Agricultural and Biological Engineering                                0
  Bio Engineering                                                        0
  Chemical Engineering                                                   0
  Civil Engineering                                                      0
  Computer Engineering                                                   0
  Computer Science                                                       0
  Computer Science and Engineering                                       0
  Elec Engineering                                                       0
  Environmental Engineering                                              0
  Industrial Engineering                                                 0
  Material Science and Engineering                                     162
  Mechanical Engineering                                                 0
  Nuclear Engineering                                                    0
  Other Engineering                                                      0
  <NA>                                                                   0
                                         
                                          Mechanical Engineering
  Aerospace Engineering                                        0
  Agricultural and Biological Engineering                      0
  Bio Engineering                                              0
  Chemical Engineering                                         0
  Civil Engineering                                            0
  Computer Engineering                                         0
  Computer Science                                             0
  Computer Science and Engineering                             0
  Elec Engineering                                             0
  Environmental Engineering                                    0
  Industrial Engineering                                       0
  Material Science and Engineering                             0
  Mechanical Engineering                                     165
  Nuclear Engineering                                          0
  Other Engineering                                            0
  <NA>                                                         0
                                         
                                          Other Engineering <NA>
  Aerospace Engineering                                  28    0
  Agricultural and Biological Engineering                 0    0
  Bio Engineering                                         0    0
  Chemical Engineering                                    0    0
  Civil Engineering                                       0    0
  Computer Engineering                                    0    0
  Computer Science                                        0    0
  Computer Science and Engineering                        0    0
  Elec Engineering                                        0    0
  Environmental Engineering                              28    0
  Industrial Engineering                                 29    0
  Material Science and Engineering                        0    0
  Mechanical Engineering                                  0    0
  Nuclear Engineering                                    44    0
  Other Engineering                                      34    0
  <NA>                                                   13    0
Code
table(d$maj_fin2, useNA = "always")

                 Bio Engineering             Chemical Engineering 
                             129                              149 
               Civil Engineering Computer Science and Engineering 
                             100                               95 
                Elec Engineering Material Science and Engineering 
                             157                              162 
          Mechanical Engineering                Other Engineering 
                             165                              176 
                            <NA> 
                               0 
Code
d$maj_fin2 <- as.factor(d$maj_fin2)

chiout <- chisq.test(d$maj_fin2, d$class)
chiout

    Pearson's Chi-squared test

data:  d$maj_fin2 and d$class
X-squared = 83.451, df = 35, p-value = 7.788e-06
Code
chiout$observed
                                  d$class
d$maj_fin2                          1  2  3  4  5  6
  Bio Engineering                  23 63 26 12  1  4
  Chemical Engineering             27 66 35 12  2  7
  Civil Engineering                19 59 13  2  6  1
  Computer Science and Engineering 20 46 16  8  4  1
  Elec Engineering                 36 71 28  6  9  7
  Material Science and Engineering 34 80 19 26  2  1
  Mechanical Engineering           33 76 37  0 11  8
  Other Engineering                39 93 24 12  4  4
Code
chiout$stdres
                                  d$class
d$maj_fin2                                   1           2           3
  Bio Engineering                  -0.76632958 -0.01436740  0.85125079
  Chemical Engineering             -0.73720274 -1.20570018  2.07434150
  Civil Engineering                -0.36089782  2.11671464 -1.23429794
  Computer Science and Engineering  0.16790022 -0.09690145 -0.16990277
  Elec Engineering                  0.85165847 -0.99218269  0.12750361
  Material Science and Engineering  0.20451787  0.13366525 -2.08075393
  Mechanical Engineering           -0.13395615 -0.78846217  1.81086296
  Other Engineering                 0.63444081  1.13895670 -1.45937256
                                  d$class
d$maj_fin2                                   4           5           6
  Bio Engineering                   1.15224878 -1.76503822  0.13499964
  Chemical Engineering              0.60491992 -1.50868838  1.39063837
  Civil Engineering                -2.02036770  1.46933969 -1.19116080
  Computer Science and Engineering  0.61803981  0.42915979 -1.12632892
  Elec Engineering                 -1.63306619  1.69597826  1.24113665
  Material Science and Engineering  4.97681996 -1.66485458 -1.87666165
  Mechanical Engineering           -3.77868178  2.45791909  1.59981798
  Other Engineering                -0.03774010 -0.92595575 -0.54928788
Code
sr_di <- data.frame(chiout$stdres)
colnames(sr_di) <- c("discipline","class","sr")

Race/Ethnicity

Code
d$race_fin2 <- as.factor(d$race_fin2)
d$race_fin2 <- relevel(d$race_fin2, ref = "White")

# d$race_fin3 <- d$race_fin2
# d$race_fin3[d$race_fin == "Bi/Multiracial"] <- "URM"
# d$race_fin3 <- droplevels(d$race_fin3)

chiout <- chisq.test(table(d$race_fin2, d$class))
chiout

    Pearson's Chi-squared test

data:  table(d$race_fin2, d$class)
X-squared = 23.989, df = 15, p-value = 0.06529
Code
chiout$observed
                
                   1   2   3   4   5   6
  White          102 286 110  50  15  18
  Asian           80 151  41  17  17   9
  Bi/Multiracial  15  47  16   7   2   3
  URM             25  58  22   4   4   1
Code
chiout$stdres
                
                           1           2           3           4           5
  White          -2.29585560 -0.03316766  1.62893368  2.07117563 -1.67706174
  Asian           2.72986170 -0.56152680 -2.32024014 -1.38666511  2.23443253
  Bi/Multiracial -0.86710142  0.58409099  0.15641425  0.26494185 -0.66806724
  URM             0.49115918  0.36192342  0.63272688 -1.57388305  0.03348577
                
                           6
  White           0.59356084
  Asian           0.04946184
  Bi/Multiracial  0.30818740
  URM            -1.32268471
Code
sr_re <- data.frame(chiout$stdres)
colnames(sr_re) <- c("raceethnicity","class","sr")

Gender

Code
table(d$gen_fin)

female     gq   male 
   398     15    692 
Code
d$gen_fin2 <- d$gen_fin
d$gen_fin2[d$gen_fin == "gq"] <- "fgq"
d$gen_fin2[d$gen_fin == "female"] <- "fgq"
d$gen_fin2 <- as.factor(d$gen_fin2)
table(d$gen_fin2)

 fgq male 
 413  692 
Code
d$gen_fin2 <- as.factor(d$gen_fin2)
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")

chiout <- chisq.test(table(d$gen_fin2, d$class))
chiout

    Pearson's Chi-squared test

data:  table(d$gen_fin2, d$class)
X-squared = 5.5919, df = 5, p-value = 0.348
Code
chiout$observed
      
         1   2   3   4   5   6
  male 137 333 130  45  25  22
  fgq   87 212  60  33  12   9
Code
chiout$stdres
      
                1          2          3          4          5          6
  male -0.5071185 -1.0326826  1.8149150 -0.9339242  0.6321686  0.9739402
  fgq   0.5071185  1.0326826 -1.8149150  0.9339242 -0.6321686 -0.9739402
Code
sr_ge <- data.frame(chiout$stdres)
colnames(sr_ge) <- c("gender","class","sr")

Degree Progress

Code
table(d$prog_fin)

  0   1   2   3 
324 373 286  51 
Code
d$prog_fin <- as.factor(d$prog_fin)

chiout <- chisq.test(table(d$prog_fin, d$class))
chiout

    Pearson's Chi-squared test

data:  table(d$prog_fin, d$class)
X-squared = 28.679, df = 15, p-value = 0.01769
Code
chiout$observed
   
      1   2   3   4   5   6
  0  62 173  55  14  11   9
  1  89 172  63  23  17   9
  2  53 139  48  32   5   9
  3   5  24  11   4   5   2
Code
chiout$stdres
   
              1           2           3           4           5           6
  0 -0.58253899  1.85338691 -0.08228287 -2.32267728 -0.32325106 -0.03534427
  1  2.19415971 -1.45775355 -0.14615610 -0.84279759  1.13311296 -0.57317542
  2 -0.83244866 -0.21007906 -0.17671955  3.20493329 -2.03623281  0.41212118
  3 -1.89841666 -0.30338656  0.86543503  0.22393674  2.38587841  0.49548634
Code
sr_pr <- data.frame(chiout$stdres)
colnames(sr_pr) <- c("degreeprog","class","sr")

International Status

Code
table(d$intl_fin, useNA = "always")

     domestic international          <NA> 
          718           405            10 
Code
d$intl_fin <- as.factor(d$intl_fin)

chiout <- chisq.test(table(d$intl_fin, d$class))
chiout

    Pearson's Chi-squared test

data:  table(d$intl_fin, d$class)
X-squared = 19.381, df = 5, p-value = 0.001632
Code
chiout$observed
               
                  1   2   3   4   5   6
  domestic      131 359 124  63  18  23
  international  98 193  69  15  20  10
Code
chiout$stdres
               
                          1           2           3           4           5
  domestic      -2.37730893  0.75502373  0.09945113  3.20951548 -2.16377895
  international  2.37730893 -0.75502373 -0.09945113 -3.20951548  2.16377895
               
                          6
  domestic       0.69956457
  international -0.69956457
Code
sr_in <- data.frame(chiout$stdres)
colnames(sr_in) <- c("intl_status","class","sr")

chiout <- chisq.test(table(d$intl_fin, d$maj_fin2))
chiout

    Pearson's Chi-squared test

data:  table(d$intl_fin, d$maj_fin2)
X-squared = 122.59, df = 7, p-value < 2.2e-16
Code
chiout$observed
               
                Bio Engineering Chemical Engineering Civil Engineering
  domestic                  100                  111                46
  international              28                   37                54
               
                Computer Science and Engineering Elec Engineering
  domestic                                    47               66
  international                               48               91
               
                Material Science and Engineering Mechanical Engineering
  domestic                                   140                     87
  international                               21                     76
               
                Other Engineering
  domestic                    121
  international                50
Code
chiout$stdres
               
                Bio Engineering Chemical Engineering Civil Engineering
  domestic             3.551637             3.008327         -3.913496
  international       -3.551637            -3.008327          3.913496
               
                Computer Science and Engineering Elec Engineering
  domestic                             -3.068171        -6.160828
  international                         3.068171         6.160828
               
                Material Science and Engineering Mechanical Engineering
  domestic                              6.572379              -3.037176
  international                        -6.572379               3.037176
               
                Other Engineering
  domestic               2.018462
  international         -2.018462
Code
chiout <- chisq.test(table(d$intl_fin, d$race_fin2))
chiout

    Pearson's Chi-squared test

data:  table(d$intl_fin, d$race_fin2)
X-squared = 473.22, df = 3, p-value < 2.2e-16
Code
chiout$observed
               
                White Asian Bi/Multiracial URM
  domestic        528    63             65  48
  international    53   252             25  66
Code
chiout$stdres
               
                     White      Asian Bi/Multiracial        URM
  domestic       19.649575 -19.258756       1.695918  -5.144093
  international -19.649575  19.258756      -1.695918   5.144093

Regression Analyses

Model 1

Code
d <- subset(d, prog_fin != 3)

d$maj_fin2[d$maj_fin2 == "Other Engineering"] <- NA

d$maj_fin2 <- relevel(d$maj_fin2, ref = "Mechanical Engineering")
d$race_fin2 <- relevel(d$race_fin2, ref = "White")
d$intl_fin <- relevel(d$intl_fin, ref = "domestic")
d$gen_fin2 <- relevel(d$gen_fin2, ref = "male")
d$prog_fin <- relevel(d$prog_fin, ref = "0")
d$class_fin[d$class == "1"] <- "disorganized"
d$class_fin[d$class == "2"] <- "moderate"
d$class_fin[d$class == "3"] <- "strong"
d$class_fin[d$class == "4"] <- "mle"
d$class_fin[d$class == "5"] <- "mls"
d$class_fin[d$class == "6"] <- "max"
d$class_fin <- as.factor(d$class_fin)
d$class_fin <- relevel(d$class_fin, ref = "moderate")

regout1 <- lm(uncer ~ maj_fin2 + gen_fin2 + race_fin2 + intl_fin + prog_fin, data = d, na.action = na.exclude)
plot_model(regout1, type="diag")
[[1]]


[[2]]


[[3]]


[[4]]

Code
qqnorm(d$uncer)
qqline(d$uncer)

Code
plot(regout1, 4)

Code
plot(regout1, 5)

Code
plot(regout1, 3)

Code
summary(regout1)

Call:
lm(formula = uncer ~ maj_fin2 + gen_fin2 + race_fin2 + intl_fin + 
    prog_fin, data = d, na.action = na.exclude)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.56448 -0.60809  0.03452  0.54057  2.66023 

Coefficients:
                                          Estimate Std. Error t value Pr(>|t|)
(Intercept)                               2.144976   0.083162  25.793  < 2e-16
maj_fin2Bio Engineering                   0.048421   0.096266   0.503 0.615104
maj_fin2Chemical Engineering              0.089741   0.091700   0.979 0.328054
maj_fin2Civil Engineering                 0.014319   0.101946   0.140 0.888337
maj_fin2Computer Science and Engineering  0.136315   0.102758   1.327 0.185029
maj_fin2Elec Engineering                  0.025970   0.089742   0.289 0.772365
maj_fin2Material Science and Engineering  0.163696   0.094330   1.735 0.083057
gen_fin2fgq                              -0.003499   0.054631  -0.064 0.948947
race_fin2Asian                            0.040529   0.075925   0.534 0.593623
race_fin2Bi/Multiracial                   0.268740   0.102873   2.612 0.009159
race_fin2URM                              0.059797   0.092915   0.644 0.520038
intl_fininternational                    -0.245993   0.072755  -3.381 0.000757
prog_fin1                                 0.154261   0.061821   2.495 0.012784
prog_fin2                                -0.050636   0.067105  -0.755 0.450723
                                            
(Intercept)                              ***
maj_fin2Bio Engineering                     
maj_fin2Chemical Engineering                
maj_fin2Civil Engineering                   
maj_fin2Computer Science and Engineering    
maj_fin2Elec Engineering                    
maj_fin2Material Science and Engineering .  
gen_fin2fgq                                 
race_fin2Asian                              
race_fin2Bi/Multiracial                  ** 
race_fin2URM                                
intl_fininternational                    ***
prog_fin1                                *  
prog_fin2                                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7362 on 809 degrees of freedom
  (160 observations deleted due to missingness)
Multiple R-squared:  0.05074,   Adjusted R-squared:  0.03548 
F-statistic: 3.326 on 13 and 809 DF,  p-value: 5.613e-05
Code
tab_model(regout1, show.ci = F, show.se = T, show.stat = T)
  uncer
Predictors Estimates std. Error Statistic p
(Intercept) 2.14 0.08 25.79 <0.001
maj fin2 [Bio
Engineering]
0.05 0.10 0.50 0.615
maj fin2 [Chemical
Engineering]
0.09 0.09 0.98 0.328
maj fin2 [Civil
Engineering]
0.01 0.10 0.14 0.888
maj fin2 [Computer
Science and Engineering]
0.14 0.10 1.33 0.185
maj fin2 [Elec
Engineering]
0.03 0.09 0.29 0.772
maj fin2 [Material
Science and Engineering]
0.16 0.09 1.74 0.083
gen fin2 [fgq] -0.00 0.05 -0.06 0.949
race fin2 [Asian] 0.04 0.08 0.53 0.594
race fin2
[Bi/Multiracial]
0.27 0.10 2.61 0.009
race fin2 [URM] 0.06 0.09 0.64 0.520
intl fin [international] -0.25 0.07 -3.38 0.001
prog fin [1] 0.15 0.06 2.50 0.013
prog fin [2] -0.05 0.07 -0.75 0.451
Observations 823
R2 / R2 adjusted 0.051 / 0.035
Code
plot_model(regout1)

Model 2

Code
regout2 <- lm(uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + intl_fin + prog_fin, data = d, na.action = na.exclude)
plot_model(regout2, type="diag")
[[1]]


[[2]]


[[3]]


[[4]]

Code
plot(regout2, 4)

Code
plot(regout2, 5)

Code
plot(regout2, 3)

Code
summary(regout2)

Call:
lm(formula = uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + 
    intl_fin + prog_fin, data = d, na.action = na.exclude)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.67864 -0.49771  0.02514  0.52788  2.33980 

Coefficients:
                                          Estimate Std. Error t value Pr(>|t|)
(Intercept)                               2.219025   0.083063  26.715  < 2e-16
class_findisorganized                     0.309858   0.064940   4.771 2.17e-06
class_finmax                             -0.331029   0.158673  -2.086 0.037272
class_finmle                             -0.352733   0.101492  -3.475 0.000537
class_finmls                             -0.203343   0.138255  -1.471 0.141743
class_finstrong                          -0.367676   0.068329  -5.381 9.73e-08
maj_fin2Bio Engineering                   0.065662   0.092259   0.712 0.476853
maj_fin2Chemical Engineering              0.108568   0.087724   1.238 0.216223
maj_fin2Civil Engineering                -0.010904   0.097353  -0.112 0.910846
maj_fin2Computer Science and Engineering  0.123246   0.098208   1.255 0.209864
maj_fin2Elec Engineering                  0.005229   0.085579   0.061 0.951295
maj_fin2Material Science and Engineering  0.137365   0.091699   1.498 0.134524
gen_fin2fgq                              -0.014320   0.052132  -0.275 0.783631
race_fin2Asian                           -0.013215   0.072759  -0.182 0.855927
race_fin2Bi/Multiracial                   0.280523   0.098037   2.861 0.004327
race_fin2URM                              0.035375   0.088487   0.400 0.689426
intl_fininternational                    -0.253635   0.069372  -3.656 0.000273
prog_fin1                                 0.144530   0.059044   2.448 0.014585
prog_fin2                                -0.036032   0.064008  -0.563 0.573639
                                            
(Intercept)                              ***
class_findisorganized                    ***
class_finmax                             *  
class_finmle                             ***
class_finmls                                
class_finstrong                          ***
maj_fin2Bio Engineering                     
maj_fin2Chemical Engineering                
maj_fin2Civil Engineering                   
maj_fin2Computer Science and Engineering    
maj_fin2Elec Engineering                    
maj_fin2Material Science and Engineering    
gen_fin2fgq                                 
race_fin2Asian                              
race_fin2Bi/Multiracial                  ** 
race_fin2URM                                
intl_fininternational                    ***
prog_fin1                                *  
prog_fin2                                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7005 on 804 degrees of freedom
  (160 observations deleted due to missingness)
Multiple R-squared:  0.1458,    Adjusted R-squared:  0.1267 
F-statistic: 7.623 on 18 and 804 DF,  p-value: < 2.2e-16
Code
tab_model(regout2, show.ci = F, show.se = T, show.stat = T)
  uncer
Predictors Estimates std. Error Statistic p
(Intercept) 2.22 0.08 26.71 <0.001
class fin [disorganized] 0.31 0.06 4.77 <0.001
class fin [max] -0.33 0.16 -2.09 0.037
class fin [mle] -0.35 0.10 -3.48 0.001
class fin [mls] -0.20 0.14 -1.47 0.142
class fin [strong] -0.37 0.07 -5.38 <0.001
maj fin2 [Bio
Engineering]
0.07 0.09 0.71 0.477
maj fin2 [Chemical
Engineering]
0.11 0.09 1.24 0.216
maj fin2 [Civil
Engineering]
-0.01 0.10 -0.11 0.911
maj fin2 [Computer
Science and Engineering]
0.12 0.10 1.25 0.210
maj fin2 [Elec
Engineering]
0.01 0.09 0.06 0.951
maj fin2 [Material
Science and Engineering]
0.14 0.09 1.50 0.135
gen fin2 [fgq] -0.01 0.05 -0.27 0.784
race fin2 [Asian] -0.01 0.07 -0.18 0.856
race fin2
[Bi/Multiracial]
0.28 0.10 2.86 0.004
race fin2 [URM] 0.04 0.09 0.40 0.689
intl fin [international] -0.25 0.07 -3.66 <0.001
prog fin [1] 0.14 0.06 2.45 0.015
prog fin [2] -0.04 0.06 -0.56 0.574
Observations 823
R2 / R2 adjusted 0.146 / 0.127
Code
plot_model(regout2)

ANOVA

Code
anova(regout1,regout2)
Analysis of Variance Table

Model 1: uncer ~ maj_fin2 + gen_fin2 + race_fin2 + intl_fin + prog_fin
Model 2: uncer ~ class_fin + maj_fin2 + gen_fin2 + race_fin2 + intl_fin + 
    prog_fin
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1    809 438.48                                  
2    804 394.58  5    43.907 17.893 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Visualizations

Chi Square Plot - Discipline

Code
rm(list = ls())

df <- read.csv(file="auxvar.csv", header=T)
lpa_df <- subset(df, select=c(R_IDs, s_std, r_std, e_std, class_fin))
un_df <- read.csv(file="un_df2.csv", header=T)

d <- df
table(d$maj_fin, useNA = "always")

                  Aerospace Engineering Agricultural and Biological Engineering 
                                     28                                      27 
                        Bio Engineering                    Chemical Engineering 
                                    102                                     149 
                      Civil Engineering                    Computer Engineering 
                                    100                                      19 
                       Computer Science        Computer Science and Engineering 
                                     55                                      21 
                       Elec Engineering               Environmental Engineering 
                                    157                                      28 
                 Industrial Engineering        Material Science and Engineering 
                                     29                                     162 
                 Mechanical Engineering                     Nuclear Engineering 
                                    165                                      44 
                      Other Engineering                                    <NA> 
                                     34                                      13 
Code
d$maj_fin2 <- d$maj_fin
d$maj_fin2[d$maj_fin == "Computer Engineering"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Computer Science"] <- "Computer Science and Engineering"
d$maj_fin2[d$maj_fin == "Agricultural and Biological Engineering"] <- "Bio Engineering"
d$maj_fin2[d$maj_fin == "Aerospace Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Environmental Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Industrial Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Nuclear Engineering"] <- "Other Engineering"
d$maj_fin2[d$maj_fin == "Elec Engineering"] <- "Electrical Engineering"
d$maj_fin2[is.na(d$maj_fin)] <- "Other Engineering"
table(d$maj_fin, d$maj_fin2, useNA = "always")
                                         
                                          Bio Engineering Chemical Engineering
  Aerospace Engineering                                 0                    0
  Agricultural and Biological Engineering              27                    0
  Bio Engineering                                     102                    0
  Chemical Engineering                                  0                  149
  Civil Engineering                                     0                    0
  Computer Engineering                                  0                    0
  Computer Science                                      0                    0
  Computer Science and Engineering                      0                    0
  Elec Engineering                                      0                    0
  Environmental Engineering                             0                    0
  Industrial Engineering                                0                    0
  Material Science and Engineering                      0                    0
  Mechanical Engineering                                0                    0
  Nuclear Engineering                                   0                    0
  Other Engineering                                     0                    0
  <NA>                                                  0                    0
                                         
                                          Civil Engineering
  Aerospace Engineering                                   0
  Agricultural and Biological Engineering                 0
  Bio Engineering                                         0
  Chemical Engineering                                    0
  Civil Engineering                                     100
  Computer Engineering                                    0
  Computer Science                                        0
  Computer Science and Engineering                        0
  Elec Engineering                                        0
  Environmental Engineering                               0
  Industrial Engineering                                  0
  Material Science and Engineering                        0
  Mechanical Engineering                                  0
  Nuclear Engineering                                     0
  Other Engineering                                       0
  <NA>                                                    0
                                         
                                          Computer Science and Engineering
  Aerospace Engineering                                                  0
  Agricultural and Biological Engineering                                0
  Bio Engineering                                                        0
  Chemical Engineering                                                   0
  Civil Engineering                                                      0
  Computer Engineering                                                  19
  Computer Science                                                      55
  Computer Science and Engineering                                      21
  Elec Engineering                                                       0
  Environmental Engineering                                              0
  Industrial Engineering                                                 0
  Material Science and Engineering                                       0
  Mechanical Engineering                                                 0
  Nuclear Engineering                                                    0
  Other Engineering                                                      0
  <NA>                                                                   0
                                         
                                          Electrical Engineering
  Aerospace Engineering                                        0
  Agricultural and Biological Engineering                      0
  Bio Engineering                                              0
  Chemical Engineering                                         0
  Civil Engineering                                            0
  Computer Engineering                                         0
  Computer Science                                             0
  Computer Science and Engineering                             0
  Elec Engineering                                           157
  Environmental Engineering                                    0
  Industrial Engineering                                       0
  Material Science and Engineering                             0
  Mechanical Engineering                                       0
  Nuclear Engineering                                          0
  Other Engineering                                            0
  <NA>                                                         0
                                         
                                          Material Science and Engineering
  Aerospace Engineering                                                  0
  Agricultural and Biological Engineering                                0
  Bio Engineering                                                        0
  Chemical Engineering                                                   0
  Civil Engineering                                                      0
  Computer Engineering                                                   0
  Computer Science                                                       0
  Computer Science and Engineering                                       0
  Elec Engineering                                                       0
  Environmental Engineering                                              0
  Industrial Engineering                                                 0
  Material Science and Engineering                                     162
  Mechanical Engineering                                                 0
  Nuclear Engineering                                                    0
  Other Engineering                                                      0
  <NA>                                                                   0
                                         
                                          Mechanical Engineering
  Aerospace Engineering                                        0
  Agricultural and Biological Engineering                      0
  Bio Engineering                                              0
  Chemical Engineering                                         0
  Civil Engineering                                            0
  Computer Engineering                                         0
  Computer Science                                             0
  Computer Science and Engineering                             0
  Elec Engineering                                             0
  Environmental Engineering                                    0
  Industrial Engineering                                       0
  Material Science and Engineering                             0
  Mechanical Engineering                                     165
  Nuclear Engineering                                          0
  Other Engineering                                            0
  <NA>                                                         0
                                         
                                          Other Engineering <NA>
  Aerospace Engineering                                  28    0
  Agricultural and Biological Engineering                 0    0
  Bio Engineering                                         0    0
  Chemical Engineering                                    0    0
  Civil Engineering                                       0    0
  Computer Engineering                                    0    0
  Computer Science                                        0    0
  Computer Science and Engineering                        0    0
  Elec Engineering                                        0    0
  Environmental Engineering                              28    0
  Industrial Engineering                                 29    0
  Material Science and Engineering                        0    0
  Mechanical Engineering                                  0    0
  Nuclear Engineering                                    44    0
  Other Engineering                                      34    0
  <NA>                                                   13    0
Code
table(d$maj_fin2, useNA = "always")

                 Bio Engineering             Chemical Engineering 
                             129                              149 
               Civil Engineering Computer Science and Engineering 
                             100                               95 
          Electrical Engineering Material Science and Engineering 
                             157                              162 
          Mechanical Engineering                Other Engineering 
                             165                              176 
                            <NA> 
                               0 
Code
d$maj_fin2 <- as.factor(d$maj_fin2)

d$class[d$class == 1] <- "Dispersed"
d$class[d$class == 2] <- "Moderate"
d$class[d$class == 3] <- "Strong"
d$class[d$class == 4] <- "Primarily Scientist"
d$class[d$class == 5] <- "Primarily Engineer"
d$class[d$class == 6] <- "Maximum"
d$class <- factor(d$class, levels = c("Dispersed",
                                       "Moderate",
                                       "Strong",
                                       "Primarily Scientist",
                                       "Primarily Engineer",
                                       "Maximum"))

table(d$class)

          Dispersed            Moderate              Strong Primarily Scientist 
                231                 554                 198                  78 
 Primarily Engineer             Maximum 
                 39                  33 
Code
chiout <- chisq.test(d$maj_fin2, d$class)

# standardized residuals
resid_mat <- chiout$stdres

df_resid <- as.data.frame(as.table(resid_mat))
names(df_resid) <- c("maj_fin2", "class", "std_resid")

df_resid_filt <- df_resid %>%
  group_by(maj_fin2) %>%
  filter(any(abs(std_resid) > 2)) %>%
  ungroup()

plotout <- ggplot(df_resid_filt, aes(x = class, y = std_resid, fill = class)) +
  geom_col(width = 0.7) +
  geom_hline(
    yintercept = c(-2, 2),
    linetype = "dashed",
    linewidth = 0.3
  ) +
  facet_wrap(
    ~ maj_fin2,
    ncol = 4,
    labeller = label_wrap_gen(width = 20)
  ) +
  labs(
    x = NULL,
    y = "Standardized Residuals",
    fill = "Class"
  ) +
  theme_bw() +
  scale_fill_viridis_d(option = "D", end = 0.9) +
  theme(
    axis.text.x  = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position  = "bottom",
    legend.direction = "horizontal",
    strip.text = element_text(size = 8, lineheight = 1),
    legend.text  = element_text(size = 8),
    legend.title = element_text(size = 9),
    legend.key.width  = unit(0.7, "lines"),
    legend.key.height = unit(0.7, "lines"),
    axis.title.y = element_text(size = 9),
    axis.text.y  = element_text(size = 7),
  ) +
  guides(fill = guide_legend(nrow = 1, override.aes = list(size = 2)))

disc <- plotout +
  theme(
    axis.title.y = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 10, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

disc

Code
# ggsave("pres/disc.png", plot = disc, bg = "transparent",
#        width = 8, height = 2.75, dpi = 300)

Chi Square Plot - International Status

Code
d <- na.omit(subset(df, select=c(intl_fin, class)))
table(d$intl_fin, useNA = "always")

     domestic international          <NA> 
          718           405             0 
Code
d$intl_fin <- as.factor(d$intl_fin)

d$class[d$class == 1] <- "Dispersed"
d$class[d$class == 2] <- "Moderate"
d$class[d$class == 3] <- "Strong"
d$class[d$class == 4] <- "Primarily Scientist"
d$class[d$class == 5] <- "Primarily Engineer"
d$class[d$class == 6] <- "Maximum"
d$class <- factor(d$class, levels = c("Dispersed",
                                       "Moderate",
                                       "Strong",
                                       "Primarily Scientist",
                                       "Primarily Engineer",
                                       "Maximum"))

table(d$class)

          Dispersed            Moderate              Strong Primarily Scientist 
                229                 552                 193                  78 
 Primarily Engineer             Maximum 
                 38                  33 
Code
chiout <- chisq.test(d$intl_fin, d$class)

table(df$maj_fin, df$intl_fin)
                                         
                                          domestic international
  Aerospace Engineering                         22             6
  Agricultural and Biological Engineering       15            12
  Bio Engineering                               85            16
  Chemical Engineering                         111            37
  Civil Engineering                             46            54
  Computer Engineering                           6            13
  Computer Science                              28            27
  Computer Science and Engineering              13             8
  Elec Engineering                              66            91
  Environmental Engineering                     19             9
  Industrial Engineering                        17            12
  Material Science and Engineering             140            21
  Mechanical Engineering                        87            76
  Nuclear Engineering                           38             6
  Other Engineering                             20            14
Code
chiout2 <- chisq.test(df$maj_fin, df$intl_fin)
# chiout2
# chiout2$stdres

# standardized residuals
resid_mat <- chiout$stdres

df_resid <- as.data.frame(as.table(resid_mat))
names(df_resid) <- c("intl_fin", "class", "std_resid")

# df_resid_filt <- df_resid %>%
#   group_by(maj_fin2) %>%
#   filter(any(abs(std_resid) > 2)) %>%
#   ungroup()

plotout <- ggplot(df_resid, aes(x = class, y = std_resid, fill = class)) +
  geom_col(width = 0.7) +
  geom_hline(
    yintercept = c(-2, 2),
    linetype = "dashed",
    linewidth = 0.3
  ) +
  facet_wrap(
    ~ intl_fin,
    ncol = 4,
    labeller = label_wrap_gen(width = 20)
  ) +
  labs(
    x = NULL,
    y = "Standardized Residuals",
    fill = "Class"
  ) +
  theme_bw() +
  scale_fill_viridis_d(option = "D", end = 0.9) +
  theme(
    axis.text.x  = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position  = "bottom",
    legend.direction = "horizontal",
    strip.text = element_text(size = 8, lineheight = 1),
    legend.text  = element_text(size = 8),
    legend.title = element_text(size = 9),
    legend.key.width  = unit(0.7, "lines"),
    legend.key.height = unit(0.7, "lines"),
    axis.title.y = element_text(size = 9),
    axis.text.y  = element_text(size = 7),
  ) +
  guides(fill = guide_legend(nrow = 1, override.aes = list(size = 2)))

intl <- plotout +
  theme(
    axis.title.y = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 10, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

intl

Code
# ggsave("pres/intl.png", plot = intl, bg = "transparent",
#        width = 8, height = 2.75, dpi = 300)

Chi Square Plot - Progress

Code
d <- df
table(d$prog_fin)

  0   1   2   3 
324 373 286  51 
Code
d$prog_fin[d$prog_fin == 0] <- "No Milestones"
d$prog_fin[d$prog_fin == 1] <- "First Milestone (Comprehensive Exam)"
d$prog_fin[d$prog_fin == 2] <- "Second Milestone (Dissertation Proposal)"
d$prog_fin[d$prog_fin == 3] <- "All Milestones (Dissertation Defense)"
d$prog_fin <- factor(d$prog_fin, levels = c("No Milestones",
                                            "First Milestone (Comprehensive Exam)",
                                            "Second Milestone (Dissertation Proposal)",
                                            "All Milestones (Dissertation Defense)"))

d$class[d$class == 1] <- "Dispersed"
d$class[d$class == 2] <- "Moderate"
d$class[d$class == 3] <- "Strong"
d$class[d$class == 4] <- "Primarily Scientist"
d$class[d$class == 5] <- "Primarily Engineer"
d$class[d$class == 6] <- "Maximum"
d$class <- factor(d$class, levels = c("Dispersed",
                                       "Moderate",
                                       "Strong",
                                       "Primarily Scientist",
                                       "Primarily Engineer",
                                       "Maximum"))

table(d$class)

          Dispersed            Moderate              Strong Primarily Scientist 
                231                 554                 198                  78 
 Primarily Engineer             Maximum 
                 39                  33 
Code
chiout <- chisq.test(table(d$prog_fin, d$class))

# standardized residuals
resid_mat <- chiout$stdres

df_resid <- as.data.frame(as.table(resid_mat))
names(df_resid) <- c("pred", "class", "std_resid")

plotout <- ggplot(df_resid, aes(x = class, y = std_resid, fill = class)) +
  geom_col(width = 0.7) +
  geom_hline(
    yintercept = c(-2, 2),
    linetype = "dashed",
    linewidth = 0.3
  ) +
  facet_wrap(
    ~ pred,
    ncol = 4,
    labeller = label_wrap_gen(width = 25)
  ) +
  labs(
    x = NULL,
    y = "Standardized Residuals",
    fill = "Class"
  ) +
  theme_bw() +
  scale_fill_viridis_d(option = "D", end = 0.9) +
  theme(
    axis.text.x  = element_blank(),
    axis.ticks.x = element_blank(),
    legend.position  = "bottom",
    legend.direction = "horizontal",
    strip.text = element_text(size = 8, lineheight = 1),
    legend.text  = element_text(size = 8),
    legend.title = element_text(size = 9),
    legend.key.width  = unit(0.7, "lines"),
    legend.key.height = unit(0.7, "lines"),
    axis.title.y = element_text(size = 9),
    axis.text.y  = element_text(size = 7),
  ) +
  guides(fill = guide_legend(nrow = 1, override.aes = list(size = 2)))

mile <- plotout +
  theme(
    axis.title.y = element_text(size = 11),
    axis.text.y  = element_text(size = 9),
    legend.title = element_text(size = 11, lineheight = 0.9),
    legend.text  = element_text(size = 10, lineheight = 0.9),
    strip.text   = element_text(size = 10, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

mile

Code
# ggsave("pres/mile.png", plot = mile, bg = "transparent",
#        width = 8, height = 2.75, dpi = 300)

Regression Plot - Degree Uncertainty

Code
un_df$class_fin[un_df$class_fin == "disorganized"] <- "Dispersed"
un_df$class_fin[un_df$class_fin == "strong"] <- "Strong"
un_df$class_fin[un_df$class_fin == "moderate"] <- "Moderate"
un_df$class_fin[un_df$class_fin == "mle"] <- "Primarily Scientist"
un_df$class_fin[un_df$class_fin == "mls"] <- "Primarily Engineer"
un_df$class_fin[un_df$class_fin == "max"] <- "Maximum"

un_df$class_fin <- factor(un_df$class_fin, levels = c("Dispersed",
                                                             "Moderate",
                                                             "Strong",
                                                             "Primarily Scientist",
                                                             "Primarily Engineer",
                                                             "Maximum"))

un_df2 <- na.omit(un_df)
un_df3 <- merge(un_df2, subset(df, select=c(R_IDs,prog_fin)), by = "R_IDs")
un_df4 <- subset(un_df3, prog_fin != 3)

uncer <- ggplot(un_df4, aes(x = class_fin, y = uncer, color = factor(class_fin))) +
  geom_boxplot() +
  geom_jitter(width = .20, height = .20, alpha = .20) +
  labs(
    y = "Degree Uncertainty",
    x = NULL,
    color = "GEI Profile"
  ) +
  theme_bw(base_size = 12) +
  scale_color_viridis_d(option = "D", end = 0.9) +
  theme(
    # x-axis labels back, angled
    axis.text.x = element_text(
      angle = 45, hjust = 1, vjust = 1, size = 8
    ),
    axis.ticks.x = element_blank(),

    # legend cleanup
    legend.position  = "none",
    legend.direction = "horizontal",
    legend.box       = "horizontal",
    legend.text  = element_text(size = 8),
    legend.title = element_text(size = 9),
    legend.key.width  = unit(0.7, "lines"),
    legend.key.height = unit(0.7, "lines"),

    axis.title.y = element_text(size = 9),
    axis.text.y  = element_text(size = 7)
  ) +
  guides(
    color = guide_legend(
      nrow = 1,
      override.aes = list(alpha = 1, size = 2)
    )
  )

uncer <- uncer +
  theme(
    axis.title.y = element_text(size = 12),
    axis.text.x  = element_text(size = 11),
    legend.title = element_text(size = 12, lineheight = 0.9),
    legend.text  = element_text(size = 11, lineheight = 0.9),
    strip.text   = element_text(size = 11, lineheight = 0.9),
    plot.background   = element_rect(fill = "transparent", color = NA),
    legend.background = element_rect(fill = "transparent", color = NA)
  )

uncer

Code
# ggsave("pres/uncer.png", plot = uncer, bg = "transparent",
#        width = 6, height = 6, dpi = 300)

# describeBy(un_df2, group = "class_fin")