Info & Notes

Original: Kevin Kaufman-Ortiz and Allison Godwin (June 7, 2022)

Revised: Heather Perkins (October 10, 2022)

Step 1: Import

Data imported, race/ethnicity and gender variables cleaned, pre/post dataframes created.

#UBelong Exploratory analysis for Pre- Post- Surveys 


# install packages --------------------------------------------------------
#install.packages("readxl")
#install.packages("stargazer")
#install.packages("car")
#install.packages("Hmisc")
#install.packages("psych")
#install.packages("moments")
#install.packages("nFactors")
#install.packages("GPArotation")
#install.packages("Rmisc")
#install.packages("rstatix")
#install.packages("tidyverse")
#install.packages("ggpubr")
#install.packages("reshape2")
#install.packages("DescTools")


# load libraries ----------------------------------------------------------
library("readxl")
library("stargazer")
library("car")
library("Hmisc")
library("psych")
library("moments")
library("nFactors")
library("GPArotation")
library("Rmisc")
library("rstatix")
library("tidyverse")
library("ggpubr")
library("reshape2")
library("DescTools")
library(readxl)
library(ids)
library(MASS)
library(afex)
library(emmeans)
library(ggbeeswarm)
library(effects)

# require(devtools)
# install_version("parameters", version = "0.19.0", repos = "http://cran.us.r-project.org")
library(sjPlot)

# import data -------------------------------------------------------------
spring2022 <- read.csv("data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")

# assign UID
# spring2022$UID <- ids::random_id(nrow(spring2022), 4)
# write.csv(spring2022, file="data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")

# identify important variables

# id = UID
# instructor = Instructor
# section = Section
# control/treatment = Treatment
# pre/post = Time
# gender = DEMOgen
# BLI = DEMOmin
  # black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
  # NA is coded as 0 (majority)
# MATLABGradePercent
# ProjectGradePercent


# basic recoding and cleaning ---------------------------------------------
# transform -99 to NA
spring2022[spring2022 == -99] <- NA

# remove bad responses
# table(spring2022$CHECKpass, useNA = "always")
spring2022_2 <- subset(spring2022, CHECKpass == 1 | is.na(CHECKpass))
spring2022_3 <- subset(spring2022_2, CHECKpass_2 == 1 | is.na(CHECKpass_2))
spring2022 <- spring2022_3

# recode gender
# table(spring2022$DEMOgen)
# 1   2   3   4   5 
# 328 131   3   1   6 

#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
#June 24, 2022, we decided as a team that we recognize we do not want to erase the experiences of NonBinary folk
spring2022$DEMOgen_orig <- spring2022$DEMOgen
spring2022$DEMOgen [spring2022$DEMOgen == 1] <- "man"  #Recoding to 0 = male (1)
spring2022$DEMOgen [spring2022$DEMOgen == 2 ] <- "woman"  #Recoding to 1 = Female (2)
spring2022$DEMOgen [spring2022$DEMOgen == 3 | spring2022$DEMOgen == 4 | spring2022$DEMOgen == 5] <- "nb"  #Recoding to NA = NonBinary, Not listed, Prefer not to respond (3, 4, 5)

# table(spring2022$DEMOgen)
# man    nb woman 
# 328    10   131

# creating race variables
# Race is coded with many variables, need a minoritized variable
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
spring2022$DEMOmin <- "majority"
spring2022$DEMOmin[spring2022$DEMOblack == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOai == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOmex == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOca == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOsa == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpr == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOol == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpi == 1] <- "bli"

# srm = used in other places
spring2022$DEMOmin2 <- "majority"
spring2022$DEMOmin2[spring2022$DEMOblack == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOai == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsea == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOmex == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOca == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsa == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpr == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOol == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpi == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOgen == "woman"] <- "srm"
# table(spring2022$DEMOmin2, useNA = "always")

# rg = three categories, bli, majority men, and majority women/nb
spring2022$rg <- NA
spring2022$rg[spring2022$DEMOmin == "bli"] <- "bli"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "woman"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "nb"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "man"] <- "majority man"
# spring2022$rg[spring2022$DEMOmin == "majority" & is.na(spring2022$DEMOgen)] <- "majority man & NA"

# demographics and numbers
# table(spring2022$Treatment, useNA = "always")
# table(spring2022$DEMOgen, spring2022$DEMOmin, useNA = "always")
# table(spring2022$rg, useNA = "always")

# table(spring2022$DEMOmin, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOblack, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOai, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOmex, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOca, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpr, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOsa, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOol, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpi, useNA = "always")


# breaking out pre and post dataframes ------------------------------------
names(spring2022)
##   [1] "X"                          "EndDate"                   
##   [3] "Progress"                   "Duration__in_seconds"      
##   [5] "Finished"                   "Major01"                   
##   [7] "Major02"                    "MSdisc01"                  
##   [9] "MSdisc02"                   "MSdisc03"                  
##  [11] "MSdisc04"                   "MSdisc01rev"               
##  [13] "MSdisc02rev"                "MSdisc03rev"               
##  [15] "MSdisc04rev"                "MSdisc05"                  
##  [17] "MSdisc06"                   "MSdisc07"                  
##  [19] "CNEBclass01"                "CNEBclass02"               
##  [21] "CNEBclass03"                "CNEBclass04"               
##  [23] "CNEBclass05"                "CNEBclass01rev"            
##  [25] "CNEBclass02rev"             "CNEBclass04rev"            
##  [27] "CNEBclass05rev"             "CNEBclass06"               
##  [29] "LBdisc01"                   "LBdisc02"                  
##  [31] "LBdisc03"                   "LBdisc04"                  
##  [33] "LBdisc05"                   "LBdisc06"                  
##  [35] "LBdisc01rev"                "LBdisc05rev"               
##  [37] "LBdisc06rev"                "EEgen01"                   
##  [39] "EEgen02"                    "EEgen03"                   
##  [41] "EEgen04"                    "EEgen01rev"                
##  [43] "EEgen02rev"                 "EEgen03rev"                
##  [45] "EEgen04rev"                 "FASdisc01"                 
##  [47] "ENGdisc01"                  "ENGdisc02"                 
##  [49] "ENGdisc03"                  "ENGdisc03rev"              
##  [51] "FASdisc02"                  "FASdisc03"                 
##  [53] "FASdisc03rev"               "CNHSclass01"               
##  [55] "CNHSclass02"                "CNHSclass03"               
##  [57] "CNSWclass01"                "CNSWclass02"               
##  [59] "CNSWclass03"                "CNSWclass04"               
##  [61] "CNSWclass05"                "CNSWclass05rev"            
##  [63] "CNHSclass04"                "CNHSclass05"               
##  [65] "CNHSclass06"                "BELclass01"                
##  [67] "BELclass02"                 "BELclass03"                
##  [69] "BELclass04"                 "BELclass02rev"             
##  [71] "IPdisc01"                   "IPdisc02"                  
##  [73] "IPdisc03"                   "IPdisc04"                  
##  [75] "IPdisc04rev"                "CHECK"                     
##  [77] "GSdisc01"                   "GSdisc02"                  
##  [79] "GSdisc03"                   "GSdisc04"                  
##  [81] "GSdisc05"                   "RSdisc01"                  
##  [83] "RSdisc02"                   "RSdisc03"                  
##  [85] "RSdisc04"                   "RSdisc05"                  
##  [87] "CAgen01"                    "CAgen02"                   
##  [89] "CAgen03"                    "CAgen04"                   
##  [91] "CAgen05"                    "CAgen06"                   
##  [93] "CAgen07"                    "CAgen04rev"                
##  [95] "CAgen07rev"                 "Egrade01"                  
##  [97] "DEMOblack"                  "DEMOai"                    
##  [99] "DEMOme"                     "DEMOea"                    
## [101] "DEMOsea"                    "DEMOip"                    
## [103] "DEMOoa"                     "DEMOmex"                   
## [105] "DEMOca"                     "DEMOpr"                    
## [107] "DEMOol"                     "DEMOpi"                    
## [109] "DEMOw"                      "DEMOnl"                    
## [111] "DEMOpnr"                    "DEMOoadet"                 
## [113] "DEMOoldet"                  "DEMOnldet"                 
## [115] "DEMOlatinx"                 "DEMOasian"                 
## [117] "DEMOamerasian"              "DEMOsrm"                   
## [119] "DEMOmisc"                   "DEMOgen"                   
## [121] "DEMOgendet"                 "Woman"                     
## [123] "Nonbinary"                  "DEMOsexor"                 
## [125] "DEMOhetdum"                 "DEMOsexordet"              
## [127] "DEMOtrans"                  "DEMOtransdum"              
## [129] "DEMOpardeg"                 "DEMOpardum"                
## [131] "DEMOfamdum"                 "firstgen"                  
## [133] "DEMOfamdeg"                 "DEMOintern"                
## [135] "DEMOintdum"                 "DEMOdyslex"                
## [137] "DEMOadhd"                   "DEMOautism"                
## [139] "DEMOphysdis"                "DEMOchronic"               
## [141] "DEMOpsych"                  "DEMOdisnl"                 
## [143] "DEMOdisnldet"               "DEMOtransfer"              
## [145] "DEMOpell"                   "DEMOopen"                  
## [147] "Instructor"                 "Section"                   
## [149] "Discipline"                 "Course"                    
## [151] "Institution"                "Survey"                    
## [153] "Semester"                   "CHECKpass"                 
## [155] "RaceSum"                    "DEMOw1"                    
## [157] "DEMOblack1"                 "DEMOlatinx1"               
## [159] "DEMOasian1"                 "DEMOmisc1"                 
## [161] "DEMOmulti"                  "EEgen_mean"                
## [163] "CNEBclass_mean"             "BELclass_mean"             
## [165] "LBdisc_mean"                "MSdisc_mean"               
## [167] "FASdisc_mean"               "ENGdisc_mean"              
## [169] "CNHSclass_mean"             "CNSWclass_mean"            
## [171] "IPdisc_mean"                "GSdisc_mean"               
## [173] "RSdisc_mean"                "CAgen_mean"                
## [175] "PreEngagement_factor"       "EndDate_2"                 
## [177] "Progress_2"                 "Duration__in_seconds_2"    
## [179] "Finished_2"                 "Major01_2"                 
## [181] "Major02_2"                  "MSdisc01_2"                
## [183] "MSdisc02_2"                 "MSdisc03_2"                
## [185] "MSdisc04_2"                 "MSdisc01rev_2"             
## [187] "MSdisc02rev_2"              "MSdisc03rev_2"             
## [189] "MSdisc04rev_2"              "MSdisc05_2"                
## [191] "MSdisc06_2"                 "MSdisc07_2"                
## [193] "CNEBclass01_2"              "CNEBclass02_2"             
## [195] "CNEBclass03_2"              "CNEBclass04_2"             
## [197] "CNEBclass05_2"              "CNEBclass01rev_2"          
## [199] "CNEBclass02rev_2"           "CNEBclass04rev_2"          
## [201] "CNEBclass05rev_2"           "CNEBclass06_2"             
## [203] "LBdisc01_2"                 "LBdisc02_2"                
## [205] "LBdisc03_2"                 "LBdisc04_2"                
## [207] "LBdisc05_2"                 "LBdisc06_2"                
## [209] "LBdisc01rev_2"              "LBdisc05rev_2"             
## [211] "LBdisc06rev_2"              "EEgen01_2"                 
## [213] "EEgen02_2"                  "EEgen03_2"                 
## [215] "EEgen04_2"                  "EEgen01rev_2"              
## [217] "EEgen02rev_2"               "EEgen03rev_2"              
## [219] "EEgen04rev_2"               "FASdisc01_2"               
## [221] "ENGdisc01_2"                "ENGdisc02_2"               
## [223] "ENGdisc03_2"                "ENGdisc03rev_2"            
## [225] "FASdisc02_2"                "FASdisc03_2"               
## [227] "FASdisc03rev_2"             "CNHSclass01_2"             
## [229] "CNHSclass02_2"              "CNHSclass03_2"             
## [231] "CNSWclass01_2"              "CNSWclass02_2"             
## [233] "CNSWclass03_2"              "CNSWclass04_2"             
## [235] "CNSWclass05_2"              "CNSWclass05rev_2"          
## [237] "CNHSclass04_2"              "CNHSclass05_2"             
## [239] "CNHSclass06_2"              "BELclass01_2"              
## [241] "BELclass02_2"               "BELclass03_2"              
## [243] "BELclass04_2"               "BELclass02rev_2"           
## [245] "IPdisc01_2"                 "IPdisc02_2"                
## [247] "IPdisc03_2"                 "IPdisc04_2"                
## [249] "IPdisc04rev_2"              "CHECK1_2"                  
## [251] "GSdisc01_2"                 "GSdisc02_2"                
## [253] "GSdisc03_2"                 "GSdisc04_2"                
## [255] "GSdisc05_2"                 "RSdisc01_2"                
## [257] "RSdisc02_2"                 "RSdisc03_2"                
## [259] "RSdisc04_2"                 "RSdisc05_2"                
## [261] "CAgen01_2"                  "CAgen02_2"                 
## [263] "CAgen03_2"                  "CAgen04_2"                 
## [265] "CAgen05_2"                  "CAgen06_2"                 
## [267] "CAgen07_2"                  "CAgen04rev_2"              
## [269] "CAgen07rev_2"               "Egrade01_2"                
## [271] "CHECKpass_2"                "DEMOsa"                    
## [273] "EEgen_mean_2"               "CNEBclass_mean_2"          
## [275] "BELclass_mean_2"            "LBdisc_mean_2"             
## [277] "MSdisc_mean_2"              "FASdisc_mean_2"            
## [279] "ENGdisc_mean_2"             "CNHSclass_mean_2"          
## [281] "CNSWclass_mean_2"           "IPdisc_mean_2"             
## [283] "GSdisc_mean_2"              "RSdisc_mean_2"             
## [285] "CAgen_mean_2"               "PostEngagement_factor"     
## [287] "pre_post_merge"             "section"                   
## [289] "FinalGradePercent"          "MATLABGradePercent"        
## [291] "ProjectGradePercent"        "AttendanceGradePercent"    
## [293] "grades_merge"               "Term"                      
## [295] "Treatment"                  "condition_merge"           
## [297] "GENDER_DESC"                "REPORTING_ETHNICITY"       
## [299] "RESIDENCY_DESC"             "FirstGeneration"           
## [301] "CURRENT_TIME_STATUS_DESC"   "DEGREE_DESC"               
## [303] "COLLEGE_DESC"               "MAJOR_DESC"                
## [305] "SAT2_RW"                    "SAT2_Math"                 
## [307] "ACTEnglish"                 "ACTMath"                   
## [309] "ACTReading"                 "ACTScience"                
## [311] "ACTCompACT"                 "TOEFLListening"            
## [313] "TOEFLReading"               "TOEFLSpeaking"             
## [315] "TOEFLTotal"                 "TOEFLWriting"              
## [317] "HSGPA"                      "SCHOOL_RANK"               
## [319] "COURSE_SECTION_NUMBER"      "FINAL_GRADE"               
## [321] "Overall_GPA"                "Term_CREDITS_ATTEMPTED"    
## [323] "Term_GPA"                   "email_institutional_merge" 
## [325] "institutional_survey_merge" "coursegrade"               
## [327] "UID"                        "DEMOgen_orig"              
## [329] "DEMOmin"                    "DEMOmin2"                  
## [331] "rg"
head(subset(spring2022, select=c(ProjectGradePercent, MATLABGradePercent, ProjectGradePercent, AttendanceGradePercent)))
##   ProjectGradePercent MATLABGradePercent ProjectGradePercent.1
## 1           0.9633333          1.0000000             0.9633333
## 3           0.9255555          1.0000000             0.9255555
## 4           0.9600000          0.9866667             0.9600000
## 5           0.8272222          0.9533333             0.8272222
## 6           0.9633333          1.0000000             0.9633333
## 7           0.9319444          0.7493846             0.9319444
##   AttendanceGradePercent
## 1                     NA
## 3                   0.70
## 4                     NA
## 5                     NA
## 6                   0.95
## 7                     NA
pre <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 1:175, MATLABGradePercent, ProjectGradePercent))
post <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 176:286, MATLABGradePercent, ProjectGradePercent))

PreBelongingna <- na.omit(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04)))
PostBelongingna <- na.omit(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2)))

Step 2: Assumption Checking

Create Functions

# correlation matrix function ---------------------------------------------
  #create function to conduct cor matrix with *** for significance levels
  corstars <- function(x){
    require(Hmisc)
    x <- as.matrix(x)
    R <- rcorr(x)$r
    p <- rcorr(x)$P
    ## define notions for significance levels; spacing is important.
    mystars <- ifelse(p < .001, "***", ifelse(p < .01, "** ", ifelse(p < .05, "* ", " ")))
    ## trunctuate the matrix that holds the correlations to two decimal
    R <- format(round(cbind(rep(-1.11, ncol(x)), R), 2))[,-1]
    ## build a new matrix that includes the correlations with their apropriate stars
    Rnew <- matrix(paste(R, mystars, sep=""), ncol=ncol(x))
    diag(Rnew) <- paste(diag(R), " ", sep="")
    rownames(Rnew) <- colnames(x)
    colnames(Rnew) <- paste(colnames(x), "", sep="")
    ## remove upper triangle
    Rnew <- as.matrix(Rnew)
    Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
    Rnew <- as.data.frame(Rnew)
    ## remove last column and return the matrix (which is now a data frame)
    Rnew <- cbind(Rnew[1:length(Rnew)-1])
    return(Rnew)
  }


# kmo function ------------------------------------------------------------
  # There is no R command to run KMO test. G.Jay Kerns from Youngstown University created a fuction to do this test
  # http://tolstoy.newcastle.edu.au/R/e2/help/07/08/22816.html
  # KMO Kaiser-Meyer-Olkin Measure of Sampling Adequacy
  kmo = function( data ){
    X <- cor(as.matrix(data))
    iX <- ginv(X)
    S2 <- diag(diag((iX^-1)))
    AIS <- S2%*%iX%*%S2                      # anti-image covariance matrix
    IS <- X+AIS-2*S2                         # image covariance matrix
    Dai <- sqrt(diag(diag(AIS)))
    IR <- ginv(Dai)%*%IS%*%ginv(Dai)         # image correlation matrix
    AIR <- ginv(Dai)%*%AIS%*%ginv(Dai)       # anti-image correlation matrix
    a <- apply((AIR - diag(diag(AIR)))^2, 2, sum)
    AA <- sum(a)
    b <- apply((X - diag(nrow(X)))^2, 2, sum)
    BB <- sum(b)
    MSA <- b/(b+a)                        # indiv. measures of sampling adequacy
    AIR <- AIR-diag(nrow(AIR))+diag(MSA)  # Examine the anti-image of the
    # correlation matrix. That is the
    # negative of the partial correlations,
    # partialling out all other variables.
    kmo <- BB/(AA+BB)                     # overall KMO statistic
    # Reporting the conclusion
    if (kmo >= 0.00 && kmo < 0.50){
      test <- 'The KMO test yields a degree of common variance unacceptable for FA.'
    } else if (kmo >= 0.50 && kmo < 0.60){
      test <- 'The KMO test yields a degree of common variance miserable.'
    } else if (kmo >= 0.60 && kmo < 0.70){
      test <- 'The KMO test yields a degree of common variance mediocre.'
    } else if (kmo >= 0.70 && kmo < 0.80){
      test <- 'The KMO test yields a degree of common variance middling.'
    } else if (kmo >= 0.80 && kmo < 0.90){
      test <- 'The KMO test yields a degree of common variance meritorious.'
    } else {
      test <- 'The KMO test yields a degree of common variance marvelous.'     }
    ans <- list( overall = kmo,
                 report = test,
                 individual = MSA,
                 AIS = AIS,
                 AIR = AIR )
    return(ans)
  } # end of kmo()

Checking Correlations

# checking correlations for target variables ------------------------------
corstars(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04))) #all correlations statistically significant
##               BELclass01 BELclass02rev BELclass03
## BELclass01                                       
## BELclass02rev    0.64***                         
## BELclass03       0.57***       0.52***           
## BELclass04       0.57***       0.51***    0.71***
corstars(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2))) #all correlations statistically significant 
##                 BELclass01_2 BELclass02rev_2 BELclass03_2
## BELclass01_2                                             
## BELclass02rev_2      0.64***                             
## BELclass03_2         0.59***         0.56***             
## BELclass04_2         0.56***         0.51***      0.72***

Examining Skew & Kurtosis

# examining skew and kurtosis ---------------------------------------------
#In a psychologists perspective. If it a published scale that has been tested, it is good to keep it and transform it
# all items within range of -2 and 2, if not in this range, remove them. Maximum likelihood
sapply(PreBelongingna, FUN = skewness)
##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##   0.006386369  -0.221506098  -0.162112144   0.345292957
sapply(PostBelongingna, FUN = skewness)
##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##      -0.4747835      -0.5677938      -0.2949721      -0.6963178
# all items with kurtosis less than 7
sapply(PreBelongingna, FUN = kurtosis)
##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##      4.168645      3.941443      4.589422      4.044981
sapply(PostBelongingna, FUN = kurtosis)
##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##        4.608295        4.309433        4.526886        6.149700

Check Barlett’s & KMO

# check barlett's and kmo -------------------------------------------------
# This makes sure it is not TOO correlated. As long as it is significant, I'm good. Make sure to NA OMIT
# If significant, it does not resemble an identity matrix
cortest.bartlett(cor(PreBelongingna), n = nrow(PreBelongingna))
## $chisq
## [1] 749.7921
## 
## $p.value
## [1] 1.080976e-158
## 
## $df
## [1] 6
cortest.bartlett(cor(PostBelongingna), n = nrow(PostBelongingna))
## $chisq
## [1] 605.6985
## 
## $p.value
## [1] 1.375762e-127
## 
## $df
## [1] 6
# Run the KMO command on the data
# If KMO is larger than a certain number:
    # 0.00 to 0.49   unacceptable.
    # to 0.59   miserable.
    # 0.60 to 0.69 mediocre.
    # 0.70 to 0.79   middling.
    # 0.80 to 0.89   meritorious.
    # 0.90 to 1.00   marvelous.
#Do we have a good sample size to run a good factor analysis
kmooutput <- kmo(PreBelongingna)
kmooutput$overall
## [1] 0.7735774
# [1] 0.7735774

kmooutput <- kmo(PostBelongingna)
kmooutput$overall
## [1] 0.7770663
# [1] 0.7770663
# We have meritorious data

Running EFA

Belonging Pre

# running EFA -------------------------------------------------------------
# Rule of thumb: Retain the number of components above the elbow
#Summarizing EFA results: TLI is ideally above 0.95 with minimum acceptable values at 0.90 and RMSEA should be less than 0.08.
#I am not sure how to interpret uniqueness... I found online that the higher the number, the more unique they are. 1-uniqueness = communality. But are there any thresholds?
# entering raw data and extracting (n) number of factors (determined in the previous step), 
# with promax rotation

#Pre Belonging EFA
ev <- eigen(cor(PreBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PreBelongingna), var = ncol(PreBelongingna),
               rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS) 

fit <- factanal(PreBelongingna, 1, rotation="promax") 
print(fit, digits = 3, cutoff = 0.3, sort = TRUE) 
## 
## Call:
## factanal(x = PreBelongingna, factors = 1, rotation = "promax")
## 
## Uniquenesses:
##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##         0.447         0.530         0.331         0.343 
## 
## Loadings:
## [1] 0.743 0.685 0.818 0.811
## 
##                Factor1
## SS loadings      2.348
## Proportion Var   0.587
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 50.96 on 2 degrees of freedom.
## The p-value is 8.6e-12
# All loadings appear well over 0.32, looks good! 

fa(PreBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method =  ml
## Call: fa(r = PreBelongingna, nfactors = 1, rotate = "promax", fm = "ml", 
##     oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                ML1   h2   u2 com
## BELclass01    0.74 0.55 0.45   1
## BELclass02rev 0.69 0.47 0.53   1
## BELclass03    0.82 0.67 0.33   1
## BELclass04    0.81 0.66 0.34   1
## 
##                 ML1
## SS loadings    2.35
## Proportion Var 0.59
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.78 with Chi Square of  749.79
## The degrees of freedom for the model are 2  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.07 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic number of observations is  424 with the empirical chi square  21.71  with prob <  1.9e-05 
## The total number of observations was  424  with Likelihood Chi Square =  50.96  with prob <  8.6e-12 
## 
## Tucker Lewis Index of factoring reliability =  0.802
## RMSEA index =  0.24  and the 90 % confidence intervals are  0.186 0.3
## BIC =  38.86
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.93
## Multiple R square of scores with factors          0.86
## Minimum correlation of possible factor scores     0.72
# Tucker Lewis Index of factoring reliability =  0.802
# RMSEA index =  0.24
# indicate good fit

Belonging Post

#Post Belonging EFA
ev <- eigen(cor(PostBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PostBelongingna), var = ncol(PostBelongingna),
               rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)

#Post Belonging
fit <- factanal(PostBelongingna, 1, rotation="promax") 
print(fit, digits = 3, cutoff = 0.3, sort = TRUE) 
## 
## Call:
## factanal(x = PostBelongingna, factors = 1, rotation = "promax")
## 
## Uniquenesses:
##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##           0.456           0.517           0.281           0.351 
## 
## Loadings:
## [1] 0.738 0.695 0.848 0.806
## 
##                Factor1
## SS loadings      2.395
## Proportion Var   0.599
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 38.26 on 2 degrees of freedom.
## The p-value is 4.92e-09
# All loadings appear over 0.32, looks good! 
fa(PostBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method =  ml
## Call: fa(r = PostBelongingna, nfactors = 1, rotate = "promax", fm = "ml", 
##     oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  ML1   h2   u2 com
## BELclass01_2    0.74 0.54 0.46   1
## BELclass02rev_2 0.69 0.48 0.52   1
## BELclass03_2    0.85 0.72 0.28   1
## BELclass04_2    0.81 0.65 0.35   1
## 
##                 ML1
## SS loadings    2.39
## Proportion Var 0.60
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.86 with Chi Square of  605.7
## The degrees of freedom for the model are 2  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic number of observations is  329 with the empirical chi square  15.99  with prob <  0.00034 
## The total number of observations was  329  with Likelihood Chi Square =  38.26  with prob <  4.9e-09 
## 
## Tucker Lewis Index of factoring reliability =  0.818
## RMSEA index =  0.235  and the 90 % confidence intervals are  0.174 0.303
## BIC =  26.67
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.93
## Multiple R square of scores with factors          0.87
## Minimum correlation of possible factor scores     0.73
# Tucker Lewis Index of factoring reliability =  0.818
# RMSEA index =  0.235
# indicate marginal fit

Cronbach’s Alphas

# cronbach's alphas -------------------------------------------------------
#For the future, it's good to check alpha for overall scale and for individual factors
psych::alpha(PreBelongingna) #0.85
## 
## Reliability analysis   
## Call: psych::alpha(x = PreBelongingna)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.85      0.85    0.82      0.59 5.7 0.012  3.2 0.44     0.57
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.82  0.85  0.87
## Duhachek  0.82  0.85  0.87
## 
##  Reliability if an item is dropped:
##               raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## BELclass01         0.80      0.81    0.75      0.58 4.1    0.017 0.0130  0.52
## BELclass02rev      0.83      0.83    0.77      0.62 4.8    0.015 0.0066  0.57
## BELclass03         0.80      0.80    0.73      0.57 4.0    0.017 0.0043  0.57
## BELclass04         0.80      0.80    0.74      0.58 4.1    0.017 0.0037  0.57
## 
##  Item statistics 
##                 n raw.r std.r r.cor r.drop mean   sd
## BELclass01    424  0.84  0.84  0.76   0.70  3.2 0.52
## BELclass02rev 424  0.82  0.80  0.70   0.64  3.1 0.57
## BELclass03    424  0.84  0.84  0.78   0.70  3.1 0.53
## BELclass04    424  0.83  0.84  0.77   0.70  3.2 0.48
## 
## Non missing response frequency for each item
##                  1    2    3    4 miss
## BELclass01    0.00 0.04 0.71 0.24    0
## BELclass02rev 0.01 0.09 0.68 0.22    0
## BELclass03    0.01 0.06 0.72 0.21    0
## BELclass04    0.00 0.03 0.74 0.23    0
psych::alpha(PostBelongingna) #0.85
## 
## Reliability analysis   
## Call: psych::alpha(x = PostBelongingna)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.85      0.86    0.83       0.6 5.9 0.013  3.1 0.49     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.85  0.88
## Duhachek  0.83  0.85  0.88
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## BELclass01_2         0.81      0.82    0.76      0.60 4.4    0.019 0.0128  0.56
## BELclass02rev_2      0.83      0.83    0.78      0.63 5.0    0.016 0.0074  0.59
## BELclass03_2         0.80      0.80    0.73      0.57 4.0    0.019 0.0046  0.56
## BELclass04_2         0.82      0.82    0.75      0.60 4.5    0.017 0.0018  0.59
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean   sd
## BELclass01_2    329  0.84  0.84  0.76   0.70  3.1 0.59
## BELclass02rev_2 329  0.82  0.81  0.71   0.66  3.1 0.63
## BELclass03_2    329  0.85  0.86  0.81   0.74  3.1 0.55
## BELclass04_2    329  0.83  0.84  0.77   0.69  3.1 0.56
## 
## Non missing response frequency for each item
##                    1    2    3    4 miss
## BELclass01_2    0.02 0.08 0.68 0.22    0
## BELclass02rev_2 0.02 0.10 0.66 0.22    0
## BELclass03_2    0.01 0.10 0.72 0.18    0
## BELclass04_2    0.02 0.05 0.74 0.19    0

Step 3: Composites & Check

Create Composites

# create composite variables ----------------------------------------------
# do not have notes on why BELclass02 was dropped
pre$FBelong <- (pre$BELclass01 + pre$BELclass03 + pre$BELclass04)/3
post$FBelong <- (post$BELclass01 + post$BELclass03 + post$BELclass04)/3

Create New Dataframe

# subset needed variables into new long dataset
d <- rbind.data.frame(
  cbind(subset(pre, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, ProjectGradePercent)), Time = "0pre"),
  cbind(subset(post, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, ProjectGradePercent)), Time = "1post"))

# Make sure data are factors
d$Treatment <- as.factor(d$Treatment)
d$DEMOgen <- as.factor(d$DEMOgen)
d$DEMOmin <- as.factor(d$DEMOmin)
d$DEMOmin2 <- as.factor(d$DEMOmin2)
d$rg <- as.factor(d$rg)
d$Time <- as.factor(d$Time)
d$UID <- as.factor(d$UID)

d <- subset(d, !is.na(Treatment))

Examine QQ-plots and Trim

Belonging

# Examination of residuals demonstrated some normality challenges. Trim
qqPlot(d$FBelong)

## [1] 1011 1127
# trimming
upper_quantile <- quantile(d$FBelong, 0.98, na.rm = TRUE)
lower_quantile <- quantile(d$FBelong, 0.02, na.rm = TRUE)
# where 99th percentile becomes NA
d$FBelong[d$FBelong > upper_quantile] <- NA
d$FBelong[d$FBelong < lower_quantile] <- NA
qqPlot(d$FBelong)

## [1] 44 60
res.aovB <- aov(FBelong ~ Treatment*Time, data = d)
plot(res.aovB, 1)

plot(res.aovB, 2) # some trimming may be needed

res.aovB2 <- aov(FBelong ~ Treatment*Time*DEMOgen, data = d)
plot(res.aovB2, 1)

plot(res.aovB2, 2) # some trimming may be needed

res.aovB3 <- aov(FBelong ~ Treatment*Time*DEMOmin, data = d)
plot(res.aovB3, 1)

plot(res.aovB3, 2) # some trimming may be needed

Examining Skew & Kurtosis

# assumption checking -----------------------------------------------------
# Normality
moments::skewness(d$FBelong, na.rm = TRUE)
## [1] 0.4862195
moments::skewness(subset(d, select=c(FBelong), Time == "0pre"), na.rm=T)
##   FBelong 
## 0.5280985
moments::skewness(subset(d, select=c(FBelong), Time == "1post"), na.rm=T)
##   FBelong 
## 0.4462114
moments::kurtosis(d$FBelong, na.rm = TRUE)
## [1] 3.3805
moments::kurtosis(subset(d, select=c(FBelong), Time == "0pre"), na.rm=T)
##  FBelong 
## 3.321822
moments::kurtosis(subset(d, select=c(FBelong), Time == "1post"), na.rm=T)
##  FBelong 
## 3.441133
moments::skewness(d$MATLABGradePercent, na.rm = TRUE)
## [1] -3.18743
moments::kurtosis(d$MATLABGradePercent, na.rm = TRUE)
## [1] 14.70525
hist(d$FBelong, breaks=5)

hist(d$MATLABGradePercent, breaks=100)

Mauchly’s test of sphericity

Not needed since within-subjects variable only has two levels.

Create Extra Dataframes

# Wide Dataframe
temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))
d2$FBelong.pre <- scale(d2$FBelong.pre, center = T)

Step 4: Analyses

Used afex package in R for ANOVAs. Automatically uses appropriate orthogonal contrasts for factors and uses Type III sums of squares. (https://www.r-bloggers.com/2017/06/anova-in-r-afex-may-be-the-solution-you-are-looking-for/)

!RM-ANOVA - Belonging #1

Time x Treatment X DEMOmin (BLI x Majority)

Results

out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","DEMOmin"), within="Time", observed=c("DEMOmin"), anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin
nice(out)
## Anova Table (Type 3 tests)
## 
## Response: FBelong
##                   Effect     df  MSE      F   pes p.value
## 1              Treatment 1, 261 0.32   0.22 <.001    .643
## 2                DEMOmin 1, 261 0.32   0.45  .002    .504
## 3      Treatment:DEMOmin 1, 261 0.32   0.97  .004    .326
## 4                   Time 1, 261 0.07   1.04  .004    .310
## 5         Treatment:Time 1, 261 0.07   0.54  .002    .465
## 6           DEMOmin:Time 1, 261 0.07   0.63  .002    .427
## 7 Treatment:DEMOmin:Time 1, 261 0.07 2.99 +  .011    .085
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plot

afex_plot(out, x = "Time", trace = "Treatment", panel = "DEMOmin")
## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"

Refined Plot

tgc <- summarySE(d, measurevar="FBelong", groupvars=c("Treatment","DEMOmin","Time"), na.rm = T)
tgc
##   Treatment  DEMOmin  Time   N  FBelong        sd         se         ci
## 1         0      bli  0pre  13 3.333333 0.5270463 0.14617634 0.31849088
## 2         0      bli 1post   6 3.166667 0.6236096 0.25458754 0.65443810
## 3         0 majority  0pre 149 3.118568 0.4028312 0.03300122 0.06521446
## 4         0 majority 1post 129 3.129199 0.4512907 0.03973393 0.07862038
## 5         1      bli  0pre  34 3.156863 0.4122313 0.07069708 0.14383430
## 6         1      bli 1post  23 3.159420 0.4910262 0.10238604 0.21233565
## 7         1 majority  0pre 225 3.204444 0.4439630 0.02959754 0.05832523
## 8         1 majority 1post 163 3.112474 0.4177490 0.03272063 0.06461394
levels(tgc$Time) <- c("0Pre","1Post")
levels(tgc$Treatment) <- c("Control","Intervention")
levels(tgc$DEMOmin) <- c("BLI","White & Asian")

pd <- position_dodge(.5) # move them .05 to the left and right

ggplot(tgc, aes(x=Time, y=FBelong, group=Treatment, linetype=Treatment)) + 
  geom_errorbar(aes(ymin=FBelong-ci, ymax=FBelong+ci), width=.2, position=pd) +
  geom_line(position=pd) +
  geom_point(position=pd) +
  facet_grid(~DEMOmin) +
  ylim(1,4) + theme_bw() +
  ggtitle("Pre and Post Effects of Intervention on Majority and BLI Students") +
  xlab("Timepoint") + ylab("Belonging Score")

Regression - MATLab Grade

Treatment x rg (BLI, Majority Men, Majority Women & NB)

Results

d2$Treatment <- relevel(d2$Treatment, ref="0")
d2$DEMOmin <- relevel(d2$DEMOmin, ref="majority")

table(round(d2$MATLABGradePercent, digits = 2))
## 
##    0 0.06 0.07 0.23 0.29 0.33 0.34 0.36 0.39  0.4 0.47 0.49  0.5 0.51 0.54 0.55 
##    1    1    1    1    1    1    3    2    1    1    2    1    1    1    1    1 
## 0.56 0.57  0.6 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69  0.7 0.71 0.72 0.73 
##    1    2    3    1    3    1    1    2    1    1    3    4    3    5    3    2 
## 0.74 0.75 0.76 0.77 0.79  0.8 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89  0.9 
##    5    2    1    2    6    2    1    1    3    5    3    5    3    3    5   10 
## 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99    1 
##    4    6    9   10   16   11   14   18   24  379
d2$MATLABGradePercent_cat <- round(d2$MATLABGradePercent, digits = 2)
d2$MATLABGradePercent_cat <- 1
d2$MATLABGradePercent_cat[d2$MATLABGradePercent < .71] <- 0
d2$MATLABGradePercent_cat[is.na(d2$MATLABGradePercent)] <- NA
table(d2$MATLABGradePercent_cat, useNA = "always")
## 
##    0    1 <NA> 
##   49  555   37
model <- glm(MATLABGradePercent_cat~ProjectGradePercent + Treatment + DEMOmin + Treatment*DEMOmin, family="binomial", data=d2)
summary(model)
## 
## Call:
## glm(formula = MATLABGradePercent_cat ~ ProjectGradePercent + 
##     Treatment + DEMOmin + Treatment * DEMOmin, family = "binomial", 
##     data = d2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4719   0.3275   0.3499   0.3914   1.2602  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -1.87927    1.21292  -1.549 0.121290    
## ProjectGradePercent    4.88640    1.32424   3.690 0.000224 ***
## Treatment1             0.02698    0.34172   0.079 0.937068    
## DEMOminbli            -1.95219    0.60766  -3.213 0.001315 ** 
## Treatment1:DEMOminbli  1.61145    0.84622   1.904 0.056873 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 340.06  on 603  degrees of freedom
## Residual deviance: 311.82  on 599  degrees of freedom
##   (37 observations deleted due to missingness)
## AIC: 321.82
## 
## Number of Fisher Scoring iterations: 5
tab_model(model, p.style = "numeric_stars", transform = NULL)
  MATLABGradePercent_cat
Predictors Log-Odds CI p
(Intercept) -1.88 -4.73 – 0.17 0.121
ProjectGradePercent 4.89 *** 2.63 – 7.99 <0.001
Treatment [1] 0.03 -0.65 – 0.70 0.937
DEMOmin [bli] -1.95 ** -3.12 – -0.69 0.001
Treatment [1] * DEMOmin
[bli]
1.61 -0.04 – 3.33 0.057
Observations 604
R2 Tjur 0.092
  • p<0.05   ** p<0.01   *** p<0.001
with(model, null.deviance - deviance)
## [1] 28.24937
with(model, df.null - df.residual)
## [1] 4
with(model, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
## [1] 1.11023e-05
logLik(model)
## 'log Lik.' -155.9076 (df=5)

Log-Odds

So the exponentiated constant gives you the baseline odds, the exponentiated coefficients of the main effects give you the odds ratios when the other variable equals 0, and the exponentiated coefficient of the interaction terms tells you the ratio by wich the odds ratio changes.

summary(model)
## 
## Call:
## glm(formula = MATLABGradePercent_cat ~ ProjectGradePercent + 
##     Treatment + DEMOmin + Treatment * DEMOmin, family = "binomial", 
##     data = d2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4719   0.3275   0.3499   0.3914   1.2602  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -1.87927    1.21292  -1.549 0.121290    
## ProjectGradePercent    4.88640    1.32424   3.690 0.000224 ***
## Treatment1             0.02698    0.34172   0.079 0.937068    
## DEMOminbli            -1.95219    0.60766  -3.213 0.001315 ** 
## Treatment1:DEMOminbli  1.61145    0.84622   1.904 0.056873 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 340.06  on 603  degrees of freedom
## Residual deviance: 311.82  on 599  degrees of freedom
##   (37 observations deleted due to missingness)
## AIC: 321.82
## 
## Number of Fisher Scoring iterations: 5
exp(coef(model))
##           (Intercept)   ProjectGradePercent            Treatment1 
##             0.1527015           132.4753884             1.0273483 
##            DEMOminbli Treatment1:DEMOminbli 
##             0.1419631             5.0100846
confint(model)
## Waiting for profiling to be done...
##                             2.5 %     97.5 %
## (Intercept)           -4.73302471  0.1724902
## ProjectGradePercent    2.63253885  7.9905110
## Treatment1            -0.64928357  0.7002987
## DEMOminbli            -3.11644515 -0.6893222
## Treatment1:DEMOminbli -0.04220871  3.3268940
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 4.1.3
## Loading required package: foreign
## Loading required package: nnet
## 
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
## 
##     alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:lattice':
## 
##     dotplot
logistic.display(model)
## 
## Logistic regression predicting MATLABGradePercent_cat 
##  
##                                  crude OR(95%CI)        adj. OR(95%CI)        
## ProjectGradePercent (cont. var.) 125.98 (9.19,1727.87)  132.48 (9.88,1775.53) 
##                                                                               
## Treatment: 1 vs 0                1.26 (0.7,2.27)        1.03 (0.53,2.01)      
##                                                                               
## DEMOmin: bli vs majority         0.37 (0.17,0.82)       0.14 (0.04,0.47)      
##                                                                               
## Treatment1:DEMOminbli            -                      5.01 (0.95,26.31)     
##                                                                               
##                                  P(Wald's test) P(LR-test)
## ProjectGradePercent (cont. var.) < 0.001        < 0.001   
##                                                           
## Treatment: 1 vs 0                0.937          1         
##                                                           
## DEMOmin: bli vs majority         0.001          1         
##                                                           
## Treatment1:DEMOminbli            0.057          0.056     
##                                                           
## Log-likelihood = -155.9076
## No. of observations = 604
## AIC value = 321.8153
exp(1.41)/(1+exp(1.41))
## [1] 0.8037659
# https://stackoverflow.com/questions/56810063/how-can-i-convert-logs-to-odds-ratio-and-logs-to-probabilities-for-logistic-regr

Nagelkerke’s R2

library(fmsb)
## 
## Attaching package: 'fmsb'
## The following objects are masked from 'package:DescTools':
## 
##     CronbachAlpha, VIF
NagelkerkeR2(model)
## $N
## [1] 604
## 
## $R2
## [1] 0.1061373

Correct Classifications

glm_probs <- data.frame(probs = predict(model, type="response"))
glm_pred <- glm_probs %>%
  mutate(pred = ifelse(probs>.5, 1, 0))
glm_pred <- cbind(orig=model$model$MATLABGradePercent_cat, glm_pred)

glm_pred %>% 
  count(pred, orig) %>%
  spread(orig, n, fill = 0)
##   pred  0   1
## 1    0  4   1
## 2    1 45 554
glm_pred %>%
  summarize(score = mean(pred == orig))
##       score
## 1 0.9238411

Check Multicollinearity

car::vif(model)
## ProjectGradePercent           Treatment             DEMOmin   Treatment:DEMOmin 
##            1.009898            1.198177            2.085300            2.302348
# https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif
# https://stats.stackexchange.com/questions/430412/vif-for-categorical-variable-with-more-than-2-categories

Default Plots

plot(allEffects(model))

plot_model(model, type="eff")
## $ProjectGradePercent

## 
## $Treatment

## 
## $DEMOmin

plot_model(model, type="int")

tgc <- summarySE(d2, measurevar="MATLABGradePercent_cat", groupvars=c("Treatment","DEMOmin"), na.rm = T)
tgc
##   Treatment  DEMOmin   N MATLABGradePercent_cat        sd         se         ci
## 1         0 majority 273              0.9230769 0.2669587 0.01615708 0.03180882
## 2         0      bli  15              0.6666667 0.4879500 0.12598816 0.27021772
## 3         1 majority 279              0.9318996 0.2523707 0.01510904 0.02974266
## 4         1      bli  37              0.8918919 0.3148001 0.05175282 0.10495958
.89-.66
## [1] 0.23

Step 5: Sample Information

Numbers

table(spring2022$Treatment, useNA = "always")
## 
##    0    1 <NA> 
##  307  334   13
table(d2$Treatment, useNA = "always")
## 
##    0    1 <NA> 
##  307  334    0

Race/Ethnicity

table(d2$DEMOmin, useNA = "always")
## 
## majority      bli     <NA> 
##      589       52        0
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
table(spring2022$DEMOblack, useNA = "always")
## 
##    0    1 <NA> 
##  595   11   48
table(spring2022$DEMOai, useNA = "always")
## 
##    0    1 <NA> 
##  466    5  183
table(spring2022$DEMOme, useNA = "always")
## 
##    0    1 <NA> 
##  457   14  183
table(spring2022$DEMOea, useNA = "always")
## 
##    0    1 <NA> 
##  430   41  183
table(spring2022$DEMOsea, useNA = "always")
## 
##    0    1 <NA> 
##  453   18  183
table(spring2022$DEMOip, useNA = "always")
## 
##    0    1 <NA> 
##  417   54  183
table(spring2022$DEMOoa, useNA = "always")
## 
##    0    1 <NA> 
##  467    4  183
table(spring2022$DEMOmex, useNA = "always")
## 
##    0    1 <NA> 
##  460   11  183
table(spring2022$DEMOca, useNA = "always")
## 
##    0    1 <NA> 
##  463    8  183
table(spring2022$DEMOpr, useNA = "always")
## 
##    0    1 <NA> 
##  470    1  183
table(spring2022$DEMOol, useNA = "always")
## 
##    0    1 <NA> 
##  455   16  183
table(spring2022$DEMOpi, useNA = "always")
## 
##    0    1 <NA> 
##  468    3  183
table(spring2022$DEMOw, useNA = "always")
## 
##    0    1 <NA> 
##  173  433   48
table(spring2022$DEMOnl, useNA = "always")
## 
##    0    1 <NA> 
##  470    1  183
table(spring2022$DEMOpnr, useNA = "always")
## 
##    0    1 <NA> 
##  464    7  183

Gender Identity

#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
table(spring2022$DEMOgen_orig, useNA = "always")
## 
##    1    2    3    4    5 <NA> 
##  328  131    3    1    6  185