Info & Notes

Original: Kevin Kaufman-Ortiz and Allison Godwin (June 7, 2022)

Revised: Heather Perkins (October 10, 2022)

Step 1: Import

Data imported, race/ethnicity and gender variables cleaned, pre/post dataframes created.

#UBelong Exploratory analysis for Pre- Post- Surveys 


# install packages --------------------------------------------------------
#install.packages("readxl")
#install.packages("stargazer")
#install.packages("car")
#install.packages("Hmisc")
#install.packages("psych")
#install.packages("moments")
#install.packages("nFactors")
#install.packages("GPArotation")
#install.packages("Rmisc")
#install.packages("rstatix")
#install.packages("tidyverse")
#install.packages("ggpubr")
#install.packages("reshape2")
#install.packages("DescTools")


# load libraries ----------------------------------------------------------
library("readxl")
library("stargazer")
library("car")
library("Hmisc")
library("psych")
library("moments")
library("nFactors")
library("GPArotation")
library("Rmisc")
library("rstatix")
library("tidyverse")
library("ggpubr")
library("reshape2")
library("DescTools")
library(readxl)
library(ids)
library(MASS)
library(afex)
library(emmeans)
library(ggbeeswarm)


# import data -------------------------------------------------------------
spring2022 <- read.csv("data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")

# assign UID
# spring2022$UID <- ids::random_id(nrow(spring2022), 4)
# write.csv(spring2022, file="data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")

# identify important variables

# id = UID
# instructor = Instructor
# section = Section
# control/treatment = Treatment
# pre/post = Time
# gender = DEMOgen
# BLI = DEMOmin
  # black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
  # NA is coded as 0 (majority)
# MATLABGradePercent
# FinalGradePercent


# basic recoding and cleaning ---------------------------------------------
# transform -99 to NA
spring2022[spring2022 == -99] <- NA

# remove bad responses
# table(spring2022$CHECKpass, useNA = "always")
spring2022_2 <- subset(spring2022, CHECKpass == 1 | is.na(CHECKpass))
spring2022_3 <- subset(spring2022_2, CHECKpass_2 == 1 | is.na(CHECKpass_2))
spring2022 <- spring2022_3

# recode gender
# table(spring2022$DEMOgen)
# 1   2   3   4   5 
# 328 131   3   1   6 

#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
#June 24, 2022, we decided as a team that we recognize we do not want to erase the experiences of NonBinary folk
spring2022$DEMOgen [spring2022$DEMOgen == 1] <- "man"  #Recoding to 0 = male (1)
spring2022$DEMOgen [spring2022$DEMOgen == 2 ] <- "woman"  #Recoding to 1 = Female (2)
spring2022$DEMOgen [spring2022$DEMOgen == 3 | spring2022$DEMOgen == 4 | spring2022$DEMOgen == 5] <- "nb"  #Recoding to NA = NonBinary, Not listed, Prefer not to respond (3, 4, 5)

# table(spring2022$DEMOgen)
# man    nb woman 
# 328    10   131

# creating race variables
# Race is coded with many variables, need a minoritized variable
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
spring2022$DEMOmin <- "majority"
spring2022$DEMOmin[spring2022$DEMOblack == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOai == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOmex == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOca == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOsa == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpr == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOol == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpi == 1] <- "bli"

# srm = used in other places
spring2022$DEMOmin2 <- "majority"
spring2022$DEMOmin2[spring2022$DEMOblack == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOai == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsea == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOmex == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOca == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsa == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpr == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOol == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpi == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOgen == "woman"] <- "srm"
# table(spring2022$DEMOmin2, useNA = "always")

# rg = three categories, bli, majority men, and majority women/nb
spring2022$rg <- NA
spring2022$rg[spring2022$DEMOmin == "bli"] <- "bli"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "woman"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "nb"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "man"] <- "majority man"
# spring2022$rg[spring2022$DEMOmin == "majority" & is.na(spring2022$DEMOgen)] <- "majority man & NA"

# demographics and numbers
# table(spring2022$Treatment, useNA = "always")
# table(spring2022$DEMOgen, spring2022$DEMOmin, useNA = "always")
# table(spring2022$rg, useNA = "always")

# table(spring2022$DEMOmin, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOblack, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOai, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOmex, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOca, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpr, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOsa, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOol, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpi, useNA = "always")


# breaking out pre and post dataframes ------------------------------------
# names(spring2022)
pre <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 1:175, MATLABGradePercent, FinalGradePercent))
post <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 176:286, MATLABGradePercent, FinalGradePercent))

PreBelongingna <- na.omit(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04)))
PostBelongingna <- na.omit(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2)))

Step 2: Assumption Checking

Create Functions

# correlation matrix function ---------------------------------------------
  #create function to conduct cor matrix with *** for significance levels
  corstars <- function(x){
    require(Hmisc)
    x <- as.matrix(x)
    R <- rcorr(x)$r
    p <- rcorr(x)$P
    ## define notions for significance levels; spacing is important.
    mystars <- ifelse(p < .001, "***", ifelse(p < .01, "** ", ifelse(p < .05, "* ", " ")))
    ## trunctuate the matrix that holds the correlations to two decimal
    R <- format(round(cbind(rep(-1.11, ncol(x)), R), 2))[,-1]
    ## build a new matrix that includes the correlations with their apropriate stars
    Rnew <- matrix(paste(R, mystars, sep=""), ncol=ncol(x))
    diag(Rnew) <- paste(diag(R), " ", sep="")
    rownames(Rnew) <- colnames(x)
    colnames(Rnew) <- paste(colnames(x), "", sep="")
    ## remove upper triangle
    Rnew <- as.matrix(Rnew)
    Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
    Rnew <- as.data.frame(Rnew)
    ## remove last column and return the matrix (which is now a data frame)
    Rnew <- cbind(Rnew[1:length(Rnew)-1])
    return(Rnew)
  }


# kmo function ------------------------------------------------------------
  # There is no R command to run KMO test. G.Jay Kerns from Youngstown University created a fuction to do this test
  # http://tolstoy.newcastle.edu.au/R/e2/help/07/08/22816.html
  # KMO Kaiser-Meyer-Olkin Measure of Sampling Adequacy
  kmo = function( data ){
    X <- cor(as.matrix(data))
    iX <- ginv(X)
    S2 <- diag(diag((iX^-1)))
    AIS <- S2%*%iX%*%S2                      # anti-image covariance matrix
    IS <- X+AIS-2*S2                         # image covariance matrix
    Dai <- sqrt(diag(diag(AIS)))
    IR <- ginv(Dai)%*%IS%*%ginv(Dai)         # image correlation matrix
    AIR <- ginv(Dai)%*%AIS%*%ginv(Dai)       # anti-image correlation matrix
    a <- apply((AIR - diag(diag(AIR)))^2, 2, sum)
    AA <- sum(a)
    b <- apply((X - diag(nrow(X)))^2, 2, sum)
    BB <- sum(b)
    MSA <- b/(b+a)                        # indiv. measures of sampling adequacy
    AIR <- AIR-diag(nrow(AIR))+diag(MSA)  # Examine the anti-image of the
    # correlation matrix. That is the
    # negative of the partial correlations,
    # partialling out all other variables.
    kmo <- BB/(AA+BB)                     # overall KMO statistic
    # Reporting the conclusion
    if (kmo >= 0.00 && kmo < 0.50){
      test <- 'The KMO test yields a degree of common variance unacceptable for FA.'
    } else if (kmo >= 0.50 && kmo < 0.60){
      test <- 'The KMO test yields a degree of common variance miserable.'
    } else if (kmo >= 0.60 && kmo < 0.70){
      test <- 'The KMO test yields a degree of common variance mediocre.'
    } else if (kmo >= 0.70 && kmo < 0.80){
      test <- 'The KMO test yields a degree of common variance middling.'
    } else if (kmo >= 0.80 && kmo < 0.90){
      test <- 'The KMO test yields a degree of common variance meritorious.'
    } else {
      test <- 'The KMO test yields a degree of common variance marvelous.'     }
    ans <- list( overall = kmo,
                 report = test,
                 individual = MSA,
                 AIS = AIS,
                 AIR = AIR )
    return(ans)
  } # end of kmo()

Checking Correlations

# checking correlations for target variables ------------------------------
corstars(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04))) #all correlations statistically significant

##               BELclass01 BELclass02rev BELclass03
## BELclass01                                       
## BELclass02rev    0.64***                         
## BELclass03       0.57***       0.52***           
## BELclass04       0.57***       0.51***    0.71***

corstars(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2))) #all correlations statistically significant

##                 BELclass01_2 BELclass02rev_2 BELclass03_2
## BELclass01_2                                             
## BELclass02rev_2      0.64***                             
## BELclass03_2         0.59***         0.56***             
## BELclass04_2         0.56***         0.51***      0.72***

Examining Skew & Kurtosis

# examining skew and kurtosis ---------------------------------------------
#In a psychologists perspective. If it a published scale that has been tested, it is good to keep it and transform it
# all items within range of -2 and 2, if not in this range, remove them. Maximum likelihood
sapply(PreBelongingna, FUN = skewness)

##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##   0.006386369  -0.221506098  -0.162112144   0.345292957

sapply(PostBelongingna, FUN = skewness)

##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##      -0.4747835      -0.5677938      -0.2949721      -0.6963178

# all items with kurtosis less than 7
sapply(PreBelongingna, FUN = kurtosis)

##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##      4.168645      3.941443      4.589422      4.044981

sapply(PostBelongingna, FUN = kurtosis)

##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##        4.608295        4.309433        4.526886        6.149700

Check Barlett’s & KMO

# check barlett's and kmo -------------------------------------------------
# This makes sure it is not TOO correlated. As long as it is significant, I'm good. Make sure to NA OMIT
# If significant, it does not resemble an identity matrix
cortest.bartlett(cor(PreBelongingna), n = nrow(PreBelongingna))

## $chisq
## [1] 749.7921
## 
## $p.value
## [1] 1.080976e-158
## 
## $df
## [1] 6

cortest.bartlett(cor(PostBelongingna), n = nrow(PostBelongingna))

## $chisq
## [1] 605.6985
## 
## $p.value
## [1] 1.375762e-127
## 
## $df
## [1] 6

# Run the KMO command on the data
# If KMO is larger than a certain number:
    # 0.00 to 0.49   unacceptable.
    # to 0.59   miserable.
    # 0.60 to 0.69 mediocre.
    # 0.70 to 0.79   middling.
    # 0.80 to 0.89   meritorious.
    # 0.90 to 1.00   marvelous.
#Do we have a good sample size to run a good factor analysis
kmooutput <- kmo(PreBelongingna)
kmooutput$overall

## [1] 0.7735774

# [1] 0.7735774

kmooutput <- kmo(PostBelongingna)
kmooutput$overall

## [1] 0.7770663

# [1] 0.7770663
# We have meritorious data

Running EFA

Belonging Pre

# running EFA -------------------------------------------------------------
# Rule of thumb: Retain the number of components above the elbow
#Summarizing EFA results: TLI is ideally above 0.95 with minimum acceptable values at 0.90 and RMSEA should be less than 0.08.
#I am not sure how to interpret uniqueness... I found online that the higher the number, the more unique they are. 1-uniqueness = communality. But are there any thresholds?
# entering raw data and extracting (n) number of factors (determined in the previous step), 
# with promax rotation

#Pre Belonging EFA
ev <- eigen(cor(PreBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PreBelongingna), var = ncol(PreBelongingna),
               rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)

fit <- factanal(PreBelongingna, 1, rotation="promax") 
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)

## 
## Call:
## factanal(x = PreBelongingna, factors = 1, rotation = "promax")
## 
## Uniquenesses:
##    BELclass01 BELclass02rev    BELclass03    BELclass04 
##         0.447         0.530         0.331         0.343 
## 
## Loadings:
## [1] 0.743 0.685 0.818 0.811
## 
##                Factor1
## SS loadings      2.348
## Proportion Var   0.587
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 50.96 on 2 degrees of freedom.
## The p-value is 8.6e-12

# All loadings appear well over 0.32, looks good! 

fa(PreBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )

## Factor Analysis using method =  ml
## Call: fa(r = PreBelongingna, nfactors = 1, rotate = "promax", fm = "ml", 
##     oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                ML1   h2   u2 com
## BELclass01    0.74 0.55 0.45   1
## BELclass02rev 0.69 0.47 0.53   1
## BELclass03    0.82 0.67 0.33   1
## BELclass04    0.81 0.66 0.34   1
## 
##                 ML1
## SS loadings    2.35
## Proportion Var 0.59
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.78 with Chi Square of  749.79
## The degrees of freedom for the model are 2  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.07 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic number of observations is  424 with the empirical chi square  21.71  with prob <  1.9e-05 
## The total number of observations was  424  with Likelihood Chi Square =  50.96  with prob <  8.6e-12 
## 
## Tucker Lewis Index of factoring reliability =  0.802
## RMSEA index =  0.24  and the 90 % confidence intervals are  0.186 0.3
## BIC =  38.86
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.93
## Multiple R square of scores with factors          0.86
## Minimum correlation of possible factor scores     0.72

# Tucker Lewis Index of factoring reliability =  0.802
# RMSEA index =  0.24
# indicate good fit

Belonging Post

#Post Belonging EFA
ev <- eigen(cor(PostBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PostBelongingna), var = ncol(PostBelongingna),
               rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)

#Post Belonging
fit <- factanal(PostBelongingna, 1, rotation="promax") 
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)

## 
## Call:
## factanal(x = PostBelongingna, factors = 1, rotation = "promax")
## 
## Uniquenesses:
##    BELclass01_2 BELclass02rev_2    BELclass03_2    BELclass04_2 
##           0.456           0.517           0.281           0.351 
## 
## Loadings:
## [1] 0.738 0.695 0.848 0.806
## 
##                Factor1
## SS loadings      2.395
## Proportion Var   0.599
## 
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 38.26 on 2 degrees of freedom.
## The p-value is 4.92e-09

# All loadings appear over 0.32, looks good! 
fa(PostBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )

## Factor Analysis using method =  ml
## Call: fa(r = PostBelongingna, nfactors = 1, rotate = "promax", fm = "ml", 
##     oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                  ML1   h2   u2 com
## BELclass01_2    0.74 0.54 0.46   1
## BELclass02rev_2 0.69 0.48 0.52   1
## BELclass03_2    0.85 0.72 0.28   1
## BELclass04_2    0.81 0.65 0.35   1
## 
##                 ML1
## SS loadings    2.39
## Proportion Var 0.60
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  6  and the objective function was  1.86 with Chi Square of  605.7
## The degrees of freedom for the model are 2  and the objective function was  0.12 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic number of observations is  329 with the empirical chi square  15.99  with prob <  0.00034 
## The total number of observations was  329  with Likelihood Chi Square =  38.26  with prob <  4.9e-09 
## 
## Tucker Lewis Index of factoring reliability =  0.818
## RMSEA index =  0.235  and the 90 % confidence intervals are  0.174 0.303
## BIC =  26.67
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.93
## Multiple R square of scores with factors          0.87
## Minimum correlation of possible factor scores     0.73

# Tucker Lewis Index of factoring reliability =  0.818
# RMSEA index =  0.235
# indicate marginal fit

Cronbach’s Alphas

# cronbach's alphas -------------------------------------------------------
#For the future, it's good to check alpha for overall scale and for individual factors
psych::alpha(PreBelongingna) #0.85

## 
## Reliability analysis   
## Call: psych::alpha(x = PreBelongingna)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.85      0.85    0.82      0.59 5.7 0.012  3.2 0.44     0.57
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.82  0.85  0.87
## Duhachek  0.82  0.85  0.87
## 
##  Reliability if an item is dropped:
##               raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## BELclass01         0.80      0.81    0.75      0.58 4.1    0.017 0.0130  0.52
## BELclass02rev      0.83      0.83    0.77      0.62 4.8    0.015 0.0066  0.57
## BELclass03         0.80      0.80    0.73      0.57 4.0    0.017 0.0043  0.57
## BELclass04         0.80      0.80    0.74      0.58 4.1    0.017 0.0037  0.57
## 
##  Item statistics 
##                 n raw.r std.r r.cor r.drop mean   sd
## BELclass01    424  0.84  0.84  0.76   0.70  3.2 0.52
## BELclass02rev 424  0.82  0.80  0.70   0.64  3.1 0.57
## BELclass03    424  0.84  0.84  0.78   0.70  3.1 0.53
## BELclass04    424  0.83  0.84  0.77   0.70  3.2 0.48
## 
## Non missing response frequency for each item
##                  1    2    3    4 miss
## BELclass01    0.00 0.04 0.71 0.24    0
## BELclass02rev 0.01 0.09 0.68 0.22    0
## BELclass03    0.01 0.06 0.72 0.21    0
## BELclass04    0.00 0.03 0.74 0.23    0

psych::alpha(PostBelongingna) #0.85

## 
## Reliability analysis   
## Call: psych::alpha(x = PostBelongingna)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.85      0.86    0.83       0.6 5.9 0.013  3.1 0.49     0.58
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.83  0.85  0.88
## Duhachek  0.83  0.85  0.88
## 
##  Reliability if an item is dropped:
##                 raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## BELclass01_2         0.81      0.82    0.76      0.60 4.4    0.019 0.0128  0.56
## BELclass02rev_2      0.83      0.83    0.78      0.63 5.0    0.016 0.0074  0.59
## BELclass03_2         0.80      0.80    0.73      0.57 4.0    0.019 0.0046  0.56
## BELclass04_2         0.82      0.82    0.75      0.60 4.5    0.017 0.0018  0.59
## 
##  Item statistics 
##                   n raw.r std.r r.cor r.drop mean   sd
## BELclass01_2    329  0.84  0.84  0.76   0.70  3.1 0.59
## BELclass02rev_2 329  0.82  0.81  0.71   0.66  3.1 0.63
## BELclass03_2    329  0.85  0.86  0.81   0.74  3.1 0.55
## BELclass04_2    329  0.83  0.84  0.77   0.69  3.1 0.56
## 
## Non missing response frequency for each item
##                    1    2    3    4 miss
## BELclass01_2    0.02 0.08 0.68 0.22    0
## BELclass02rev_2 0.02 0.10 0.66 0.22    0
## BELclass03_2    0.01 0.10 0.72 0.18    0
## BELclass04_2    0.02 0.05 0.74 0.19    0

Step 3: Composites & Check

Create Composites

# create composite variables ----------------------------------------------
# do not have notes on why BELclass02 was dropped
pre$FBelong <- (pre$BELclass01 + pre$BELclass03 + pre$BELclass04)/3
post$FBelong <- (post$BELclass01 + post$BELclass03 + post$BELclass04)/3

Create New Dataframe

# subset needed variables into new long dataset
d <- rbind.data.frame(
  cbind(subset(pre, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, FinalGradePercent)), Time = "0pre"),
  cbind(subset(post, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, FinalGradePercent)), Time = "1post"))

# Make sure data are factors
d$Treatment <- as.factor(d$Treatment)
d$DEMOgen <- as.factor(d$DEMOgen)
d$DEMOmin <- as.factor(d$DEMOmin)
d$DEMOmin2 <- as.factor(d$DEMOmin2)
d$rg <- as.factor(d$rg)
d$Time <- as.factor(d$Time)
d$UID <- as.factor(d$UID)

d <- subset(d, !is.na(Treatment))

Examining Skew & Kurtosis

# assumption checking -----------------------------------------------------
# Normality
moments::skewness(d$FBelong, na.rm = TRUE) #0.01937707

## [1] -0.05792312

moments::kurtosis(d$FBelong, na.rm = TRUE) #4.859645

## [1] 4.870226

moments::skewness(d$MATLABGradePercent, na.rm = TRUE) #[1] -3.266513

## [1] -3.18743

moments::kurtosis(d$MATLABGradePercent, na.rm = TRUE) #15.84626

## [1] 14.70525

moments::skewness(d$FinalGradePercent, na.rm = TRUE) #[1] -3.367032

## [1] -4.326676

moments::kurtosis(d$FinalGradePercent, na.rm = TRUE) #26.30512

## [1] 32.84907

hist(d$FBelong, breaks=5)

hist(d$MATLABGradePercent, breaks=100)

hist(d$FinalGradePercent, breaks=100)

Examine QQ-plots and Trim

# Examination of residuals demonstrated some normality challenges. Trim
qqPlot(d$FBelong)

## [1] 1011 1127

# trimming
upper_quantile <- quantile(d$FBelong, 0.98, na.rm = TRUE)
lower_quantile <- quantile(d$FBelong, 0.02, na.rm = TRUE)
# where 99th percentile becomes NA
d$FBelong[d$FBelong > upper_quantile] <- NA
d$FBelong[d$FBelong < lower_quantile] <- NA
qqPlot(d$FBelong)

## [1] 44 60

res.aovB <- aov(FBelong ~ Treatment*Time, data = d)
plot(res.aovB, 1)

plot(res.aovB, 2) # some trimming may be needed

res.aovB2 <- aov(FBelong ~ Treatment*Time*DEMOgen, data = d)
plot(res.aovB2, 1)

plot(res.aovB2, 2) # some trimming may be needed

res.aovB3 <- aov(FBelong ~ Treatment*Time*DEMOmin, data = d)
plot(res.aovB3, 1)

plot(res.aovB3, 2) # some trimming may be needed

Mauchly’s test of sphericity

Not needed since within-subjects variable only has two levels.

Create Extra Dataframes

# Wide Dataframe
temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))
d2$FBelong.pre <- scale(d2$FBelong.pre, center = T)

# Dataframe with dichotimized pre-test belonging
d3 <- subset(d2, !is.na(FBelong.pre))
# d3$FBelong.pre <- scale(d3$FBelong.pre, center = T)
# table(d3$FBelong.pre, useNA = "always")
hist(d3$FBelong.pre)

d3$bel_cut[d3$FBelong.pre < 0] <- "low"
d3$bel_cut[d3$FBelong.pre > 0] <- "high"

# dataframe limited to low-belongers
d4 <- subset(d3, bel_cut == "low")

Step 4: Analyses

Used afex package in R for ANOVAs. Automatically uses appropriate orthogonal contrasts for factors and uses Type III sums of squares. (https://www.r-bloggers.com/2017/06/anova-in-r-afex-may-be-the-solution-you-are-looking-for/)

!RM-ANOVA - Belonging #1

Time x Treatment X DEMOmin (BLI x Majority)

Results

out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","DEMOmin"), within="Time", observed=c("DEMOmin"), anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FBelong
##                   Effect     df  MSE      F   pes p.value
## 1              Treatment 1, 261 0.32   0.22 <.001    .643
## 2                DEMOmin 1, 261 0.32   0.45  .002    .504
## 3      Treatment:DEMOmin 1, 261 0.32   0.97  .004    .326
## 4                   Time 1, 261 0.07   1.04  .004    .310
## 5         Treatment:Time 1, 261 0.07   0.54  .002    .465
## 6           DEMOmin:Time 1, 261 0.07   0.63  .002    .427
## 7 Treatment:DEMOmin:Time 1, 261 0.07 2.99 +  .011    .085
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plot

afex_plot(out, x = "Time", trace = "Treatment", panel = "DEMOmin")

## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"

Refined Plot

tgc <- summarySE(d, measurevar="FBelong", groupvars=c("Treatment","DEMOmin","Time"), na.rm = T)
tgc

##   Treatment  DEMOmin  Time   N  FBelong        sd         se         ci
## 1         0      bli  0pre  13 3.333333 0.5270463 0.14617634 0.31849088
## 2         0      bli 1post   6 3.166667 0.6236096 0.25458754 0.65443810
## 3         0 majority  0pre 149 3.118568 0.4028312 0.03300122 0.06521446
## 4         0 majority 1post 129 3.129199 0.4512907 0.03973393 0.07862038
## 5         1      bli  0pre  34 3.156863 0.4122313 0.07069708 0.14383430
## 6         1      bli 1post  23 3.159420 0.4910262 0.10238604 0.21233565
## 7         1 majority  0pre 225 3.204444 0.4439630 0.02959754 0.05832523
## 8         1 majority 1post 163 3.112474 0.4177490 0.03272063 0.06461394

levels(tgc$Time) <- c("0Pre","1Post")
levels(tgc$Treatment) <- c("Control","Intervention")
levels(tgc$DEMOmin) <- c("BLI","White & Asian")

pd <- position_dodge(.5) # move them .05 to the left and right

ggplot(tgc, aes(x=Time, y=FBelong, group=Treatment, linetype=Treatment)) + 
  geom_errorbar(aes(ymin=FBelong-ci, ymax=FBelong+ci), width=.2, position=pd) +
  geom_line(position=pd) +
  geom_point(position=pd) +
  facet_grid(~DEMOmin) +
  ylim(1,4) + theme_bw() +
  ggtitle("Pre and Post Effects of Intervention on Majority and BLI Students") +
  xlab("Timepoint") + ylab("Belonging Score")

Pairwise Comparisons

emmeans(out, specs = c("Time", "Treatment", "DEMOmin"))

##  Time   Treatment DEMOmin  emmean     SE  df lower.CL upper.CL
##  X0pre  0         bli        3.40 0.1997 261     3.01     3.79
##  X1post 0         bli        3.20 0.1931 261     2.82     3.58
##  X0pre  1         bli        3.14 0.0952 261     2.95     3.32
##  X1post 1         bli        3.17 0.0920 261     2.99     3.35
##  X0pre  0         majority   3.11 0.0466 261     3.02     3.20
##  X1post 0         majority   3.15 0.0450 261     3.06     3.24
##  X0pre  1         majority   3.21 0.0370 261     3.14     3.29
##  X1post 1         majority   3.16 0.0357 261     3.08     3.23
## 
## Confidence level used: 0.95

pairs(emmeans(out, specs = c("Time", "Treatment", "DEMOmin")))

##  contrast                                                estimate     SE  df
##  X0pre Treatment0 bli - X1post Treatment0 bli              0.2000 0.1632 261
##  X0pre Treatment0 bli - X0pre Treatment1 bli               0.2636 0.2212 261
##  X0pre Treatment0 bli - X1post Treatment1 bli              0.2333 0.2199 261
##  X0pre Treatment0 bli - X0pre Treatment0 majority          0.2877 0.2051 261
##  X0pre Treatment0 bli - X1post Treatment0 majority         0.2514 0.2047 261
##  X0pre Treatment0 bli - X0pre Treatment1 majority          0.1877 0.2031 261
##  X0pre Treatment0 bli - X1post Treatment1 majority         0.2447 0.2029 261
##  X1post Treatment0 bli - X0pre Treatment1 bli              0.0636 0.2153 261
##  X1post Treatment0 bli - X1post Treatment1 bli             0.0333 0.2139 261
##  X1post Treatment0 bli - X0pre Treatment0 majority         0.0877 0.1986 261
##  X1post Treatment0 bli - X1post Treatment0 majority        0.0514 0.1982 261
##  X1post Treatment0 bli - X0pre Treatment1 majority        -0.0123 0.1966 261
##  X1post Treatment0 bli - X1post Treatment1 majority        0.0447 0.1963 261
##  X0pre Treatment1 bli - X1post Treatment1 bli             -0.0303 0.0778 261
##  X0pre Treatment1 bli - X0pre Treatment0 majority          0.0240 0.1060 261
##  X0pre Treatment1 bli - X1post Treatment0 majority        -0.0122 0.1053 261
##  X0pre Treatment1 bli - X0pre Treatment1 majority         -0.0760 0.1021 261
##  X0pre Treatment1 bli - X1post Treatment1 majority        -0.0189 0.1017 261
##  X1post Treatment1 bli - X0pre Treatment0 majority         0.0543 0.1031 261
##  X1post Treatment1 bli - X1post Treatment0 majority        0.0181 0.1025 261
##  X1post Treatment1 bli - X0pre Treatment1 majority        -0.0457 0.0992 261
##  X1post Treatment1 bli - X1post Treatment1 majority        0.0114 0.0987 261
##  X0pre Treatment0 majority - X1post Treatment0 majority   -0.0362 0.0380 261
##  X0pre Treatment0 majority - X0pre Treatment1 majority    -0.1000 0.0594 261
##  X0pre Treatment0 majority - X1post Treatment1 majority   -0.0429 0.0587 261
##  X1post Treatment0 majority - X0pre Treatment1 majority   -0.0638 0.0582 261
##  X1post Treatment0 majority - X1post Treatment1 majority  -0.0067 0.0575 261
##  X0pre Treatment1 majority - X1post Treatment1 majority    0.0571 0.0302 261
##  t.ratio p.value
##    1.226  0.9237
##    1.192  0.9338
##    1.061  0.9641
##    1.403  0.8554
##    1.228  0.9229
##    0.924  0.9835
##    1.206  0.9295
##    0.296  1.0000
##    0.156  1.0000
##    0.442  0.9998
##    0.260  1.0000
##   -0.063  1.0000
##    0.228  1.0000
##   -0.390  0.9999
##    0.227  1.0000
##   -0.116  1.0000
##   -0.744  0.9955
##   -0.186  1.0000
##    0.527  0.9995
##    0.177  1.0000
##   -0.460  0.9998
##    0.116  1.0000
##   -0.953  0.9804
##   -1.682  0.6986
##   -0.732  0.9960
##   -1.095  0.9574
##   -0.117  1.0000
##    1.890  0.5588
## 
## P value adjustment: tukey method for comparing a family of 8 estimates

RM-ANOVA - Belonging #2

Time x Treatment x rg (BLI, Majority Men, Majority Women & NB)

Results

out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","rg"), within="Time", observed=c("rg"), anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: Treatment, rg

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FBelong
##              Effect     df  MSE    F   pes p.value
## 1         Treatment 1, 257 0.32 0.14 <.001    .711
## 2                rg 2, 257 0.32 1.38  .011    .252
## 3      Treatment:rg 2, 257 0.32 1.24  .010    .290
## 4              Time 1, 257 0.07 0.87  .003    .352
## 5    Treatment:Time 1, 257 0.07 0.01 <.001    .943
## 6           rg:Time 2, 257 0.07 0.35  .003    .702
## 7 Treatment:rg:Time 2, 257 0.07 1.84  .014    .161
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plot

afex_plot(out, x = "Time", trace = "Treatment", panel = "rg")

## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"

Pairwise Comparisons

emmeans(out, specs = c("Time", "Treatment", "rg"))

##  Time   Treatment rg                  emmean     SE  df lower.CL upper.CL
##  X0pre  0         bli                   3.40 0.1995 257     3.01     3.79
##  X1post 0         bli                   3.20 0.1925 257     2.82     3.58
##  X0pre  1         bli                   3.14 0.0951 257     2.95     3.32
##  X1post 1         bli                   3.17 0.0918 257     2.99     3.35
##  X0pre  0         majority man          3.12 0.0581 257     3.01     3.24
##  X1post 0         majority man          3.15 0.0560 257     3.04     3.26
##  X0pre  1         majority man          3.26 0.0463 257     3.17     3.35
##  X1post 1         majority man          3.22 0.0446 257     3.13     3.30
##  X0pre  0         majority woman & nb   3.08 0.0789 257     2.93     3.24
##  X1post 0         majority woman & nb   3.16 0.0761 257     3.01     3.31
##  X0pre  1         majority woman & nb   3.12 0.0619 257     3.00     3.24
##  X1post 1         majority woman & nb   3.04 0.0597 257     2.93     3.16
## 
## Confidence level used: 0.95

pairs(emmeans(out, specs = c("Time", "Treatment", "rg")))

##  contrast                                                                     
##  X0pre Treatment0 bli - X1post Treatment0 bli                                 
##  X0pre Treatment0 bli - X0pre Treatment1 bli                                  
##  X0pre Treatment0 bli - X1post Treatment1 bli                                 
##  X0pre Treatment0 bli - X0pre Treatment0 majority man                         
##  X0pre Treatment0 bli - X1post Treatment0 majority man                        
##  X0pre Treatment0 bli - X0pre Treatment1 majority man                         
##  X0pre Treatment0 bli - X1post Treatment1 majority man                        
##  X0pre Treatment0 bli - X0pre Treatment0 majority woman & nb                  
##  X0pre Treatment0 bli - X1post Treatment0 majority woman & nb                 
##  X0pre Treatment0 bli - X0pre Treatment1 majority woman & nb                  
##  X0pre Treatment0 bli - X1post Treatment1 majority woman & nb                 
##  X1post Treatment0 bli - X0pre Treatment1 bli                                 
##  X1post Treatment0 bli - X1post Treatment1 bli                                
##  X1post Treatment0 bli - X0pre Treatment0 majority man                        
##  X1post Treatment0 bli - X1post Treatment0 majority man                       
##  X1post Treatment0 bli - X0pre Treatment1 majority man                        
##  X1post Treatment0 bli - X1post Treatment1 majority man                       
##  X1post Treatment0 bli - X0pre Treatment0 majority woman & nb                 
##  X1post Treatment0 bli - X1post Treatment0 majority woman & nb                
##  X1post Treatment0 bli - X0pre Treatment1 majority woman & nb                 
##  X1post Treatment0 bli - X1post Treatment1 majority woman & nb                
##  X0pre Treatment1 bli - X1post Treatment1 bli                                 
##  X0pre Treatment1 bli - X0pre Treatment0 majority man                         
##  X0pre Treatment1 bli - X1post Treatment0 majority man                        
##  X0pre Treatment1 bli - X0pre Treatment1 majority man                         
##  X0pre Treatment1 bli - X1post Treatment1 majority man                        
##  X0pre Treatment1 bli - X0pre Treatment0 majority woman & nb                  
##  X0pre Treatment1 bli - X1post Treatment0 majority woman & nb                 
##  X0pre Treatment1 bli - X0pre Treatment1 majority woman & nb                  
##  X0pre Treatment1 bli - X1post Treatment1 majority woman & nb                 
##  X1post Treatment1 bli - X0pre Treatment0 majority man                        
##  X1post Treatment1 bli - X1post Treatment0 majority man                       
##  X1post Treatment1 bli - X0pre Treatment1 majority man                        
##  X1post Treatment1 bli - X1post Treatment1 majority man                       
##  X1post Treatment1 bli - X0pre Treatment0 majority woman & nb                 
##  X1post Treatment1 bli - X1post Treatment0 majority woman & nb                
##  X1post Treatment1 bli - X0pre Treatment1 majority woman & nb                 
##  X1post Treatment1 bli - X1post Treatment1 majority woman & nb                
##  X0pre Treatment0 majority man - X1post Treatment0 majority man               
##  X0pre Treatment0 majority man - X0pre Treatment1 majority man                
##  X0pre Treatment0 majority man - X1post Treatment1 majority man               
##  X0pre Treatment0 majority man - X0pre Treatment0 majority woman & nb         
##  X0pre Treatment0 majority man - X1post Treatment0 majority woman & nb        
##  X0pre Treatment0 majority man - X0pre Treatment1 majority woman & nb         
##  X0pre Treatment0 majority man - X1post Treatment1 majority woman & nb        
##  X1post Treatment0 majority man - X0pre Treatment1 majority man               
##  X1post Treatment0 majority man - X1post Treatment1 majority man              
##  X1post Treatment0 majority man - X0pre Treatment0 majority woman & nb        
##  X1post Treatment0 majority man - X1post Treatment0 majority woman & nb       
##  X1post Treatment0 majority man - X0pre Treatment1 majority woman & nb        
##  X1post Treatment0 majority man - X1post Treatment1 majority woman & nb       
##  X0pre Treatment1 majority man - X1post Treatment1 majority man               
##  X0pre Treatment1 majority man - X0pre Treatment0 majority woman & nb         
##  X0pre Treatment1 majority man - X1post Treatment0 majority woman & nb        
##  X0pre Treatment1 majority man - X0pre Treatment1 majority woman & nb         
##  X0pre Treatment1 majority man - X1post Treatment1 majority woman & nb        
##  X1post Treatment1 majority man - X0pre Treatment0 majority woman & nb        
##  X1post Treatment1 majority man - X1post Treatment0 majority woman & nb       
##  X1post Treatment1 majority man - X0pre Treatment1 majority woman & nb        
##  X1post Treatment1 majority man - X1post Treatment1 majority woman & nb       
##  X0pre Treatment0 majority woman & nb - X1post Treatment0 majority woman & nb 
##  X0pre Treatment0 majority woman & nb - X0pre Treatment1 majority woman & nb  
##  X0pre Treatment0 majority woman & nb - X1post Treatment1 majority woman & nb 
##  X1post Treatment0 majority woman & nb - X0pre Treatment1 majority woman & nb 
##  X1post Treatment0 majority woman & nb - X1post Treatment1 majority woman & nb
##  X0pre Treatment1 majority woman & nb - X1post Treatment1 majority woman & nb 
##  estimate     SE  df t.ratio p.value
##   0.20000 0.1637 257   1.222  0.9868
##   0.26364 0.2210 257   1.193  0.9891
##   0.23333 0.2196 257   1.063  0.9959
##   0.27571 0.2078 257   1.327  0.9749
##   0.25311 0.2072 257   1.222  0.9868
##   0.14194 0.2048 257   0.693  0.9999
##   0.18495 0.2044 257   0.905  0.9990
##   0.31667 0.2145 257   1.476  0.9458
##   0.24375 0.2135 257   1.142  0.9924
##   0.27821 0.2089 257   1.332  0.9742
##   0.35513 0.2082 257   1.705  0.8643
##   0.06364 0.2147 257   0.296  1.0000
##   0.03333 0.2133 257   0.156  1.0000
##   0.07571 0.2011 257   0.377  1.0000
##   0.05311 0.2005 257   0.265  1.0000
##  -0.05806 0.1980 257  -0.293  1.0000
##  -0.01505 0.1976 257  -0.076  1.0000
##   0.11667 0.2080 257   0.561  1.0000
##   0.04375 0.2070 257   0.211  1.0000
##   0.07821 0.2022 257   0.387  1.0000
##   0.15513 0.2015 257   0.770  0.9998
##  -0.03030 0.0780 257  -0.388  1.0000
##   0.01207 0.1114 257   0.108  1.0000
##  -0.01053 0.1104 257  -0.095  1.0000
##  -0.12170 0.1058 257  -1.151  0.9919
##  -0.07869 0.1051 257  -0.749  0.9998
##   0.05303 0.1235 257   0.429  1.0000
##  -0.01989 0.1218 257  -0.163  1.0000
##   0.01457 0.1134 257   0.128  1.0000
##   0.09149 0.1123 257   0.815  0.9996
##   0.04237 0.1086 257   0.390  1.0000
##   0.01977 0.1075 257   0.184  1.0000
##  -0.09140 0.1028 257  -0.889  0.9992
##  -0.04839 0.1021 257  -0.474  1.0000
##   0.08333 0.1210 257   0.689  0.9999
##   0.01042 0.1192 257   0.087  1.0000
##   0.04487 0.1107 257   0.405  1.0000
##   0.12179 0.1095 257   1.113  0.9939
##  -0.02260 0.0477 257  -0.474  1.0000
##  -0.13377 0.0742 257  -1.802  0.8157
##  -0.09076 0.0732 257  -1.239  0.9853
##   0.04096 0.0979 257   0.418  1.0000
##  -0.03196 0.0957 257  -0.334  1.0000
##   0.00250 0.0848 257   0.029  1.0000
##   0.07942 0.0833 257   0.954  0.9984
##  -0.11117 0.0727 257  -1.530  0.9310
##  -0.06816 0.0716 257  -0.951  0.9985
##   0.06356 0.0967 257   0.657  1.0000
##  -0.00936 0.0945 257  -0.099  1.0000
##   0.02510 0.0835 257   0.301  1.0000
##   0.10202 0.0819 257   1.246  0.9846
##   0.04301 0.0380 257   1.133  0.9929
##   0.17473 0.0914 257   1.911  0.7512
##   0.10181 0.0890 257   1.143  0.9923
##   0.13627 0.0772 257   1.764  0.8357
##   0.21319 0.0755 257   2.823  0.1766
##   0.13172 0.0906 257   1.454  0.9513
##   0.05880 0.0882 257   0.667  1.0000
##   0.09326 0.0763 257   1.223  0.9868
##   0.17018 0.0745 257   2.283  0.4912
##  -0.07292 0.0647 257  -1.127  0.9932
##  -0.03846 0.1002 257  -0.384  1.0000
##   0.03846 0.0989 257   0.389  1.0000
##   0.03446 0.0981 257   0.351  1.0000
##   0.11138 0.0967 257   1.152  0.9919
##   0.07692 0.0508 257   1.515  0.9352
## 
## P value adjustment: tukey method for comparing a family of 12 estimates

ANCOVA - Final Grade

Treatment x DEMOmin (BLI X Majority)

out <- aov_ez(id = "UID", dv = "FinalGradePercent", data=d2, between=c("Treatment","DEMOmin"), observed=c("DEMOmin"), anova_table = list(es="pes"))

## Warning: Missing values for following ID(s):
## 066bf253, 07cd36e8, 103835b7, 11a109de, 163cd192, 19f253f7, 28164b98, 2b3256e1, 2df65c18, 3d8e33cd, 55d91e27, 57e50784, 5c4f7e0e, 62a6e123, 632fe4b0, 64285c43, 6682f8ec, 66d35e35, 67e77d96, 69ff8dd3, 754ec1be, 83efb1cc, 881ac70b, 88699c14, 925ffb4c, 9a8d5ad4, b98abba9, bacb5cad, bb2d714a, c3109248, ce453266, d2b2cff0, e965f476, eaab6818, f469ac4e, fb672a6e, fc4c2da2
## Removing those cases from the analysis.

## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FinalGradePercent
##              Effect     df  MSE      F   pes p.value
## 1         Treatment 1, 600 0.01   0.13 <.001    .716
## 2           DEMOmin 1, 600 0.01 3.16 +  .005    .076
## 3 Treatment:DEMOmin 1, 600 0.01   0.02 <.001    .890
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

!!ANCOVA - MATLab Grade

Treatment x rg (BLI, Majority Men, Majority Women & NB)

Results

out <- aov_ez(id = "UID", dv = "MATLABGradePercent", data=d2, between=c("Treatment","rg"), observed=c("rg"), anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: Treatment, rg

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: MATLABGradePercent
##         Effect     df  MSE        F  pes p.value
## 1    Treatment 1, 451 0.01   4.08 * .009    .044
## 2           rg 2, 451 0.01 7.61 *** .033   <.001
## 3 Treatment:rg 2, 451 0.01   3.25 * .014    .040
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plots

afex_plot(out, x = "Treatment")

## NOTE: Results may be misleading due to involvement in interactions

afex_plot(out, x = "rg")

## NOTE: Results may be misleading due to involvement in interactions

afex_plot(out, x = "Treatment", trace = "rg")

Refined Plot

tgc2 <- summarySE(subset(d2, !is.na(rg)), measurevar="MATLABGradePercent", groupvars=c("Treatment","rg"), na.rm = T)
tgc2

##   Treatment                  rg   N MATLABGradePercent         sd          se
## 1         0                 bli  15          0.8343282 0.21151872 0.054613900
## 2         0        majority man 119          0.9375260 0.14742820 0.013514721
## 3         0 majority woman & nb  56          0.9687582 0.06243506 0.008343236
## 4         1                 bli  37          0.9216334 0.14770019 0.024281762
## 5         1        majority man 160          0.9602997 0.09483373 0.007497265
## 6         1 majority woman & nb  70          0.9500308 0.11702724 0.013987431
##           ci
## 1 0.11713517
## 2 0.02676283
## 3 0.01672022
## 4 0.04924570
## 5 0.01480707
## 6 0.02790416

levels(tgc2$Treatment) <- c("Control","Intervention")
levels(tgc2$rg) <- c("BLI Men and Women","White & Asian Men","White & Asian Women & Non-Binary")

ggplot(tgc2, aes(x=Treatment, y=MATLABGradePercent, fill=rg)) + 
  geom_bar(position=position_dodge(), stat="identity", color="black") +
  geom_errorbar(aes(ymin=MATLABGradePercent-se, ymax=MATLABGradePercent+se),
                width=.2,
                position=position_dodge(.9)) +
  xlab("Condition") + ylab("MATLab Grade") + labs(fill="Race/Gender")

Pairwise Comparisons

emmeans(out, specs = c("Treatment", "rg"))

##  Treatment rg                  emmean      SE  df lower.CL upper.CL
##  0         bli                  0.834 0.03115 451    0.773    0.896
##  1         bli                  0.922 0.01984 451    0.883    0.961
##  0         majority man         0.938 0.01106 451    0.916    0.959
##  1         majority man         0.960 0.00954 451    0.942    0.979
##  0         majority woman & nb  0.969 0.01612 451    0.937    1.000
##  1         majority woman & nb  0.950 0.01442 451    0.922    0.978
## 
## Confidence level used: 0.95

pairs(emmeans(out, specs = c("Treatment", "rg")))

##  contrast                                                        estimate
##  Treatment0 bli - Treatment1 bli                                 -0.08731
##  Treatment0 bli - Treatment0 majority man                        -0.10320
##  Treatment0 bli - Treatment1 majority man                        -0.12597
##  Treatment0 bli - Treatment0 majority woman & nb                 -0.13443
##  Treatment0 bli - Treatment1 majority woman & nb                 -0.11570
##  Treatment1 bli - Treatment0 majority man                        -0.01589
##  Treatment1 bli - Treatment1 majority man                        -0.03867
##  Treatment1 bli - Treatment0 majority woman & nb                 -0.04712
##  Treatment1 bli - Treatment1 majority woman & nb                 -0.02840
##  Treatment0 majority man - Treatment1 majority man               -0.02277
##  Treatment0 majority man - Treatment0 majority woman & nb        -0.03123
##  Treatment0 majority man - Treatment1 majority woman & nb        -0.01250
##  Treatment1 majority man - Treatment0 majority woman & nb        -0.00846
##  Treatment1 majority man - Treatment1 majority woman & nb         0.01027
##  Treatment0 majority woman & nb - Treatment1 majority woman & nb  0.01873
##      SE  df t.ratio p.value
##  0.0369 451  -2.364  0.1713
##  0.0331 451  -3.122  0.0234
##  0.0326 451  -3.866  0.0018
##  0.0351 451  -3.832  0.0020
##  0.0343 451  -3.370  0.0105
##  0.0227 451  -0.700  0.9819
##  0.0220 451  -1.757  0.4950
##  0.0256 451  -1.844  0.4388
##  0.0245 451  -1.158  0.8565
##  0.0146 451  -1.559  0.6259
##  0.0196 451  -1.597  0.6007
##  0.0182 451  -0.688  0.9832
##  0.0187 451  -0.452  0.9976
##  0.0173 451   0.594  0.9914
##  0.0216 451   0.866  0.9544
## 
## P value adjustment: tukey method for comparing a family of 6 estimates

ANCOVA - Belonging #1

Treatment x DEMOmin (BLI x Majority) C: Pre-Test Belonging

Results

out <- aov_ez(id = "UID", dv = "FBelong.post", data=d2, between=c("DEMOmin","Treatment"), covariate=c("FBelong.pre"), factorize = F, anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FBelong.post
##              Effect     df  MSE          F   pes p.value
## 1           DEMOmin 1, 260 0.11       0.18 <.001    .672
## 2         Treatment 1, 260 0.11       0.21 <.001    .646
## 3       FBelong.pre 1, 260 0.11 195.74 ***  .429   <.001
## 4 DEMOmin:Treatment 1, 260 0.11       1.28  .005    .259
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plot

afex_plot(out, x = "Treatment", trace ="DEMOmin")

?ANCOVA - Belonging #2

Treatment x DEMOmin (BLI x Majority) x bel_cut (High x Low Belonging)

Results

out <- aov_ez(id = "UID", dv = "FBelong.post", data=d3, between=c("DEMOmin","Treatment","bel_cut"), factorize = F, anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment, bel_cut

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FBelong.post
##                      Effect     df  MSE         F   pes p.value
## 1                   DEMOmin 1, 257 0.12      0.31  .001    .575
## 2                 Treatment 1, 257 0.12    4.51 *  .017    .035
## 3                   bel_cut 1, 257 0.12 64.87 ***  .202   <.001
## 4         DEMOmin:Treatment 1, 257 0.12    4.39 *  .017    .037
## 5           DEMOmin:bel_cut 1, 257 0.12  10.62 **  .040    .001
## 6         Treatment:bel_cut 1, 257 0.12      0.00 <.001    .949
## 7 DEMOmin:Treatment:bel_cut 1, 257 0.12      0.49  .002    .483
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plots

afex_plot(out, x = "Treatment", trace ="DEMOmin", panel = "bel_cut")

afex_plot(out, x ="DEMOmin", trace = "bel_cut")

## NOTE: Results may be misleading due to involvement in interactions

Refined Plot

d3$bel_cut <- as.factor(d3$bel_cut)
tgc2 <- summarySE(d3, measurevar="FBelong.post", groupvars=c("Treatment","DEMOmin","bel_cut"), na.rm = T)
tgc2

##   Treatment  DEMOmin bel_cut  N FBelong.post        sd         se         ci
## 1         0      bli    high  3     3.666667 0.3333333 0.19245009 0.82804590
## 2         0      bli     low  2     2.500000 0.2357023 0.16666667 2.11770079
## 3         0 majority    high 27     3.432099 0.4697227 0.09039818 0.18581612
## 4         0 majority     low 65     3.030769 0.3713857 0.04606473 0.09202487
## 5         1      bli    high  4     4.000000 0.0000000 0.00000000 0.00000000
## 6         1      bli     low 18     2.981481 0.3327882 0.07843894 0.16549169
## 7         1 majority    high 51     3.496732 0.4588515 0.06425206 0.12905405
## 8         1 majority     low 95     2.971930 0.2100499 0.02155066 0.04278934

levels(tgc2$DEMOmin) <- c("BLI","Majority")
levels(tgc2$bel_cut) <- c("High Pre-Test Belonging","Low Pre-Test Belonging")

ggplot(tgc2, aes(x=DEMOmin, y=FBelong.post, fill=Treatment, panel=bel_cut)) + 
  geom_bar(position=position_dodge(), stat="identity", color="black") +
  geom_errorbar(aes(ymin=FBelong.post-se, ymax=FBelong.post+se),
                width=.2,
                position=position_dodge(.9)) +
  xlab("Condition") + ylab("Post-Test Belonging Score") +
  facet_wrap(~bel_cut)

?ANCOVA - Belonging #3

Treatment X DEMOmin (BLI x Majority) Limited to low-belongers (pre-test belonging score below the mean)

Results

out <- aov_ez(id = "UID", dv = "FBelong.post", data=d4, between=c("DEMOmin","Treatment"), factorize = F, anova_table = list(es="pes"))

## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment

nice(out)

## Anova Table (Type 3 tests)
## 
## Response: FBelong.post
##              Effect     df  MSE      F  pes p.value
## 1           DEMOmin 1, 176 0.08 5.51 * .030    .020
## 2         Treatment 1, 176 0.08 3.63 + .020    .059
## 3 DEMOmin:Treatment 1, 176 0.08 5.93 * .033    .016
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Default Plot

afex_plot(out, x = "Treatment", trace ="DEMOmin")

Refined Plot

tgc3 <- summarySE(d4, measurevar="FBelong.post", groupvars=c("Treatment","DEMOmin"), na.rm = T)
tgc3

##   Treatment  DEMOmin  N FBelong.post        sd         se         ci
## 1         0      bli  2     2.500000 0.2357023 0.16666667 2.11770079
## 2         0 majority 65     3.030769 0.3713857 0.04606473 0.09202487
## 3         1      bli 18     2.981481 0.3327882 0.07843894 0.16549169
## 4         1 majority 95     2.971930 0.2100499 0.02155066 0.04278934

levels(tgc3$DEMOmin) <- c("BLI","Majority")
levels(tgc3$Treatment) <- c("Control","Intervention")

ggplot(tgc3, aes(x=DEMOmin, y=FBelong.post, fill=Treatment)) + 
  geom_bar(position=position_dodge(), stat="identity", color="black") +
  geom_errorbar(aes(ymin=FBelong.post-se, ymax=FBelong.post+se),
                width=.2,
                position=position_dodge(.9)) +
  ylim(0,4) +
  xlab("Race/Ethnicity") + ylab("Post-Test Belonging Score")

Pairwise Comparisons

emmeans(out, specs = c("Treatment", "DEMOmin"))

##  Treatment DEMOmin  emmean     SE  df lower.CL upper.CL
##  0         bli        2.50 0.2058 176     2.09     2.91
##  1         bli        2.98 0.0686 176     2.85     3.12
##  0         majority   3.03 0.0361 176     2.96     3.10
##  1         majority   2.97 0.0299 176     2.91     3.03
## 
## Confidence level used: 0.95

pairs(emmeans(out, specs = c("Treatment", "DEMOmin")))

##  contrast                                  estimate     SE  df t.ratio p.value
##  Treatment0 bli - Treatment1 bli           -0.48148 0.2170 176  -2.219  0.1220
##  Treatment0 bli - Treatment0 majority      -0.53077 0.2090 176  -2.540  0.0574
##  Treatment0 bli - Treatment1 majority      -0.47193 0.2080 176  -2.269  0.1093
##  Treatment1 bli - Treatment0 majority      -0.04929 0.0775 176  -0.636  0.9204
##  Treatment1 bli - Treatment1 majority       0.00955 0.0748 176   0.128  0.9993
##  Treatment0 majority - Treatment1 majority  0.05884 0.0469 176   1.256  0.5923
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

Logistic Regression - MATLab

MATLab Grade ~ Treatment * DEMOmin (BLI x Majority)

temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))

d2$mgrade <- d2$MATLABGradePercent
d2$mgrade_cat[d2$mgrade < 1] <- 0
d2$mgrade_cat[d2$mgrade == 1] <- 1
# table(d2$mgrade_cat, d2$Treatment, d2$DEMOmin)
# str(d2)
d2$mgrade_cat <- as.factor(d2$mgrade_cat)

mylogit <- glm(mgrade_cat ~ DEMOmin * Treatment, data = d2, family = "binomial")
summary(mylogit)

## 
## Call:
## glm(formula = mgrade_cat ~ DEMOmin * Treatment, family = "binomial", 
##     data = d2)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.419  -1.355   0.954   1.010   1.482  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                 -0.6931     0.5477  -1.266   0.2057  
## DEMOminmajority              1.1017     0.5615   1.962   0.0498 *
## Treatment1                   0.9651     0.6404   1.507   0.1318  
## DEMOminmajority:Treatment1  -0.8224     0.6640  -1.239   0.2155  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 809.12  on 603  degrees of freedom
## Residual deviance: 803.38  on 600  degrees of freedom
##   (37 observations deleted due to missingness)
## AIC: 811.38
## 
## Number of Fisher Scoring iterations: 4

ENG 132 Analysis

Heather Perkins

2022-10-21

Info & Notes

Step 1: Import

Step 2: Assumption Checking

Create Functions

Checking Correlations

Examining Skew & Kurtosis

Check Barlett’s & KMO

Running EFA

Belonging Pre

Belonging Post

Cronbach’s Alphas

Step 3: Composites & Check

Create Composites

Create New Dataframe

Examining Skew & Kurtosis

Examine QQ-plots and Trim

Mauchly’s test of sphericity

Create Extra Dataframes

Step 4: Analyses

!RM-ANOVA - Belonging #1

Results

Default Plot

Refined Plot

Pairwise Comparisons

RM-ANOVA - Belonging #2

Results

Default Plot

Pairwise Comparisons

ANCOVA - Final Grade

!!ANCOVA - MATLab Grade

Results

Default Plots

Refined Plot

Pairwise Comparisons

ANCOVA - Belonging #1

Results

Default Plot

?ANCOVA - Belonging #2

Results

Default Plots

Refined Plot

?ANCOVA - Belonging #3

Results

Default Plot

Refined Plot

Pairwise Comparisons

Logistic Regression - MATLab