Original: Kevin Kaufman-Ortiz and Allison Godwin (June 7, 2022)
Revised: Heather Perkins (October 10, 2022)
Data imported, race/ethnicity and gender variables cleaned, pre/post dataframes created.
#UBelong Exploratory analysis for Pre- Post- Surveys
# install packages --------------------------------------------------------
#install.packages("readxl")
#install.packages("stargazer")
#install.packages("car")
#install.packages("Hmisc")
#install.packages("psych")
#install.packages("moments")
#install.packages("nFactors")
#install.packages("GPArotation")
#install.packages("Rmisc")
#install.packages("rstatix")
#install.packages("tidyverse")
#install.packages("ggpubr")
#install.packages("reshape2")
#install.packages("DescTools")
# load libraries ----------------------------------------------------------
library("readxl")
library("stargazer")
library("car")
library("Hmisc")
library("psych")
library("moments")
library("nFactors")
library("GPArotation")
library("Rmisc")
library("rstatix")
library("tidyverse")
library("ggpubr")
library("reshape2")
library("DescTools")
library(readxl)
library(ids)
library(MASS)
library(afex)
library(emmeans)
library(ggbeeswarm)
# import data -------------------------------------------------------------
spring2022 <- read.csv("data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")
# assign UID
# spring2022$UID <- ids::random_id(nrow(spring2022), 4)
# write.csv(spring2022, file="data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")
# identify important variables
# id = UID
# instructor = Instructor
# section = Section
# control/treatment = Treatment
# pre/post = Time
# gender = DEMOgen
# BLI = DEMOmin
# black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
# NA is coded as 0 (majority)
# MATLABGradePercent
# FinalGradePercent
# basic recoding and cleaning ---------------------------------------------
# transform -99 to NA
spring2022[spring2022 == -99] <- NA
# remove bad responses
# table(spring2022$CHECKpass, useNA = "always")
spring2022_2 <- subset(spring2022, CHECKpass == 1 | is.na(CHECKpass))
spring2022_3 <- subset(spring2022_2, CHECKpass_2 == 1 | is.na(CHECKpass_2))
spring2022 <- spring2022_3
# recode gender
# table(spring2022$DEMOgen)
# 1 2 3 4 5
# 328 131 3 1 6
#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
#June 24, 2022, we decided as a team that we recognize we do not want to erase the experiences of NonBinary folk
spring2022$DEMOgen [spring2022$DEMOgen == 1] <- "man" #Recoding to 0 = male (1)
spring2022$DEMOgen [spring2022$DEMOgen == 2 ] <- "woman" #Recoding to 1 = Female (2)
spring2022$DEMOgen [spring2022$DEMOgen == 3 | spring2022$DEMOgen == 4 | spring2022$DEMOgen == 5] <- "nb" #Recoding to NA = NonBinary, Not listed, Prefer not to respond (3, 4, 5)
# table(spring2022$DEMOgen)
# man nb woman
# 328 10 131
# creating race variables
# Race is coded with many variables, need a minoritized variable
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
spring2022$DEMOmin <- "majority"
spring2022$DEMOmin[spring2022$DEMOblack == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOai == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOmex == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOca == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOsa == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpr == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOol == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpi == 1] <- "bli"
# srm = used in other places
spring2022$DEMOmin2 <- "majority"
spring2022$DEMOmin2[spring2022$DEMOblack == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOai == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsea == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOmex == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOca == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsa == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpr == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOol == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpi == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOgen == "woman"] <- "srm"
# table(spring2022$DEMOmin2, useNA = "always")
# rg = three categories, bli, majority men, and majority women/nb
spring2022$rg <- NA
spring2022$rg[spring2022$DEMOmin == "bli"] <- "bli"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "woman"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "nb"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "man"] <- "majority man"
# spring2022$rg[spring2022$DEMOmin == "majority" & is.na(spring2022$DEMOgen)] <- "majority man & NA"
# demographics and numbers
# table(spring2022$Treatment, useNA = "always")
# table(spring2022$DEMOgen, spring2022$DEMOmin, useNA = "always")
# table(spring2022$rg, useNA = "always")
# table(spring2022$DEMOmin, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOblack, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOai, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOmex, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOca, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpr, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOsa, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOol, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpi, useNA = "always")
# breaking out pre and post dataframes ------------------------------------
# names(spring2022)
pre <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 1:175, MATLABGradePercent, FinalGradePercent))
post <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 176:286, MATLABGradePercent, FinalGradePercent))
PreBelongingna <- na.omit(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04)))
PostBelongingna <- na.omit(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2)))
# correlation matrix function ---------------------------------------------
#create function to conduct cor matrix with *** for significance levels
corstars <- function(x){
require(Hmisc)
x <- as.matrix(x)
R <- rcorr(x)$r
p <- rcorr(x)$P
## define notions for significance levels; spacing is important.
mystars <- ifelse(p < .001, "***", ifelse(p < .01, "** ", ifelse(p < .05, "* ", " ")))
## trunctuate the matrix that holds the correlations to two decimal
R <- format(round(cbind(rep(-1.11, ncol(x)), R), 2))[,-1]
## build a new matrix that includes the correlations with their apropriate stars
Rnew <- matrix(paste(R, mystars, sep=""), ncol=ncol(x))
diag(Rnew) <- paste(diag(R), " ", sep="")
rownames(Rnew) <- colnames(x)
colnames(Rnew) <- paste(colnames(x), "", sep="")
## remove upper triangle
Rnew <- as.matrix(Rnew)
Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
Rnew <- as.data.frame(Rnew)
## remove last column and return the matrix (which is now a data frame)
Rnew <- cbind(Rnew[1:length(Rnew)-1])
return(Rnew)
}
# kmo function ------------------------------------------------------------
# There is no R command to run KMO test. G.Jay Kerns from Youngstown University created a fuction to do this test
# http://tolstoy.newcastle.edu.au/R/e2/help/07/08/22816.html
# KMO Kaiser-Meyer-Olkin Measure of Sampling Adequacy
kmo = function( data ){
X <- cor(as.matrix(data))
iX <- ginv(X)
S2 <- diag(diag((iX^-1)))
AIS <- S2%*%iX%*%S2 # anti-image covariance matrix
IS <- X+AIS-2*S2 # image covariance matrix
Dai <- sqrt(diag(diag(AIS)))
IR <- ginv(Dai)%*%IS%*%ginv(Dai) # image correlation matrix
AIR <- ginv(Dai)%*%AIS%*%ginv(Dai) # anti-image correlation matrix
a <- apply((AIR - diag(diag(AIR)))^2, 2, sum)
AA <- sum(a)
b <- apply((X - diag(nrow(X)))^2, 2, sum)
BB <- sum(b)
MSA <- b/(b+a) # indiv. measures of sampling adequacy
AIR <- AIR-diag(nrow(AIR))+diag(MSA) # Examine the anti-image of the
# correlation matrix. That is the
# negative of the partial correlations,
# partialling out all other variables.
kmo <- BB/(AA+BB) # overall KMO statistic
# Reporting the conclusion
if (kmo >= 0.00 && kmo < 0.50){
test <- 'The KMO test yields a degree of common variance unacceptable for FA.'
} else if (kmo >= 0.50 && kmo < 0.60){
test <- 'The KMO test yields a degree of common variance miserable.'
} else if (kmo >= 0.60 && kmo < 0.70){
test <- 'The KMO test yields a degree of common variance mediocre.'
} else if (kmo >= 0.70 && kmo < 0.80){
test <- 'The KMO test yields a degree of common variance middling.'
} else if (kmo >= 0.80 && kmo < 0.90){
test <- 'The KMO test yields a degree of common variance meritorious.'
} else {
test <- 'The KMO test yields a degree of common variance marvelous.' }
ans <- list( overall = kmo,
report = test,
individual = MSA,
AIS = AIS,
AIR = AIR )
return(ans)
} # end of kmo()
# checking correlations for target variables ------------------------------
corstars(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04))) #all correlations statistically significant
## BELclass01 BELclass02rev BELclass03
## BELclass01
## BELclass02rev 0.64***
## BELclass03 0.57*** 0.52***
## BELclass04 0.57*** 0.51*** 0.71***
corstars(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2))) #all correlations statistically significant
## BELclass01_2 BELclass02rev_2 BELclass03_2
## BELclass01_2
## BELclass02rev_2 0.64***
## BELclass03_2 0.59*** 0.56***
## BELclass04_2 0.56*** 0.51*** 0.72***
# examining skew and kurtosis ---------------------------------------------
#In a psychologists perspective. If it a published scale that has been tested, it is good to keep it and transform it
# all items within range of -2 and 2, if not in this range, remove them. Maximum likelihood
sapply(PreBelongingna, FUN = skewness)
## BELclass01 BELclass02rev BELclass03 BELclass04
## 0.006386369 -0.221506098 -0.162112144 0.345292957
sapply(PostBelongingna, FUN = skewness)
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## -0.4747835 -0.5677938 -0.2949721 -0.6963178
# all items with kurtosis less than 7
sapply(PreBelongingna, FUN = kurtosis)
## BELclass01 BELclass02rev BELclass03 BELclass04
## 4.168645 3.941443 4.589422 4.044981
sapply(PostBelongingna, FUN = kurtosis)
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## 4.608295 4.309433 4.526886 6.149700
# check barlett's and kmo -------------------------------------------------
# This makes sure it is not TOO correlated. As long as it is significant, I'm good. Make sure to NA OMIT
# If significant, it does not resemble an identity matrix
cortest.bartlett(cor(PreBelongingna), n = nrow(PreBelongingna))
## $chisq
## [1] 749.7921
##
## $p.value
## [1] 1.080976e-158
##
## $df
## [1] 6
cortest.bartlett(cor(PostBelongingna), n = nrow(PostBelongingna))
## $chisq
## [1] 605.6985
##
## $p.value
## [1] 1.375762e-127
##
## $df
## [1] 6
# Run the KMO command on the data
# If KMO is larger than a certain number:
# 0.00 to 0.49 unacceptable.
# to 0.59 miserable.
# 0.60 to 0.69 mediocre.
# 0.70 to 0.79 middling.
# 0.80 to 0.89 meritorious.
# 0.90 to 1.00 marvelous.
#Do we have a good sample size to run a good factor analysis
kmooutput <- kmo(PreBelongingna)
kmooutput$overall
## [1] 0.7735774
# [1] 0.7735774
kmooutput <- kmo(PostBelongingna)
kmooutput$overall
## [1] 0.7770663
# [1] 0.7770663
# We have meritorious data
# running EFA -------------------------------------------------------------
# Rule of thumb: Retain the number of components above the elbow
#Summarizing EFA results: TLI is ideally above 0.95 with minimum acceptable values at 0.90 and RMSEA should be less than 0.08.
#I am not sure how to interpret uniqueness... I found online that the higher the number, the more unique they are. 1-uniqueness = communality. But are there any thresholds?
# entering raw data and extracting (n) number of factors (determined in the previous step),
# with promax rotation
#Pre Belonging EFA
ev <- eigen(cor(PreBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PreBelongingna), var = ncol(PreBelongingna),
rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
fit <- factanal(PreBelongingna, 1, rotation="promax")
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = PreBelongingna, factors = 1, rotation = "promax")
##
## Uniquenesses:
## BELclass01 BELclass02rev BELclass03 BELclass04
## 0.447 0.530 0.331 0.343
##
## Loadings:
## [1] 0.743 0.685 0.818 0.811
##
## Factor1
## SS loadings 2.348
## Proportion Var 0.587
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 50.96 on 2 degrees of freedom.
## The p-value is 8.6e-12
# All loadings appear well over 0.32, looks good!
fa(PreBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method = ml
## Call: fa(r = PreBelongingna, nfactors = 1, rotate = "promax", fm = "ml",
## oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## BELclass01 0.74 0.55 0.45 1
## BELclass02rev 0.69 0.47 0.53 1
## BELclass03 0.82 0.67 0.33 1
## BELclass04 0.81 0.66 0.34 1
##
## ML1
## SS loadings 2.35
## Proportion Var 0.59
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.78 with Chi Square of 749.79
## The degrees of freedom for the model are 2 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic number of observations is 424 with the empirical chi square 21.71 with prob < 1.9e-05
## The total number of observations was 424 with Likelihood Chi Square = 50.96 with prob < 8.6e-12
##
## Tucker Lewis Index of factoring reliability = 0.802
## RMSEA index = 0.24 and the 90 % confidence intervals are 0.186 0.3
## BIC = 38.86
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.93
## Multiple R square of scores with factors 0.86
## Minimum correlation of possible factor scores 0.72
# Tucker Lewis Index of factoring reliability = 0.802
# RMSEA index = 0.24
# indicate good fit
#Post Belonging EFA
ev <- eigen(cor(PostBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PostBelongingna), var = ncol(PostBelongingna),
rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
#Post Belonging
fit <- factanal(PostBelongingna, 1, rotation="promax")
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = PostBelongingna, factors = 1, rotation = "promax")
##
## Uniquenesses:
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## 0.456 0.517 0.281 0.351
##
## Loadings:
## [1] 0.738 0.695 0.848 0.806
##
## Factor1
## SS loadings 2.395
## Proportion Var 0.599
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 38.26 on 2 degrees of freedom.
## The p-value is 4.92e-09
# All loadings appear over 0.32, looks good!
fa(PostBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method = ml
## Call: fa(r = PostBelongingna, nfactors = 1, rotate = "promax", fm = "ml",
## oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## BELclass01_2 0.74 0.54 0.46 1
## BELclass02rev_2 0.69 0.48 0.52 1
## BELclass03_2 0.85 0.72 0.28 1
## BELclass04_2 0.81 0.65 0.35 1
##
## ML1
## SS loadings 2.39
## Proportion Var 0.60
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.86 with Chi Square of 605.7
## The degrees of freedom for the model are 2 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic number of observations is 329 with the empirical chi square 15.99 with prob < 0.00034
## The total number of observations was 329 with Likelihood Chi Square = 38.26 with prob < 4.9e-09
##
## Tucker Lewis Index of factoring reliability = 0.818
## RMSEA index = 0.235 and the 90 % confidence intervals are 0.174 0.303
## BIC = 26.67
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.93
## Multiple R square of scores with factors 0.87
## Minimum correlation of possible factor scores 0.73
# Tucker Lewis Index of factoring reliability = 0.818
# RMSEA index = 0.235
# indicate marginal fit
# cronbach's alphas -------------------------------------------------------
#For the future, it's good to check alpha for overall scale and for individual factors
psych::alpha(PreBelongingna) #0.85
##
## Reliability analysis
## Call: psych::alpha(x = PreBelongingna)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.82 0.59 5.7 0.012 3.2 0.44 0.57
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.82 0.85 0.87
## Duhachek 0.82 0.85 0.87
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BELclass01 0.80 0.81 0.75 0.58 4.1 0.017 0.0130 0.52
## BELclass02rev 0.83 0.83 0.77 0.62 4.8 0.015 0.0066 0.57
## BELclass03 0.80 0.80 0.73 0.57 4.0 0.017 0.0043 0.57
## BELclass04 0.80 0.80 0.74 0.58 4.1 0.017 0.0037 0.57
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## BELclass01 424 0.84 0.84 0.76 0.70 3.2 0.52
## BELclass02rev 424 0.82 0.80 0.70 0.64 3.1 0.57
## BELclass03 424 0.84 0.84 0.78 0.70 3.1 0.53
## BELclass04 424 0.83 0.84 0.77 0.70 3.2 0.48
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## BELclass01 0.00 0.04 0.71 0.24 0
## BELclass02rev 0.01 0.09 0.68 0.22 0
## BELclass03 0.01 0.06 0.72 0.21 0
## BELclass04 0.00 0.03 0.74 0.23 0
psych::alpha(PostBelongingna) #0.85
##
## Reliability analysis
## Call: psych::alpha(x = PostBelongingna)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.86 0.83 0.6 5.9 0.013 3.1 0.49 0.58
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.83 0.85 0.88
## Duhachek 0.83 0.85 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BELclass01_2 0.81 0.82 0.76 0.60 4.4 0.019 0.0128 0.56
## BELclass02rev_2 0.83 0.83 0.78 0.63 5.0 0.016 0.0074 0.59
## BELclass03_2 0.80 0.80 0.73 0.57 4.0 0.019 0.0046 0.56
## BELclass04_2 0.82 0.82 0.75 0.60 4.5 0.017 0.0018 0.59
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## BELclass01_2 329 0.84 0.84 0.76 0.70 3.1 0.59
## BELclass02rev_2 329 0.82 0.81 0.71 0.66 3.1 0.63
## BELclass03_2 329 0.85 0.86 0.81 0.74 3.1 0.55
## BELclass04_2 329 0.83 0.84 0.77 0.69 3.1 0.56
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## BELclass01_2 0.02 0.08 0.68 0.22 0
## BELclass02rev_2 0.02 0.10 0.66 0.22 0
## BELclass03_2 0.01 0.10 0.72 0.18 0
## BELclass04_2 0.02 0.05 0.74 0.19 0
# create composite variables ----------------------------------------------
# do not have notes on why BELclass02 was dropped
pre$FBelong <- (pre$BELclass01 + pre$BELclass03 + pre$BELclass04)/3
post$FBelong <- (post$BELclass01 + post$BELclass03 + post$BELclass04)/3
# subset needed variables into new long dataset
d <- rbind.data.frame(
cbind(subset(pre, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, FinalGradePercent)), Time = "0pre"),
cbind(subset(post, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, FinalGradePercent)), Time = "1post"))
# Make sure data are factors
d$Treatment <- as.factor(d$Treatment)
d$DEMOgen <- as.factor(d$DEMOgen)
d$DEMOmin <- as.factor(d$DEMOmin)
d$DEMOmin2 <- as.factor(d$DEMOmin2)
d$rg <- as.factor(d$rg)
d$Time <- as.factor(d$Time)
d$UID <- as.factor(d$UID)
d <- subset(d, !is.na(Treatment))
# assumption checking -----------------------------------------------------
# Normality
moments::skewness(d$FBelong, na.rm = TRUE) #0.01937707
## [1] -0.05792312
moments::kurtosis(d$FBelong, na.rm = TRUE) #4.859645
## [1] 4.870226
moments::skewness(d$MATLABGradePercent, na.rm = TRUE) #[1] -3.266513
## [1] -3.18743
moments::kurtosis(d$MATLABGradePercent, na.rm = TRUE) #15.84626
## [1] 14.70525
moments::skewness(d$FinalGradePercent, na.rm = TRUE) #[1] -3.367032
## [1] -4.326676
moments::kurtosis(d$FinalGradePercent, na.rm = TRUE) #26.30512
## [1] 32.84907
hist(d$FBelong, breaks=5)
hist(d$MATLABGradePercent, breaks=100)
hist(d$FinalGradePercent, breaks=100)
# Examination of residuals demonstrated some normality challenges. Trim
qqPlot(d$FBelong)
## [1] 1011 1127
# trimming
upper_quantile <- quantile(d$FBelong, 0.98, na.rm = TRUE)
lower_quantile <- quantile(d$FBelong, 0.02, na.rm = TRUE)
# where 99th percentile becomes NA
d$FBelong[d$FBelong > upper_quantile] <- NA
d$FBelong[d$FBelong < lower_quantile] <- NA
qqPlot(d$FBelong)
## [1] 44 60
res.aovB <- aov(FBelong ~ Treatment*Time, data = d)
plot(res.aovB, 1)
plot(res.aovB, 2) # some trimming may be needed
res.aovB2 <- aov(FBelong ~ Treatment*Time*DEMOgen, data = d)
plot(res.aovB2, 1)
plot(res.aovB2, 2) # some trimming may be needed
res.aovB3 <- aov(FBelong ~ Treatment*Time*DEMOmin, data = d)
plot(res.aovB3, 1)
plot(res.aovB3, 2) # some trimming may be needed
Not needed since within-subjects variable only has two levels.
# Wide Dataframe
temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))
d2$FBelong.pre <- scale(d2$FBelong.pre, center = T)
# Dataframe with dichotimized pre-test belonging
d3 <- subset(d2, !is.na(FBelong.pre))
# d3$FBelong.pre <- scale(d3$FBelong.pre, center = T)
# table(d3$FBelong.pre, useNA = "always")
hist(d3$FBelong.pre)
d3$bel_cut[d3$FBelong.pre < 0] <- "low"
d3$bel_cut[d3$FBelong.pre > 0] <- "high"
# dataframe limited to low-belongers
d4 <- subset(d3, bel_cut == "low")
Used afex package in R for ANOVAs. Automatically uses appropriate orthogonal contrasts for factors and uses Type III sums of squares. (https://www.r-bloggers.com/2017/06/anova-in-r-afex-may-be-the-solution-you-are-looking-for/)
Time x Treatment X DEMOmin (BLI x Majority)
out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","DEMOmin"), within="Time", observed=c("DEMOmin"), anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong
## Effect df MSE F pes p.value
## 1 Treatment 1, 261 0.32 0.22 <.001 .643
## 2 DEMOmin 1, 261 0.32 0.45 .002 .504
## 3 Treatment:DEMOmin 1, 261 0.32 0.97 .004 .326
## 4 Time 1, 261 0.07 1.04 .004 .310
## 5 Treatment:Time 1, 261 0.07 0.54 .002 .465
## 6 DEMOmin:Time 1, 261 0.07 0.63 .002 .427
## 7 Treatment:DEMOmin:Time 1, 261 0.07 2.99 + .011 .085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Time", trace = "Treatment", panel = "DEMOmin")
## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"
tgc <- summarySE(d, measurevar="FBelong", groupvars=c("Treatment","DEMOmin","Time"), na.rm = T)
tgc
## Treatment DEMOmin Time N FBelong sd se ci
## 1 0 bli 0pre 13 3.333333 0.5270463 0.14617634 0.31849088
## 2 0 bli 1post 6 3.166667 0.6236096 0.25458754 0.65443810
## 3 0 majority 0pre 149 3.118568 0.4028312 0.03300122 0.06521446
## 4 0 majority 1post 129 3.129199 0.4512907 0.03973393 0.07862038
## 5 1 bli 0pre 34 3.156863 0.4122313 0.07069708 0.14383430
## 6 1 bli 1post 23 3.159420 0.4910262 0.10238604 0.21233565
## 7 1 majority 0pre 225 3.204444 0.4439630 0.02959754 0.05832523
## 8 1 majority 1post 163 3.112474 0.4177490 0.03272063 0.06461394
levels(tgc$Time) <- c("0Pre","1Post")
levels(tgc$Treatment) <- c("Control","Intervention")
levels(tgc$DEMOmin) <- c("BLI","White & Asian")
pd <- position_dodge(.5) # move them .05 to the left and right
ggplot(tgc, aes(x=Time, y=FBelong, group=Treatment, linetype=Treatment)) +
geom_errorbar(aes(ymin=FBelong-ci, ymax=FBelong+ci), width=.2, position=pd) +
geom_line(position=pd) +
geom_point(position=pd) +
facet_grid(~DEMOmin) +
ylim(1,4) + theme_bw() +
ggtitle("Pre and Post Effects of Intervention on Majority and BLI Students") +
xlab("Timepoint") + ylab("Belonging Score")
emmeans(out, specs = c("Time", "Treatment", "DEMOmin"))
## Time Treatment DEMOmin emmean SE df lower.CL upper.CL
## X0pre 0 bli 3.40 0.1997 261 3.01 3.79
## X1post 0 bli 3.20 0.1931 261 2.82 3.58
## X0pre 1 bli 3.14 0.0952 261 2.95 3.32
## X1post 1 bli 3.17 0.0920 261 2.99 3.35
## X0pre 0 majority 3.11 0.0466 261 3.02 3.20
## X1post 0 majority 3.15 0.0450 261 3.06 3.24
## X0pre 1 majority 3.21 0.0370 261 3.14 3.29
## X1post 1 majority 3.16 0.0357 261 3.08 3.23
##
## Confidence level used: 0.95
pairs(emmeans(out, specs = c("Time", "Treatment", "DEMOmin")))
## contrast estimate SE df
## X0pre Treatment0 bli - X1post Treatment0 bli 0.2000 0.1632 261
## X0pre Treatment0 bli - X0pre Treatment1 bli 0.2636 0.2212 261
## X0pre Treatment0 bli - X1post Treatment1 bli 0.2333 0.2199 261
## X0pre Treatment0 bli - X0pre Treatment0 majority 0.2877 0.2051 261
## X0pre Treatment0 bli - X1post Treatment0 majority 0.2514 0.2047 261
## X0pre Treatment0 bli - X0pre Treatment1 majority 0.1877 0.2031 261
## X0pre Treatment0 bli - X1post Treatment1 majority 0.2447 0.2029 261
## X1post Treatment0 bli - X0pre Treatment1 bli 0.0636 0.2153 261
## X1post Treatment0 bli - X1post Treatment1 bli 0.0333 0.2139 261
## X1post Treatment0 bli - X0pre Treatment0 majority 0.0877 0.1986 261
## X1post Treatment0 bli - X1post Treatment0 majority 0.0514 0.1982 261
## X1post Treatment0 bli - X0pre Treatment1 majority -0.0123 0.1966 261
## X1post Treatment0 bli - X1post Treatment1 majority 0.0447 0.1963 261
## X0pre Treatment1 bli - X1post Treatment1 bli -0.0303 0.0778 261
## X0pre Treatment1 bli - X0pre Treatment0 majority 0.0240 0.1060 261
## X0pre Treatment1 bli - X1post Treatment0 majority -0.0122 0.1053 261
## X0pre Treatment1 bli - X0pre Treatment1 majority -0.0760 0.1021 261
## X0pre Treatment1 bli - X1post Treatment1 majority -0.0189 0.1017 261
## X1post Treatment1 bli - X0pre Treatment0 majority 0.0543 0.1031 261
## X1post Treatment1 bli - X1post Treatment0 majority 0.0181 0.1025 261
## X1post Treatment1 bli - X0pre Treatment1 majority -0.0457 0.0992 261
## X1post Treatment1 bli - X1post Treatment1 majority 0.0114 0.0987 261
## X0pre Treatment0 majority - X1post Treatment0 majority -0.0362 0.0380 261
## X0pre Treatment0 majority - X0pre Treatment1 majority -0.1000 0.0594 261
## X0pre Treatment0 majority - X1post Treatment1 majority -0.0429 0.0587 261
## X1post Treatment0 majority - X0pre Treatment1 majority -0.0638 0.0582 261
## X1post Treatment0 majority - X1post Treatment1 majority -0.0067 0.0575 261
## X0pre Treatment1 majority - X1post Treatment1 majority 0.0571 0.0302 261
## t.ratio p.value
## 1.226 0.9237
## 1.192 0.9338
## 1.061 0.9641
## 1.403 0.8554
## 1.228 0.9229
## 0.924 0.9835
## 1.206 0.9295
## 0.296 1.0000
## 0.156 1.0000
## 0.442 0.9998
## 0.260 1.0000
## -0.063 1.0000
## 0.228 1.0000
## -0.390 0.9999
## 0.227 1.0000
## -0.116 1.0000
## -0.744 0.9955
## -0.186 1.0000
## 0.527 0.9995
## 0.177 1.0000
## -0.460 0.9998
## 0.116 1.0000
## -0.953 0.9804
## -1.682 0.6986
## -0.732 0.9960
## -1.095 0.9574
## -0.117 1.0000
## 1.890 0.5588
##
## P value adjustment: tukey method for comparing a family of 8 estimates
Time x Treatment x rg (BLI, Majority Men, Majority Women & NB)
out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","rg"), within="Time", observed=c("rg"), anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: Treatment, rg
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong
## Effect df MSE F pes p.value
## 1 Treatment 1, 257 0.32 0.14 <.001 .711
## 2 rg 2, 257 0.32 1.38 .011 .252
## 3 Treatment:rg 2, 257 0.32 1.24 .010 .290
## 4 Time 1, 257 0.07 0.87 .003 .352
## 5 Treatment:Time 1, 257 0.07 0.01 <.001 .943
## 6 rg:Time 2, 257 0.07 0.35 .003 .702
## 7 Treatment:rg:Time 2, 257 0.07 1.84 .014 .161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Time", trace = "Treatment", panel = "rg")
## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"
emmeans(out, specs = c("Time", "Treatment", "rg"))
## Time Treatment rg emmean SE df lower.CL upper.CL
## X0pre 0 bli 3.40 0.1995 257 3.01 3.79
## X1post 0 bli 3.20 0.1925 257 2.82 3.58
## X0pre 1 bli 3.14 0.0951 257 2.95 3.32
## X1post 1 bli 3.17 0.0918 257 2.99 3.35
## X0pre 0 majority man 3.12 0.0581 257 3.01 3.24
## X1post 0 majority man 3.15 0.0560 257 3.04 3.26
## X0pre 1 majority man 3.26 0.0463 257 3.17 3.35
## X1post 1 majority man 3.22 0.0446 257 3.13 3.30
## X0pre 0 majority woman & nb 3.08 0.0789 257 2.93 3.24
## X1post 0 majority woman & nb 3.16 0.0761 257 3.01 3.31
## X0pre 1 majority woman & nb 3.12 0.0619 257 3.00 3.24
## X1post 1 majority woman & nb 3.04 0.0597 257 2.93 3.16
##
## Confidence level used: 0.95
pairs(emmeans(out, specs = c("Time", "Treatment", "rg")))
## contrast
## X0pre Treatment0 bli - X1post Treatment0 bli
## X0pre Treatment0 bli - X0pre Treatment1 bli
## X0pre Treatment0 bli - X1post Treatment1 bli
## X0pre Treatment0 bli - X0pre Treatment0 majority man
## X0pre Treatment0 bli - X1post Treatment0 majority man
## X0pre Treatment0 bli - X0pre Treatment1 majority man
## X0pre Treatment0 bli - X1post Treatment1 majority man
## X0pre Treatment0 bli - X0pre Treatment0 majority woman & nb
## X0pre Treatment0 bli - X1post Treatment0 majority woman & nb
## X0pre Treatment0 bli - X0pre Treatment1 majority woman & nb
## X0pre Treatment0 bli - X1post Treatment1 majority woman & nb
## X1post Treatment0 bli - X0pre Treatment1 bli
## X1post Treatment0 bli - X1post Treatment1 bli
## X1post Treatment0 bli - X0pre Treatment0 majority man
## X1post Treatment0 bli - X1post Treatment0 majority man
## X1post Treatment0 bli - X0pre Treatment1 majority man
## X1post Treatment0 bli - X1post Treatment1 majority man
## X1post Treatment0 bli - X0pre Treatment0 majority woman & nb
## X1post Treatment0 bli - X1post Treatment0 majority woman & nb
## X1post Treatment0 bli - X0pre Treatment1 majority woman & nb
## X1post Treatment0 bli - X1post Treatment1 majority woman & nb
## X0pre Treatment1 bli - X1post Treatment1 bli
## X0pre Treatment1 bli - X0pre Treatment0 majority man
## X0pre Treatment1 bli - X1post Treatment0 majority man
## X0pre Treatment1 bli - X0pre Treatment1 majority man
## X0pre Treatment1 bli - X1post Treatment1 majority man
## X0pre Treatment1 bli - X0pre Treatment0 majority woman & nb
## X0pre Treatment1 bli - X1post Treatment0 majority woman & nb
## X0pre Treatment1 bli - X0pre Treatment1 majority woman & nb
## X0pre Treatment1 bli - X1post Treatment1 majority woman & nb
## X1post Treatment1 bli - X0pre Treatment0 majority man
## X1post Treatment1 bli - X1post Treatment0 majority man
## X1post Treatment1 bli - X0pre Treatment1 majority man
## X1post Treatment1 bli - X1post Treatment1 majority man
## X1post Treatment1 bli - X0pre Treatment0 majority woman & nb
## X1post Treatment1 bli - X1post Treatment0 majority woman & nb
## X1post Treatment1 bli - X0pre Treatment1 majority woman & nb
## X1post Treatment1 bli - X1post Treatment1 majority woman & nb
## X0pre Treatment0 majority man - X1post Treatment0 majority man
## X0pre Treatment0 majority man - X0pre Treatment1 majority man
## X0pre Treatment0 majority man - X1post Treatment1 majority man
## X0pre Treatment0 majority man - X0pre Treatment0 majority woman & nb
## X0pre Treatment0 majority man - X1post Treatment0 majority woman & nb
## X0pre Treatment0 majority man - X0pre Treatment1 majority woman & nb
## X0pre Treatment0 majority man - X1post Treatment1 majority woman & nb
## X1post Treatment0 majority man - X0pre Treatment1 majority man
## X1post Treatment0 majority man - X1post Treatment1 majority man
## X1post Treatment0 majority man - X0pre Treatment0 majority woman & nb
## X1post Treatment0 majority man - X1post Treatment0 majority woman & nb
## X1post Treatment0 majority man - X0pre Treatment1 majority woman & nb
## X1post Treatment0 majority man - X1post Treatment1 majority woman & nb
## X0pre Treatment1 majority man - X1post Treatment1 majority man
## X0pre Treatment1 majority man - X0pre Treatment0 majority woman & nb
## X0pre Treatment1 majority man - X1post Treatment0 majority woman & nb
## X0pre Treatment1 majority man - X0pre Treatment1 majority woman & nb
## X0pre Treatment1 majority man - X1post Treatment1 majority woman & nb
## X1post Treatment1 majority man - X0pre Treatment0 majority woman & nb
## X1post Treatment1 majority man - X1post Treatment0 majority woman & nb
## X1post Treatment1 majority man - X0pre Treatment1 majority woman & nb
## X1post Treatment1 majority man - X1post Treatment1 majority woman & nb
## X0pre Treatment0 majority woman & nb - X1post Treatment0 majority woman & nb
## X0pre Treatment0 majority woman & nb - X0pre Treatment1 majority woman & nb
## X0pre Treatment0 majority woman & nb - X1post Treatment1 majority woman & nb
## X1post Treatment0 majority woman & nb - X0pre Treatment1 majority woman & nb
## X1post Treatment0 majority woman & nb - X1post Treatment1 majority woman & nb
## X0pre Treatment1 majority woman & nb - X1post Treatment1 majority woman & nb
## estimate SE df t.ratio p.value
## 0.20000 0.1637 257 1.222 0.9868
## 0.26364 0.2210 257 1.193 0.9891
## 0.23333 0.2196 257 1.063 0.9959
## 0.27571 0.2078 257 1.327 0.9749
## 0.25311 0.2072 257 1.222 0.9868
## 0.14194 0.2048 257 0.693 0.9999
## 0.18495 0.2044 257 0.905 0.9990
## 0.31667 0.2145 257 1.476 0.9458
## 0.24375 0.2135 257 1.142 0.9924
## 0.27821 0.2089 257 1.332 0.9742
## 0.35513 0.2082 257 1.705 0.8643
## 0.06364 0.2147 257 0.296 1.0000
## 0.03333 0.2133 257 0.156 1.0000
## 0.07571 0.2011 257 0.377 1.0000
## 0.05311 0.2005 257 0.265 1.0000
## -0.05806 0.1980 257 -0.293 1.0000
## -0.01505 0.1976 257 -0.076 1.0000
## 0.11667 0.2080 257 0.561 1.0000
## 0.04375 0.2070 257 0.211 1.0000
## 0.07821 0.2022 257 0.387 1.0000
## 0.15513 0.2015 257 0.770 0.9998
## -0.03030 0.0780 257 -0.388 1.0000
## 0.01207 0.1114 257 0.108 1.0000
## -0.01053 0.1104 257 -0.095 1.0000
## -0.12170 0.1058 257 -1.151 0.9919
## -0.07869 0.1051 257 -0.749 0.9998
## 0.05303 0.1235 257 0.429 1.0000
## -0.01989 0.1218 257 -0.163 1.0000
## 0.01457 0.1134 257 0.128 1.0000
## 0.09149 0.1123 257 0.815 0.9996
## 0.04237 0.1086 257 0.390 1.0000
## 0.01977 0.1075 257 0.184 1.0000
## -0.09140 0.1028 257 -0.889 0.9992
## -0.04839 0.1021 257 -0.474 1.0000
## 0.08333 0.1210 257 0.689 0.9999
## 0.01042 0.1192 257 0.087 1.0000
## 0.04487 0.1107 257 0.405 1.0000
## 0.12179 0.1095 257 1.113 0.9939
## -0.02260 0.0477 257 -0.474 1.0000
## -0.13377 0.0742 257 -1.802 0.8157
## -0.09076 0.0732 257 -1.239 0.9853
## 0.04096 0.0979 257 0.418 1.0000
## -0.03196 0.0957 257 -0.334 1.0000
## 0.00250 0.0848 257 0.029 1.0000
## 0.07942 0.0833 257 0.954 0.9984
## -0.11117 0.0727 257 -1.530 0.9310
## -0.06816 0.0716 257 -0.951 0.9985
## 0.06356 0.0967 257 0.657 1.0000
## -0.00936 0.0945 257 -0.099 1.0000
## 0.02510 0.0835 257 0.301 1.0000
## 0.10202 0.0819 257 1.246 0.9846
## 0.04301 0.0380 257 1.133 0.9929
## 0.17473 0.0914 257 1.911 0.7512
## 0.10181 0.0890 257 1.143 0.9923
## 0.13627 0.0772 257 1.764 0.8357
## 0.21319 0.0755 257 2.823 0.1766
## 0.13172 0.0906 257 1.454 0.9513
## 0.05880 0.0882 257 0.667 1.0000
## 0.09326 0.0763 257 1.223 0.9868
## 0.17018 0.0745 257 2.283 0.4912
## -0.07292 0.0647 257 -1.127 0.9932
## -0.03846 0.1002 257 -0.384 1.0000
## 0.03846 0.0989 257 0.389 1.0000
## 0.03446 0.0981 257 0.351 1.0000
## 0.11138 0.0967 257 1.152 0.9919
## 0.07692 0.0508 257 1.515 0.9352
##
## P value adjustment: tukey method for comparing a family of 12 estimates
Treatment x DEMOmin (BLI X Majority)
out <- aov_ez(id = "UID", dv = "FinalGradePercent", data=d2, between=c("Treatment","DEMOmin"), observed=c("DEMOmin"), anova_table = list(es="pes"))
## Warning: Missing values for following ID(s):
## 066bf253, 07cd36e8, 103835b7, 11a109de, 163cd192, 19f253f7, 28164b98, 2b3256e1, 2df65c18, 3d8e33cd, 55d91e27, 57e50784, 5c4f7e0e, 62a6e123, 632fe4b0, 64285c43, 6682f8ec, 66d35e35, 67e77d96, 69ff8dd3, 754ec1be, 83efb1cc, 881ac70b, 88699c14, 925ffb4c, 9a8d5ad4, b98abba9, bacb5cad, bb2d714a, c3109248, ce453266, d2b2cff0, e965f476, eaab6818, f469ac4e, fb672a6e, fc4c2da2
## Removing those cases from the analysis.
## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FinalGradePercent
## Effect df MSE F pes p.value
## 1 Treatment 1, 600 0.01 0.13 <.001 .716
## 2 DEMOmin 1, 600 0.01 3.16 + .005 .076
## 3 Treatment:DEMOmin 1, 600 0.01 0.02 <.001 .890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
Treatment x rg (BLI, Majority Men, Majority Women & NB)
out <- aov_ez(id = "UID", dv = "MATLABGradePercent", data=d2, between=c("Treatment","rg"), observed=c("rg"), anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: Treatment, rg
nice(out)
## Anova Table (Type 3 tests)
##
## Response: MATLABGradePercent
## Effect df MSE F pes p.value
## 1 Treatment 1, 451 0.01 4.08 * .009 .044
## 2 rg 2, 451 0.01 7.61 *** .033 <.001
## 3 Treatment:rg 2, 451 0.01 3.25 * .014 .040
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Treatment")
## NOTE: Results may be misleading due to involvement in interactions
afex_plot(out, x = "rg")
## NOTE: Results may be misleading due to involvement in interactions
afex_plot(out, x = "Treatment", trace = "rg")
tgc2 <- summarySE(subset(d2, !is.na(rg)), measurevar="MATLABGradePercent", groupvars=c("Treatment","rg"), na.rm = T)
tgc2
## Treatment rg N MATLABGradePercent sd se
## 1 0 bli 15 0.8343282 0.21151872 0.054613900
## 2 0 majority man 119 0.9375260 0.14742820 0.013514721
## 3 0 majority woman & nb 56 0.9687582 0.06243506 0.008343236
## 4 1 bli 37 0.9216334 0.14770019 0.024281762
## 5 1 majority man 160 0.9602997 0.09483373 0.007497265
## 6 1 majority woman & nb 70 0.9500308 0.11702724 0.013987431
## ci
## 1 0.11713517
## 2 0.02676283
## 3 0.01672022
## 4 0.04924570
## 5 0.01480707
## 6 0.02790416
levels(tgc2$Treatment) <- c("Control","Intervention")
levels(tgc2$rg) <- c("BLI Men and Women","White & Asian Men","White & Asian Women & Non-Binary")
ggplot(tgc2, aes(x=Treatment, y=MATLABGradePercent, fill=rg)) +
geom_bar(position=position_dodge(), stat="identity", color="black") +
geom_errorbar(aes(ymin=MATLABGradePercent-se, ymax=MATLABGradePercent+se),
width=.2,
position=position_dodge(.9)) +
xlab("Condition") + ylab("MATLab Grade") + labs(fill="Race/Gender")
emmeans(out, specs = c("Treatment", "rg"))
## Treatment rg emmean SE df lower.CL upper.CL
## 0 bli 0.834 0.03115 451 0.773 0.896
## 1 bli 0.922 0.01984 451 0.883 0.961
## 0 majority man 0.938 0.01106 451 0.916 0.959
## 1 majority man 0.960 0.00954 451 0.942 0.979
## 0 majority woman & nb 0.969 0.01612 451 0.937 1.000
## 1 majority woman & nb 0.950 0.01442 451 0.922 0.978
##
## Confidence level used: 0.95
pairs(emmeans(out, specs = c("Treatment", "rg")))
## contrast estimate
## Treatment0 bli - Treatment1 bli -0.08731
## Treatment0 bli - Treatment0 majority man -0.10320
## Treatment0 bli - Treatment1 majority man -0.12597
## Treatment0 bli - Treatment0 majority woman & nb -0.13443
## Treatment0 bli - Treatment1 majority woman & nb -0.11570
## Treatment1 bli - Treatment0 majority man -0.01589
## Treatment1 bli - Treatment1 majority man -0.03867
## Treatment1 bli - Treatment0 majority woman & nb -0.04712
## Treatment1 bli - Treatment1 majority woman & nb -0.02840
## Treatment0 majority man - Treatment1 majority man -0.02277
## Treatment0 majority man - Treatment0 majority woman & nb -0.03123
## Treatment0 majority man - Treatment1 majority woman & nb -0.01250
## Treatment1 majority man - Treatment0 majority woman & nb -0.00846
## Treatment1 majority man - Treatment1 majority woman & nb 0.01027
## Treatment0 majority woman & nb - Treatment1 majority woman & nb 0.01873
## SE df t.ratio p.value
## 0.0369 451 -2.364 0.1713
## 0.0331 451 -3.122 0.0234
## 0.0326 451 -3.866 0.0018
## 0.0351 451 -3.832 0.0020
## 0.0343 451 -3.370 0.0105
## 0.0227 451 -0.700 0.9819
## 0.0220 451 -1.757 0.4950
## 0.0256 451 -1.844 0.4388
## 0.0245 451 -1.158 0.8565
## 0.0146 451 -1.559 0.6259
## 0.0196 451 -1.597 0.6007
## 0.0182 451 -0.688 0.9832
## 0.0187 451 -0.452 0.9976
## 0.0173 451 0.594 0.9914
## 0.0216 451 0.866 0.9544
##
## P value adjustment: tukey method for comparing a family of 6 estimates
Treatment x DEMOmin (BLI x Majority) C: Pre-Test Belonging
out <- aov_ez(id = "UID", dv = "FBelong.post", data=d2, between=c("DEMOmin","Treatment"), covariate=c("FBelong.pre"), factorize = F, anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong.post
## Effect df MSE F pes p.value
## 1 DEMOmin 1, 260 0.11 0.18 <.001 .672
## 2 Treatment 1, 260 0.11 0.21 <.001 .646
## 3 FBelong.pre 1, 260 0.11 195.74 *** .429 <.001
## 4 DEMOmin:Treatment 1, 260 0.11 1.28 .005 .259
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Treatment", trace ="DEMOmin")
Treatment x DEMOmin (BLI x Majority) x bel_cut (High x Low Belonging)
out <- aov_ez(id = "UID", dv = "FBelong.post", data=d3, between=c("DEMOmin","Treatment","bel_cut"), factorize = F, anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment, bel_cut
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong.post
## Effect df MSE F pes p.value
## 1 DEMOmin 1, 257 0.12 0.31 .001 .575
## 2 Treatment 1, 257 0.12 4.51 * .017 .035
## 3 bel_cut 1, 257 0.12 64.87 *** .202 <.001
## 4 DEMOmin:Treatment 1, 257 0.12 4.39 * .017 .037
## 5 DEMOmin:bel_cut 1, 257 0.12 10.62 ** .040 .001
## 6 Treatment:bel_cut 1, 257 0.12 0.00 <.001 .949
## 7 DEMOmin:Treatment:bel_cut 1, 257 0.12 0.49 .002 .483
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Treatment", trace ="DEMOmin", panel = "bel_cut")
afex_plot(out, x ="DEMOmin", trace = "bel_cut")
## NOTE: Results may be misleading due to involvement in interactions
d3$bel_cut <- as.factor(d3$bel_cut)
tgc2 <- summarySE(d3, measurevar="FBelong.post", groupvars=c("Treatment","DEMOmin","bel_cut"), na.rm = T)
tgc2
## Treatment DEMOmin bel_cut N FBelong.post sd se ci
## 1 0 bli high 3 3.666667 0.3333333 0.19245009 0.82804590
## 2 0 bli low 2 2.500000 0.2357023 0.16666667 2.11770079
## 3 0 majority high 27 3.432099 0.4697227 0.09039818 0.18581612
## 4 0 majority low 65 3.030769 0.3713857 0.04606473 0.09202487
## 5 1 bli high 4 4.000000 0.0000000 0.00000000 0.00000000
## 6 1 bli low 18 2.981481 0.3327882 0.07843894 0.16549169
## 7 1 majority high 51 3.496732 0.4588515 0.06425206 0.12905405
## 8 1 majority low 95 2.971930 0.2100499 0.02155066 0.04278934
levels(tgc2$DEMOmin) <- c("BLI","Majority")
levels(tgc2$bel_cut) <- c("High Pre-Test Belonging","Low Pre-Test Belonging")
ggplot(tgc2, aes(x=DEMOmin, y=FBelong.post, fill=Treatment, panel=bel_cut)) +
geom_bar(position=position_dodge(), stat="identity", color="black") +
geom_errorbar(aes(ymin=FBelong.post-se, ymax=FBelong.post+se),
width=.2,
position=position_dodge(.9)) +
xlab("Condition") + ylab("Post-Test Belonging Score") +
facet_wrap(~bel_cut)
Treatment X DEMOmin (BLI x Majority) Limited to low-belongers (pre-test belonging score below the mean)
out <- aov_ez(id = "UID", dv = "FBelong.post", data=d4, between=c("DEMOmin","Treatment"), factorize = F, anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: DEMOmin, Treatment
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong.post
## Effect df MSE F pes p.value
## 1 DEMOmin 1, 176 0.08 5.51 * .030 .020
## 2 Treatment 1, 176 0.08 3.63 + .020 .059
## 3 DEMOmin:Treatment 1, 176 0.08 5.93 * .033 .016
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Treatment", trace ="DEMOmin")
tgc3 <- summarySE(d4, measurevar="FBelong.post", groupvars=c("Treatment","DEMOmin"), na.rm = T)
tgc3
## Treatment DEMOmin N FBelong.post sd se ci
## 1 0 bli 2 2.500000 0.2357023 0.16666667 2.11770079
## 2 0 majority 65 3.030769 0.3713857 0.04606473 0.09202487
## 3 1 bli 18 2.981481 0.3327882 0.07843894 0.16549169
## 4 1 majority 95 2.971930 0.2100499 0.02155066 0.04278934
levels(tgc3$DEMOmin) <- c("BLI","Majority")
levels(tgc3$Treatment) <- c("Control","Intervention")
ggplot(tgc3, aes(x=DEMOmin, y=FBelong.post, fill=Treatment)) +
geom_bar(position=position_dodge(), stat="identity", color="black") +
geom_errorbar(aes(ymin=FBelong.post-se, ymax=FBelong.post+se),
width=.2,
position=position_dodge(.9)) +
ylim(0,4) +
xlab("Race/Ethnicity") + ylab("Post-Test Belonging Score")
emmeans(out, specs = c("Treatment", "DEMOmin"))
## Treatment DEMOmin emmean SE df lower.CL upper.CL
## 0 bli 2.50 0.2058 176 2.09 2.91
## 1 bli 2.98 0.0686 176 2.85 3.12
## 0 majority 3.03 0.0361 176 2.96 3.10
## 1 majority 2.97 0.0299 176 2.91 3.03
##
## Confidence level used: 0.95
pairs(emmeans(out, specs = c("Treatment", "DEMOmin")))
## contrast estimate SE df t.ratio p.value
## Treatment0 bli - Treatment1 bli -0.48148 0.2170 176 -2.219 0.1220
## Treatment0 bli - Treatment0 majority -0.53077 0.2090 176 -2.540 0.0574
## Treatment0 bli - Treatment1 majority -0.47193 0.2080 176 -2.269 0.1093
## Treatment1 bli - Treatment0 majority -0.04929 0.0775 176 -0.636 0.9204
## Treatment1 bli - Treatment1 majority 0.00955 0.0748 176 0.128 0.9993
## Treatment0 majority - Treatment1 majority 0.05884 0.0469 176 1.256 0.5923
##
## P value adjustment: tukey method for comparing a family of 4 estimates
MATLab Grade ~ Treatment * DEMOmin (BLI x Majority)
temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))
d2$mgrade <- d2$MATLABGradePercent
d2$mgrade_cat[d2$mgrade < 1] <- 0
d2$mgrade_cat[d2$mgrade == 1] <- 1
# table(d2$mgrade_cat, d2$Treatment, d2$DEMOmin)
# str(d2)
d2$mgrade_cat <- as.factor(d2$mgrade_cat)
mylogit <- glm(mgrade_cat ~ DEMOmin * Treatment, data = d2, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = mgrade_cat ~ DEMOmin * Treatment, family = "binomial",
## data = d2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.419 -1.355 0.954 1.010 1.482
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6931 0.5477 -1.266 0.2057
## DEMOminmajority 1.1017 0.5615 1.962 0.0498 *
## Treatment1 0.9651 0.6404 1.507 0.1318
## DEMOminmajority:Treatment1 -0.8224 0.6640 -1.239 0.2155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 809.12 on 603 degrees of freedom
## Residual deviance: 803.38 on 600 degrees of freedom
## (37 observations deleted due to missingness)
## AIC: 811.38
##
## Number of Fisher Scoring iterations: 4