Original: Kevin Kaufman-Ortiz and Allison Godwin (June 7, 2022)
Revised: Heather Perkins (October 10, 2022)
Data imported, race/ethnicity and gender variables cleaned, pre/post dataframes created.
#UBelong Exploratory analysis for Pre- Post- Surveys
# install packages --------------------------------------------------------
#install.packages("readxl")
#install.packages("stargazer")
#install.packages("car")
#install.packages("Hmisc")
#install.packages("psych")
#install.packages("moments")
#install.packages("nFactors")
#install.packages("GPArotation")
#install.packages("Rmisc")
#install.packages("rstatix")
#install.packages("tidyverse")
#install.packages("ggpubr")
#install.packages("reshape2")
#install.packages("DescTools")
# load libraries ----------------------------------------------------------
library("readxl")
library("stargazer")
library("car")
library("Hmisc")
library("psych")
library("moments")
library("nFactors")
library("GPArotation")
library("Rmisc")
library("rstatix")
library("tidyverse")
library("ggpubr")
library("reshape2")
library("DescTools")
library(readxl)
library(ids)
library(MASS)
library(afex)
library(emmeans)
library(ggbeeswarm)
library(effects)
# require(devtools)
# install_version("parameters", version = "0.19.0", repos = "http://cran.us.r-project.org")
library(sjPlot)
# import data -------------------------------------------------------------
spring2022 <- read.csv("data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")
# assign UID
# spring2022$UID <- ids::random_id(nrow(spring2022), 4)
# write.csv(spring2022, file="data/final-UBelong_ENGR 132_Spring 2022_prepostgradescondinstitutional_deid.csv")
# identify important variables
# id = UID
# instructor = Instructor
# section = Section
# control/treatment = Treatment
# pre/post = Time
# gender = DEMOgen
# BLI = DEMOmin
# black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
# NA is coded as 0 (majority)
# MATLABGradePercent
# ProjectGradePercent
# basic recoding and cleaning ---------------------------------------------
# transform -99 to NA
spring2022[spring2022 == -99] <- NA
# remove bad responses
# table(spring2022$CHECKpass, useNA = "always")
spring2022_2 <- subset(spring2022, CHECKpass == 1 | is.na(CHECKpass))
spring2022_3 <- subset(spring2022_2, CHECKpass_2 == 1 | is.na(CHECKpass_2))
spring2022 <- spring2022_3
# recode gender
# table(spring2022$DEMOgen)
# 1 2 3 4 5
# 328 131 3 1 6
#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
#June 24, 2022, we decided as a team that we recognize we do not want to erase the experiences of NonBinary folk
spring2022$DEMOgen_orig <- spring2022$DEMOgen
spring2022$DEMOgen [spring2022$DEMOgen == 1] <- "man" #Recoding to 0 = male (1)
spring2022$DEMOgen [spring2022$DEMOgen == 2 ] <- "woman" #Recoding to 1 = Female (2)
spring2022$DEMOgen [spring2022$DEMOgen == 3 | spring2022$DEMOgen == 4 | spring2022$DEMOgen == 5] <- "nb" #Recoding to NA = NonBinary, Not listed, Prefer not to respond (3, 4, 5)
# table(spring2022$DEMOgen)
# man nb woman
# 328 10 131
# creating race variables
# Race is coded with many variables, need a minoritized variable
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
spring2022$DEMOmin <- "majority"
spring2022$DEMOmin[spring2022$DEMOblack == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOai == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOmex == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOca == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOsa == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpr == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOol == 1] <- "bli"
spring2022$DEMOmin[spring2022$DEMOpi == 1] <- "bli"
# srm = used in other places
spring2022$DEMOmin2 <- "majority"
spring2022$DEMOmin2[spring2022$DEMOblack == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOai == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsea == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOmex == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOca == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOsa == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpr == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOol == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOpi == 1] <- "srm"
spring2022$DEMOmin2[spring2022$DEMOgen == "woman"] <- "srm"
# table(spring2022$DEMOmin2, useNA = "always")
# rg = three categories, bli, majority men, and majority women/nb
spring2022$rg <- NA
spring2022$rg[spring2022$DEMOmin == "bli"] <- "bli"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "woman"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "nb"] <- "majority woman & nb"
spring2022$rg[spring2022$DEMOmin == "majority" & spring2022$DEMOgen == "man"] <- "majority man"
# spring2022$rg[spring2022$DEMOmin == "majority" & is.na(spring2022$DEMOgen)] <- "majority man & NA"
# demographics and numbers
# table(spring2022$Treatment, useNA = "always")
# table(spring2022$DEMOgen, spring2022$DEMOmin, useNA = "always")
# table(spring2022$rg, useNA = "always")
# table(spring2022$DEMOmin, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOblack, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOai, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOmex, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOca, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpr, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOsa, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOol, useNA = "always")
# table(spring2022$DEMOmin, spring2022$DEMOpi, useNA = "always")
# breaking out pre and post dataframes ------------------------------------
names(spring2022)
## [1] "X" "EndDate"
## [3] "Progress" "Duration__in_seconds"
## [5] "Finished" "Major01"
## [7] "Major02" "MSdisc01"
## [9] "MSdisc02" "MSdisc03"
## [11] "MSdisc04" "MSdisc01rev"
## [13] "MSdisc02rev" "MSdisc03rev"
## [15] "MSdisc04rev" "MSdisc05"
## [17] "MSdisc06" "MSdisc07"
## [19] "CNEBclass01" "CNEBclass02"
## [21] "CNEBclass03" "CNEBclass04"
## [23] "CNEBclass05" "CNEBclass01rev"
## [25] "CNEBclass02rev" "CNEBclass04rev"
## [27] "CNEBclass05rev" "CNEBclass06"
## [29] "LBdisc01" "LBdisc02"
## [31] "LBdisc03" "LBdisc04"
## [33] "LBdisc05" "LBdisc06"
## [35] "LBdisc01rev" "LBdisc05rev"
## [37] "LBdisc06rev" "EEgen01"
## [39] "EEgen02" "EEgen03"
## [41] "EEgen04" "EEgen01rev"
## [43] "EEgen02rev" "EEgen03rev"
## [45] "EEgen04rev" "FASdisc01"
## [47] "ENGdisc01" "ENGdisc02"
## [49] "ENGdisc03" "ENGdisc03rev"
## [51] "FASdisc02" "FASdisc03"
## [53] "FASdisc03rev" "CNHSclass01"
## [55] "CNHSclass02" "CNHSclass03"
## [57] "CNSWclass01" "CNSWclass02"
## [59] "CNSWclass03" "CNSWclass04"
## [61] "CNSWclass05" "CNSWclass05rev"
## [63] "CNHSclass04" "CNHSclass05"
## [65] "CNHSclass06" "BELclass01"
## [67] "BELclass02" "BELclass03"
## [69] "BELclass04" "BELclass02rev"
## [71] "IPdisc01" "IPdisc02"
## [73] "IPdisc03" "IPdisc04"
## [75] "IPdisc04rev" "CHECK"
## [77] "GSdisc01" "GSdisc02"
## [79] "GSdisc03" "GSdisc04"
## [81] "GSdisc05" "RSdisc01"
## [83] "RSdisc02" "RSdisc03"
## [85] "RSdisc04" "RSdisc05"
## [87] "CAgen01" "CAgen02"
## [89] "CAgen03" "CAgen04"
## [91] "CAgen05" "CAgen06"
## [93] "CAgen07" "CAgen04rev"
## [95] "CAgen07rev" "Egrade01"
## [97] "DEMOblack" "DEMOai"
## [99] "DEMOme" "DEMOea"
## [101] "DEMOsea" "DEMOip"
## [103] "DEMOoa" "DEMOmex"
## [105] "DEMOca" "DEMOpr"
## [107] "DEMOol" "DEMOpi"
## [109] "DEMOw" "DEMOnl"
## [111] "DEMOpnr" "DEMOoadet"
## [113] "DEMOoldet" "DEMOnldet"
## [115] "DEMOlatinx" "DEMOasian"
## [117] "DEMOamerasian" "DEMOsrm"
## [119] "DEMOmisc" "DEMOgen"
## [121] "DEMOgendet" "Woman"
## [123] "Nonbinary" "DEMOsexor"
## [125] "DEMOhetdum" "DEMOsexordet"
## [127] "DEMOtrans" "DEMOtransdum"
## [129] "DEMOpardeg" "DEMOpardum"
## [131] "DEMOfamdum" "firstgen"
## [133] "DEMOfamdeg" "DEMOintern"
## [135] "DEMOintdum" "DEMOdyslex"
## [137] "DEMOadhd" "DEMOautism"
## [139] "DEMOphysdis" "DEMOchronic"
## [141] "DEMOpsych" "DEMOdisnl"
## [143] "DEMOdisnldet" "DEMOtransfer"
## [145] "DEMOpell" "DEMOopen"
## [147] "Instructor" "Section"
## [149] "Discipline" "Course"
## [151] "Institution" "Survey"
## [153] "Semester" "CHECKpass"
## [155] "RaceSum" "DEMOw1"
## [157] "DEMOblack1" "DEMOlatinx1"
## [159] "DEMOasian1" "DEMOmisc1"
## [161] "DEMOmulti" "EEgen_mean"
## [163] "CNEBclass_mean" "BELclass_mean"
## [165] "LBdisc_mean" "MSdisc_mean"
## [167] "FASdisc_mean" "ENGdisc_mean"
## [169] "CNHSclass_mean" "CNSWclass_mean"
## [171] "IPdisc_mean" "GSdisc_mean"
## [173] "RSdisc_mean" "CAgen_mean"
## [175] "PreEngagement_factor" "EndDate_2"
## [177] "Progress_2" "Duration__in_seconds_2"
## [179] "Finished_2" "Major01_2"
## [181] "Major02_2" "MSdisc01_2"
## [183] "MSdisc02_2" "MSdisc03_2"
## [185] "MSdisc04_2" "MSdisc01rev_2"
## [187] "MSdisc02rev_2" "MSdisc03rev_2"
## [189] "MSdisc04rev_2" "MSdisc05_2"
## [191] "MSdisc06_2" "MSdisc07_2"
## [193] "CNEBclass01_2" "CNEBclass02_2"
## [195] "CNEBclass03_2" "CNEBclass04_2"
## [197] "CNEBclass05_2" "CNEBclass01rev_2"
## [199] "CNEBclass02rev_2" "CNEBclass04rev_2"
## [201] "CNEBclass05rev_2" "CNEBclass06_2"
## [203] "LBdisc01_2" "LBdisc02_2"
## [205] "LBdisc03_2" "LBdisc04_2"
## [207] "LBdisc05_2" "LBdisc06_2"
## [209] "LBdisc01rev_2" "LBdisc05rev_2"
## [211] "LBdisc06rev_2" "EEgen01_2"
## [213] "EEgen02_2" "EEgen03_2"
## [215] "EEgen04_2" "EEgen01rev_2"
## [217] "EEgen02rev_2" "EEgen03rev_2"
## [219] "EEgen04rev_2" "FASdisc01_2"
## [221] "ENGdisc01_2" "ENGdisc02_2"
## [223] "ENGdisc03_2" "ENGdisc03rev_2"
## [225] "FASdisc02_2" "FASdisc03_2"
## [227] "FASdisc03rev_2" "CNHSclass01_2"
## [229] "CNHSclass02_2" "CNHSclass03_2"
## [231] "CNSWclass01_2" "CNSWclass02_2"
## [233] "CNSWclass03_2" "CNSWclass04_2"
## [235] "CNSWclass05_2" "CNSWclass05rev_2"
## [237] "CNHSclass04_2" "CNHSclass05_2"
## [239] "CNHSclass06_2" "BELclass01_2"
## [241] "BELclass02_2" "BELclass03_2"
## [243] "BELclass04_2" "BELclass02rev_2"
## [245] "IPdisc01_2" "IPdisc02_2"
## [247] "IPdisc03_2" "IPdisc04_2"
## [249] "IPdisc04rev_2" "CHECK1_2"
## [251] "GSdisc01_2" "GSdisc02_2"
## [253] "GSdisc03_2" "GSdisc04_2"
## [255] "GSdisc05_2" "RSdisc01_2"
## [257] "RSdisc02_2" "RSdisc03_2"
## [259] "RSdisc04_2" "RSdisc05_2"
## [261] "CAgen01_2" "CAgen02_2"
## [263] "CAgen03_2" "CAgen04_2"
## [265] "CAgen05_2" "CAgen06_2"
## [267] "CAgen07_2" "CAgen04rev_2"
## [269] "CAgen07rev_2" "Egrade01_2"
## [271] "CHECKpass_2" "DEMOsa"
## [273] "EEgen_mean_2" "CNEBclass_mean_2"
## [275] "BELclass_mean_2" "LBdisc_mean_2"
## [277] "MSdisc_mean_2" "FASdisc_mean_2"
## [279] "ENGdisc_mean_2" "CNHSclass_mean_2"
## [281] "CNSWclass_mean_2" "IPdisc_mean_2"
## [283] "GSdisc_mean_2" "RSdisc_mean_2"
## [285] "CAgen_mean_2" "PostEngagement_factor"
## [287] "pre_post_merge" "section"
## [289] "FinalGradePercent" "MATLABGradePercent"
## [291] "ProjectGradePercent" "AttendanceGradePercent"
## [293] "grades_merge" "Term"
## [295] "Treatment" "condition_merge"
## [297] "GENDER_DESC" "REPORTING_ETHNICITY"
## [299] "RESIDENCY_DESC" "FirstGeneration"
## [301] "CURRENT_TIME_STATUS_DESC" "DEGREE_DESC"
## [303] "COLLEGE_DESC" "MAJOR_DESC"
## [305] "SAT2_RW" "SAT2_Math"
## [307] "ACTEnglish" "ACTMath"
## [309] "ACTReading" "ACTScience"
## [311] "ACTCompACT" "TOEFLListening"
## [313] "TOEFLReading" "TOEFLSpeaking"
## [315] "TOEFLTotal" "TOEFLWriting"
## [317] "HSGPA" "SCHOOL_RANK"
## [319] "COURSE_SECTION_NUMBER" "FINAL_GRADE"
## [321] "Overall_GPA" "Term_CREDITS_ATTEMPTED"
## [323] "Term_GPA" "email_institutional_merge"
## [325] "institutional_survey_merge" "coursegrade"
## [327] "UID" "DEMOgen_orig"
## [329] "DEMOmin" "DEMOmin2"
## [331] "rg"
head(subset(spring2022, select=c(ProjectGradePercent, MATLABGradePercent, ProjectGradePercent, AttendanceGradePercent)))
## ProjectGradePercent MATLABGradePercent ProjectGradePercent.1
## 1 0.9633333 1.0000000 0.9633333
## 3 0.9255555 1.0000000 0.9255555
## 4 0.9600000 0.9866667 0.9600000
## 5 0.8272222 0.9533333 0.8272222
## 6 0.9633333 1.0000000 0.9633333
## 7 0.9319444 0.7493846 0.9319444
## AttendanceGradePercent
## 1 NA
## 3 0.70
## 4 NA
## 5 NA
## 6 0.95
## 7 NA
pre <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 1:175, MATLABGradePercent, ProjectGradePercent))
post <- subset(spring2022, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, 176:286, MATLABGradePercent, ProjectGradePercent))
PreBelongingna <- na.omit(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04)))
PostBelongingna <- na.omit(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2)))
# correlation matrix function ---------------------------------------------
#create function to conduct cor matrix with *** for significance levels
corstars <- function(x){
require(Hmisc)
x <- as.matrix(x)
R <- rcorr(x)$r
p <- rcorr(x)$P
## define notions for significance levels; spacing is important.
mystars <- ifelse(p < .001, "***", ifelse(p < .01, "** ", ifelse(p < .05, "* ", " ")))
## trunctuate the matrix that holds the correlations to two decimal
R <- format(round(cbind(rep(-1.11, ncol(x)), R), 2))[,-1]
## build a new matrix that includes the correlations with their apropriate stars
Rnew <- matrix(paste(R, mystars, sep=""), ncol=ncol(x))
diag(Rnew) <- paste(diag(R), " ", sep="")
rownames(Rnew) <- colnames(x)
colnames(Rnew) <- paste(colnames(x), "", sep="")
## remove upper triangle
Rnew <- as.matrix(Rnew)
Rnew[upper.tri(Rnew, diag = TRUE)] <- ""
Rnew <- as.data.frame(Rnew)
## remove last column and return the matrix (which is now a data frame)
Rnew <- cbind(Rnew[1:length(Rnew)-1])
return(Rnew)
}
# kmo function ------------------------------------------------------------
# There is no R command to run KMO test. G.Jay Kerns from Youngstown University created a fuction to do this test
# http://tolstoy.newcastle.edu.au/R/e2/help/07/08/22816.html
# KMO Kaiser-Meyer-Olkin Measure of Sampling Adequacy
kmo = function( data ){
X <- cor(as.matrix(data))
iX <- ginv(X)
S2 <- diag(diag((iX^-1)))
AIS <- S2%*%iX%*%S2 # anti-image covariance matrix
IS <- X+AIS-2*S2 # image covariance matrix
Dai <- sqrt(diag(diag(AIS)))
IR <- ginv(Dai)%*%IS%*%ginv(Dai) # image correlation matrix
AIR <- ginv(Dai)%*%AIS%*%ginv(Dai) # anti-image correlation matrix
a <- apply((AIR - diag(diag(AIR)))^2, 2, sum)
AA <- sum(a)
b <- apply((X - diag(nrow(X)))^2, 2, sum)
BB <- sum(b)
MSA <- b/(b+a) # indiv. measures of sampling adequacy
AIR <- AIR-diag(nrow(AIR))+diag(MSA) # Examine the anti-image of the
# correlation matrix. That is the
# negative of the partial correlations,
# partialling out all other variables.
kmo <- BB/(AA+BB) # overall KMO statistic
# Reporting the conclusion
if (kmo >= 0.00 && kmo < 0.50){
test <- 'The KMO test yields a degree of common variance unacceptable for FA.'
} else if (kmo >= 0.50 && kmo < 0.60){
test <- 'The KMO test yields a degree of common variance miserable.'
} else if (kmo >= 0.60 && kmo < 0.70){
test <- 'The KMO test yields a degree of common variance mediocre.'
} else if (kmo >= 0.70 && kmo < 0.80){
test <- 'The KMO test yields a degree of common variance middling.'
} else if (kmo >= 0.80 && kmo < 0.90){
test <- 'The KMO test yields a degree of common variance meritorious.'
} else {
test <- 'The KMO test yields a degree of common variance marvelous.' }
ans <- list( overall = kmo,
report = test,
individual = MSA,
AIS = AIS,
AIR = AIR )
return(ans)
} # end of kmo()
# checking correlations for target variables ------------------------------
corstars(subset(pre, select=c(BELclass01, BELclass02rev, BELclass03, BELclass04))) #all correlations statistically significant
## BELclass01 BELclass02rev BELclass03
## BELclass01
## BELclass02rev 0.64***
## BELclass03 0.57*** 0.52***
## BELclass04 0.57*** 0.51*** 0.71***
corstars(subset(post, select=c(BELclass01_2, BELclass02rev_2, BELclass03_2, BELclass04_2))) #all correlations statistically significant
## BELclass01_2 BELclass02rev_2 BELclass03_2
## BELclass01_2
## BELclass02rev_2 0.64***
## BELclass03_2 0.59*** 0.56***
## BELclass04_2 0.56*** 0.51*** 0.72***
# examining skew and kurtosis ---------------------------------------------
#In a psychologists perspective. If it a published scale that has been tested, it is good to keep it and transform it
# all items within range of -2 and 2, if not in this range, remove them. Maximum likelihood
sapply(PreBelongingna, FUN = skewness)
## BELclass01 BELclass02rev BELclass03 BELclass04
## 0.006386369 -0.221506098 -0.162112144 0.345292957
sapply(PostBelongingna, FUN = skewness)
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## -0.4747835 -0.5677938 -0.2949721 -0.6963178
# all items with kurtosis less than 7
sapply(PreBelongingna, FUN = kurtosis)
## BELclass01 BELclass02rev BELclass03 BELclass04
## 4.168645 3.941443 4.589422 4.044981
sapply(PostBelongingna, FUN = kurtosis)
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## 4.608295 4.309433 4.526886 6.149700
# check barlett's and kmo -------------------------------------------------
# This makes sure it is not TOO correlated. As long as it is significant, I'm good. Make sure to NA OMIT
# If significant, it does not resemble an identity matrix
cortest.bartlett(cor(PreBelongingna), n = nrow(PreBelongingna))
## $chisq
## [1] 749.7921
##
## $p.value
## [1] 1.080976e-158
##
## $df
## [1] 6
cortest.bartlett(cor(PostBelongingna), n = nrow(PostBelongingna))
## $chisq
## [1] 605.6985
##
## $p.value
## [1] 1.375762e-127
##
## $df
## [1] 6
# Run the KMO command on the data
# If KMO is larger than a certain number:
# 0.00 to 0.49 unacceptable.
# to 0.59 miserable.
# 0.60 to 0.69 mediocre.
# 0.70 to 0.79 middling.
# 0.80 to 0.89 meritorious.
# 0.90 to 1.00 marvelous.
#Do we have a good sample size to run a good factor analysis
kmooutput <- kmo(PreBelongingna)
kmooutput$overall
## [1] 0.7735774
# [1] 0.7735774
kmooutput <- kmo(PostBelongingna)
kmooutput$overall
## [1] 0.7770663
# [1] 0.7770663
# We have meritorious data
# running EFA -------------------------------------------------------------
# Rule of thumb: Retain the number of components above the elbow
#Summarizing EFA results: TLI is ideally above 0.95 with minimum acceptable values at 0.90 and RMSEA should be less than 0.08.
#I am not sure how to interpret uniqueness... I found online that the higher the number, the more unique they are. 1-uniqueness = communality. But are there any thresholds?
# entering raw data and extracting (n) number of factors (determined in the previous step),
# with promax rotation
#Pre Belonging EFA
ev <- eigen(cor(PreBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PreBelongingna), var = ncol(PreBelongingna),
rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
fit <- factanal(PreBelongingna, 1, rotation="promax")
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = PreBelongingna, factors = 1, rotation = "promax")
##
## Uniquenesses:
## BELclass01 BELclass02rev BELclass03 BELclass04
## 0.447 0.530 0.331 0.343
##
## Loadings:
## [1] 0.743 0.685 0.818 0.811
##
## Factor1
## SS loadings 2.348
## Proportion Var 0.587
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 50.96 on 2 degrees of freedom.
## The p-value is 8.6e-12
# All loadings appear well over 0.32, looks good!
fa(PreBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method = ml
## Call: fa(r = PreBelongingna, nfactors = 1, rotate = "promax", fm = "ml",
## oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## BELclass01 0.74 0.55 0.45 1
## BELclass02rev 0.69 0.47 0.53 1
## BELclass03 0.82 0.67 0.33 1
## BELclass04 0.81 0.66 0.34 1
##
## ML1
## SS loadings 2.35
## Proportion Var 0.59
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.78 with Chi Square of 749.79
## The degrees of freedom for the model are 2 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.07
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic number of observations is 424 with the empirical chi square 21.71 with prob < 1.9e-05
## The total number of observations was 424 with Likelihood Chi Square = 50.96 with prob < 8.6e-12
##
## Tucker Lewis Index of factoring reliability = 0.802
## RMSEA index = 0.24 and the 90 % confidence intervals are 0.186 0.3
## BIC = 38.86
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.93
## Multiple R square of scores with factors 0.86
## Minimum correlation of possible factor scores 0.72
# Tucker Lewis Index of factoring reliability = 0.802
# RMSEA index = 0.24
# indicate good fit
#Post Belonging EFA
ev <- eigen(cor(PostBelongingna)) # get eigenvalues
ap <- parallel(subject = nrow(PostBelongingna), var = ncol(PostBelongingna),
rep = 100,cent = .05)
nS <- nScree(x = ev$values, aparallel = ap$eigen$qevpea)
plotnScree(nS)
#Post Belonging
fit <- factanal(PostBelongingna, 1, rotation="promax")
print(fit, digits = 3, cutoff = 0.3, sort = TRUE)
##
## Call:
## factanal(x = PostBelongingna, factors = 1, rotation = "promax")
##
## Uniquenesses:
## BELclass01_2 BELclass02rev_2 BELclass03_2 BELclass04_2
## 0.456 0.517 0.281 0.351
##
## Loadings:
## [1] 0.738 0.695 0.848 0.806
##
## Factor1
## SS loadings 2.395
## Proportion Var 0.599
##
## Test of the hypothesis that 1 factor is sufficient.
## The chi square statistic is 38.26 on 2 degrees of freedom.
## The p-value is 4.92e-09
# All loadings appear over 0.32, looks good!
fa(PostBelongingna, nfactors=1, rotate="promax", oblique.scores = TRUE, fm = "ml" )
## Factor Analysis using method = ml
## Call: fa(r = PostBelongingna, nfactors = 1, rotate = "promax", fm = "ml",
## oblique.scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## BELclass01_2 0.74 0.54 0.46 1
## BELclass02rev_2 0.69 0.48 0.52 1
## BELclass03_2 0.85 0.72 0.28 1
## BELclass04_2 0.81 0.65 0.35 1
##
## ML1
## SS loadings 2.39
## Proportion Var 0.60
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 6 and the objective function was 1.86 with Chi Square of 605.7
## The degrees of freedom for the model are 2 and the objective function was 0.12
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic number of observations is 329 with the empirical chi square 15.99 with prob < 0.00034
## The total number of observations was 329 with Likelihood Chi Square = 38.26 with prob < 4.9e-09
##
## Tucker Lewis Index of factoring reliability = 0.818
## RMSEA index = 0.235 and the 90 % confidence intervals are 0.174 0.303
## BIC = 26.67
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.93
## Multiple R square of scores with factors 0.87
## Minimum correlation of possible factor scores 0.73
# Tucker Lewis Index of factoring reliability = 0.818
# RMSEA index = 0.235
# indicate marginal fit
# cronbach's alphas -------------------------------------------------------
#For the future, it's good to check alpha for overall scale and for individual factors
psych::alpha(PreBelongingna) #0.85
##
## Reliability analysis
## Call: psych::alpha(x = PreBelongingna)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.82 0.59 5.7 0.012 3.2 0.44 0.57
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.82 0.85 0.87
## Duhachek 0.82 0.85 0.87
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BELclass01 0.80 0.81 0.75 0.58 4.1 0.017 0.0130 0.52
## BELclass02rev 0.83 0.83 0.77 0.62 4.8 0.015 0.0066 0.57
## BELclass03 0.80 0.80 0.73 0.57 4.0 0.017 0.0043 0.57
## BELclass04 0.80 0.80 0.74 0.58 4.1 0.017 0.0037 0.57
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## BELclass01 424 0.84 0.84 0.76 0.70 3.2 0.52
## BELclass02rev 424 0.82 0.80 0.70 0.64 3.1 0.57
## BELclass03 424 0.84 0.84 0.78 0.70 3.1 0.53
## BELclass04 424 0.83 0.84 0.77 0.70 3.2 0.48
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## BELclass01 0.00 0.04 0.71 0.24 0
## BELclass02rev 0.01 0.09 0.68 0.22 0
## BELclass03 0.01 0.06 0.72 0.21 0
## BELclass04 0.00 0.03 0.74 0.23 0
psych::alpha(PostBelongingna) #0.85
##
## Reliability analysis
## Call: psych::alpha(x = PostBelongingna)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.86 0.83 0.6 5.9 0.013 3.1 0.49 0.58
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.83 0.85 0.88
## Duhachek 0.83 0.85 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## BELclass01_2 0.81 0.82 0.76 0.60 4.4 0.019 0.0128 0.56
## BELclass02rev_2 0.83 0.83 0.78 0.63 5.0 0.016 0.0074 0.59
## BELclass03_2 0.80 0.80 0.73 0.57 4.0 0.019 0.0046 0.56
## BELclass04_2 0.82 0.82 0.75 0.60 4.5 0.017 0.0018 0.59
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## BELclass01_2 329 0.84 0.84 0.76 0.70 3.1 0.59
## BELclass02rev_2 329 0.82 0.81 0.71 0.66 3.1 0.63
## BELclass03_2 329 0.85 0.86 0.81 0.74 3.1 0.55
## BELclass04_2 329 0.83 0.84 0.77 0.69 3.1 0.56
##
## Non missing response frequency for each item
## 1 2 3 4 miss
## BELclass01_2 0.02 0.08 0.68 0.22 0
## BELclass02rev_2 0.02 0.10 0.66 0.22 0
## BELclass03_2 0.01 0.10 0.72 0.18 0
## BELclass04_2 0.02 0.05 0.74 0.19 0
# create composite variables ----------------------------------------------
# do not have notes on why BELclass02 was dropped
pre$FBelong <- (pre$BELclass01 + pre$BELclass03 + pre$BELclass04)/3
post$FBelong <- (post$BELclass01 + post$BELclass03 + post$BELclass04)/3
# subset needed variables into new long dataset
d <- rbind.data.frame(
cbind(subset(pre, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, ProjectGradePercent)), Time = "0pre"),
cbind(subset(post, select=c(UID, Treatment, DEMOgen, DEMOmin, DEMOmin2, rg, FBelong, MATLABGradePercent, ProjectGradePercent)), Time = "1post"))
# Make sure data are factors
d$Treatment <- as.factor(d$Treatment)
d$DEMOgen <- as.factor(d$DEMOgen)
d$DEMOmin <- as.factor(d$DEMOmin)
d$DEMOmin2 <- as.factor(d$DEMOmin2)
d$rg <- as.factor(d$rg)
d$Time <- as.factor(d$Time)
d$UID <- as.factor(d$UID)
d <- subset(d, !is.na(Treatment))
# Examination of residuals demonstrated some normality challenges. Trim
qqPlot(d$FBelong)
## [1] 1011 1127
# trimming
upper_quantile <- quantile(d$FBelong, 0.98, na.rm = TRUE)
lower_quantile <- quantile(d$FBelong, 0.02, na.rm = TRUE)
# where 99th percentile becomes NA
d$FBelong[d$FBelong > upper_quantile] <- NA
d$FBelong[d$FBelong < lower_quantile] <- NA
qqPlot(d$FBelong)
## [1] 44 60
res.aovB <- aov(FBelong ~ Treatment*Time, data = d)
plot(res.aovB, 1)
plot(res.aovB, 2) # some trimming may be needed
res.aovB2 <- aov(FBelong ~ Treatment*Time*DEMOgen, data = d)
plot(res.aovB2, 1)
plot(res.aovB2, 2) # some trimming may be needed
res.aovB3 <- aov(FBelong ~ Treatment*Time*DEMOmin, data = d)
plot(res.aovB3, 1)
plot(res.aovB3, 2) # some trimming may be needed
# assumption checking -----------------------------------------------------
# Normality
moments::skewness(d$FBelong, na.rm = TRUE)
## [1] 0.4862195
moments::skewness(subset(d, select=c(FBelong), Time == "0pre"), na.rm=T)
## FBelong
## 0.5280985
moments::skewness(subset(d, select=c(FBelong), Time == "1post"), na.rm=T)
## FBelong
## 0.4462114
moments::kurtosis(d$FBelong, na.rm = TRUE)
## [1] 3.3805
moments::kurtosis(subset(d, select=c(FBelong), Time == "0pre"), na.rm=T)
## FBelong
## 3.321822
moments::kurtosis(subset(d, select=c(FBelong), Time == "1post"), na.rm=T)
## FBelong
## 3.441133
moments::skewness(d$MATLABGradePercent, na.rm = TRUE)
## [1] -3.18743
moments::kurtosis(d$MATLABGradePercent, na.rm = TRUE)
## [1] 14.70525
hist(d$FBelong, breaks=5)
hist(d$MATLABGradePercent, breaks=100)
Not needed since within-subjects variable only has two levels.
# Wide Dataframe
temp1 <- subset(d, Time == "0pre")
temp2 <- subset(d, Time == "1post", select=c(UID, FBelong))
d2 <- merge(temp1, temp2, by = "UID", suffixes = c(".pre",".post"))
d2$FBelong.pre <- scale(d2$FBelong.pre, center = T)
Used afex package in R for ANOVAs. Automatically uses appropriate orthogonal contrasts for factors and uses Type III sums of squares. (https://www.r-bloggers.com/2017/06/anova-in-r-afex-may-be-the-solution-you-are-looking-for/)
Time x Treatment X DEMOmin (BLI x Majority)
out <- aov_ez(id = "UID", dv = "FBelong", data=d, between=c("Treatment","DEMOmin"), within="Time", observed=c("DEMOmin"), anova_table = list(es="pes"))
## Contrasts set to contr.sum for the following variables: Treatment, DEMOmin
nice(out)
## Anova Table (Type 3 tests)
##
## Response: FBelong
## Effect df MSE F pes p.value
## 1 Treatment 1, 261 0.32 0.22 <.001 .643
## 2 DEMOmin 1, 261 0.32 0.45 .002 .504
## 3 Treatment:DEMOmin 1, 261 0.32 0.97 .004 .326
## 4 Time 1, 261 0.07 1.04 .004 .310
## 5 Treatment:Time 1, 261 0.07 0.54 .002 .465
## 6 DEMOmin:Time 1, 261 0.07 0.63 .002 .427
## 7 Treatment:DEMOmin:Time 1, 261 0.07 2.99 + .011 .085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(out, x = "Time", trace = "Treatment", panel = "DEMOmin")
## Warning: Panel(s) show a mixed within-between-design.
## Error bars do not allow comparisons across all means.
## Suppress error bars with: error = "none"
tgc <- summarySE(d, measurevar="FBelong", groupvars=c("Treatment","DEMOmin","Time"), na.rm = T)
tgc
## Treatment DEMOmin Time N FBelong sd se ci
## 1 0 bli 0pre 13 3.333333 0.5270463 0.14617634 0.31849088
## 2 0 bli 1post 6 3.166667 0.6236096 0.25458754 0.65443810
## 3 0 majority 0pre 149 3.118568 0.4028312 0.03300122 0.06521446
## 4 0 majority 1post 129 3.129199 0.4512907 0.03973393 0.07862038
## 5 1 bli 0pre 34 3.156863 0.4122313 0.07069708 0.14383430
## 6 1 bli 1post 23 3.159420 0.4910262 0.10238604 0.21233565
## 7 1 majority 0pre 225 3.204444 0.4439630 0.02959754 0.05832523
## 8 1 majority 1post 163 3.112474 0.4177490 0.03272063 0.06461394
levels(tgc$Time) <- c("0Pre","1Post")
levels(tgc$Treatment) <- c("Control","Intervention")
levels(tgc$DEMOmin) <- c("BLI","White & Asian")
pd <- position_dodge(.5) # move them .05 to the left and right
ggplot(tgc, aes(x=Time, y=FBelong, group=Treatment, linetype=Treatment)) +
geom_errorbar(aes(ymin=FBelong-ci, ymax=FBelong+ci), width=.2, position=pd) +
geom_line(position=pd) +
geom_point(position=pd) +
facet_grid(~DEMOmin) +
ylim(1,4) + theme_bw() +
ggtitle("Pre and Post Effects of Intervention on Majority and BLI Students") +
xlab("Timepoint") + ylab("Belonging Score")
Treatment x rg (BLI, Majority Men, Majority Women & NB)
d2$Treatment <- relevel(d2$Treatment, ref="0")
d2$DEMOmin <- relevel(d2$DEMOmin, ref="majority")
table(round(d2$MATLABGradePercent, digits = 2))
##
## 0 0.06 0.07 0.23 0.29 0.33 0.34 0.36 0.39 0.4 0.47 0.49 0.5 0.51 0.54 0.55
## 1 1 1 1 1 1 3 2 1 1 2 1 1 1 1 1
## 0.56 0.57 0.6 0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.7 0.71 0.72 0.73
## 1 2 3 1 3 1 1 2 1 1 3 4 3 5 3 2
## 0.74 0.75 0.76 0.77 0.79 0.8 0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.9
## 5 2 1 2 6 2 1 1 3 5 3 5 3 3 5 10
## 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1
## 4 6 9 10 16 11 14 18 24 379
d2$MATLABGradePercent_cat <- round(d2$MATLABGradePercent, digits = 2)
d2$MATLABGradePercent_cat <- 1
d2$MATLABGradePercent_cat[d2$MATLABGradePercent < .71] <- 0
d2$MATLABGradePercent_cat[is.na(d2$MATLABGradePercent)] <- NA
table(d2$MATLABGradePercent_cat, useNA = "always")
##
## 0 1 <NA>
## 49 555 37
model <- glm(MATLABGradePercent_cat~ProjectGradePercent + Treatment + DEMOmin + Treatment*DEMOmin, family="binomial", data=d2)
summary(model)
##
## Call:
## glm(formula = MATLABGradePercent_cat ~ ProjectGradePercent +
## Treatment + DEMOmin + Treatment * DEMOmin, family = "binomial",
## data = d2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4719 0.3275 0.3499 0.3914 1.2602
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.87927 1.21292 -1.549 0.121290
## ProjectGradePercent 4.88640 1.32424 3.690 0.000224 ***
## Treatment1 0.02698 0.34172 0.079 0.937068
## DEMOminbli -1.95219 0.60766 -3.213 0.001315 **
## Treatment1:DEMOminbli 1.61145 0.84622 1.904 0.056873 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 340.06 on 603 degrees of freedom
## Residual deviance: 311.82 on 599 degrees of freedom
## (37 observations deleted due to missingness)
## AIC: 321.82
##
## Number of Fisher Scoring iterations: 5
tab_model(model, p.style = "numeric_stars", transform = NULL)
| Â | MATLABGradePercent_cat | ||
|---|---|---|---|
| Predictors | Log-Odds | CI | p |
| (Intercept) | -1.88 | -4.73 – 0.17 | 0.121 |
| ProjectGradePercent | 4.89 *** | 2.63 – 7.99 | <0.001 |
| Treatment [1] | 0.03 | -0.65 – 0.70 | 0.937 |
| DEMOmin [bli] | -1.95 ** | -3.12 – -0.69 | 0.001 |
|
Treatment [1] * DEMOmin [bli] |
1.61 | -0.04 – 3.33 | 0.057 |
| Observations | 604 | ||
| R2 Tjur | 0.092 | ||
|
|||
with(model, null.deviance - deviance)
## [1] 28.24937
with(model, df.null - df.residual)
## [1] 4
with(model, pchisq(null.deviance - deviance, df.null - df.residual, lower.tail = FALSE))
## [1] 1.11023e-05
logLik(model)
## 'log Lik.' -155.9076 (df=5)
So the exponentiated constant gives you the baseline odds, the exponentiated coefficients of the main effects give you the odds ratios when the other variable equals 0, and the exponentiated coefficient of the interaction terms tells you the ratio by wich the odds ratio changes.
summary(model)
##
## Call:
## glm(formula = MATLABGradePercent_cat ~ ProjectGradePercent +
## Treatment + DEMOmin + Treatment * DEMOmin, family = "binomial",
## data = d2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4719 0.3275 0.3499 0.3914 1.2602
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.87927 1.21292 -1.549 0.121290
## ProjectGradePercent 4.88640 1.32424 3.690 0.000224 ***
## Treatment1 0.02698 0.34172 0.079 0.937068
## DEMOminbli -1.95219 0.60766 -3.213 0.001315 **
## Treatment1:DEMOminbli 1.61145 0.84622 1.904 0.056873 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 340.06 on 603 degrees of freedom
## Residual deviance: 311.82 on 599 degrees of freedom
## (37 observations deleted due to missingness)
## AIC: 321.82
##
## Number of Fisher Scoring iterations: 5
exp(coef(model))
## (Intercept) ProjectGradePercent Treatment1
## 0.1527015 132.4753884 1.0273483
## DEMOminbli Treatment1:DEMOminbli
## 0.1419631 5.0100846
confint(model)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -4.73302471 0.1724902
## ProjectGradePercent 2.63253885 7.9905110
## Treatment1 -0.64928357 0.7002987
## DEMOminbli -3.11644515 -0.6893222
## Treatment1:DEMOminbli -0.04220871 3.3268940
library(epiDisplay)
## Warning: package 'epiDisplay' was built under R version 4.1.3
## Loading required package: foreign
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following objects are masked from 'package:psych':
##
## alpha, cs, lookup
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:lattice':
##
## dotplot
logistic.display(model)
##
## Logistic regression predicting MATLABGradePercent_cat
##
## crude OR(95%CI) adj. OR(95%CI)
## ProjectGradePercent (cont. var.) 125.98 (9.19,1727.87) 132.48 (9.88,1775.53)
##
## Treatment: 1 vs 0 1.26 (0.7,2.27) 1.03 (0.53,2.01)
##
## DEMOmin: bli vs majority 0.37 (0.17,0.82) 0.14 (0.04,0.47)
##
## Treatment1:DEMOminbli - 5.01 (0.95,26.31)
##
## P(Wald's test) P(LR-test)
## ProjectGradePercent (cont. var.) < 0.001 < 0.001
##
## Treatment: 1 vs 0 0.937 1
##
## DEMOmin: bli vs majority 0.001 1
##
## Treatment1:DEMOminbli 0.057 0.056
##
## Log-likelihood = -155.9076
## No. of observations = 604
## AIC value = 321.8153
exp(1.41)/(1+exp(1.41))
## [1] 0.8037659
# https://stackoverflow.com/questions/56810063/how-can-i-convert-logs-to-odds-ratio-and-logs-to-probabilities-for-logistic-regr
library(fmsb)
##
## Attaching package: 'fmsb'
## The following objects are masked from 'package:DescTools':
##
## CronbachAlpha, VIF
NagelkerkeR2(model)
## $N
## [1] 604
##
## $R2
## [1] 0.1061373
glm_probs <- data.frame(probs = predict(model, type="response"))
glm_pred <- glm_probs %>%
mutate(pred = ifelse(probs>.5, 1, 0))
glm_pred <- cbind(orig=model$model$MATLABGradePercent_cat, glm_pred)
glm_pred %>%
count(pred, orig) %>%
spread(orig, n, fill = 0)
## pred 0 1
## 1 0 4 1
## 2 1 45 554
glm_pred %>%
summarize(score = mean(pred == orig))
## score
## 1 0.9238411
car::vif(model)
## ProjectGradePercent Treatment DEMOmin Treatment:DEMOmin
## 1.009898 1.198177 2.085300 2.302348
# https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif
# https://stats.stackexchange.com/questions/430412/vif-for-categorical-variable-with-more-than-2-categories
plot(allEffects(model))
plot_model(model, type="eff")
## $ProjectGradePercent
##
## $Treatment
##
## $DEMOmin
plot_model(model, type="int")
tgc <- summarySE(d2, measurevar="MATLABGradePercent_cat", groupvars=c("Treatment","DEMOmin"), na.rm = T)
tgc
## Treatment DEMOmin N MATLABGradePercent_cat sd se ci
## 1 0 majority 273 0.9230769 0.2669587 0.01615708 0.03180882
## 2 0 bli 15 0.6666667 0.4879500 0.12598816 0.27021772
## 3 1 majority 279 0.9318996 0.2523707 0.01510904 0.02974266
## 4 1 bli 37 0.8918919 0.3148001 0.05175282 0.10495958
.89-.66
## [1] 0.23
table(spring2022$Treatment, useNA = "always")
##
## 0 1 <NA>
## 307 334 13
table(d2$Treatment, useNA = "always")
##
## 0 1 <NA>
## 307 334 0
table(d2$DEMOmin, useNA = "always")
##
## majority bli <NA>
## 589 52 0
# bli = black, american indian, mexican, central american, puerto rican, other latinx, native hawaiian/pacific islander
table(spring2022$DEMOblack, useNA = "always")
##
## 0 1 <NA>
## 595 11 48
table(spring2022$DEMOai, useNA = "always")
##
## 0 1 <NA>
## 466 5 183
table(spring2022$DEMOme, useNA = "always")
##
## 0 1 <NA>
## 457 14 183
table(spring2022$DEMOea, useNA = "always")
##
## 0 1 <NA>
## 430 41 183
table(spring2022$DEMOsea, useNA = "always")
##
## 0 1 <NA>
## 453 18 183
table(spring2022$DEMOip, useNA = "always")
##
## 0 1 <NA>
## 417 54 183
table(spring2022$DEMOoa, useNA = "always")
##
## 0 1 <NA>
## 467 4 183
table(spring2022$DEMOmex, useNA = "always")
##
## 0 1 <NA>
## 460 11 183
table(spring2022$DEMOca, useNA = "always")
##
## 0 1 <NA>
## 463 8 183
table(spring2022$DEMOpr, useNA = "always")
##
## 0 1 <NA>
## 470 1 183
table(spring2022$DEMOol, useNA = "always")
##
## 0 1 <NA>
## 455 16 183
table(spring2022$DEMOpi, useNA = "always")
##
## 0 1 <NA>
## 468 3 183
table(spring2022$DEMOw, useNA = "always")
##
## 0 1 <NA>
## 173 433 48
table(spring2022$DEMOnl, useNA = "always")
##
## 0 1 <NA>
## 470 1 183
table(spring2022$DEMOpnr, useNA = "always")
##
## 0 1 <NA>
## 464 7 183
#Gender is currently coded to have 5 values as follows: (1) Man, (2) Woman, (3) Non-binary / gender-queer, (4) Not listed above, (5) Prefer not to respond
#Not listed above was mostly attack helicopters so they got put as NA
table(spring2022$DEMOgen_orig, useNA = "always")
##
## 1 2 3 4 5 <NA>
## 328 131 3 1 6 185