library(naniar) # for the gg_miss-upset() command
library(psych) # for the describe() command
library(kableExtra) # for tables
library(corrplot) # for correlation plots
library(afex) # for ANOVA
library(emmeans)
library(ggplot2) # for plots
library(dplyr)
df <- read.csv(file="data/data_9-16.csv")
df <- df %>%
mutate(raceeth = na_if(raceeth, "") %>% na_if(" "))
Variables of interest: * Race * Gender (self-reported demographic) * Racial identity dissonance * TMFS * SOS warmth * SOS competence * Science interest * Science confidence
We came up with two hypotheses to test in our next meeting:
Racial identity dissonance:
TMFS:
SOS warmth:
SOS competence:
Interest:
Confidence:
df2 <- subset(df, select=c(UID))
df2$ri_diss <- (df$rid16 + df$rid17 + df$rid18 + df$rid19)/4
df2$ri_si <- (df$rid9 + df$rid11 + df$rid13 + df$rid14 + df$rid15)/5
df2$tmfs_ideal <- (df$tmfs2 + df$tmfs4 + df$tmfs5)/3
df2$tmfs_consider <- (df$tmfs1 + df$tmfs3 + df$tmfs6)/3
df2$tmfs <- (df$tmfs2 + df$tmfs3 + df$tmfs4 + df$tmfs5)/4
df2$sos_comp <- (df$sos4 + df$sos8 + df$sos11 + df$sos16 + df$sos19 + df$sos20 + df$sos21 + df$sos28 + df$sos30 + df$sos39 + df$sos7 + df$sos10 + df$sos12 + df$sos13 + df$sos15 + df$sos33)/15
df2$sos_warm <- (df$sos9 + df$sos24 + df$sos26 + df$sos29 + df$sos31 + df$sos32 + df$sos37 + df$sos38 + df$sos2 + df$sos27 + df$sos25)/11
df2$int <- (df$salg1 + df$salg2 + df$salg3 + df$salg4)/4
df2$con <- (df$salg5 + df$salg6 + df$salg7)/3
df2$gender2 <- "N"
# Recode 'f' or 'F' as 'F'
df2$gender2[df$gender %in% c("f", "F")] <- "F"
# Recode 'm' or 'M' as 'M'
df2$gender2[df$gender %in% c("m", "M")] <- "M"
# Recode 'female' or 'Female' as 'F'
df2$gender2[df$gender %in% c("female", "Female", "female ", "Female ", "Woman")] <- "F"
# Recode 'male' or 'Male' as 'M'
df2$gender2[df$gender %in% c("male", "Male", "male ", "Male ")] <- "M"
df2$gender2 <- as.factor(df2$gender2)
race <- read.csv(file="racesort2.csv", header=T, na.strings = c("", " "))
df2$race_fin <- race$newrace
df2$race_fin2[df2$race_fin == "asian"] <- "asian"
df2$race_fin2[df2$race_fin == "black"] <- "other"
df2$race_fin2[df2$race_fin == "latino"] <- "latino"
df2$race_fin2[df2$race_fin == "mena"] <- "other"
df2$race_fin2[df2$race_fin == "multi"] <- "other"
df2$race_fin2[df2$race_fin == "pi"] <- "other"
df2$race_fin2[df2$race_fin == "white"] <- "white"
df4 <- distinct(df2)
gg_miss_upset(df4, nsets = 10)
desc <- describe(df4[-1])
kable(round(desc, digits = 2)) %>%
kable_styling() %>%
row_spec(which(desc$kurtosis > 2), bold = T) %>%
row_spec(which(desc$kurtosis < -2), bold = T) %>%
row_spec(which(desc$skew > 2), italic = T) %>%
row_spec(which(desc$skew < -2), italic = T)
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ri_diss | 1 | 252 | 2.18 | 0.90 | 2.25 | 2.11 | 1.11 | 1.00 | 5.00 | 4.00 | 0.57 | 0.18 | 0.06 |
| ri_si | 2 | 252 | 3.25 | 0.81 | 3.20 | 3.26 | 0.89 | 1.00 | 5.00 | 4.00 | -0.18 | -0.02 | 0.05 |
| tmfs_ideal | 3 | 233 | 5.00 | 1.74 | 5.00 | 5.11 | 1.98 | 1.00 | 8.33 | 7.33 | -0.44 | -0.45 | 0.11 |
| tmfs_consider | 4 | 233 | 4.21 | 1.58 | 4.00 | 4.22 | 1.48 | 1.00 | 7.33 | 6.33 | -0.04 | -0.72 | 0.10 |
| tmfs | 5 | 233 | 4.81 | 1.58 | 5.00 | 4.91 | 1.85 | 1.00 | 7.50 | 6.50 | -0.50 | -0.31 | 0.10 |
| sos_comp | 6 | 122 | 5.58 | 0.59 | 5.67 | 5.63 | 0.64 | 3.40 | 6.40 | 3.00 | -0.87 | 1.14 | 0.05 |
| sos_warm | 7 | 122 | 4.44 | 0.76 | 4.41 | 4.45 | 0.74 | 2.55 | 6.00 | 3.45 | -0.07 | -0.54 | 0.07 |
| int | 8 | 264 | 3.59 | 0.96 | 3.75 | 3.64 | 1.11 | 1.25 | 5.00 | 3.75 | -0.40 | -0.64 | 0.06 |
| con | 9 | 264 | 3.40 | 0.92 | 3.33 | 3.40 | 0.99 | 1.00 | 5.00 | 4.00 | -0.02 | -0.46 | 0.06 |
| gender2* | 10 | 291 | 1.55 | 0.75 | 1.00 | 1.44 | 0.00 | 1.00 | 3.00 | 2.00 | 0.95 | -0.61 | 0.04 |
| race_fin* | 11 | 247 | 3.40 | 2.32 | 3.00 | 3.26 | 2.97 | 1.00 | 7.00 | 6.00 | 0.33 | -1.45 | 0.15 |
| race_fin2* | 12 | 247 | 2.17 | 1.11 | 2.00 | 2.09 | 1.48 | 1.00 | 4.00 | 3.00 | 0.34 | -1.30 | 0.07 |
table(df4$gender2, useNA = "always")
F M N
table(df4$race_fin2, useNA = "always")
asian latino other white
Using Mahalanobis’ distance. Two outliers dropped.
d <- na.omit(subset(df4, select=-c(11:13)))
m_dist <- mahalanobis(d[-1], colMeans(d[-1]), cov(d[-1]))
d$MD <- round(m_dist, 1)
plot(d$MD)
describe(m_dist)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 96 8.91 3.85 8.09 8.59 3.28 2.62 22.83 20.21 0.9 0.68 0.39
cut <- qchisq(.99, df=(ncol(d)-1))
abline(a=cut, b=0, col="red")
d$outlier <- F
d$outlier[d$MD > cut] <- T
table(d$outlier)
##
## FALSE
## 96
outs <- subset(d, select=c(UID, outlier), outlier == T)
df3 <- subset(df2, !(UID %in% outs$UID))
Hypothesis 1: we will see a gender difference in science interest when using participants self-reported gender.
No significant difference in science interest when using participants’ gender. Significant difference in science confidence (p < .001). Difference is between men (M = 3.77, SE = .11) and women (M = 3.27, SE = .07).
No signicant difference in perceptions of scientist competence (p = .296). Significant difference in perceptions of scientist warmth (p = .051). Difference is between men (M = 4.23, SE = .13) and women (M = 4.56, SE = .08).
# aov_out <- aov_ez(id = "UID", dv = "int", data = df4, between = c("gender2"))
# nice(aov_out)
# afex_plot(aov_out, x = "gender2")
#
# aov_out <- aov_ez(id = "UID", dv = "con", data = df4, between = c("gender2"))
# nice(aov_out)
# emmeans(aov_out, specs = "gender2")
# pairs(emmeans(aov_out, specs = "gender2"))
# afex_plot(aov_out, x = "gender2")
Hypothesis 2: we will not see a significant relationship between science interest and gender identity using the TMFS.
No significant relationship between science interest and TMFS. No significant relationship between science confidence and TMFS.
# out <- corr.test(subset(df4, select=c(int, con, tmfs, sos_warm, sos_comp)))
#
# corrplot(out$r, type="upper", method = "color", tl.col = "black", tl.cex = .75,
# p.mat = out$p,
# sig.level = c(.001, .01, .05), pch.cex = .9,
# insig = "label_sig", pch.col = "white",
# order = "hclust")
#
# plot(df4$tmfs, df4$int, xlab = "TMFS", ylab = "Interest")
# plot(df4$tmfs, df4$con, xlab = "TMFS", ylab = "Confidence")
#
# out$r
# out$p
Interest and confidence by race/ethnicity.
df5 <- subset(df4, gender2 != "N")
table(df5$gender2, df5$race_fin2, useNA = "always")
##
## asian latino other white <NA>
## F 66 39 41 30 1
## M 28 11 20 8 1
## N 0 0 0 0 0
## <NA> 0 0 0 0 0
df5$gender2 <- droplevels(df5$gender2)
aov_out <- aov_ez(id = "UID", dv = "int", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
##
## Response: int
## Effect df MSE F ges p.value
## 1 gender2 1, 235 0.90 0.62 .003 .431
## 2 race_fin2 3, 235 0.90 0.67 .009 .570
## 3 gender2:race_fin2 3, 235 0.90 3.31 * .041 .021
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
ylim(1,5)
afex_plot(aov_out, x = "race_fin2") +
ylim(1,5)
afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
ylim(1,5)
emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
## gender2 emmean SE df lower.CL upper.CL
## F 3.51 0.117 235 3.28 3.74
## M 3.60 0.179 235 3.24 3.95
##
## race_fin2 = latino:
## gender2 emmean SE df lower.CL upper.CL
## F 3.37 0.152 235 3.07 3.66
## M 4.07 0.286 235 3.50 4.63
##
## race_fin2 = other:
## gender2 emmean SE df lower.CL upper.CL
## F 3.52 0.148 235 3.23 3.82
## M 3.96 0.212 235 3.54 4.38
##
## race_fin2 = white:
## gender2 emmean SE df lower.CL upper.CL
## F 3.88 0.173 235 3.54 4.22
## M 3.12 0.336 235 2.46 3.79
##
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))
## race_fin2 = asian:
## contrast estimate SE df t.ratio p.value
## F - M -0.0906 0.214 235 -0.423 0.6724
##
## race_fin2 = latino:
## contrast estimate SE df t.ratio p.value
## F - M -0.7028 0.324 235 -2.169 0.0311
##
## race_fin2 = other:
## contrast estimate SE df t.ratio p.value
## F - M -0.4381 0.259 235 -1.692 0.0919
##
## race_fin2 = white:
## contrast estimate SE df t.ratio p.value
## F - M 0.7583 0.378 235 2.008 0.0458
pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))
## gender2 = F:
## contrast estimate SE df t.ratio p.value
## asian - latino 0.1422 0.192 235 0.742 0.8801
## asian - other -0.0168 0.189 235 -0.089 0.9997
## asian - white -0.3758 0.209 235 -1.798 0.2770
## latino - other -0.1590 0.212 235 -0.749 0.8771
## latino - white -0.5179 0.231 235 -2.247 0.1138
## other - white -0.3589 0.228 235 -1.574 0.3956
##
## gender2 = M:
## contrast estimate SE df t.ratio p.value
## asian - latino -0.4700 0.338 235 -1.391 0.5060
## asian - other -0.3643 0.278 235 -1.311 0.5570
## asian - white 0.4732 0.381 235 1.243 0.5998
## latino - other 0.1057 0.356 235 0.297 0.9909
## latino - white 0.9432 0.441 235 2.138 0.1441
## other - white 0.8375 0.397 235 2.109 0.1532
##
## P value adjustment: tukey method for comparing a family of 4 estimates
aov_out <- aov_ez(id = "UID", dv = "con", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
##
## Response: con
## Effect df MSE F ges p.value
## 1 gender2 1, 235 0.81 15.94 *** .064 <.001
## 2 race_fin2 3, 235 0.81 0.44 .006 .723
## 3 gender2:race_fin2 3, 235 0.81 1.00 .013 .392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
ylim(1,5)
emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
## gender2 emmean SE df lower.CL upper.CL
## F 3.34 0.111 235 3.13 3.56
## M 3.61 0.170 235 3.27 3.94
##
## race_fin2 = latino:
## gender2 emmean SE df lower.CL upper.CL
## F 3.13 0.144 235 2.84 3.41
## M 3.79 0.271 235 3.25 4.32
##
## race_fin2 = other:
## gender2 emmean SE df lower.CL upper.CL
## F 3.18 0.140 235 2.90 3.46
## M 3.97 0.201 235 3.57 4.36
##
## race_fin2 = white:
## gender2 emmean SE df lower.CL upper.CL
## F 3.40 0.164 235 3.08 3.72
## M 3.96 0.318 235 3.33 4.58
##
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2"))
## contrast estimate SE df t.ratio p.value
## F - M -0.567 0.142 235 -3.993 0.0001
##
## Results are averaged over the levels of: race_fin2
Perceptions of scientists by gender and race/ethnicity.
We will find that women participants will perceive scientists as more warm, while male participants will perceive scientists as more competent.
We will see less of a gender difference in perceptions of warmth and competence among students of Color.
# gender and perceptions of scientists
aov_out <- aov_ez(id = "UID", dv = "sos_comp", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
##
## Response: sos_comp
## Effect df MSE F ges p.value
## 1 gender2 1, 106 0.32 0.00 <.001 .965
## 2 race_fin2 3, 106 0.32 0.32 .009 .809
## 3 gender2:race_fin2 3, 106 0.32 1.07 .030 .363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
ylim(1,7)
afex_plot(aov_out, x = "race_fin2") +
ylim(1,7)
afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
ylim(1,7)
aov_out <- aov_ez(id = "UID", dv = "sos_warm", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
##
## Response: sos_warm
## Effect df MSE F ges p.value
## 1 gender2 1, 106 0.53 5.36 * .048 .022
## 2 race_fin2 3, 106 0.53 2.25 + .060 .087
## 3 gender2:race_fin2 3, 106 0.53 3.65 * .094 .015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
ylim(1,7)
afex_plot(aov_out, x = "race_fin2") +
ylim(1,7)
afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
ylim(1,7)
emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
## gender2 emmean SE df lower.CL upper.CL
## F 4.66 0.133 106 4.39 4.92
## M 4.04 0.195 106 3.65 4.42
##
## race_fin2 = latino:
## gender2 emmean SE df lower.CL upper.CL
## F 4.51 0.146 106 4.22 4.80
## M 4.56 0.297 106 3.97 5.15
##
## race_fin2 = other:
## gender2 emmean SE df lower.CL upper.CL
## F 4.48 0.163 106 4.15 4.80
## M 4.74 0.243 106 4.26 5.22
##
## race_fin2 = white:
## gender2 emmean SE df lower.CL upper.CL
## F 4.58 0.297 106 3.99 5.17
## M 3.30 0.364 106 2.57 4.02
##
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))
## race_fin2 = asian:
## contrast estimate SE df t.ratio p.value
## F - M 0.6186 0.236 106 2.624 0.0100
##
## race_fin2 = latino:
## contrast estimate SE df t.ratio p.value
## F - M -0.0479 0.331 106 -0.145 0.8853
##
## race_fin2 = other:
## contrast estimate SE df t.ratio p.value
## F - M -0.2601 0.292 106 -0.890 0.3757
##
## race_fin2 = white:
## contrast estimate SE df t.ratio p.value
## F - M 1.2803 0.470 106 2.723 0.0076
pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))
## gender2 = F:
## contrast estimate SE df t.ratio p.value
## asian - latino 0.1448 0.197 106 0.734 0.8831
## asian - other 0.1803 0.210 106 0.857 0.8267
## asian - white 0.0818 0.326 106 0.251 0.9944
## latino - other 0.0355 0.219 106 0.162 0.9985
## latino - white -0.0630 0.331 106 -0.190 0.9975
## other - white -0.0985 0.339 106 -0.290 0.9914
##
## gender2 = M:
## contrast estimate SE df t.ratio p.value
## asian - latino -0.5216 0.355 106 -1.468 0.4607
## asian - other -0.6984 0.311 106 -2.244 0.1182
## asian - white 0.7435 0.413 106 1.800 0.2790
## latino - other -0.1768 0.384 106 -0.460 0.9674
## latino - white 1.2652 0.470 106 2.691 0.0407
## other - white 1.4419 0.438 106 3.294 0.0072
##
## P value adjustment: tukey method for comparing a family of 4 estimates