1 Load Libraries

library(naniar) # for the gg_miss-upset() command
library(psych) # for the describe() command
library(kableExtra) # for tables
library(corrplot) # for correlation plots
library(afex) # for ANOVA
library(emmeans)
library(ggplot2) # for plots
library(dplyr)

2 Load Data

df <- read.csv(file="data/data_9-16.csv")

df <- df %>%
  mutate(raceeth = na_if(raceeth, "") %>% na_if(" "))

3 Notes

Variables of interest: * Race * Gender (self-reported demographic) * Racial identity dissonance * TMFS * SOS warmth * SOS competence * Science interest * Science confidence

We came up with two hypotheses to test in our next meeting:

  • Hypothesis 1: we will see a gender difference in science interest when using participants self-reported gender.
  • Hypothesis 2: we will not see a significant relationship between science interest and gender identity using the TMFS.

Racial identity dissonance:

  • I keep my distance from most people with my background
  • I have to turn my back on poor people from my race/background in order to get ahead
  • I can barely deal with my own issues, let alone those in communities like the one I grew up in
  • I wish I were more comfortable around others from my background

TMFS:

  • I consider myself: (consider)
  • Ideally, I would like to be: (ideal)(tmfs)
  • Traditionally, my interests would be considered as: (consider)(tmfs)
  • Traditionally, my attitudes and beliefs would be considered: (ideal)(tmfs)
  • Traditionally, my behavior would be considered: (ideal)(tmfs)
  • Traditionally, my outer appearance would be considered: (consider)

SOS warmth:

  • They have fun with colleagues at work
  • They have happy marriages
  • They were committed to their hobbies (like sports or art) as children.
  • They struggled to do well in school.
  • They defy expectations about what they’re ‘supposed’ to be like.
  • They had parents who struggled to make ends meet.
  • They enjoy literature and writing.
  • They are warm and caring people.
  • They are devoted to their families.
  • They are determined to have fun with their work.
  • They work with a lot of people everyday.

SOS competence:

  • They know a lot about the latest discoveries
  • They are the ones who know how equipment works
  • They are careful with expensive instruments
  • They are competitive
  • They are cooperative
  • They are independent
  • They are work oriented
  • They are technically competent
  • They are competent
  • They are collaborative
  • They are highly focused
  • They have imagination.
  • They think outside the box.
  • They succeed in spite of failure.
  • They are forward thinkers who want to improve the world.

Interest:

  • Enthusiastic about this subject
  • Interested in discussing this subject area with friends or family
  • Interested in taking or planning to take additional classes in this subject
  • Interested in pursuing a science career

Confidence:

  • Confident that I understand this subject
  • Confident that I can do this subject
  • Comfortable working with complex ideas

4 Create Variables

df2 <- subset(df, select=c(UID))
df2$ri_diss <- (df$rid16 + df$rid17 + df$rid18 + df$rid19)/4
df2$ri_si <- (df$rid9 + df$rid11 + df$rid13 + df$rid14 + df$rid15)/5
df2$tmfs_ideal <- (df$tmfs2 + df$tmfs4 + df$tmfs5)/3
df2$tmfs_consider <- (df$tmfs1 + df$tmfs3 + df$tmfs6)/3
df2$tmfs <- (df$tmfs2 + df$tmfs3 + df$tmfs4 +  df$tmfs5)/4
df2$sos_comp <- (df$sos4 + df$sos8 + df$sos11 + df$sos16 + df$sos19 + df$sos20 + df$sos21 + df$sos28 + df$sos30 + df$sos39 + df$sos7 + df$sos10 + df$sos12 + df$sos13 + df$sos15 + df$sos33)/15
df2$sos_warm <- (df$sos9 + df$sos24 + df$sos26 + df$sos29 + df$sos31 + df$sos32 + df$sos37 + df$sos38 + df$sos2 + df$sos27 + df$sos25)/11
df2$int <- (df$salg1 + df$salg2 + df$salg3 + df$salg4)/4
df2$con <- (df$salg5 + df$salg6 + df$salg7)/3

df2$gender2 <- "N"
# Recode 'f' or 'F' as 'F'
df2$gender2[df$gender %in% c("f", "F")] <- "F"
# Recode 'm' or 'M' as 'M'
df2$gender2[df$gender %in% c("m", "M")] <- "M"
# Recode 'female' or 'Female' as 'F'
df2$gender2[df$gender %in% c("female", "Female", "female ", "Female ", "Woman")] <- "F"
# Recode 'male' or 'Male' as 'M'
df2$gender2[df$gender %in% c("male", "Male", "male ", "Male ")] <- "M"
df2$gender2 <- as.factor(df2$gender2)

race <- read.csv(file="racesort2.csv", header=T, na.strings = c("", " "))
df2$race_fin <- race$newrace

df2$race_fin2[df2$race_fin == "asian"] <- "asian"
df2$race_fin2[df2$race_fin == "black"] <- "other"
df2$race_fin2[df2$race_fin == "latino"] <- "latino"
df2$race_fin2[df2$race_fin == "mena"] <- "other"
df2$race_fin2[df2$race_fin == "multi"] <- "other"
df2$race_fin2[df2$race_fin == "pi"] <- "other"
df2$race_fin2[df2$race_fin == "white"] <- "white"

df4 <- distinct(df2)

4.1 View Missing Data

gg_miss_upset(df4, nsets = 10)

4.2 View Item Normality

desc <- describe(df4[-1])
kable(round(desc, digits = 2)) %>%
  kable_styling() %>%
  row_spec(which(desc$kurtosis > 2), bold = T) %>%
  row_spec(which(desc$kurtosis < -2), bold = T) %>%
  row_spec(which(desc$skew > 2), italic = T) %>%
  row_spec(which(desc$skew < -2), italic = T)
vars n mean sd median trimmed mad min max range skew kurtosis se
ri_diss 1 252 2.18 0.90 2.25 2.11 1.11 1.00 5.00 4.00 0.57 0.18 0.06
ri_si 2 252 3.25 0.81 3.20 3.26 0.89 1.00 5.00 4.00 -0.18 -0.02 0.05
tmfs_ideal 3 233 5.00 1.74 5.00 5.11 1.98 1.00 8.33 7.33 -0.44 -0.45 0.11
tmfs_consider 4 233 4.21 1.58 4.00 4.22 1.48 1.00 7.33 6.33 -0.04 -0.72 0.10
tmfs 5 233 4.81 1.58 5.00 4.91 1.85 1.00 7.50 6.50 -0.50 -0.31 0.10
sos_comp 6 122 5.58 0.59 5.67 5.63 0.64 3.40 6.40 3.00 -0.87 1.14 0.05
sos_warm 7 122 4.44 0.76 4.41 4.45 0.74 2.55 6.00 3.45 -0.07 -0.54 0.07
int 8 264 3.59 0.96 3.75 3.64 1.11 1.25 5.00 3.75 -0.40 -0.64 0.06
con 9 264 3.40 0.92 3.33 3.40 0.99 1.00 5.00 4.00 -0.02 -0.46 0.06
gender2* 10 291 1.55 0.75 1.00 1.44 0.00 1.00 3.00 2.00 0.95 -0.61 0.04
race_fin* 11 247 3.40 2.32 3.00 3.26 2.97 1.00 7.00 6.00 0.33 -1.45 0.15
race_fin2* 12 247 2.17 1.11 2.00 2.09 1.48 1.00 4.00 3.00 0.34 -1.30 0.07
table(df4$gender2, useNA = "always")

F M N 177 68 46 0

table(df4$race_fin2, useNA = "always")

asian latino other white 96 51 62 38 44

4.3 Check Outliers

Using Mahalanobis’ distance. Two outliers dropped.

d <- na.omit(subset(df4, select=-c(11:13)))

m_dist <- mahalanobis(d[-1], colMeans(d[-1]), cov(d[-1]))
d$MD <- round(m_dist, 1)
plot(d$MD)
describe(m_dist)
##    vars  n mean   sd median trimmed  mad  min   max range skew kurtosis   se
## X1    1 96 8.91 3.85   8.09    8.59 3.28 2.62 22.83 20.21  0.9     0.68 0.39
cut <- qchisq(.99, df=(ncol(d)-1))
abline(a=cut, b=0, col="red")

d$outlier <- F
d$outlier[d$MD > cut] <- T
table(d$outlier)
## 
## FALSE 
##    96
outs <- subset(d, select=c(UID, outlier), outlier == T)
df3 <- subset(df2, !(UID %in% outs$UID))

5 H1

Hypothesis 1: we will see a gender difference in science interest when using participants self-reported gender.

  • Int/Con: lower score = less interested/less confident
  • Gender: F = woman, M = man, N = non-binary, genderqueer, or NA

No significant difference in science interest when using participants’ gender. Significant difference in science confidence (p < .001). Difference is between men (M = 3.77, SE = .11) and women (M = 3.27, SE = .07).

No signicant difference in perceptions of scientist competence (p = .296). Significant difference in perceptions of scientist warmth (p = .051). Difference is between men (M = 4.23, SE = .13) and women (M = 4.56, SE = .08).

# aov_out <- aov_ez(id = "UID", dv = "int", data = df4, between = c("gender2"))
# nice(aov_out)
# afex_plot(aov_out, x = "gender2")
# 
# aov_out <- aov_ez(id = "UID", dv = "con", data = df4, between = c("gender2"))
# nice(aov_out)
# emmeans(aov_out, specs = "gender2")
# pairs(emmeans(aov_out, specs = "gender2"))
# afex_plot(aov_out, x = "gender2")

6 H2

Hypothesis 2: we will not see a significant relationship between science interest and gender identity using the TMFS.

  • TMFS: lower score = more masculine
  • Int/Con: lower score = less interested/less confident

No significant relationship between science interest and TMFS. No significant relationship between science confidence and TMFS.

# out <- corr.test(subset(df4, select=c(int, con, tmfs, sos_warm, sos_comp)))
# 
# corrplot(out$r, type="upper", method = "color", tl.col = "black", tl.cex = .75,
#          p.mat = out$p,
#          sig.level = c(.001, .01, .05), pch.cex = .9,
#          insig = "label_sig", pch.col = "white",
#          order = "hclust")
# 
# plot(df4$tmfs, df4$int, xlab = "TMFS", ylab = "Interest")
# plot(df4$tmfs, df4$con, xlab = "TMFS", ylab = "Confidence")
# 
# out$r
# out$p

7 H4

Interest and confidence by race/ethnicity.

df5 <- subset(df4, gender2 != "N")
table(df5$gender2, df5$race_fin2, useNA = "always")
##       
##        asian latino other white <NA>
##   F       66     39    41    30    1
##   M       28     11    20     8    1
##   N        0      0     0     0    0
##   <NA>     0      0     0     0    0
df5$gender2 <- droplevels(df5$gender2)

aov_out <- aov_ez(id = "UID", dv = "int", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
## 
## Response: int
##              Effect     df  MSE      F  ges p.value
## 1           gender2 1, 235 0.90   0.62 .003    .431
## 2         race_fin2 3, 235 0.90   0.67 .009    .570
## 3 gender2:race_fin2 3, 235 0.90 3.31 * .041    .021
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
  ylim(1,5)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,5)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,5)

emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.51 0.117 235     3.28     3.74
##  M         3.60 0.179 235     3.24     3.95
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.37 0.152 235     3.07     3.66
##  M         4.07 0.286 235     3.50     4.63
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.52 0.148 235     3.23     3.82
##  M         3.96 0.212 235     3.54     4.38
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.88 0.173 235     3.54     4.22
##  M         3.12 0.336 235     2.46     3.79
## 
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))
## race_fin2 = asian:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.0906 0.214 235  -0.423  0.6724
## 
## race_fin2 = latino:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.7028 0.324 235  -2.169  0.0311
## 
## race_fin2 = other:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.4381 0.259 235  -1.692  0.0919
## 
## race_fin2 = white:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      0.7583 0.378 235   2.008  0.0458
pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))
## gender2 = F:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino   0.1422 0.192 235   0.742  0.8801
##  asian - other   -0.0168 0.189 235  -0.089  0.9997
##  asian - white   -0.3758 0.209 235  -1.798  0.2770
##  latino - other  -0.1590 0.212 235  -0.749  0.8771
##  latino - white  -0.5179 0.231 235  -2.247  0.1138
##  other - white   -0.3589 0.228 235  -1.574  0.3956
## 
## gender2 = M:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino  -0.4700 0.338 235  -1.391  0.5060
##  asian - other   -0.3643 0.278 235  -1.311  0.5570
##  asian - white    0.4732 0.381 235   1.243  0.5998
##  latino - other   0.1057 0.356 235   0.297  0.9909
##  latino - white   0.9432 0.441 235   2.138  0.1441
##  other - white    0.8375 0.397 235   2.109  0.1532
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
aov_out <- aov_ez(id = "UID", dv = "con", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
## 
## Response: con
##              Effect     df  MSE         F  ges p.value
## 1           gender2 1, 235 0.81 15.94 *** .064   <.001
## 2         race_fin2 3, 235 0.81      0.44 .006    .723
## 3 gender2:race_fin2 3, 235 0.81      1.00 .013    .392
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
  ylim(1,5)

emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.34 0.111 235     3.13     3.56
##  M         3.61 0.170 235     3.27     3.94
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.13 0.144 235     2.84     3.41
##  M         3.79 0.271 235     3.25     4.32
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.18 0.140 235     2.90     3.46
##  M         3.97 0.201 235     3.57     4.36
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.40 0.164 235     3.08     3.72
##  M         3.96 0.318 235     3.33     4.58
## 
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2"))
##  contrast estimate    SE  df t.ratio p.value
##  F - M      -0.567 0.142 235  -3.993  0.0001
## 
## Results are averaged over the levels of: race_fin2

8 H3

Perceptions of scientists by gender and race/ethnicity.

We will find that women participants will perceive scientists as more warm, while male participants will perceive scientists as more competent.

We will see less of a gender difference in perceptions of warmth and competence among students of Color.

# gender and perceptions of scientists
aov_out <- aov_ez(id = "UID", dv = "sos_comp", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
## 
## Response: sos_comp
##              Effect     df  MSE    F   ges p.value
## 1           gender2 1, 106 0.32 0.00 <.001    .965
## 2         race_fin2 3, 106 0.32 0.32  .009    .809
## 3 gender2:race_fin2 3, 106 0.32 1.07  .030    .363
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,7)

aov_out <- aov_ez(id = "UID", dv = "sos_warm", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)
## Anova Table (Type 3 tests)
## 
## Response: sos_warm
##              Effect     df  MSE      F  ges p.value
## 1           gender2 1, 106 0.53 5.36 * .048    .022
## 2         race_fin2 3, 106 0.53 2.25 + .060    .087
## 3 gender2:race_fin2 3, 106 0.53 3.65 * .094    .015
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
afex_plot(aov_out, x = "gender2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,7)

emmeans(aov_out, specs = "gender2", by = "race_fin2")
## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.66 0.133 106     4.39     4.92
##  M         4.04 0.195 106     3.65     4.42
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.51 0.146 106     4.22     4.80
##  M         4.56 0.297 106     3.97     5.15
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.48 0.163 106     4.15     4.80
##  M         4.74 0.243 106     4.26     5.22
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.58 0.297 106     3.99     5.17
##  M         3.30 0.364 106     2.57     4.02
## 
## Confidence level used: 0.95
pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))
## race_fin2 = asian:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      0.6186 0.236 106   2.624  0.0100
## 
## race_fin2 = latino:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.0479 0.331 106  -0.145  0.8853
## 
## race_fin2 = other:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.2601 0.292 106  -0.890  0.3757
## 
## race_fin2 = white:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      1.2803 0.470 106   2.723  0.0076
pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))
## gender2 = F:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino   0.1448 0.197 106   0.734  0.8831
##  asian - other    0.1803 0.210 106   0.857  0.8267
##  asian - white    0.0818 0.326 106   0.251  0.9944
##  latino - other   0.0355 0.219 106   0.162  0.9985
##  latino - white  -0.0630 0.331 106  -0.190  0.9975
##  other - white   -0.0985 0.339 106  -0.290  0.9914
## 
## gender2 = M:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino  -0.5216 0.355 106  -1.468  0.4607
##  asian - other   -0.6984 0.311 106  -2.244  0.1182
##  asian - white    0.7435 0.413 106   1.800  0.2790
##  latino - other  -0.1768 0.384 106  -0.460  0.9674
##  latino - white   1.2652 0.470 106   2.691  0.0407
##  other - white    1.4419 0.438 106   3.294  0.0072
## 
## P value adjustment: tukey method for comparing a family of 4 estimates