1 Load Libraries

library(naniar) # for the gg_miss-upset() command
library(psych) # for the describe() command
library(kableExtra) # for tables
library(corrplot) # for correlation plots
library(afex) # for ANOVA
library(emmeans)
library(ggplot2) # for plots
library(dplyr)

2 Load Data

df <- read.csv(file="data/data_9-16.csv")

df <- df %>%
  mutate(raceeth = na_if(raceeth, "") %>% na_if(" "))

3 Notes

Variables of interest: * Race * Gender (self-reported demographic) * Racial identity dissonance * TMFS * SOS warmth * SOS competence * Science interest * Science confidence

We came up with two hypotheses to test in our next meeting:

Hypothesis 1: we will see a gender difference in science interest when using participants self-reported gender.
Hypothesis 2: we will not see a significant relationship between science interest and gender identity using the TMFS.

Racial identity dissonance:

I keep my distance from most people with my background
I have to turn my back on poor people from my race/background in order to get ahead
I can barely deal with my own issues, let alone those in communities like the one I grew up in
I wish I were more comfortable around others from my background

TMFS:

I consider myself: (consider)
Ideally, I would like to be: (ideal)(tmfs)
Traditionally, my interests would be considered as: (consider)(tmfs)
Traditionally, my attitudes and beliefs would be considered: (ideal)(tmfs)
Traditionally, my behavior would be considered: (ideal)(tmfs)
Traditionally, my outer appearance would be considered: (consider)

SOS warmth:

They have fun with colleagues at work
They have happy marriages
They were committed to their hobbies (like sports or art) as children.
They struggled to do well in school.
They defy expectations about what they’re ‘supposed’ to be like.
They had parents who struggled to make ends meet.
They enjoy literature and writing.
They are warm and caring people.
They are devoted to their families.
They are determined to have fun with their work.
They work with a lot of people everyday.

SOS competence:

They know a lot about the latest discoveries
They are the ones who know how equipment works
They are careful with expensive instruments
They are competitive
They are cooperative
They are independent
They are work oriented
They are technically competent
They are competent
They are collaborative
They are highly focused
They have imagination.
They think outside the box.
They succeed in spite of failure.
They are forward thinkers who want to improve the world.

Interest:

Enthusiastic about this subject
Interested in discussing this subject area with friends or family
Interested in taking or planning to take additional classes in this subject
Interested in pursuing a science career

Confidence:

Confident that I understand this subject
Confident that I can do this subject
Comfortable working with complex ideas

4 Create Variables

df2 <- subset(df, select=c(UID))
df2$ri_diss <- (df$rid16 + df$rid17 + df$rid18 + df$rid19)/4
df2$ri_si <- (df$rid9 + df$rid11 + df$rid13 + df$rid14 + df$rid15)/5
df2$tmfs_ideal <- (df$tmfs2 + df$tmfs4 + df$tmfs5)/3
df2$tmfs_consider <- (df$tmfs1 + df$tmfs3 + df$tmfs6)/3
df2$tmfs <- (df$tmfs2 + df$tmfs3 + df$tmfs4 +  df$tmfs5)/4
df2$sos_comp <- (df$sos4 + df$sos8 + df$sos11 + df$sos16 + df$sos19 + df$sos20 + df$sos21 + df$sos28 + df$sos30 + df$sos39 + df$sos7 + df$sos10 + df$sos12 + df$sos13 + df$sos15 + df$sos33)/15
df2$sos_warm <- (df$sos9 + df$sos24 + df$sos26 + df$sos29 + df$sos31 + df$sos32 + df$sos37 + df$sos38 + df$sos2 + df$sos27 + df$sos25)/11
df2$int <- (df$salg1 + df$salg2 + df$salg3 + df$salg4)/4
df2$con <- (df$salg5 + df$salg6 + df$salg7)/3

df2$gender2 <- "N"
# Recode 'f' or 'F' as 'F'
df2$gender2[df$gender %in% c("f", "F")] <- "F"
# Recode 'm' or 'M' as 'M'
df2$gender2[df$gender %in% c("m", "M")] <- "M"
# Recode 'female' or 'Female' as 'F'
df2$gender2[df$gender %in% c("female", "Female", "female ", "Female ", "Woman")] <- "F"
# Recode 'male' or 'Male' as 'M'
df2$gender2[df$gender %in% c("male", "Male", "male ", "Male ")] <- "M"
df2$gender2 <- as.factor(df2$gender2)

race <- read.csv(file="racesort2.csv", header=T, na.strings = c("", " "))
df2$race_fin <- race$newrace

df2$race_fin2[df2$race_fin == "asian"] <- "asian"
df2$race_fin2[df2$race_fin == "black"] <- "other"
df2$race_fin2[df2$race_fin == "latino"] <- "latino"
df2$race_fin2[df2$race_fin == "mena"] <- "other"
df2$race_fin2[df2$race_fin == "multi"] <- "other"
df2$race_fin2[df2$race_fin == "pi"] <- "other"
df2$race_fin2[df2$race_fin == "white"] <- "white"

df4 <- distinct(df2)

4.1 View Missing Data

gg_miss_upset(df4, nsets = 10)

4.2 View Item Normality

desc <- describe(df4[-1])
kable(round(desc, digits = 2)) %>%
  kable_styling() %>%
  row_spec(which(desc$kurtosis > 2), bold = T) %>%
  row_spec(which(desc$kurtosis < -2), bold = T) %>%
  row_spec(which(desc$skew > 2), italic = T) %>%
  row_spec(which(desc$skew < -2), italic = T)

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
ri_diss	1	252	2.18	0.90	2.25	2.11	1.11	1.00	5.00	4.00	0.57	0.18	0.06
ri_si	2	252	3.25	0.81	3.20	3.26	0.89	1.00	5.00	4.00	-0.18	-0.02	0.05
tmfs_ideal	3	233	5.00	1.74	5.00	5.11	1.98	1.00	8.33	7.33	-0.44	-0.45	0.11
tmfs_consider	4	233	4.21	1.58	4.00	4.22	1.48	1.00	7.33	6.33	-0.04	-0.72	0.10
tmfs	5	233	4.81	1.58	5.00	4.91	1.85	1.00	7.50	6.50	-0.50	-0.31	0.10
sos_comp	6	122	5.58	0.59	5.67	5.63	0.64	3.40	6.40	3.00	-0.87	1.14	0.05
sos_warm	7	122	4.44	0.76	4.41	4.45	0.74	2.55	6.00	3.45	-0.07	-0.54	0.07
int	8	264	3.59	0.96	3.75	3.64	1.11	1.25	5.00	3.75	-0.40	-0.64	0.06
con	9	264	3.40	0.92	3.33	3.40	0.99	1.00	5.00	4.00	-0.02	-0.46	0.06
gender2*	10	291	1.55	0.75	1.00	1.44	0.00	1.00	3.00	2.00	0.95	-0.61	0.04
race_fin*	11	247	3.40	2.32	3.00	3.26	2.97	1.00	7.00	6.00	0.33	-1.45	0.15
race_fin2*	12	247	2.17	1.11	2.00	2.09	1.48	1.00	4.00	3.00	0.34	-1.30	0.07

table(df4$gender2, useNA = "always")

F M N 177 68 46 0

table(df4$race_fin2, useNA = "always")

asian latino other white 96 51 62 38 44

4.3 Check Outliers

Using Mahalanobis’ distance. Two outliers dropped.

d <- na.omit(subset(df4, select=-c(11:13)))

m_dist <- mahalanobis(d[-1], colMeans(d[-1]), cov(d[-1]))
d$MD <- round(m_dist, 1)
plot(d$MD)
describe(m_dist)

##    vars  n mean   sd median trimmed  mad  min   max range skew kurtosis   se
## X1    1 96 8.91 3.85   8.09    8.59 3.28 2.62 22.83 20.21  0.9     0.68 0.39

cut <- qchisq(.99, df=(ncol(d)-1))
abline(a=cut, b=0, col="red")

d$outlier <- F
d$outlier[d$MD > cut] <- T
table(d$outlier)

## 
## FALSE 
##    96

outs <- subset(d, select=c(UID, outlier), outlier == T)
df3 <- subset(df2, !(UID %in% outs$UID))

5 H1

Hypothesis 1: we will see a gender difference in science interest when using participants self-reported gender.

Int/Con: lower score = less interested/less confident
Gender: F = woman, M = man, N = non-binary, genderqueer, or NA

No significant difference in science interest when using participants’ gender. Significant difference in science confidence (p < .001). Difference is between men (M = 3.77, SE = .11) and women (M = 3.27, SE = .07).

No signicant difference in perceptions of scientist competence (p = .296). Significant difference in perceptions of scientist warmth (p = .051). Difference is between men (M = 4.23, SE = .13) and women (M = 4.56, SE = .08).

# aov_out <- aov_ez(id = "UID", dv = "int", data = df4, between = c("gender2"))
# nice(aov_out)
# afex_plot(aov_out, x = "gender2")
# 
# aov_out <- aov_ez(id = "UID", dv = "con", data = df4, between = c("gender2"))
# nice(aov_out)
# emmeans(aov_out, specs = "gender2")
# pairs(emmeans(aov_out, specs = "gender2"))
# afex_plot(aov_out, x = "gender2")

6 H2

Hypothesis 2: we will not see a significant relationship between science interest and gender identity using the TMFS.

TMFS: lower score = more masculine
Int/Con: lower score = less interested/less confident

No significant relationship between science interest and TMFS. No significant relationship between science confidence and TMFS.

# out <- corr.test(subset(df4, select=c(int, con, tmfs, sos_warm, sos_comp)))
# 
# corrplot(out$r, type="upper", method = "color", tl.col = "black", tl.cex = .75,
#          p.mat = out$p,
#          sig.level = c(.001, .01, .05), pch.cex = .9,
#          insig = "label_sig", pch.col = "white",
#          order = "hclust")
# 
# plot(df4$tmfs, df4$int, xlab = "TMFS", ylab = "Interest")
# plot(df4$tmfs, df4$con, xlab = "TMFS", ylab = "Confidence")
# 
# out$r
# out$p

7 H4

Interest and confidence by race/ethnicity.

df5 <- subset(df4, gender2 != "N")
table(df5$gender2, df5$race_fin2, useNA = "always")

##       
##        asian latino other white <NA>
##   F       66     39    41    30    1
##   M       28     11    20     8    1
##   N        0      0     0     0    0
##   <NA>     0      0     0     0    0

df5$gender2 <- droplevels(df5$gender2)

aov_out <- aov_ez(id = "UID", dv = "int", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)

## Anova Table (Type 3 tests)
## 
## Response: int
##              Effect     df  MSE      F  ges p.value
## 1           gender2 1, 235 0.90   0.62 .003    .431
## 2         race_fin2 3, 235 0.90   0.67 .009    .570
## 3 gender2:race_fin2 3, 235 0.90 3.31 * .041    .021
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

afex_plot(aov_out, x = "gender2") +
  ylim(1,5)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,5)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,5)

emmeans(aov_out, specs = "gender2", by = "race_fin2")

## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.51 0.117 235     3.28     3.74
##  M         3.60 0.179 235     3.24     3.95
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.37 0.152 235     3.07     3.66
##  M         4.07 0.286 235     3.50     4.63
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.52 0.148 235     3.23     3.82
##  M         3.96 0.212 235     3.54     4.38
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.88 0.173 235     3.54     4.22
##  M         3.12 0.336 235     2.46     3.79
## 
## Confidence level used: 0.95

pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))

## race_fin2 = asian:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.0906 0.214 235  -0.423  0.6724
## 
## race_fin2 = latino:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.7028 0.324 235  -2.169  0.0311
## 
## race_fin2 = other:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.4381 0.259 235  -1.692  0.0919
## 
## race_fin2 = white:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      0.7583 0.378 235   2.008  0.0458

pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))

## gender2 = F:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino   0.1422 0.192 235   0.742  0.8801
##  asian - other   -0.0168 0.189 235  -0.089  0.9997
##  asian - white   -0.3758 0.209 235  -1.798  0.2770
##  latino - other  -0.1590 0.212 235  -0.749  0.8771
##  latino - white  -0.5179 0.231 235  -2.247  0.1138
##  other - white   -0.3589 0.228 235  -1.574  0.3956
## 
## gender2 = M:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino  -0.4700 0.338 235  -1.391  0.5060
##  asian - other   -0.3643 0.278 235  -1.311  0.5570
##  asian - white    0.4732 0.381 235   1.243  0.5998
##  latino - other   0.1057 0.356 235   0.297  0.9909
##  latino - white   0.9432 0.441 235   2.138  0.1441
##  other - white    0.8375 0.397 235   2.109  0.1532
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

aov_out <- aov_ez(id = "UID", dv = "con", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)

## Anova Table (Type 3 tests)
## 
## Response: con
##              Effect     df  MSE         F  ges p.value
## 1           gender2 1, 235 0.81 15.94 *** .064   <.001
## 2         race_fin2 3, 235 0.81      0.44 .006    .723
## 3 gender2:race_fin2 3, 235 0.81      1.00 .013    .392
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

afex_plot(aov_out, x = "gender2") +
  ylim(1,5)

emmeans(aov_out, specs = "gender2", by = "race_fin2")

## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.34 0.111 235     3.13     3.56
##  M         3.61 0.170 235     3.27     3.94
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.13 0.144 235     2.84     3.41
##  M         3.79 0.271 235     3.25     4.32
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.18 0.140 235     2.90     3.46
##  M         3.97 0.201 235     3.57     4.36
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         3.40 0.164 235     3.08     3.72
##  M         3.96 0.318 235     3.33     4.58
## 
## Confidence level used: 0.95

pairs(emmeans(aov_out, specs = "gender2"))

##  contrast estimate    SE  df t.ratio p.value
##  F - M      -0.567 0.142 235  -3.993  0.0001
## 
## Results are averaged over the levels of: race_fin2

8 H3

Perceptions of scientists by gender and race/ethnicity.

We will find that women participants will perceive scientists as more warm, while male participants will perceive scientists as more competent.

We will see less of a gender difference in perceptions of warmth and competence among students of Color.

# gender and perceptions of scientists
aov_out <- aov_ez(id = "UID", dv = "sos_comp", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)

## Anova Table (Type 3 tests)
## 
## Response: sos_comp
##              Effect     df  MSE    F   ges p.value
## 1           gender2 1, 106 0.32 0.00 <.001    .965
## 2         race_fin2 3, 106 0.32 0.32  .009    .809
## 3 gender2:race_fin2 3, 106 0.32 1.07  .030    .363
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

afex_plot(aov_out, x = "gender2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,7)

aov_out <- aov_ez(id = "UID", dv = "sos_warm", data = df5, between = c("gender2","race_fin2"))
nice(aov_out)

## Anova Table (Type 3 tests)
## 
## Response: sos_warm
##              Effect     df  MSE      F  ges p.value
## 1           gender2 1, 106 0.53 5.36 * .048    .022
## 2         race_fin2 3, 106 0.53 2.25 + .060    .087
## 3 gender2:race_fin2 3, 106 0.53 3.65 * .094    .015
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

afex_plot(aov_out, x = "gender2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2") +
  ylim(1,7)

afex_plot(aov_out, x = "race_fin2", trace = "gender2") +
  ylim(1,7)

emmeans(aov_out, specs = "gender2", by = "race_fin2")

## race_fin2 = asian:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.66 0.133 106     4.39     4.92
##  M         4.04 0.195 106     3.65     4.42
## 
## race_fin2 = latino:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.51 0.146 106     4.22     4.80
##  M         4.56 0.297 106     3.97     5.15
## 
## race_fin2 = other:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.48 0.163 106     4.15     4.80
##  M         4.74 0.243 106     4.26     5.22
## 
## race_fin2 = white:
##  gender2 emmean    SE  df lower.CL upper.CL
##  F         4.58 0.297 106     3.99     5.17
##  M         3.30 0.364 106     2.57     4.02
## 
## Confidence level used: 0.95

pairs(emmeans(aov_out, specs = "gender2", by = "race_fin2"))

## race_fin2 = asian:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      0.6186 0.236 106   2.624  0.0100
## 
## race_fin2 = latino:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.0479 0.331 106  -0.145  0.8853
## 
## race_fin2 = other:
##  contrast estimate    SE  df t.ratio p.value
##  F - M     -0.2601 0.292 106  -0.890  0.3757
## 
## race_fin2 = white:
##  contrast estimate    SE  df t.ratio p.value
##  F - M      1.2803 0.470 106   2.723  0.0076

pairs(emmeans(aov_out, specs = "race_fin2", by = "gender2"))

## gender2 = F:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino   0.1448 0.197 106   0.734  0.8831
##  asian - other    0.1803 0.210 106   0.857  0.8267
##  asian - white    0.0818 0.326 106   0.251  0.9944
##  latino - other   0.0355 0.219 106   0.162  0.9985
##  latino - white  -0.0630 0.331 106  -0.190  0.9975
##  other - white   -0.0985 0.339 106  -0.290  0.9914
## 
## gender2 = M:
##  contrast       estimate    SE  df t.ratio p.value
##  asian - latino  -0.5216 0.355 106  -1.468  0.4607
##  asian - other   -0.6984 0.311 106  -2.244  0.1182
##  asian - white    0.7435 0.413 106   1.800  0.2790
##  latino - other  -0.1768 0.384 106  -0.460  0.9674
##  latino - white   1.2652 0.470 106   2.691  0.0407
##  other - white    1.4419 0.438 106   3.294  0.0072
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

Sophia Analysis

Heather Perkins

2024-02-12