library(readr)
library(tidyverse)
library(readxl)
library(leaps)
STEM_data_tb <- read_excel("Data_set_5.16.2022.xlsx")
STEM_data <- as.data.frame(STEM_data_tb)
The above code snippet loads in the libraries necessary for this project and imports the data, converting it from an excel document to a tibble, and from a tibble to a dataframe.
STEM_data<- STEM_data %>%
add_column(Intended_Field = "Other")
STEM_data$Intended_Field[STEM_data$Business == 1]<- "Business"
STEM_data$Intended_Field[STEM_data$CompSci == 1]<- "Computer Science"
STEM_data$Intended_Field[STEM_data$Education == 1]<- "Education"
STEM_data$Intended_Field[STEM_data$Engineering == 1]<- "Engineering"
STEM_data$Intended_Field[STEM_data$Humanities == 1]<- "Humanities"
STEM_data$Intended_Field[STEM_data$JournalismComm == 1]<- "Journalism or Communications"
STEM_data$Intended_Field[STEM_data$LifeSciences == 1]<- "Life Sciences"
STEM_data$Intended_Field[STEM_data$Math == 1]<- "Math"
STEM_data$Intended_Field[STEM_data$MedicalSciences == 1]<- "Medical Sciences"
STEM_data$Intended_Field[STEM_data$PhysicalSciences == 1]<- "Physical Sciences"
STEM_data$Intended_Field[STEM_data$SocialBehavioralSciences == 1]<- "Social or Behavioral Sciences"
STEM_data$Intended_Field[STEM_data$Art == 1]<- "Art"
STEM_data <- STEM_data %>%
mutate(Belonging_STEM = (Belonging_Math + Belonging_Science)/2)
The above code creates two new columns: Intended_Field and Belonging_STEM, based on data in other columns - effectively aggregating the data.
STEM_data$Gender_Identity[STEM_data$Gender_Identity == 1]<- "Non-Binary, GNC, or Other"
STEM_data$Gender_Identity[STEM_data$Gender_Identity == 2]<- "Transgender Man"
STEM_data$Gender_Identity[STEM_data$Gender_Identity == 3]<- "Cisgender or Transgender Woman"
STEM_data$Gender_Identity[STEM_data$Gender_Identity == 4]<- "Cisgender Man"
STEM_data$Sexual_Identity[STEM_data$Sexual_Identity == 1]<- "Unsure, Questioning, or Other"
STEM_data$Sexual_Identity[STEM_data$Sexual_Identity == 2]<- "Queer"
STEM_data$Sexual_Identity[STEM_data$Sexual_Identity == 3]<- "Pansexual"
STEM_data$Sexual_Identity[STEM_data$Sexual_Identity == 4]<- "Bisexual"
STEM_data$Sexual_Identity[STEM_data$Sexual_Identity == 5]<- "Gay/Lesbian"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 1]<- "Latinx or Hispanic"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 2]<- "Indigenous or Native American"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 3]<- "Asian"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 4]<- "Black or African American"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 5]<- "White"
STEM_data$Race_Ethnicity[STEM_data$Race_Ethnicity == 6]<- "Multiracial"
The above code changes the encoding of the Race_Ethnicity, Gender_Identity, and Sexual_Identity columns to be encoded in words rather than numbers, for the ease of reading the plots that will be made.
plot1 <- ggplot(STEM_data, aes(x = Gender_Identity, y = Belonging_Science, fill = Race_Ethnicity))+
geom_boxplot(position="dodge")+
coord_flip()
plot1
The above exploratory plot shows how well people of different genders and ethnicities feel they belonged in science fields.
plot2 <- ggplot(STEM_data, aes(Gender_Identity))+
geom_bar()
plot2
plot3 <- ggplot(STEM_data, aes(Race_Ethnicity))+
geom_bar()
plot3
The two charts above show the numbers of respondents of each gender and racial category in relation to each other
STEM_data %>%
group_by(Gender_Identity)%>%
summarise(n=n())
## # A tibble: 4 x 2
## Gender_Identity n
## <chr> <int>
## 1 Cisgender Man 292
## 2 Cisgender or Transgender Woman 22
## 3 Non-Binary, GNC, or Other 34
## 4 Transgender Man 191
STEM_data %>%
group_by(Sexual_Identity)%>%
summarise(n=n())
## # A tibble: 5 x 2
## Sexual_Identity n
## <chr> <int>
## 1 Bisexual 135
## 2 Gay/Lesbian 276
## 3 Pansexual 55
## 4 Queer 36
## 5 Unsure, Questioning, or Other 37
STEM_data %>%
group_by(Race_Ethnicity)%>%
summarise(n=n())
## # A tibble: 6 x 2
## Race_Ethnicity n
## <chr> <int>
## 1 Asian 35
## 2 Black or African American 25
## 3 Indigenous or Native American 7
## 4 Latinx or Hispanic 119
## 5 Multiracial 46
## 6 White 307
The above tables show the numbers of responses in each gender, sexuality, and race category collected by the data.
plot5 <- ggplot(STEM_data, aes(x = Gender_Identity, y = Belonging_Math, fill = Intended_Field))+
geom_boxplot()+
coord_flip()
plot5
The above box plots show how people of different gender identities and intended fields of study felt they belonged in math fields.
plot6 <- ggplot(STEM_data, aes(x = Gender_Identity, y = Belonging_Math, fill = as.factor(STEMIntent)))+
geom_boxplot()+
coord_flip()
plot6
The above box plots show how people of different gender identities felt they belonged in math fields, separated by whether they intended to go into STEM fields or not.
plot7 <- ggplot(STEM_data, aes(Gender_Identity, Bullying, fill = Gender_Identity))+
geom_boxplot()
plot7
The above box plot models the amount of bullying experienced by people of different gender identities. The median level of bullying is distinctly lower for cisgender men than it is for any other gender identity.
mod1a <- lm(Belonging_STEM ~ Bullying + Gender_Identity, STEM_data)
summary(mod1a)
##
## Call:
## lm(formula = Belonging_STEM ~ Bullying + Gender_Identity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9222 -1.3354 0.1401 1.3335 4.9645
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 8.6718 0.2762 31.396
## Bullying -0.6871 0.1129 -6.084
## Gender_IdentityCisgender or Transgender Woman -1.0564 0.3901 -2.708
## Gender_IdentityNon-Binary, GNC, or Other -1.1619 0.3254 -3.571
## Gender_IdentityTransgender Man -1.2629 0.1679 -7.523
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Bullying 2.24e-09 ***
## Gender_IdentityCisgender or Transgender Woman 0.006982 **
## Gender_IdentityNon-Binary, GNC, or Other 0.000388 ***
## Gender_IdentityTransgender Man 2.28e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.76 on 534 degrees of freedom
## Multiple R-squared: 0.1972, Adjusted R-squared: 0.1911
## F-statistic: 32.78 on 4 and 534 DF, p-value: < 2.2e-16
plot11 <- ggplot(STEM_data, aes(Bullying, Belonging_STEM, color = Gender_Identity))+
geom_point(position = "jitter")+
geom_abline(intercept = mod1a$coefficients[1], slope = mod1a$coefficients[2], color = "red")+
geom_abline(intercept = mod1a$coefficients[1] + mod1a$coefficients[3], slope = mod1a$coefficients[2], color = "green")+
geom_abline(intercept = mod1a$coefficients[1] + mod1a$coefficients[4], slope = mod1a$coefficients[2], color = "blue")+
geom_abline(intercept = mod1a$coefficients[1] + mod1a$coefficients[5], slope = mod1a$coefficients[2], color = "purple")
plot11
The above plot shows the relationship between bullying, gender, and belonging in STEM, with regression lines based on gender identity. Below are plots of the residuals for this model. They are, for the most part, distributed fairly evenly, which indicates that the model fits the data well.
par(mfrow = c(2,2))
plot(mod1a)
mod1b <- lm(Belonging_STEM ~ Bullying + Gender_Identity+ Bullying*Gender_Identity, STEM_data)
summary(mod1b)
##
## Call:
## lm(formula = Belonging_STEM ~ Bullying + Gender_Identity + Bullying *
## Gender_Identity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8934 -1.2807 0.1306 1.3484 4.7301
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 8.6163 0.3891
## Bullying -0.6626 0.1654
## Gender_IdentityCisgender or Transgender Woman 1.3488 1.5342
## Gender_IdentityNon-Binary, GNC, or Other 0.7835 1.2173
## Gender_IdentityTransgender Man -1.7043 0.6103
## Bullying:Gender_IdentityCisgender or Transgender Woman -0.9612 0.5960
## Bullying:Gender_IdentityNon-Binary, GNC, or Other -0.6900 0.4256
## Bullying:Gender_IdentityTransgender Man 0.1671 0.2404
## t value Pr(>|t|)
## (Intercept) 22.146 < 2e-16 ***
## Bullying -4.007 7.04e-05 ***
## Gender_IdentityCisgender or Transgender Woman 0.879 0.37972
## Gender_IdentityNon-Binary, GNC, or Other 0.644 0.52012
## Gender_IdentityTransgender Man -2.793 0.00542 **
## Bullying:Gender_IdentityCisgender or Transgender Woman -1.613 0.10737
## Bullying:Gender_IdentityNon-Binary, GNC, or Other -1.621 0.10556
## Bullying:Gender_IdentityTransgender Man 0.695 0.48738
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.754 on 531 degrees of freedom
## Multiple R-squared: 0.2073, Adjusted R-squared: 0.1968
## F-statistic: 19.84 on 7 and 531 DF, p-value: < 2.2e-16
plot11 <- ggplot(STEM_data, aes(Bullying, Belonging_STEM, color = Gender_Identity))+
geom_point(position = "jitter")+
geom_abline(intercept = mod1b$coefficients[1], slope = mod1b$coefficients[2], color = "red")+
geom_abline(intercept = mod1b$coefficients[1] + mod1b$coefficients[3], slope = mod1b$coefficients[2]+ mod1b$coefficients[6] , color = "green")+
geom_abline(intercept = mod1b$coefficients[1] + mod1b$coefficients[4], slope = mod1b$coefficients[2]+ mod1b$coefficients[7], color = "blue")+
geom_abline(intercept = mod1b$coefficients[1] + mod1b$coefficients[5], slope = mod1b$coefficients[2]+ mod1b$coefficients[8], color = "purple")
plot11
The above plot and model summary describe the relationship of how combinations of gender and bullying affect how well people feel they belong in STEM. The relationships are largely not statistically significant.
mod2a <- lm(Belonging_STEM ~ Bullying + Race_Ethnicity, STEM_data)
summary(mod2a)
##
## Call:
## lm(formula = Belonging_STEM ~ Bullying + Race_Ethnicity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4550 -1.2533 0.1547 1.4165 4.6394
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 9.1814 0.3950 23.245
## Bullying -0.8719 0.1149 -7.589
## Race_EthnicityBlack or African American -0.7569 0.4829 -1.567
## Race_EthnicityIndigenous or Native American -1.0783 0.7628 -1.414
## Race_EthnicityLatinx or Hispanic -0.2852 0.3556 -0.802
## Race_EthnicityMultiracial -1.0528 0.4145 -2.540
## Race_EthnicityWhite -0.7417 0.3312 -2.240
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Bullying 1.45e-13 ***
## Race_EthnicityBlack or African American 0.1176
## Race_EthnicityIndigenous or Native American 0.1581
## Race_EthnicityLatinx or Hispanic 0.4228
## Race_EthnicityMultiracial 0.0114 *
## Race_EthnicityWhite 0.0255 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.841 on 532 degrees of freedom
## Multiple R-squared: 0.1246, Adjusted R-squared: 0.1148
## F-statistic: 12.62 on 6 and 532 DF, p-value: 2.454e-13
plot13 <- ggplot(STEM_data, aes(Bullying, Belonging_STEM, color = Race_Ethnicity))+
geom_point(position = "jitter")+
geom_abline(intercept = mod2a$coefficients[1], slope = mod2a$coefficients[2], color = "tomato")+
geom_abline(intercept = mod2a$coefficients[1] + mod2a$coefficients[3], slope = mod2a$coefficients[2], color = "goldenrod3")+
geom_abline(intercept = mod2a$coefficients[1] + mod2a$coefficients[4], slope = mod2a$coefficients[2], color = "springgreen3")+
geom_abline(intercept = mod2a$coefficients[1] + mod2a$coefficients[5], slope = mod2a$coefficients[2], color = "cyan3")+
geom_abline(intercept = mod2a$coefficients[1] + mod2a$coefficients[6], slope = mod2a$coefficients[2], color = "royalblue1")+
geom_abline(intercept = mod2a$coefficients[1] + mod2a$coefficients[7], slope = mod2a$coefficients[2], color = "maroon1")
plot13
The above plot shows the relationship between bullying, race, and belonging in STEM, with regression lines based on race. Only some of these relationships are statistically significant, which could be due, at least in part, to discrepancies in the numbers of data points corresponding to each race.
mod2b <- lm(Belonging_STEM ~ Bullying + Race_Ethnicity+ Bullying*Race_Ethnicity, STEM_data)
summary(mod2b)
##
## Call:
## lm(formula = Belonging_STEM ~ Bullying + Race_Ethnicity + Bullying *
## Race_Ethnicity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4109 -1.2252 0.2015 1.4222 4.5908
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 8.96691 1.08771
## Bullying -0.77057 0.49233
## Race_EthnicityBlack or African American 1.45658 1.79922
## Race_EthnicityIndigenous or Native American 2.15630 2.39279
## Race_EthnicityLatinx or Hispanic -0.14134 1.24626
## Race_EthnicityMultiracial -1.25853 1.34572
## Race_EthnicityWhite -0.61768 1.16160
## Bullying:Race_EthnicityBlack or African American -0.94967 0.76675
## Bullying:Race_EthnicityIndigenous or Native American -1.38613 0.98831
## Bullying:Race_EthnicityLatinx or Hispanic -0.07197 0.54904
## Bullying:Race_EthnicityMultiracial 0.07167 0.57993
## Bullying:Race_EthnicityWhite -0.06491 0.51724
## t value Pr(>|t|)
## (Intercept) 8.244 1.33e-15 ***
## Bullying -1.565 0.118
## Race_EthnicityBlack or African American 0.810 0.419
## Race_EthnicityIndigenous or Native American 0.901 0.368
## Race_EthnicityLatinx or Hispanic -0.113 0.910
## Race_EthnicityMultiracial -0.935 0.350
## Race_EthnicityWhite -0.532 0.595
## Bullying:Race_EthnicityBlack or African American -1.239 0.216
## Bullying:Race_EthnicityIndigenous or Native American -1.403 0.161
## Bullying:Race_EthnicityLatinx or Hispanic -0.131 0.896
## Bullying:Race_EthnicityMultiracial 0.124 0.902
## Bullying:Race_EthnicityWhite -0.125 0.900
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.842 on 527 degrees of freedom
## Multiple R-squared: 0.1325, Adjusted R-squared: 0.1144
## F-statistic: 7.316 on 11 and 527 DF, p-value: 1.077e-11
plot14 <- ggplot(STEM_data, aes(Bullying, Belonging_STEM, color = Race_Ethnicity))+
geom_point(position = "jitter")+
geom_abline(intercept = mod2b$coefficients[1], slope = mod2b$coefficients[2], color = "tomato")+
geom_abline(intercept = mod2b$coefficients[1] + mod2b$coefficients[3], slope = mod2b$coefficients[2]+ mod2b$coefficients[8] , color = "goldenrod3")+
geom_abline(intercept = mod2b$coefficients[1] + mod2b$coefficients[4], slope = mod2b$coefficients[2]+ mod2b$coefficients[9], color = "springgreen3")+
geom_abline(intercept = mod2b$coefficients[1] + mod2b$coefficients[5], slope = mod2b$coefficients[2]+ mod2b$coefficients[10], color = "cyan3")+
geom_abline(intercept = mod2b$coefficients[1] + mod2b$coefficients[6], slope = mod2b$coefficients[2]+ mod2b$coefficients[11], color = "royalblue1")+
geom_abline(intercept = mod2b$coefficients[1] + mod2b$coefficients[7], slope = mod2b$coefficients[2]+ mod2b$coefficients[12], color = "maroon1")
plot14
The above plot and model summary describe the relationship of how combinations of race and bullying affect how well people feel they belong in STEM. The relationships are not statistically significant in this case, likely at least in part due to the same reason as the above.
mod3a <- lm(Bullying~Gender_Identity + Race_Ethnicity, STEM_data)
summary(mod3a)
##
## Call:
## lm(formula = Bullying ~ Gender_Identity + Race_Ethnicity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.60576 -0.46792 -0.04409 0.42667 2.21304
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.99494 0.11510 17.333
## Gender_IdentityCisgender or Transgender Woman 0.23669 0.14908 1.588
## Gender_IdentityNon-Binary, GNC, or Other 0.61081 0.12453 4.905
## Gender_IdentityTransgender Man 0.31411 0.06356 4.942
## Race_EthnicityBlack or African American 0.22063 0.17643 1.251
## Race_EthnicityIndigenous or Native American 0.04657 0.27961 0.167
## Race_EthnicityLatinx or Hispanic 0.29978 0.12917 2.321
## Race_EthnicityMultiracial 0.18120 0.15195 1.193
## Race_EthnicityWhite 0.32187 0.12052 2.671
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Gender_IdentityCisgender or Transgender Woman 0.1130
## Gender_IdentityNon-Binary, GNC, or Other 1.24e-06 ***
## Gender_IdentityTransgender Man 1.04e-06 ***
## Race_EthnicityBlack or African American 0.2117
## Race_EthnicityIndigenous or Native American 0.8678
## Race_EthnicityLatinx or Hispanic 0.0207 *
## Race_EthnicityMultiracial 0.2336
## Race_EthnicityWhite 0.0078 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6711 on 530 degrees of freedom
## Multiple R-squared: 0.08685, Adjusted R-squared: 0.07307
## F-statistic: 6.301 on 8 and 530 DF, p-value: 8.199e-08
plot12 <- ggplot(STEM_data, aes(Gender_Identity, Bullying, fill = Race_Ethnicity))+
geom_boxplot(position = "dodge")+
coord_flip()
plot12
The above model is one that looks at levels of bullying experienced by people of different gender identities and races.
mod5b <- lm(Belonging_STEM~Bullying+Sexual_Identity, STEM_data)
summary(mod5b)
##
## Call:
## lm(formula = Belonging_STEM ~ Bullying + Sexual_Identity, data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.5160 -1.2705 0.1322 1.3935 4.1001
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 8.4130 0.3140 26.795
## Bullying -0.8222 0.1123 -7.318
## Sexual_IdentityGay/Lesbian 0.3762 0.1893 1.987
## Sexual_IdentityPansexual -0.6498 0.2887 -2.251
## Sexual_IdentityQueer -1.2010 0.3388 -3.545
## Sexual_IdentityUnsure, Questioning, or Other -0.3333 0.3342 -0.997
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## Bullying 9.31e-13 ***
## Sexual_IdentityGay/Lesbian 0.047396 *
## Sexual_IdentityPansexual 0.024806 *
## Sexual_IdentityQueer 0.000427 ***
## Sexual_IdentityUnsure, Questioning, or Other 0.319040
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.801 on 533 degrees of freedom
## Multiple R-squared: 0.1608, Adjusted R-squared: 0.153
## F-statistic: 20.43 on 5 and 533 DF, p-value: < 2.2e-16
plot15 <- ggplot(STEM_data, aes(Bullying, Belonging_STEM, color = Sexual_Identity))+
geom_point(position="jitter")+
geom_abline(intercept = mod5b$coefficients[1], slope = mod5b$coefficients[2], color = "red")+
geom_abline(intercept = mod5b$coefficients[1] + mod5b$coefficients[3],
slope = mod5b$coefficients[2], color = "yellow")+
geom_abline(intercept = mod5b$coefficients[1] + mod5b$coefficients[4],
slope = mod5b$coefficients[2], color = "green")+
geom_abline(intercept = mod5b$coefficients[1] + mod5b$coefficients[5],
slope = mod5b$coefficients[2], color = "blue")+
geom_abline(intercept = mod5b$coefficients[1] + mod5b$coefficients[6],
slope = mod5b$coefficients[2], color = "purple")
plot15
The above plot shows the relationship between bullying, sexuality, and belonging in STEM, with regression lines based on sexual identity.
mod6a <- lm(Belonging_STEM~Gender_Identity*Race_Ethnicity, STEM_data)
summary(mod6a)
##
## Call:
## lm(formula = Belonging_STEM ~ Gender_Identity * Race_Ethnicity,
## data = STEM_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9619 -1.3024 0.0381 1.3422 4.3219
##
## Coefficients: (1 not defined because of singularities)
## Estimate
## (Intercept) 7.7292
## Gender_IdentityCisgender or Transgender Woman -3.2292
## Gender_IdentityNon-Binary, GNC, or Other -2.0625
## Gender_IdentityTransgender Man -0.6220
## Race_EthnicityBlack or African American -0.5685
## Race_EthnicityIndigenous or Native American 0.7708
## Race_EthnicityLatinx or Hispanic -0.4653
## Race_EthnicityMultiracial -1.0453
## Race_EthnicityWhite -0.7673
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityBlack or African American 2.7351
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityBlack or African American -1.9732
## Gender_IdentityTransgender Man:Race_EthnicityBlack or African American -1.4137
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityIndigenous or Native American NA
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityIndigenous or Native American -0.5625
## Gender_IdentityTransgender Man:Race_EthnicityIndigenous or Native American -3.3363
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityLatinx or Hispanic 3.3090
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityLatinx or Hispanic 0.1486
## Gender_IdentityTransgender Man:Race_EthnicityLatinx or Hispanic -0.9651
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityMultiracial 1.7328
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityMultiracial 0.6009
## Gender_IdentityTransgender Man:Race_EthnicityMultiracial -0.2007
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityWhite 1.7152
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityWhite 1.0560
## Gender_IdentityTransgender Man:Race_EthnicityWhite -0.7867
## Std. Error
## (Intercept) 0.3705
## Gender_IdentityCisgender or Transgender Woman 1.8525
## Gender_IdentityNon-Binary, GNC, or Other 1.1115
## Gender_IdentityTransgender Man 0.7797
## Race_EthnicityBlack or African American 0.6104
## Race_EthnicityIndigenous or Native American 1.3358
## Race_EthnicityLatinx or Hispanic 0.4218
## Race_EthnicityMultiracial 0.5754
## Race_EthnicityWhite 0.3983
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityBlack or African American 2.1829
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityBlack or African American 2.1829
## Gender_IdentityTransgender Man:Race_EthnicityBlack or African American 1.1462
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityIndigenous or Native American NA
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityIndigenous or Native American 2.1283
## Gender_IdentityTransgender Man:Race_EthnicityIndigenous or Native American 1.8312
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityLatinx or Hispanic 2.0726
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityLatinx or Hispanic 1.3910
## Gender_IdentityTransgender Man:Race_EthnicityLatinx or Hispanic 0.8730
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityMultiracial 2.2962
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityMultiracial 1.3398
## Gender_IdentityTransgender Man:Race_EthnicityMultiracial 0.9923
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityWhite 1.9307
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityWhite 1.2215
## Gender_IdentityTransgender Man:Race_EthnicityWhite 0.8095
## t value
## (Intercept) 20.862
## Gender_IdentityCisgender or Transgender Woman -1.743
## Gender_IdentityNon-Binary, GNC, or Other -1.856
## Gender_IdentityTransgender Man -0.798
## Race_EthnicityBlack or African American -0.931
## Race_EthnicityIndigenous or Native American 0.577
## Race_EthnicityLatinx or Hispanic -1.103
## Race_EthnicityMultiracial -1.817
## Race_EthnicityWhite -1.926
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityBlack or African American 1.253
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityBlack or African American -0.904
## Gender_IdentityTransgender Man:Race_EthnicityBlack or African American -1.233
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityIndigenous or Native American NA
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityIndigenous or Native American -0.264
## Gender_IdentityTransgender Man:Race_EthnicityIndigenous or Native American -1.822
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityLatinx or Hispanic 1.597
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityLatinx or Hispanic 0.107
## Gender_IdentityTransgender Man:Race_EthnicityLatinx or Hispanic -1.106
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityMultiracial 0.755
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityMultiracial 0.448
## Gender_IdentityTransgender Man:Race_EthnicityMultiracial -0.202
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityWhite 0.888
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityWhite 0.865
## Gender_IdentityTransgender Man:Race_EthnicityWhite -0.972
## Pr(>|t|)
## (Intercept) <2e-16
## Gender_IdentityCisgender or Transgender Woman 0.0819
## Gender_IdentityNon-Binary, GNC, or Other 0.0641
## Gender_IdentityTransgender Man 0.4253
## Race_EthnicityBlack or African American 0.3521
## Race_EthnicityIndigenous or Native American 0.5642
## Race_EthnicityLatinx or Hispanic 0.2705
## Race_EthnicityMultiracial 0.0698
## Race_EthnicityWhite 0.0546
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityBlack or African American 0.2108
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityBlack or African American 0.3664
## Gender_IdentityTransgender Man:Race_EthnicityBlack or African American 0.2180
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityIndigenous or Native American NA
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityIndigenous or Native American 0.7917
## Gender_IdentityTransgender Man:Race_EthnicityIndigenous or Native American 0.0690
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityLatinx or Hispanic 0.1110
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityLatinx or Hispanic 0.9150
## Gender_IdentityTransgender Man:Race_EthnicityLatinx or Hispanic 0.2694
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityMultiracial 0.4508
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityMultiracial 0.6540
## Gender_IdentityTransgender Man:Race_EthnicityMultiracial 0.8398
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityWhite 0.3747
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityWhite 0.3877
## Gender_IdentityTransgender Man:Race_EthnicityWhite 0.3316
##
## (Intercept) ***
## Gender_IdentityCisgender or Transgender Woman .
## Gender_IdentityNon-Binary, GNC, or Other .
## Gender_IdentityTransgender Man
## Race_EthnicityBlack or African American
## Race_EthnicityIndigenous or Native American
## Race_EthnicityLatinx or Hispanic
## Race_EthnicityMultiracial .
## Race_EthnicityWhite .
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityBlack or African American
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityBlack or African American
## Gender_IdentityTransgender Man:Race_EthnicityBlack or African American
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityIndigenous or Native American
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityIndigenous or Native American
## Gender_IdentityTransgender Man:Race_EthnicityIndigenous or Native American .
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityLatinx or Hispanic
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityLatinx or Hispanic
## Gender_IdentityTransgender Man:Race_EthnicityLatinx or Hispanic
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityMultiracial
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityMultiracial
## Gender_IdentityTransgender Man:Race_EthnicityMultiracial
## Gender_IdentityCisgender or Transgender Woman:Race_EthnicityWhite
## Gender_IdentityNon-Binary, GNC, or Other:Race_EthnicityWhite
## Gender_IdentityTransgender Man:Race_EthnicityWhite
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.815 on 516 degrees of freedom
## Multiple R-squared: 0.175, Adjusted R-squared: 0.1398
## F-statistic: 4.975 on 22 and 516 DF, p-value: 4.071e-12
plot16 <- ggplot(STEM_data, aes(Gender_Identity, Belonging_STEM, fill = Race_Ethnicity))+
geom_boxplot(position = "dodge")+
coord_flip()
plot16
par(mfrow = c(2, 2))
plot(mod6a)
## Warning: not plotting observations with leverage one:
## 284
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
The final plot I generated is one that shows how the relationship between gender and race affects how an individual may feel they belong in STEM. While many of the combinations are not statistically significant, I do believe that they would be if there were more data points corresponding to each gender identity and race. The residuals for this plot are also fairly well distributed, which is a good sign for the validity of my conclusion.