1 Load Libraries

library(naniar) # for the gg_miss-upset() command
library(psych) # for the describe() command
library(kableExtra) # for tables
library(corrplot) # for correlation plots
library(car) # for VIF
library(sjPlot) # for regression plots
library(ggplot2) # for plots

2 Load Data

df <- read.csv(file="data/data_9-16.csv")

3 Notes

Selected hypotheses:

  • There is a significant relationship between TMFS (masculine-feminine) and interest/confidence
  • Do significant differences in interest/confidence emerge when using TMFS scale versus self-reported gender?

TMFS:

  • I consider myself: (consider)
  • Ideally, I would like to be: (ideal)(tmfs)
  • Traditionally, my interests would be considered as: (consider)(tmfs)
  • Traditionally, my attitudes and beliefs would be considered: (ideal)(tmfs)
  • Traditionally, my behavior would be considered: (ideal)(tmfs)
  • Traditionally, my outer appearance would be considered: (consider)

4 Create Variables

df2 <- subset(df, select=c(UID))
df2$tmfs_ideal <- (df$tmfs2 + df$tmfs4 + df$tmfs5)/3
df2$tmfs_consider <- (df$tmfs1 + df$tmfs3 + df$tmfs6)/3
df2$tmfs <- (df$tmfs2 + df$tmfs3 + df$tmfs4 +  df$tmfs5)/4
df2$int <- (df$salg1 + df$salg2 + df$salg3 + df$salg4)/4
df2$con <- (df$salg5 + df$salg6 + df$salg7)/3

df2$gender2 <- "N"
# Recode 'f' or 'F' as 'F'
df2$gender2[df$gender %in% c("f", "F")] <- "F"
# Recode 'm' or 'M' as 'M'
df2$gender2[df$gender %in% c("m", "M")] <- "M"
# Recode 'female' or 'Female' as 'F'
df2$gender2[df$gender %in% c("female", "Female", "female ", "Female ", "Woman")] <- "F"
# Recode 'male' or 'Male' as 'M'
df2$gender2[df$gender %in% c("male", "Male", "male ", "Male ")] <- "M"
df2$gender2 <- as.factor(df2$gender2)

4.1 View Missing Data

gg_miss_upset(df2)

4.2 View Item Normality

desc <- describe(df2[-1])
kable(round(desc, digits = 2)) %>%
  kable_styling() %>%
  row_spec(which(desc$kurtosis > 2), bold = T) %>%
  row_spec(which(desc$kurtosis < -2), bold = T) %>%
  row_spec(which(desc$skew > 2), italic = T) %>%
  row_spec(which(desc$skew < -2), italic = T)
vars n mean sd median trimmed mad min max range skew kurtosis se
tmfs_ideal 1 394 5.08 1.75 5.00 5.19 1.98 1.00 8.33 7.33 -0.48 -0.40 0.09
tmfs_consider 2 394 4.12 1.55 4.00 4.11 1.48 1.00 7.33 6.33 0.05 -0.67 0.08
tmfs 3 394 4.86 1.57 5.00 4.97 1.85 1.00 7.50 6.50 -0.55 -0.22 0.08
int 4 451 3.53 0.97 3.75 3.56 1.11 1.25 5.00 3.75 -0.33 -0.71 0.05
con 5 451 3.39 0.94 3.33 3.39 0.99 1.00 5.00 4.00 -0.04 -0.47 0.04
gender2* 6 498 1.56 0.75 1.00 1.45 0.00 1.00 3.00 2.00 0.92 -0.64 0.03

4.3 Check Outliers

Using Mahalanobis’ distance. Two outliers dropped.

d <- na.omit(subset(df2, select=-c(7)))

m_dist <- mahalanobis(d[-1], colMeans(d[-1]), cov(d[-1]))
d$MD <- round(m_dist, 1)
plot(d$MD)
describe(m_dist)
##    vars   n mean   sd median trimmed  mad  min   max range skew kurtosis   se
## X1    1 394 4.99 2.97   4.59     4.7 2.82 0.38 19.42 19.04 1.19     2.42 0.15
cut <- qchisq(.99, df=(ncol(d)-1))
abline(a=cut, b=0, col="red")

d$outlier <- F
d$outlier[d$MD > cut] <- T
table(d$outlier)
## 
## FALSE  TRUE 
##   392     2
outs <- subset(d, select=c(UID, outlier), outlier == T)
df3 <- subset(df2, !(UID %in% outs$UID))

5 H1

There is a significant relationship between TMFS (masculine-feminine) and interest/confidence

  • TMFS: lower score = more masculine
  • Int/Con: lower score = less interested/less confident

Correlation matrix suggests that there is not a significant relationship between TMFS and interest (p > .05) or TMFS and confidence (p > .05).

out <- corr.test(subset(df3, select=c(int, con, tmfs, tmfs_ideal, tmfs_consider)))

corrplot(out$r, type="upper", method = "color", tl.col = "black", tl.cex = .75,
         p.mat = out$p,
         sig.level = c(.001, .01, .05), pch.cex = .9,
         insig = "label_sig", pch.col = "white",
         order = "hclust")

out$r
##                        int         con         tmfs  tmfs_ideal tmfs_consider
## int            1.000000000  0.61173923  0.001803163 -0.05728685    0.09009992
## con            0.611739227  1.00000000 -0.015857991 -0.04630012   -0.02386500
## tmfs           0.001803163 -0.01585799  1.000000000  0.95607678    0.50978219
## tmfs_ideal    -0.057286849 -0.04630012  0.956076777  1.00000000    0.35750063
## tmfs_consider  0.090099915 -0.02386500  0.509782192  0.35750063    1.00000000
out$p
##                        int          con          tmfs    tmfs_ideal
## int           0.000000e+00 1.746244e-46  1.000000e+00  1.000000e+00
## con           1.940271e-47 0.000000e+00  1.000000e+00  1.000000e+00
## tmfs          9.716118e-01 7.542888e-01  0.000000e+00 5.904070e-209
## tmfs_ideal    2.578325e-01 3.605811e-01 5.904070e-210  0.000000e+00
## tmfs_consider 7.477923e-02 6.375971e-01  2.569236e-27  2.917963e-13
##               tmfs_consider
## int            4.486754e-01
## con            1.000000e+00
## tmfs           2.055389e-26
## tmfs_ideal     2.042574e-12
## tmfs_consider  0.000000e+00

6 H2

Do significant differences in interest/confidence emerge when using TMFS scale versus self-reported gender?

  • TMFS: lower score = more masculine
  • Int/Con: lower score = less interested/less confident
  • Gender: F = woman, M = man, N = non-binary, genderqueer, or NA
df3 <- subset(df3, gender2 != "N")
df3$gender2 <- relevel(df3$gender2, ref = "M")

reg_model <- lm(con ~ tmfs + gender2 + int, data = df3)

vif(reg_model)
##     tmfs  gender2      int 
## 1.095629 1.106730 1.010879
plot(reg_model, 1)

plot(reg_model, 2)

plot(reg_model, 3)

plot(reg_model, 4)

plot(reg_model, 5)

summary(reg_model)
## 
## Call:
## lm(formula = con ~ tmfs + gender2 + int, data = df3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.95718 -0.53168  0.00806  0.53513  1.79978 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.56619    0.20181   7.761 8.37e-14 ***
## tmfs         0.02537    0.02501   1.014    0.311    
## gender2F    -0.40533    0.08696  -4.661 4.40e-06 ***
## int          0.56734    0.04184  13.558  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7241 on 369 degrees of freedom
##   (44 observations deleted due to missingness)
## Multiple R-squared:  0.3746, Adjusted R-squared:  0.3695 
## F-statistic: 73.67 on 3 and 369 DF,  p-value: < 2.2e-16