Descriptive statistics

Skewness, kurtosis for AMASA, PI, DS, and PTS.

library(moments) # for calculating skewness & kurtosis
library(Hmisc) # for correlation matrices
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
dat <- read.csv("~/Desktop/WES projects/Paper 1/April21.csv")

AMASA

# kurtosis(dat$AMMSA) # 2.412395 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$AMMSA) # 0.324258 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$AMMSA,main="AMMSA")

PI

# kurtosis(dat$PI) # 2.731565 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$PI) # 0.2949723 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$PI,main="PI")

### DS

# kurtosis(dat$DS) # 2.67864 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$DS) # 0.4451866 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$DS,main="DS")

PTS

# kurtosis(dat$PTS) # 2.530473 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$PTS) # 0.5905595  > 0.5, so we have a skew problem
hist(dat$PTS,main="PTS")

This obviously isn’t normally distributed. Let’s try a square root transform:

# kurtosis(sqrt(dat$PTS+.01)) # 2.494193 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(sqrt(dat$PTS+.01)) # 0.30088 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(sqrt(dat$PTS+.01),main="Transformed PTS")

Scale them afterwards if there’s nothing sketchy.

dat$AMMSA=scale(dat$AMMSA)
dat$PI=scale(dat$PI)
dat$DS=scale(dat$DS)
dat$PTS=scale(sqrt(dat$PTS+.01))

Correlations matrix

rcorr(cbind(
  dat$AMMSA,dat$PI,dat$DS,dat$PTS
))
##       [,1]  [,2] [,3]  [,4]
## [1,]  1.00 -0.07 0.02 -0.05
## [2,] -0.07  1.00 0.06  0.75
## [3,]  0.02  0.06 1.00  0.07
## [4,] -0.05  0.75 0.07  1.00
## 
## n= 697 
## 
## 
## P
##      [,1]   [,2]   [,3]   [,4]  
## [1,]        0.0558 0.5924 0.1729
## [2,] 0.0558        0.0975 0.0000
## [3,] 0.5924 0.0975        0.0490
## [4,] 0.1729 0.0000 0.0490

The top are the r value, the bottom are the p-values.

Cronbach’s alpha, where applicable

full.dat <- read.csv("~/Desktop/WES projects/Paper 1/full_dat.csv")
library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
# alpha(full.dat[,c(83:98)])[1] # AMMSA alpha = 0.9212115
# alpha(full.dat[,c(110:129)])[1] # CESDS alpha = 0.78134
# alpha(full.dat[,
#                c(175:179,
#                  182:186,
#                  189:193,
#                  196:200,
#                  203:207,
#                  210:214)]
#       )[1]
# PI alpha = 0.96171

AMMSA \(\alpha\) = 0.92
CESDS \(\alpha\) = 0.78
PI \(\alpha\) = 0.96

Examining sub-samples

Sample size for each group:

online.sample=subset(dat,dat$Sample==1 | dat$Sample==3)
#nrow(online.sample) # n = 534 for online sample

college.sample=subset(dat,dat$Sample==2)
#nrow(college.sample) # n = 163 for college sample

# 534 + 163 = 697

Compare them on PI, PTS, and DS

PI

t.test(online.sample$PI,college.sample$PI)
## 
##  Welch Two Sample t-test
## 
## data:  online.sample$PI and college.sample$PI
## t = -3.4018, df = 280.2, p-value = 0.000767
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4644618 -0.1239638
## sample estimates:
##   mean of x   mean of y 
## -0.06880443  0.22540836

There is a significant difference in PI between the two samples.

PTS

t.test(online.sample$PTS,college.sample$PTS)
## 
##  Welch Two Sample t-test
## 
## data:  online.sample$PTS and college.sample$PTS
## t = -4.0745, df = 308.35, p-value = 5.871e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4936818 -0.1721384
## sample estimates:
##   mean of x   mean of y 
## -0.07785416  0.25505594

There is a a statistically significant difference in PTS between the two samples.

DS

t.test(online.sample$DS,college.sample$DS)
## 
##  Welch Two Sample t-test
## 
## data:  online.sample$DS and college.sample$DS
## t = -2.8002, df = 273.02, p-value = 0.005472
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.42010549 -0.07324942
## sample estimates:
##   mean of x   mean of y 
## -0.05768784  0.18898961

Also significant.

Conclusion:

We will have to take recruitment method into account for analyses.

dat$recruit=as.factor(ifelse(dat$Sample==2,"college","online"))

Demographics for each sample

Online sample

# mean(online.sample$Age)
# sd(online.sample$Age)

Age: M = 35.90, SD = 11.32

table(online.sample$Sex)
## 
##   0   1   2 
##   2 116 416

The table above shows the number of people who have their gender coded as 0, 1, and 2.

table(online.sample$Sex)/nrow(online.sample)
## 
##           0           1           2 
## 0.003745318 0.217228464 0.779026217

Above are the percentages.

If y’all wanted “sex” instead or want additional demographic data, let me know.

College sample

# mean(college.sample$Age)
# sd(college.sample$Age)

Age: M = 19.03, SD = 2.11

table(college.sample$Sex)
## 
##   0   1   2 
##   2  29 132

The table above shows the number of people who have their gender coded as 0, 1, and 2.

table(college.sample$Sex)/nrow(college.sample)
## 
##          0          1          2 
## 0.01226994 0.17791411 0.80981595

Above are the percentages.

If y’all wanted “sex” instead or want additional demographic data, let me know.

H1

Each hypothesis will be tested with two linear models, one with PTSS as the dependent variable, and another with DS as the dependent variable. H1 will be tested with two simple linear regression models, H1a and H1b. H1a will have posttraumatic stress symptoms (PTSS) as the dependent variable and the frequency of nonconsensual sexual experiences (NSEs) endorsed by participants as the independent variable. H1b will have depressive symptomatology (DS) as the dependent variable and NSE Frequency as the independent variable. We will also have each independent variable interact with recruitment method to determine if participant’s population confounded any of our results.

DS

h1.ds=lm(DS~NSEV_Freq*recruit,dat)
summary(h1.ds)
## 
## Call:
## lm(formula = DS ~ NSEV_Freq * recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.62676 -0.79133 -0.09627  0.69400  2.91050 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              0.128624   0.108299   1.188    0.235  
## NSEV_Freq                0.004594   0.005719   0.803    0.422  
## recruitonline           -0.237582   0.127573  -1.862    0.063 .
## NSEV_Freq:recruitonline -0.002015   0.006286  -0.320    0.749  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9955 on 693 degrees of freedom
## Multiple R-squared:  0.01323,    Adjusted R-squared:  0.008956 
## F-statistic: 3.097 on 3 and 693 DF,  p-value: 0.02633
# Creating a data frame to apply the FDR later on
p.vals=data.frame(summary(h1.ds)[4])
p.vals$names=names(h1.ds$coefficients)
p.vals$model=rep("h1.ds",length(names(h1.ds$coefficients)))
colnames(p.vals)=c("Estimate","SE","t","p","coef","model")

PTSS

h1.pts=lm(PTS~NSEV_Freq*recruit,dat)
summary(h1.pts)
## 
## Call:
## lm(formula = PTS ~ NSEV_Freq * recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.48690 -0.59230  0.08739  0.64590  2.36398 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -0.139899   0.097686  -1.432 0.152558    
## NSEV_Freq                0.030055   0.005159   5.826  8.7e-09 ***
## recruitonline           -0.448479   0.115071  -3.897 0.000107 ***
## NSEV_Freq:recruitonline -0.004375   0.005670  -0.772 0.440646    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.898 on 693 degrees of freedom
## Multiple R-squared:  0.1972, Adjusted R-squared:  0.1937 
## F-statistic: 56.73 on 3 and 693 DF,  p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h1.pts
current.str="h1.pts"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

H2

H2 will be tested with two multiple regression models, H2a and H2b. H2a will have DS scores as the dependent variable. The independent variables will be NSE Frequency and the acceptance of myths about sexual aggression (AMASA). Again, recruitment method will interact with each independent variable. H2b will be identical to H2a except the dependent variable will be PTSS rather than DS.

DS

h2.ds=lm(DS~NSEV_Freq*dat$AMMSA*recruit,dat)
summary(h2.ds)
## 
## Call:
## lm(formula = DS ~ NSEV_Freq * dat$AMMSA * recruit, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6187 -0.7928 -0.0954  0.7023  2.9099 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        0.1378695  0.1268218   1.087   0.2774  
## NSEV_Freq                          0.0043261  0.0061841   0.700   0.4844  
## dat$AMMSA                         -0.0192276  0.1364155  -0.141   0.8880  
## recruitonline                     -0.2448522  0.1439895  -1.700   0.0895 .
## NSEV_Freq:dat$AMMSA                0.0005891  0.0060601   0.097   0.9226  
## NSEV_Freq:recruitonline           -0.0018418  0.0067179  -0.274   0.7840  
## dat$AMMSA:recruitonline            0.0417297  0.1534174   0.272   0.7857  
## NSEV_Freq:dat$AMMSA:recruitonline -0.0018209  0.0065502  -0.278   0.7811  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9982 on 689 degrees of freedom
## Multiple R-squared:  0.01363,    Adjusted R-squared:  0.003606 
## F-statistic:  1.36 on 7 and 689 DF,  p-value: 0.2195
# Creating a data frame to apply the FDR later on
current.mod=h2.ds
current.str="h2.ds"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

PTS

h2.pts=lm(PTS~NSEV_Freq*dat$AMMSA*recruit,dat)
summary(h2.pts)
## 
## Call:
## lm(formula = PTS ~ NSEV_Freq * dat$AMMSA * recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.57612 -0.57341  0.07453  0.63360  2.27066 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -0.031224   0.113746  -0.275   0.7838    
## NSEV_Freq                          0.027790   0.005547   5.010 6.91e-07 ***
## dat$AMMSA                         -0.219492   0.122350  -1.794   0.0733 .  
## recruitonline                     -0.570798   0.129143  -4.420 1.15e-05 ***
## NSEV_Freq:dat$AMMSA                0.004039   0.005435   0.743   0.4577    
## NSEV_Freq:recruitonline           -0.001988   0.006025  -0.330   0.7415    
## dat$AMMSA:recruitonline            0.130514   0.137599   0.949   0.3432    
## NSEV_Freq:dat$AMMSA:recruitonline -0.003607   0.005875  -0.614   0.5394    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8953 on 689 degrees of freedom
## Multiple R-squared:  0.2065, Adjusted R-squared:  0.1985 
## F-statistic: 25.62 on 7 and 689 DF,  p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h2.pts
current.str="h2.pts"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

H3

H3 will be tested with two multiple regression models, H3a and H3b. H3a will have DS as the dependent variable. The independent variables will include NSE frequency and the five tactics used against the participant to perpetrate NSEs: the use of threat or physical force (force), intoxication, abuse of authority (authority), verbal coercion (coercion), and enticement. H3b will be identical to H3a except the dependent variable will be PTSS instead of DS. Recruitment method will interact with all independent variables in both models to determine if this factor influenced the results.

DS

h3.ds=lm(DS~NSEV_Freq+V_Force+V_Intox+V_Auth+V_Coerce+V_Entice+recruit,dat)
summary(h3.ds)
## 
## Call:
## lm(formula = DS ~ NSEV_Freq + V_Force + V_Intox + V_Auth + V_Coerce + 
##     V_Entice + recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.48065 -0.78865 -0.07211  0.69094  2.89002 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    0.335506   0.190090   1.765  0.07801 . 
## NSEV_Freq      0.001034   0.003344   0.309  0.75714   
## V_Force        0.180619   0.099162   1.821  0.06897 . 
## V_Intox        0.009066   0.089051   0.102  0.91894   
## V_Auth        -0.071101   0.088458  -0.804  0.42180   
## V_Coerce      -0.293419   0.179157  -1.638  0.10192   
## V_Entice       0.052731   0.111472   0.473  0.63633   
## recruitonline -0.263348   0.091166  -2.889  0.00399 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9935 on 689 degrees of freedom
## Multiple R-squared:  0.02292,    Adjusted R-squared:  0.013 
## F-statistic: 2.309 on 7 and 689 DF,  p-value: 0.02479
# Creating a data frame to apply the FDR later on
current.mod=h3.ds
current.str="h3.ds"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

PTS

h3.pts=lm(PTS~NSEV_Freq+V_Force+V_Intox+V_Auth+V_Coerce+V_Entice+recruit,dat)
summary(h3.pts)
## 
## Call:
## lm(formula = PTS ~ NSEV_Freq + V_Force + V_Intox + V_Auth + V_Coerce + 
##     V_Entice + recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.51273 -0.60482  0.06819  0.65691  2.47027 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.131859   0.171510  -0.769   0.4423    
## NSEV_Freq      0.023605   0.003017   7.824 1.93e-14 ***
## V_Force        0.227592   0.089470   2.544   0.0112 *  
## V_Intox        0.012063   0.080347   0.150   0.8807    
## V_Auth        -0.036327   0.079812  -0.455   0.6491    
## V_Coerce       0.047699   0.161645   0.295   0.7680    
## V_Entice      -0.103814   0.100577  -1.032   0.3023    
## recruitonline -0.506303   0.082255  -6.155 1.27e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8964 on 689 degrees of freedom
## Multiple R-squared:  0.2046, Adjusted R-squared:  0.1965 
## F-statistic: 25.32 on 7 and 689 DF,  p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h3.pts
current.str="h3.pts"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

H4

H4 will be tested with two multiple regression models, H4a and H4b. H4a will have DS as the dependent variable, NSE Frequency and psychological inflexibility as independent variables along with the interaction between NSE Frequency and psychological inflexibility. Recruitment method will also interact with both variables. H4b will be identical to H4a except the dependent variable will be PTSS instead of DS.

DS

h4.ds=lm(DS~NSEV_Freq*dat$PI*recruit,dat)
summary(h4.ds)
## 
## Call:
## lm(formula = DS ~ NSEV_Freq * dat$PI * recruit, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5924 -0.7871 -0.1074  0.6804  2.8631 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     0.092809   0.117562   0.789    0.430
## NSEV_Freq                       0.009165   0.008243   1.112    0.267
## dat$PI                          0.036792   0.115087   0.320    0.749
## recruitonline                  -0.178333   0.137308  -1.299    0.194
## NSEV_Freq:dat$PI               -0.003810   0.004956  -0.769    0.442
## NSEV_Freq:recruitonline        -0.008012   0.008717  -0.919    0.358
## dat$PI:recruitonline           -0.017114   0.134591  -0.127    0.899
## NSEV_Freq:dat$PI:recruitonline  0.005297   0.005512   0.961    0.337
## 
## Residual standard error: 0.9967 on 689 degrees of freedom
## Multiple R-squared:  0.01654,    Adjusted R-squared:  0.006545 
## F-statistic: 1.655 on 7 and 689 DF,  p-value: 0.1171
# Creating a data frame to apply the FDR later on
current.mod=h4.ds
current.str="h4.ds"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

PTS

h4.pts=lm(PTS~NSEV_Freq*dat$PI*recruit,dat)
summary(h4.pts)
## 
## Call:
## lm(formula = PTS ~ NSEV_Freq * dat$PI * recruit, data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.13586 -0.35873  0.02456  0.40435  2.93916 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -0.105369   0.075226  -1.401  0.16175    
## NSEV_Freq                       0.020513   0.005275   3.889  0.00011 ***
## dat$PI                          0.679945   0.073642   9.233  < 2e-16 ***
## recruitonline                  -0.146604   0.087861  -1.669  0.09565 .  
## NSEV_Freq:dat$PI               -0.007303   0.003171  -2.303  0.02159 *  
## NSEV_Freq:recruitonline        -0.008346   0.005578  -1.496  0.13501    
## dat$PI:recruitonline            0.096133   0.086122   1.116  0.26471    
## NSEV_Freq:dat$PI:recruitonline  0.003887   0.003527   1.102  0.27075    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6378 on 689 degrees of freedom
## Multiple R-squared:  0.5973, Adjusted R-squared:  0.5932 
## F-statistic:   146 on 7 and 689 DF,  p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h4.pts
current.str="h4.pts"

to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")

p.vals=rbind(p.vals,to.add)

Applying the false discovery rate

p.vals$orig.sig=ifelse(p.vals$p<.05,1,0) # Mark p-values that were originally significant

library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
## 
##     src, summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
p.vals = arrange(p.vals, p) # Arrange p-values in ascending order
p.vals$i=seq(1,nrow(p.vals)) # Assign each p-value a rank/index

p.vals$BH.threshold=(p.vals$i/nrow(p.vals))*.05 # Benjamini-Hochberg thresholds

p.vals$now.sig=ifelse(p.vals$p<p.vals$BH.threshold,1,0) # Mark p-values below the BH threshold

p.vals$orig.sig==p.vals$now.sig
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
# tenth and eleventh significant p-value don't pass the threshold
p.vals[10,]
##           Estimate         SE        t          p    coef  model orig.sig  i
## V_Force1 0.2275925 0.08946974 2.543793 0.01118292 V_Force h3.pts        1 10
##          BH.threshold now.sig
## V_Force1  0.008928571       0
p.vals[11,]
##                       Estimate          SE         t          p
## NSEV_Freq:dat$PI1 -0.007302852 0.003171449 -2.302686 0.02159363
##                               coef  model orig.sig  i BH.threshold now.sig
## NSEV_Freq:dat$PI1 NSEV_Freq:dat$PI h4.pts        1 11  0.009821429       0