library(moments) # for calculating skewness & kurtosis
library(Hmisc) # for correlation matrices
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
dat <- read.csv("~/Desktop/WES projects/Paper 1/April21.csv")
# kurtosis(dat$AMMSA) # 2.412395 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$AMMSA) # 0.324258 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$AMMSA,main="AMMSA")
# kurtosis(dat$PI) # 2.731565 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$PI) # 0.2949723 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$PI,main="PI")
### DS
# kurtosis(dat$DS) # 2.67864 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$DS) # 0.4451866 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(dat$DS,main="DS")
# kurtosis(dat$PTS) # 2.530473 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(dat$PTS) # 0.5905595 > 0.5, so we have a skew problem
hist(dat$PTS,main="PTS")
This obviously isn’t normally distributed. Let’s try a square root transform:
# kurtosis(sqrt(dat$PTS+.01)) # 2.494193 is -3 < k < 3, so we're in a normal range for kurtosis
# skewness(sqrt(dat$PTS+.01)) # 0.30088 is -0.5 < skew < 0.5, so we're in a normal range for skew
hist(sqrt(dat$PTS+.01),main="Transformed PTS")
dat$AMMSA=scale(dat$AMMSA)
dat$PI=scale(dat$PI)
dat$DS=scale(dat$DS)
dat$PTS=scale(sqrt(dat$PTS+.01))
rcorr(cbind(
dat$AMMSA,dat$PI,dat$DS,dat$PTS
))
## [,1] [,2] [,3] [,4]
## [1,] 1.00 -0.07 0.02 -0.05
## [2,] -0.07 1.00 0.06 0.75
## [3,] 0.02 0.06 1.00 0.07
## [4,] -0.05 0.75 0.07 1.00
##
## n= 697
##
##
## P
## [,1] [,2] [,3] [,4]
## [1,] 0.0558 0.5924 0.1729
## [2,] 0.0558 0.0975 0.0000
## [3,] 0.5924 0.0975 0.0490
## [4,] 0.1729 0.0000 0.0490
The top are the r value, the bottom are the p-values.
full.dat <- read.csv("~/Desktop/WES projects/Paper 1/full_dat.csv")
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
# alpha(full.dat[,c(83:98)])[1] # AMMSA alpha = 0.9212115
# alpha(full.dat[,c(110:129)])[1] # CESDS alpha = 0.78134
# alpha(full.dat[,
# c(175:179,
# 182:186,
# 189:193,
# 196:200,
# 203:207,
# 210:214)]
# )[1]
# PI alpha = 0.96171
AMMSA \(\alpha\) = 0.92
CESDS
\(\alpha\) = 0.78
PI \(\alpha\) = 0.96
online.sample=subset(dat,dat$Sample==1 | dat$Sample==3)
#nrow(online.sample) # n = 534 for online sample
college.sample=subset(dat,dat$Sample==2)
#nrow(college.sample) # n = 163 for college sample
# 534 + 163 = 697
t.test(online.sample$PI,college.sample$PI)
##
## Welch Two Sample t-test
##
## data: online.sample$PI and college.sample$PI
## t = -3.4018, df = 280.2, p-value = 0.000767
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4644618 -0.1239638
## sample estimates:
## mean of x mean of y
## -0.06880443 0.22540836
There is a significant difference in PI between the two samples.
t.test(online.sample$PTS,college.sample$PTS)
##
## Welch Two Sample t-test
##
## data: online.sample$PTS and college.sample$PTS
## t = -4.0745, df = 308.35, p-value = 5.871e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4936818 -0.1721384
## sample estimates:
## mean of x mean of y
## -0.07785416 0.25505594
There is a a statistically significant difference in PTS between the two samples.
t.test(online.sample$DS,college.sample$DS)
##
## Welch Two Sample t-test
##
## data: online.sample$DS and college.sample$DS
## t = -2.8002, df = 273.02, p-value = 0.005472
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.42010549 -0.07324942
## sample estimates:
## mean of x mean of y
## -0.05768784 0.18898961
Also significant.
We will have to take recruitment method into account for analyses.
dat$recruit=as.factor(ifelse(dat$Sample==2,"college","online"))
# mean(online.sample$Age)
# sd(online.sample$Age)
Age: M = 35.90, SD = 11.32
table(online.sample$Sex)
##
## 0 1 2
## 2 116 416
The table above shows the number of people who have their gender coded as 0, 1, and 2.
table(online.sample$Sex)/nrow(online.sample)
##
## 0 1 2
## 0.003745318 0.217228464 0.779026217
Above are the percentages.
If y’all wanted “sex” instead or want additional demographic data, let me know.
# mean(college.sample$Age)
# sd(college.sample$Age)
Age: M = 19.03, SD = 2.11
table(college.sample$Sex)
##
## 0 1 2
## 2 29 132
The table above shows the number of people who have their gender coded as 0, 1, and 2.
table(college.sample$Sex)/nrow(college.sample)
##
## 0 1 2
## 0.01226994 0.17791411 0.80981595
Above are the percentages.
If y’all wanted “sex” instead or want additional demographic data, let me know.
Each hypothesis will be tested with two linear models, one with PTSS as the dependent variable, and another with DS as the dependent variable. H1 will be tested with two simple linear regression models, H1a and H1b. H1a will have posttraumatic stress symptoms (PTSS) as the dependent variable and the frequency of nonconsensual sexual experiences (NSEs) endorsed by participants as the independent variable. H1b will have depressive symptomatology (DS) as the dependent variable and NSE Frequency as the independent variable. We will also have each independent variable interact with recruitment method to determine if participant’s population confounded any of our results.
h1.ds=lm(DS~NSEV_Freq*recruit,dat)
summary(h1.ds)
##
## Call:
## lm(formula = DS ~ NSEV_Freq * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.62676 -0.79133 -0.09627 0.69400 2.91050
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.128624 0.108299 1.188 0.235
## NSEV_Freq 0.004594 0.005719 0.803 0.422
## recruitonline -0.237582 0.127573 -1.862 0.063 .
## NSEV_Freq:recruitonline -0.002015 0.006286 -0.320 0.749
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9955 on 693 degrees of freedom
## Multiple R-squared: 0.01323, Adjusted R-squared: 0.008956
## F-statistic: 3.097 on 3 and 693 DF, p-value: 0.02633
# Creating a data frame to apply the FDR later on
p.vals=data.frame(summary(h1.ds)[4])
p.vals$names=names(h1.ds$coefficients)
p.vals$model=rep("h1.ds",length(names(h1.ds$coefficients)))
colnames(p.vals)=c("Estimate","SE","t","p","coef","model")
h1.pts=lm(PTS~NSEV_Freq*recruit,dat)
summary(h1.pts)
##
## Call:
## lm(formula = PTS ~ NSEV_Freq * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.48690 -0.59230 0.08739 0.64590 2.36398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.139899 0.097686 -1.432 0.152558
## NSEV_Freq 0.030055 0.005159 5.826 8.7e-09 ***
## recruitonline -0.448479 0.115071 -3.897 0.000107 ***
## NSEV_Freq:recruitonline -0.004375 0.005670 -0.772 0.440646
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.898 on 693 degrees of freedom
## Multiple R-squared: 0.1972, Adjusted R-squared: 0.1937
## F-statistic: 56.73 on 3 and 693 DF, p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h1.pts
current.str="h1.pts"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
H2 will be tested with two multiple regression models, H2a and H2b. H2a will have DS scores as the dependent variable. The independent variables will be NSE Frequency and the acceptance of myths about sexual aggression (AMASA). Again, recruitment method will interact with each independent variable. H2b will be identical to H2a except the dependent variable will be PTSS rather than DS.
h2.ds=lm(DS~NSEV_Freq*dat$AMMSA*recruit,dat)
summary(h2.ds)
##
## Call:
## lm(formula = DS ~ NSEV_Freq * dat$AMMSA * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6187 -0.7928 -0.0954 0.7023 2.9099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1378695 0.1268218 1.087 0.2774
## NSEV_Freq 0.0043261 0.0061841 0.700 0.4844
## dat$AMMSA -0.0192276 0.1364155 -0.141 0.8880
## recruitonline -0.2448522 0.1439895 -1.700 0.0895 .
## NSEV_Freq:dat$AMMSA 0.0005891 0.0060601 0.097 0.9226
## NSEV_Freq:recruitonline -0.0018418 0.0067179 -0.274 0.7840
## dat$AMMSA:recruitonline 0.0417297 0.1534174 0.272 0.7857
## NSEV_Freq:dat$AMMSA:recruitonline -0.0018209 0.0065502 -0.278 0.7811
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9982 on 689 degrees of freedom
## Multiple R-squared: 0.01363, Adjusted R-squared: 0.003606
## F-statistic: 1.36 on 7 and 689 DF, p-value: 0.2195
# Creating a data frame to apply the FDR later on
current.mod=h2.ds
current.str="h2.ds"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
h2.pts=lm(PTS~NSEV_Freq*dat$AMMSA*recruit,dat)
summary(h2.pts)
##
## Call:
## lm(formula = PTS ~ NSEV_Freq * dat$AMMSA * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.57612 -0.57341 0.07453 0.63360 2.27066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.031224 0.113746 -0.275 0.7838
## NSEV_Freq 0.027790 0.005547 5.010 6.91e-07 ***
## dat$AMMSA -0.219492 0.122350 -1.794 0.0733 .
## recruitonline -0.570798 0.129143 -4.420 1.15e-05 ***
## NSEV_Freq:dat$AMMSA 0.004039 0.005435 0.743 0.4577
## NSEV_Freq:recruitonline -0.001988 0.006025 -0.330 0.7415
## dat$AMMSA:recruitonline 0.130514 0.137599 0.949 0.3432
## NSEV_Freq:dat$AMMSA:recruitonline -0.003607 0.005875 -0.614 0.5394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8953 on 689 degrees of freedom
## Multiple R-squared: 0.2065, Adjusted R-squared: 0.1985
## F-statistic: 25.62 on 7 and 689 DF, p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h2.pts
current.str="h2.pts"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
H3 will be tested with two multiple regression models, H3a and H3b. H3a will have DS as the dependent variable. The independent variables will include NSE frequency and the five tactics used against the participant to perpetrate NSEs: the use of threat or physical force (force), intoxication, abuse of authority (authority), verbal coercion (coercion), and enticement. H3b will be identical to H3a except the dependent variable will be PTSS instead of DS. Recruitment method will interact with all independent variables in both models to determine if this factor influenced the results.
h3.ds=lm(DS~NSEV_Freq+V_Force+V_Intox+V_Auth+V_Coerce+V_Entice+recruit,dat)
summary(h3.ds)
##
## Call:
## lm(formula = DS ~ NSEV_Freq + V_Force + V_Intox + V_Auth + V_Coerce +
## V_Entice + recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.48065 -0.78865 -0.07211 0.69094 2.89002
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.335506 0.190090 1.765 0.07801 .
## NSEV_Freq 0.001034 0.003344 0.309 0.75714
## V_Force 0.180619 0.099162 1.821 0.06897 .
## V_Intox 0.009066 0.089051 0.102 0.91894
## V_Auth -0.071101 0.088458 -0.804 0.42180
## V_Coerce -0.293419 0.179157 -1.638 0.10192
## V_Entice 0.052731 0.111472 0.473 0.63633
## recruitonline -0.263348 0.091166 -2.889 0.00399 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9935 on 689 degrees of freedom
## Multiple R-squared: 0.02292, Adjusted R-squared: 0.013
## F-statistic: 2.309 on 7 and 689 DF, p-value: 0.02479
# Creating a data frame to apply the FDR later on
current.mod=h3.ds
current.str="h3.ds"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
h3.pts=lm(PTS~NSEV_Freq+V_Force+V_Intox+V_Auth+V_Coerce+V_Entice+recruit,dat)
summary(h3.pts)
##
## Call:
## lm(formula = PTS ~ NSEV_Freq + V_Force + V_Intox + V_Auth + V_Coerce +
## V_Entice + recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.51273 -0.60482 0.06819 0.65691 2.47027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.131859 0.171510 -0.769 0.4423
## NSEV_Freq 0.023605 0.003017 7.824 1.93e-14 ***
## V_Force 0.227592 0.089470 2.544 0.0112 *
## V_Intox 0.012063 0.080347 0.150 0.8807
## V_Auth -0.036327 0.079812 -0.455 0.6491
## V_Coerce 0.047699 0.161645 0.295 0.7680
## V_Entice -0.103814 0.100577 -1.032 0.3023
## recruitonline -0.506303 0.082255 -6.155 1.27e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8964 on 689 degrees of freedom
## Multiple R-squared: 0.2046, Adjusted R-squared: 0.1965
## F-statistic: 25.32 on 7 and 689 DF, p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h3.pts
current.str="h3.pts"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
H4 will be tested with two multiple regression models, H4a and H4b. H4a will have DS as the dependent variable, NSE Frequency and psychological inflexibility as independent variables along with the interaction between NSE Frequency and psychological inflexibility. Recruitment method will also interact with both variables. H4b will be identical to H4a except the dependent variable will be PTSS instead of DS.
h4.ds=lm(DS~NSEV_Freq*dat$PI*recruit,dat)
summary(h4.ds)
##
## Call:
## lm(formula = DS ~ NSEV_Freq * dat$PI * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5924 -0.7871 -0.1074 0.6804 2.8631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.092809 0.117562 0.789 0.430
## NSEV_Freq 0.009165 0.008243 1.112 0.267
## dat$PI 0.036792 0.115087 0.320 0.749
## recruitonline -0.178333 0.137308 -1.299 0.194
## NSEV_Freq:dat$PI -0.003810 0.004956 -0.769 0.442
## NSEV_Freq:recruitonline -0.008012 0.008717 -0.919 0.358
## dat$PI:recruitonline -0.017114 0.134591 -0.127 0.899
## NSEV_Freq:dat$PI:recruitonline 0.005297 0.005512 0.961 0.337
##
## Residual standard error: 0.9967 on 689 degrees of freedom
## Multiple R-squared: 0.01654, Adjusted R-squared: 0.006545
## F-statistic: 1.655 on 7 and 689 DF, p-value: 0.1171
# Creating a data frame to apply the FDR later on
current.mod=h4.ds
current.str="h4.ds"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
h4.pts=lm(PTS~NSEV_Freq*dat$PI*recruit,dat)
summary(h4.pts)
##
## Call:
## lm(formula = PTS ~ NSEV_Freq * dat$PI * recruit, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.13586 -0.35873 0.02456 0.40435 2.93916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.105369 0.075226 -1.401 0.16175
## NSEV_Freq 0.020513 0.005275 3.889 0.00011 ***
## dat$PI 0.679945 0.073642 9.233 < 2e-16 ***
## recruitonline -0.146604 0.087861 -1.669 0.09565 .
## NSEV_Freq:dat$PI -0.007303 0.003171 -2.303 0.02159 *
## NSEV_Freq:recruitonline -0.008346 0.005578 -1.496 0.13501
## dat$PI:recruitonline 0.096133 0.086122 1.116 0.26471
## NSEV_Freq:dat$PI:recruitonline 0.003887 0.003527 1.102 0.27075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6378 on 689 degrees of freedom
## Multiple R-squared: 0.5973, Adjusted R-squared: 0.5932
## F-statistic: 146 on 7 and 689 DF, p-value: < 2.2e-16
# Creating a data frame to apply the FDR later on
current.mod=h4.pts
current.str="h4.pts"
to.add=data.frame(summary(current.mod)[4])
to.add$names=names(current.mod$coefficients)
to.add$model=rep(current.str,length(names(current.mod$coefficients)))
colnames(to.add)=c("Estimate","SE","t","p","coef","model")
p.vals=rbind(p.vals,to.add)
p.vals$orig.sig=ifelse(p.vals$p<.05,1,0) # Mark p-values that were originally significant
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
p.vals = arrange(p.vals, p) # Arrange p-values in ascending order
p.vals$i=seq(1,nrow(p.vals)) # Assign each p-value a rank/index
p.vals$BH.threshold=(p.vals$i/nrow(p.vals))*.05 # Benjamini-Hochberg thresholds
p.vals$now.sig=ifelse(p.vals$p<p.vals$BH.threshold,1,0) # Mark p-values below the BH threshold
p.vals$orig.sig==p.vals$now.sig
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# tenth and eleventh significant p-value don't pass the threshold
p.vals[10,]
## Estimate SE t p coef model orig.sig i
## V_Force1 0.2275925 0.08946974 2.543793 0.01118292 V_Force h3.pts 1 10
## BH.threshold now.sig
## V_Force1 0.008928571 0
p.vals[11,]
## Estimate SE t p
## NSEV_Freq:dat$PI1 -0.007302852 0.003171449 -2.302686 0.02159363
## coef model orig.sig i BH.threshold now.sig
## NSEV_Freq:dat$PI1 NSEV_Freq:dat$PI h4.pts 1 11 0.009821429 0