*after multiple imputation
nums <- data %>% select(where(is.numeric)) %>% select(!starts_with("use") & !ends_with("use") & !starts_with("rmet") & !starts_with("masc"))
library(summarytools)
Attaching package: ‘summarytools’
The following objects are masked from ‘package:Hmisc’:
label, label<-
view(dfSummary(nums,
varnumbers = FALSE,
na.col = TRUE,
valid.col = FALSE,
style = "multiline",
plain.ascii = FALSE,
headings = FALSE,
graph.magnif = 1.25),
method = "render")
| Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| age_visit1 [integer] |
|
24 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| gender_1isF [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| iq_est [numeric] |
|
26 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_total [integer] |
|
21 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_reExp [integer] |
|
12 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_avoid [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_negAff [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_anhed [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_extBeh [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_anxArou [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CAPS5_PM_dysArou [integer] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| BDI_Total.imp [integer] |
|
21 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| CTQ_Total.imp [integer] |
|
26 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| STAI_Total.imp [integer] |
|
31 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascAccuracy [integer] |
|
14 distinct values | 2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascControl [integer] |
|
|
2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascHyperM [integer] |
|
|
2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascHypoM [integer] |
|
11 distinct values | 2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascNoM [integer] |
|
|
2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascTotQC1 [integer] |
|
|
2 (2.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_rmetAcc [numeric] |
|
18 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_rmetAccuracy [integer] |
|
16 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| tot_mascAcc [numeric] |
|
17 distinct values | 2 (2.9%) |
Generated by summarytools 1.0.0 (R version 4.1.1)
2023-01-17
Thresholded to display only p < .2
nums <- data %>% select(age_visit1, gender_1isF, iq_est, CTQ_Total.imp, BDI_Total.imp, STAI_Total.imp, (starts_with("CAPS5_PM") & !ends_with("total")), starts_with("tot_masc"), tot_rmetAcc) %>% mutate(gender_1isF = as.numeric(gender_1isF))
corrs <- rcorr(as.matrix(nums))
M <- corrs$r
p_mat <- cor_pmat(nums)
ggcorrplot(M,
p.mat = p_mat, hc.order = TRUE,
type = "full", insig = "blank", lab = TRUE,ggtheme=theme_pubr(base_size=10), sig.level = .2)
Based on the correlations above, we WILL include sex [male/female], estimated full-scale IQ, age, BDI total, STAI total.
We will NOT include CTQ total because it does not seem to be correlated with any of the RMET/MASC variables even at p > .2.
Using stepAIC from MASS.
Note: I know in SPSS you can enter variables in “blocks” and specify stepwise for the same regression model (so a mix of hierarchical and stepwise regression?) but I can’t figure out a way to do both simultaneously in R.
# make an RMET subset (since we don't want to use MASC and a few other vars as predictors in our regression)
rmetDat <- data %>% select(-c(starts_with("tot_masc"),group_3low,record_id, gender_2isF,tot_rmetAccuracy,trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
stepAIC chooses a model by AIC, in a stepwise regression algorithm using both backward and forward selection.
# Fit the full model
full.model <- lm(tot_rmetAcc ~., rmetDat)
summary(full.model)
Call:
lm(formula = tot_rmetAcc ~ ., data = rmetDat)
Residuals:
Min 1Q Median 3Q Max
-0.26921 -0.06202 0.02161 0.06424 0.17851
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3516289 0.2111746 1.665 0.10148
age_visit1 0.0004342 0.0025505 0.170 0.86545
gender_1isF -0.0369128 0.0323888 -1.140 0.25927
iq_est 0.0037381 0.0014013 2.668 0.00997 **
CAPS5_PM_reExp 0.0178489 0.0123451 1.446 0.15380
CAPS5_PM_avoid -0.0243281 0.0184424 -1.319 0.19249
CAPS5_PM_negAff 0.0096897 0.0120900 0.801 0.42625
CAPS5_PM_anhed -0.0112361 0.0115963 -0.969 0.33674
CAPS5_PM_extBeh -0.0468449 0.0359102 -1.305 0.19740
CAPS5_PM_anxArou 0.0251020 0.0139095 1.805 0.07651 .
CAPS5_PM_dysArou 0.0026556 0.0166343 0.160 0.87374
BDI_Total.imp -0.0027197 0.0038498 -0.706 0.48284
STAI_Total.imp -0.0009918 0.0024778 -0.400 0.69048
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1047 on 56 degrees of freedom
Multiple R-squared: 0.2998, Adjusted R-squared: 0.1497
F-statistic: 1.998 on 12 and 56 DF, p-value: 0.04162
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
Call:
lm(formula = tot_rmetAcc ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou, data = rmetDat)
Residuals:
Min 1Q Median 3Q Max
-0.26509 -0.07406 0.01250 0.07424 0.19573
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.341330 0.140027 2.438 0.0176 *
gender_1isF -0.041219 0.029954 -1.376 0.1736
iq_est 0.003996 0.001240 3.222 0.0020 **
CAPS5_PM_avoid -0.020517 0.009273 -2.213 0.0305 *
CAPS5_PM_anxArou 0.017789 0.009261 1.921 0.0592 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1021 on 64 degrees of freedom
Multiple R-squared: 0.2389, Adjusted R-squared: 0.1913
F-statistic: 5.021 on 4 and 64 DF, p-value: 0.001392
The stepwise algorithm narrowed down the full model to a model that includes only gender, estimated FSIQ, CAPS5-PM avoidance, and CAPS-PM anxious arousal.
Here are the VIF scores for the full model:
car::vif(full.model)
age_visit1 gender_1isF iq_est CAPS5_PM_reExp CAPS5_PM_avoid CAPS5_PM_negAff
1.224846 1.124342 1.350497 10.209847 6.716504 7.115917
CAPS5_PM_anhed CAPS5_PM_extBeh CAPS5_PM_anxArou CAPS5_PM_dysArou BDI_Total.imp STAI_Total.imp
5.755872 3.648085 3.743280 7.836537 11.755202 6.301953
Here are the partial regression plots for RMET total and CAPS avoidance/anxious arousal dimensions:
library(ggeffects)
pr <- ggpredict(step.model, "CAPS5_PM_avoid [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_anxArou [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
Given a linear model of y = x1 + x2, partial regression plots show the conditional effect of x1 on y (i.e., effect of x1 factoring out x2).
Note: The datapoints are jittered for ease of visualization; CAPS5_PM actual values are integers.
Here are the diagnostic plots for the model:
plot(step.model)
shapiro.test(rmetDat$tot_rmetAcc)
Shapiro-Wilk normality test
data: rmetDat$tot_rmetAcc
W = 0.96106, p-value = 0.03047
Based on the Shapiro-Wilk test, the dependent variable is non-normally distributed. But overall, these diagnostic plots look okay to me (see https://www.statology.org/diagnostic-plots-in-r/); the QQ plot isn’t terrible but we do see a few values that fall off of the “normal” line (i.e., participants with exceptionally low or high RMET accuracy relative to the rest of the sample). However, none of these data points are overly influential, based on Cook’s distance
K-fold cross-validation of the stepwise regression produces the same model:
step.model$results
step.model$finalModel
Call:
lm(formula = .outcome ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou, data = dat)
Coefficients:
(Intercept) gender_1isF iq_est CAPS5_PM_avoid CAPS5_PM_anxArou
0.341330 -0.041219 0.003996 -0.020517 0.017789
summary(step.model$finalModel)
Call:
lm(formula = .outcome ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou, data = dat)
Residuals:
Min 1Q Median 3Q Max
-0.26509 -0.07406 0.01250 0.07424 0.19573
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.341330 0.140027 2.438 0.0176 *
gender_1isF -0.041219 0.029954 -1.376 0.1736
iq_est 0.003996 0.001240 3.222 0.0020 **
CAPS5_PM_avoid -0.020517 0.009273 -2.213 0.0305 *
CAPS5_PM_anxArou 0.017789 0.009261 1.921 0.0592 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1021 on 64 degrees of freedom
Multiple R-squared: 0.2389, Adjusted R-squared: 0.1913
F-statistic: 5.021 on 4 and 64 DF, p-value: 0.001392
Feature selection using elastic net regression with cross-validation (code adapted from Agnes’ ACNP 2021 analyses):
#select predictors and target variable
rmetDat <- data %>% select(-c(starts_with("tot_masc"),group_3low,record_id, gender_2isF,tot_rmetAccuracy,trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
model1 <- rmetDat
#inspect correlation matrix:
corrMat<-round(cor(model1[1:ncol(model1)]),1) #including target variable
plot1<-ggcorrplot(corrMat, hc.order = TRUE)
plot1
#define predictors and target variable:
x=as.matrix(model1[,1:12]) #excluding target variable
x=scale(x)
y=model1$tot_rmetAcc
#run cross-validated elastic net with grid search across alpha and lambda values:
alphas <- seq(0, 1, by=.002) #[alpha=1 is LASSO, alpha=0 is ridge]
mses <- numeric(501)
mins <- numeric(501)
maxes <- numeric(501)
for(i in 1:501){
cvfits1 <- cv.glmnet(x, y, alpha=alphas[i], nfolds=10, family="gaussian")
loc <- which(cvfits1$lambda==cvfits1$lambda.min)
maxes[i] <- cvfits1$lambda %>% max
mins[i] <- cvfits1$lambda %>% min
mses[i] <- cvfits1$cvm[loc]
}
plot2 <- plot(cvfits1)
plot2
NULL
plot(cvfits1[["glmnet.fit"]], xvar = "lambda")
#get coefficients and r2 for winning (MSE minimising) model:
coef(cvfits1, s="lambda.min") #gives coefficient for model (varying alpha, lambda) with minimum MSE
13 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 0.768173453
age_visit1 .
gender_1isF .
iq_est 0.026704609
CAPS5_PM_reExp .
CAPS5_PM_avoid -0.001556115
CAPS5_PM_negAff .
CAPS5_PM_anhed .
CAPS5_PM_extBeh .
CAPS5_PM_anxArou .
CAPS5_PM_dysArou .
BDI_Total.imp .
STAI_Total.imp .
r2_1<-cvfits1$glmnet.fit$dev.ratio[which(cvfits1$glmnet.fit$lambda == cvfits1$lambda.min)]
r2_1
[1] 0.1350943
#visualise coefficients as bar plot:
tmp<-coef(cvfits1, s="lambda.min") #hack to extract names from glmnet output matrix to variable
cv.out=data.frame(beta=coef(cvfits1, s="lambda.min")[2:nrow(coef(cvfits1, s="lambda.min")),1],
name = tmp@Dimnames[[1]][2:nrow(coef(cvfits1, s="lambda.min"))])
cv.out$name<-factor(cv.out$name, levels=cv.out$name) #this stops ggplot reordering the x axis
plot3<-ggplot(data=cv.out, aes(x=name,y=beta)) +
geom_bar(stat="identity") +
theme_pubclean() + geom_hline(yintercept=0, color="black") +
ggpubr::rotate_x_text(30) +
labs(x="", y="beta")
plot3
Higher estimated FSIQ and lower CAPS5-PM Avoidance is associated with more accurate RMET performance.
Using stepAIC from MASS.
Note: I know in SPSS you can enter variables in “blocks” and specify stepwise for the same regression model (so a mix of hierarchical and stepwise regression?) but I can’t figure out a way to do both simultaneously in R.
# make a MASC dataset
mascDat <- data %>% filter(!record_id %in% c("NWTC-118","NWTC-125")) %>% select(-c(starts_with("tot_rmet"),group_3low,record_id, gender_2isF,tot_mascAccuracy,tot_mascControl,tot_mascHyperM,tot_mascHypoM,tot_mascNoM,tot_mascTotQC1, trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
stepAIC chooses a model by AIC, in a stepwise regression algorithm using both backward and forward selection.
# Fit the full model
full.model <- lm(tot_mascAcc ~., mascDat)
summary(full.model)
Call:
lm(formula = tot_mascAcc ~ ., data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-0.123956 -0.044679 -0.006164 0.035976 0.146492
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6375491 0.1356614 4.700 0.0000184 ***
age_visit1 -0.0019519 0.0016663 -1.171 0.2466
gender_1isF -0.0094171 0.0221264 -0.426 0.6721
iq_est 0.0023835 0.0009034 2.638 0.0109 *
CAPS5_PM_reExp 0.0118242 0.0079264 1.492 0.1416
CAPS5_PM_avoid -0.0254211 0.0119023 -2.136 0.0372 *
CAPS5_PM_negAff -0.0003414 0.0077587 -0.044 0.9651
CAPS5_PM_anhed 0.0121915 0.0074639 1.633 0.1082
CAPS5_PM_extBeh -0.0182327 0.0230652 -0.790 0.4327
CAPS5_PM_anxArou 0.0096186 0.0089552 1.074 0.2876
CAPS5_PM_dysArou -0.0032743 0.0107327 -0.305 0.7615
BDI_Total.imp -0.0009403 0.0024799 -0.379 0.7060
STAI_Total.imp -0.0026930 0.0016120 -1.671 0.1006
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.06716 on 54 degrees of freedom
Multiple R-squared: 0.397, Adjusted R-squared: 0.263
F-statistic: 2.963 on 12 and 54 DF, p-value: 0.003112
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
Call:
lm(formula = tot_mascAcc ~ iq_est + CAPS5_PM_reExp + CAPS5_PM_avoid +
CAPS5_PM_anhed + STAI_Total.imp, data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-0.129392 -0.048643 -0.006693 0.033209 0.139022
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5725374 0.0910355 6.289 0.000000038 ***
iq_est 0.0020741 0.0008111 2.557 0.01305 *
CAPS5_PM_reExp 0.0110133 0.0050620 2.176 0.03346 *
CAPS5_PM_avoid -0.0295836 0.0092363 -3.203 0.00216 **
CAPS5_PM_anhed 0.0127710 0.0063751 2.003 0.04960 *
STAI_Total.imp -0.0031713 0.0010591 -2.994 0.00397 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.06495 on 61 degrees of freedom
Multiple R-squared: 0.3629, Adjusted R-squared: 0.3107
F-statistic: 6.95 on 5 and 61 DF, p-value: 0.00003405
The stepwise algorithm narrowed down the full model to a model that includes only estimated FSIQ, STAI-T, CAPS5-PM avoidance, CAPS-PM re-experiencing, and CAPS5-PM anhedonia.
Here are the partial regression plots for MASC accuracy and CAPS5 symptom dimensions. We see that avoidance is associated with lower accuracy, but anhedonia and re-experiencing are associated with higher accuracy.
library(ggeffects)
pr <- ggpredict(step.model, "CAPS5_PM_avoid [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_anhed [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_reExp [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
Given a linear model of y = x1 + x2, partial regression plots show the conditional effect of x1 on y (i.e., effect of x1 factoring out x2).
Note: The datapoints are jittered for ease of visualization; CAPS5_PM actual values are integers.
Here are the diagnostic plots for the model:
shapiro.test(mascDat$tot_mascAcc)
Shapiro-Wilk normality test
data: mascDat$tot_mascAcc
W = 0.97377, p-value = 0.1681
plot(step.model)
Could be better, but could also be a lot worse (see https://www.statology.org/diagnostic-plots-in-r/). Shapiro-Wilk test is okay.
K-folds cross-validation of the stepwise regression produces the same model:
step.model$results
step.model$finalModel
Call:
lm(formula = .outcome ~ iq_est + CAPS5_PM_reExp + CAPS5_PM_avoid +
CAPS5_PM_anhed + STAI_Total.imp, data = dat)
Coefficients:
(Intercept) iq_est CAPS5_PM_reExp CAPS5_PM_avoid CAPS5_PM_anhed STAI_Total.imp
0.572537 0.002074 0.011013 -0.029584 0.012771 -0.003171
summary(step.model$finalModel)
Call:
lm(formula = .outcome ~ iq_est + CAPS5_PM_reExp + CAPS5_PM_avoid +
CAPS5_PM_anhed + STAI_Total.imp, data = dat)
Residuals:
Min 1Q Median 3Q Max
-0.129392 -0.048643 -0.006693 0.033209 0.139022
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5725374 0.0910355 6.289 0.000000038 ***
iq_est 0.0020741 0.0008111 2.557 0.01305 *
CAPS5_PM_reExp 0.0110133 0.0050620 2.176 0.03346 *
CAPS5_PM_avoid -0.0295836 0.0092363 -3.203 0.00216 **
CAPS5_PM_anhed 0.0127710 0.0063751 2.003 0.04960 *
STAI_Total.imp -0.0031713 0.0010591 -2.994 0.00397 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.06495 on 61 degrees of freedom
Multiple R-squared: 0.3629, Adjusted R-squared: 0.3107
F-statistic: 6.95 on 5 and 61 DF, p-value: 0.00003405
Feature selection using elastic net regression with cross-validation (code adapted from Agnes’ ACNP 2021 analyses):
#select predictors and target variable
model1 <- mascDat
#inspect correlation matrix:
corrMat<-round(cor(model1[1:ncol(model1)]),1) #including target variable
plot1<-ggcorrplot(corrMat, hc.order = TRUE)
plot1
#define predictors and target variable:
x=as.matrix(model1[,1:12]) #excluding target variable
x=scale(x)
y=model1$tot_mascAcc
#run cross-validated elastic net with grid search across alpha and lambda values:
alphas <- seq(0, 1, by=.002) #[alpha=1 is LASSO, alpha=0 is ridge]
mses <- numeric(501)
mins <- numeric(501)
maxes <- numeric(501)
for(i in 1:501){
cvfits1 <- cv.glmnet(x, y, alpha=alphas[i], nfolds=10, family="gaussian")
loc <- which(cvfits1$lambda==cvfits1$lambda.min)
maxes[i] <- cvfits1$lambda %>% max
mins[i] <- cvfits1$lambda %>% min
mses[i] <- cvfits1$cvm[loc]
}
plot2 <- plot(cvfits1)
plot2
NULL
plot(cvfits1[["glmnet.fit"]], xvar = "lambda")
#get coefficients and r2 for winning (MSE minimising) model:
coef(cvfits1, s="lambda.min") #gives coefficient for model (varying alpha, lambda) with minimum MSE
13 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 0.761989460
age_visit1 -0.010572454
gender_1isF -0.004481230
iq_est 0.020122836
CAPS5_PM_reExp 0.016250225
CAPS5_PM_avoid -0.029924880
CAPS5_PM_negAff .
CAPS5_PM_anhed 0.013776886
CAPS5_PM_extBeh .
CAPS5_PM_anxArou 0.006739283
CAPS5_PM_dysArou .
BDI_Total.imp .
STAI_Total.imp -0.029804286
r2_1<-cvfits1$glmnet.fit$dev.ratio[which(cvfits1$glmnet.fit$lambda == cvfits1$lambda.min)]
r2_1
[1] 0.3650483
#visualise coefficients as bar plot:
tmp<-coef(cvfits1, s="lambda.min") #hack to extract names from glmnet output matrix to variable
cv.out=data.frame(beta=coef(cvfits1, s="lambda.min")[2:nrow(coef(cvfits1, s="lambda.min")),1],
name = tmp@Dimnames[[1]][2:nrow(coef(cvfits1, s="lambda.min"))])
cv.out$name<-factor(cv.out$name, levels=cv.out$name) #this stops ggplot reordering the x axis
plot3<-ggplot(data=cv.out, aes(x=name,y=beta)) +
geom_bar(stat="identity") +
theme_pubclean() + geom_hline(yintercept=0, color="black") +
ggpubr::rotate_x_text(30) +
labs(x="", y="beta")
plot3
Similar results to the stepwise regressions, except this time we have age and gender in the model too (but now associated with worse MASC accuracy, unlike RMET accuracy) and anxious arousal (now associated with better accuracy).
Using stepAIC from MASS.
Note: I know in SPSS you can enter variables in “blocks” and specify stepwise for the same regression model (so a mix of hierarchical and stepwise regression?) but I can’t figure out a way to do both simultaneously in R.
# make a MASC dataset
mascDat <- data %>% filter(!record_id %in% c("NWTC-118","NWTC-125")) %>% select(-c(starts_with("tot_rmet"),group_3low,record_id, gender_2isF,tot_mascAccuracy,tot_mascControl,tot_mascHyperM,tot_mascAcc,tot_mascNoM,tot_mascTotQC1, trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
stepAIC chooses a model by AIC, in a stepwise regression algorithm using both backward and forward selection.
# Fit the full model
full.model <- lm(tot_mascHypoM ~., mascDat)
summary(full.model)
Call:
lm(formula = tot_mascHypoM ~ ., data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-4.544 -1.361 -0.159 1.692 5.822
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.31153 5.03998 2.443 0.0179 *
age_visit1 0.00895 0.06190 0.145 0.8856
gender_1isF 0.97546 0.82202 1.187 0.2406
iq_est -0.08200 0.03356 -2.443 0.0179 *
CAPS5_PM_reExp -0.08292 0.29447 -0.282 0.7793
CAPS5_PM_avoid 0.41450 0.44218 0.937 0.3527
CAPS5_PM_negAff 0.15381 0.28824 0.534 0.5958
CAPS5_PM_anhed -0.08444 0.27729 -0.305 0.7619
CAPS5_PM_extBeh 0.38065 0.85690 0.444 0.6587
CAPS5_PM_anxArou -0.67740 0.33270 -2.036 0.0467 *
CAPS5_PM_dysArou 0.11348 0.39873 0.285 0.7770
BDI_Total.imp -0.03943 0.09213 -0.428 0.6703
STAI_Total.imp 0.07017 0.05989 1.172 0.2465
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.495 on 54 degrees of freedom
Multiple R-squared: 0.3056, Adjusted R-squared: 0.1513
F-statistic: 1.981 on 12 and 54 DF, p-value: 0.04443
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
Call:
lm(formula = tot_mascHypoM ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou + STAI_Total.imp, data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-4.8444 -1.3518 -0.1727 1.6058 5.7621
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.84167 3.27279 3.924 0.000224 ***
gender_1isF 1.01558 0.74322 1.366 0.176812
iq_est -0.08176 0.02894 -2.825 0.006386 **
CAPS5_PM_avoid 0.41723 0.25935 1.609 0.112826
CAPS5_PM_anxArou -0.62712 0.23084 -2.717 0.008570 **
STAI_Total.imp 0.05650 0.03617 1.562 0.123419
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.377 on 61 degrees of freedom
Multiple R-squared: 0.2881, Adjusted R-squared: 0.2297
F-statistic: 4.937 on 5 and 61 DF, p-value: 0.0007371
The stepwise algorithm narrowed down the full model to a model that includes only gender, estimated FSIQ, STAI-T, CAPS5-PM avoidance, and CAPS-PM anxious arousal.
Here are the partial regression plots for MASC accuracy and CAPS5 symptom dimensions. We see that avoidance is associated with more hypomentalizing errors, whereas anxious arousal is associated with fewer hypomentalizing errors.
library(ggeffects)
pr <- ggpredict(step.model, "CAPS5_PM_avoid [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_anxArou [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
Given a linear model of y = x1 + x2, partial regression plots show the conditional effect of x1 on y (i.e., effect of x1 factoring out x2).
Note: The datapoints are jittered for ease of visualization; CAPS5_PM actual values are integers.
Here are the diagnostic plots for the model:
shapiro.test(mascDat$tot_mascHypoM)
Shapiro-Wilk normality test
data: mascDat$tot_mascHypoM
W = 0.94057, p-value = 0.003106
plot(step.model)
Diagnostic plots look generally okay.
K-folds cross-validation of the stepwise regression produces the same model:
step.model$results
step.model$finalModel
Call:
lm(formula = .outcome ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou + STAI_Total.imp, data = dat)
Coefficients:
(Intercept) gender_1isF iq_est CAPS5_PM_avoid CAPS5_PM_anxArou STAI_Total.imp
12.84167 1.01558 -0.08176 0.41723 -0.62712 0.05650
summary(step.model$finalModel)
Call:
lm(formula = .outcome ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou + STAI_Total.imp, data = dat)
Residuals:
Min 1Q Median 3Q Max
-4.8444 -1.3518 -0.1727 1.6058 5.7621
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.84167 3.27279 3.924 0.000224 ***
gender_1isF 1.01558 0.74322 1.366 0.176812
iq_est -0.08176 0.02894 -2.825 0.006386 **
CAPS5_PM_avoid 0.41723 0.25935 1.609 0.112826
CAPS5_PM_anxArou -0.62712 0.23084 -2.717 0.008570 **
STAI_Total.imp 0.05650 0.03617 1.562 0.123419
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.377 on 61 degrees of freedom
Multiple R-squared: 0.2881, Adjusted R-squared: 0.2297
F-statistic: 4.937 on 5 and 61 DF, p-value: 0.0007371
Feature selection using elastic net regression with cross-validation (code adapted from Agnes’ ACNP 2021 analyses):
#select predictors and target variable
model1 <- mascDat
#inspect correlation matrix:
corrMat<-round(cor(model1[1:ncol(model1)]),1) #including target variable
plot1<-ggcorrplot(corrMat, hc.order = TRUE)
plot1
#define predictors and target variable:
x=as.matrix(model1[,1:12]) #excluding target variable
x=scale(x)
y=model1$tot_mascHypoM
#run cross-validated elastic net with grid search across alpha and lambda values:
alphas <- seq(0, 1, by=.002) #[alpha=1 is LASSO, alpha=0 is ridge]
mses <- numeric(501)
mins <- numeric(501)
maxes <- numeric(501)
for(i in 1:501){
cvfits1 <- cv.glmnet(x, y, alpha=alphas[i], nfolds=10, family="gaussian")
loc <- which(cvfits1$lambda==cvfits1$lambda.min)
maxes[i] <- cvfits1$lambda %>% max
mins[i] <- cvfits1$lambda %>% min
mses[i] <- cvfits1$cvm[loc]
}
plot2 <- plot(cvfits1)
plot2
NULL
plot(cvfits1[["glmnet.fit"]], xvar = "lambda")
#get coefficients and r2 for winning (MSE minimising) model:
coef(cvfits1, s="lambda.min") #gives coefficient for model (varying alpha, lambda) with minimum MSE
13 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 4.5970149
age_visit1 .
gender_1isF 0.2510350
iq_est -0.7111450
CAPS5_PM_reExp .
CAPS5_PM_avoid 0.3924475
CAPS5_PM_negAff 0.1271783
CAPS5_PM_anhed .
CAPS5_PM_extBeh .
CAPS5_PM_anxArou -0.5559220
CAPS5_PM_dysArou .
BDI_Total.imp .
STAI_Total.imp 0.4145198
r2_1<-cvfits1$glmnet.fit$dev.ratio[which(cvfits1$glmnet.fit$lambda == cvfits1$lambda.min)]
r2_1
[1] 0.2578073
#visualise coefficients as bar plot:
tmp<-coef(cvfits1, s="lambda.min") #hack to extract names from glmnet output matrix to variable
cv.out=data.frame(beta=coef(cvfits1, s="lambda.min")[2:nrow(coef(cvfits1, s="lambda.min")),1],
name = tmp@Dimnames[[1]][2:nrow(coef(cvfits1, s="lambda.min"))])
cv.out$name<-factor(cv.out$name, levels=cv.out$name) #this stops ggplot reordering the x axis
plot3<-ggplot(data=cv.out, aes(x=name,y=beta)) +
geom_bar(stat="identity") +
theme_pubclean() + geom_hline(yintercept=0, color="black") +
ggpubr::rotate_x_text(30) +
labs(x="", y="beta")
plot3
Similar results to the stepwise regressions, except this time we have CAPS5 negative affect in the model too (positively associated with number of hypomentalizing errors).
Using stepAIC from MASS.
Note: I know in SPSS you can enter variables in “blocks” and specify stepwise for the same regression model (so a mix of hierarchical and stepwise regression?) but I can’t figure out a way to do both simultaneously in R.
# make a MASC dataset
mascDat <- data %>% filter(!record_id %in% c("NWTC-118","NWTC-125")) %>% select(-c(starts_with("tot_rmet"),group_3low,record_id, gender_2isF,tot_mascAccuracy,tot_mascControl,tot_mascHypoM,tot_mascAcc,tot_mascNoM,tot_mascTotQC1, trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
stepAIC chooses a model by AIC, in a stepwise regression algorithm using both backward and forward selection.
# Fit the full model
full.model <- lm(tot_mascHyperM ~., mascDat)
summary(full.model)
Call:
lm(formula = tot_mascHyperM ~ ., data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-3.0092 -1.3690 -0.3126 1.3620 6.4005
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.025165 4.260878 0.006 0.995
age_visit1 0.048224 0.052334 0.921 0.361
gender_1isF -0.234086 0.694950 -0.337 0.738
iq_est 0.008907 0.028374 0.314 0.755
CAPS5_PM_reExp -0.268767 0.248954 -1.080 0.285
CAPS5_PM_avoid 0.321065 0.373831 0.859 0.394
CAPS5_PM_negAff -0.149942 0.243686 -0.615 0.541
CAPS5_PM_anhed -0.208025 0.234428 -0.887 0.379
CAPS5_PM_extBeh 0.497478 0.724435 0.687 0.495
CAPS5_PM_anxArou 0.285119 0.281267 1.014 0.315
CAPS5_PM_dysArou 0.037383 0.337093 0.111 0.912
BDI_Total.imp -0.020720 0.077888 -0.266 0.791
STAI_Total.imp 0.072756 0.050629 1.437 0.156
Residual standard error: 2.109 on 54 degrees of freedom
Multiple R-squared: 0.1189, Adjusted R-squared: -0.07687
F-statistic: 0.6074 on 12 and 54 DF, p-value: 0.8265
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
Call:
lm(formula = tot_mascHyperM ~ 1, data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-3.2537 -1.2537 -0.2537 1.7463 5.7463
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.2537 0.2483 17.13 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.033 on 66 degrees of freedom
The stepwise algorithm threw everything out of the model except for the intercept, as does the k-folds cross-validation:
step.model$results
step.model$finalModel
Call:
lm(formula = .outcome ~ 1, data = dat)
Coefficients:
(Intercept)
4.254
summary(step.model$finalModel)
Call:
lm(formula = .outcome ~ 1, data = dat)
Residuals:
Min 1Q Median 3Q Max
-3.2537 -1.2537 -0.2537 1.7463 5.7463
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.2537 0.2483 17.13 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.033 on 66 degrees of freedom
Feature selection using elastic net regression with cross-validation also shrinks all coefficients to zero:
#select predictors and target variable
model1 <- mascDat
#inspect correlation matrix:
corrMat<-round(cor(model1[1:ncol(model1)]),1) #including target variable
plot1<-ggcorrplot(corrMat, hc.order = TRUE)
plot1
#define predictors and target variable:
x=as.matrix(model1[,1:12]) #excluding target variable
x=scale(x)
y=model1$tot_mascHyperM
#run cross-validated elastic net with grid search across alpha and lambda values:
alphas <- seq(0, 1, by=.002) #[alpha=1 is LASSO, alpha=0 is ridge]
mses <- numeric(501)
mins <- numeric(501)
maxes <- numeric(501)
for(i in 1:501){
cvfits1 <- cv.glmnet(x, y, alpha=alphas[i], nfolds=10, family="gaussian")
loc <- which(cvfits1$lambda==cvfits1$lambda.min)
maxes[i] <- cvfits1$lambda %>% max
mins[i] <- cvfits1$lambda %>% min
mses[i] <- cvfits1$cvm[loc]
}
plot2 <- plot(cvfits1)
plot2
NULL
plot(cvfits1[["glmnet.fit"]], xvar = "lambda")
#get coefficients and r2 for winning (MSE minimising) model:
coef(cvfits1, s="lambda.min") #gives coefficient for model (varying alpha, lambda) with minimum MSE
13 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 4.253731
age_visit1 .
gender_1isF .
iq_est .
CAPS5_PM_reExp .
CAPS5_PM_avoid .
CAPS5_PM_negAff .
CAPS5_PM_anhed .
CAPS5_PM_extBeh .
CAPS5_PM_anxArou .
CAPS5_PM_dysArou .
BDI_Total.imp .
STAI_Total.imp .
r2_1<-cvfits1$glmnet.fit$dev.ratio[which(cvfits1$glmnet.fit$lambda == cvfits1$lambda.min)]
r2_1
[1] 0
#visualise coefficients as bar plot:
tmp<-coef(cvfits1, s="lambda.min") #hack to extract names from glmnet output matrix to variable
cv.out=data.frame(beta=coef(cvfits1, s="lambda.min")[2:nrow(coef(cvfits1, s="lambda.min")),1],
name = tmp@Dimnames[[1]][2:nrow(coef(cvfits1, s="lambda.min"))])
cv.out$name<-factor(cv.out$name, levels=cv.out$name) #this stops ggplot reordering the x axis
plot3<-ggplot(data=cv.out, aes(x=name,y=beta)) +
geom_bar(stat="identity") +
theme_pubclean() + geom_hline(yintercept=0, color="black") +
ggpubr::rotate_x_text(30) +
labs(x="", y="beta")
plot3
So we can conclude that none of the hypothesized variables are strongly associated with MASC hypermentalizing errors.
Using stepAIC from MASS.
Note: I know in SPSS you can enter variables in “blocks” and specify stepwise for the same regression model (so a mix of hierarchical and stepwise regression?) but I can’t figure out a way to do both simultaneously in R.
# make a MASC dataset
mascDat <- data %>% filter(!record_id %in% c("NWTC-118","NWTC-125")) %>% select(-c(starts_with("tot_rmet"),group_3low,record_id, gender_2isF,tot_mascAccuracy,tot_mascControl,tot_mascHyperM,tot_mascAcc,tot_mascHypoM,tot_mascTotQC1, trad_responder, CTQ_Total.imp,CAPS5_PM_total, starts_with("use"), ends_with("use"), starts_with("masc"), starts_with("rmet")))
stepAIC chooses a model by AIC, in a stepwise regression algorithm using both backward and forward selection.
# Fit the full model
full.model <- lm(tot_mascNoM ~., mascDat)
summary(full.model)
Call:
lm(formula = tot_mascNoM ~ ., data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-2.0021 -0.7215 -0.2256 0.6830 2.5188
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.024389 2.475374 1.626 0.1098
age_visit1 0.029883 0.030404 0.983 0.3301
gender_1isF -0.294850 0.403734 -0.730 0.4684
iq_est -0.034046 0.016484 -2.065 0.0437 *
CAPS5_PM_reExp -0.181758 0.144631 -1.257 0.2143
CAPS5_PM_avoid 0.405215 0.217178 1.866 0.0675 .
CAPS5_PM_negAff 0.007828 0.141570 0.055 0.9561
CAPS5_PM_anhed -0.250694 0.136192 -1.841 0.0712 .
CAPS5_PM_extBeh -0.060817 0.420864 -0.145 0.8856
CAPS5_PM_anxArou -0.038176 0.163403 -0.234 0.8162
CAPS5_PM_dysArou 0.003643 0.195836 0.019 0.9852
BDI_Total.imp 0.104620 0.045250 2.312 0.0246 *
STAI_Total.imp -0.024828 0.029413 -0.844 0.4023
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.225 on 54 degrees of freedom
Multiple R-squared: 0.345, Adjusted R-squared: 0.1994
F-statistic: 2.37 on 12 and 54 DF, p-value: 0.01558
# Stepwise regression model
step.model <- stepAIC(full.model, direction = "both", trace = FALSE)
summary(step.model)
Call:
lm(formula = tot_mascNoM ~ age_visit1 + iq_est + CAPS5_PM_reExp +
CAPS5_PM_avoid + CAPS5_PM_anhed + BDI_Total.imp, data = mascDat)
Residuals:
Min 1Q Median 3Q Max
-1.8804 -0.7338 -0.2520 0.6856 2.4957
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.81584 2.12475 1.325 0.1901
age_visit1 0.04057 0.02779 1.460 0.1495
iq_est -0.03054 0.01523 -2.006 0.0494 *
CAPS5_PM_reExp -0.18724 0.09914 -1.889 0.0638 .
CAPS5_PM_avoid 0.40780 0.17475 2.334 0.0230 *
CAPS5_PM_anhed -0.24554 0.11722 -2.095 0.0404 *
BDI_Total.imp 0.07553 0.02753 2.744 0.0080 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.18 on 60 degrees of freedom
Multiple R-squared: 0.3246, Adjusted R-squared: 0.257
F-statistic: 4.805 on 6 and 60 DF, p-value: 0.0004608
The stepwise algorithm narrowed down the full model to a model that includes age, estimated FSIQ, CAPS5-PM re-experiencing, CAPS5-PM avoidance, CAPS-PM anhedonia, and BDI-II.
Here are the partial regression plots for MASC no-mentalizing and CAPS5 symptom dimensions. We see that avoidance is associated with more no-mentalizing errors, whereas re-experiencing and anhedonia are associated with fewer no-mentalizing errors.
library(ggeffects)
pr <- ggpredict(step.model, "CAPS5_PM_avoid [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_reExp [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
pr <- ggpredict(step.model, "CAPS5_PM_anhed [all]")
plot(pr, add.data = TRUE, residuals=TRUE)
Given a linear model of y = x1 + x2, partial regression plots show the conditional effect of x1 on y (i.e., effect of x1 factoring out x2).
Note: The datapoints are jittered for ease of visualization; CAPS5_PM actual values are integers.
(The plots of CAPS5 anhedonia and re-experiencing are not super compelling, though…)
Here are the diagnostic plots for the model:
shapiro.test(mascDat$tot_mascNoM)
Shapiro-Wilk normality test
data: mascDat$tot_mascNoM
W = 0.92005, p-value = 0.0003627
plot(step.model)
K-folds cross-validation of the stepwise regression produces the same model:
step.model$results
step.model$finalModel
Call:
lm(formula = .outcome ~ age_visit1 + iq_est + CAPS5_PM_reExp +
CAPS5_PM_avoid + CAPS5_PM_anhed + BDI_Total.imp, data = dat)
Coefficients:
(Intercept) age_visit1 iq_est CAPS5_PM_reExp CAPS5_PM_avoid CAPS5_PM_anhed BDI_Total.imp
2.81584 0.04057 -0.03054 -0.18724 0.40780 -0.24554 0.07553
summary(step.model$finalModel)
Call:
lm(formula = .outcome ~ age_visit1 + iq_est + CAPS5_PM_reExp +
CAPS5_PM_avoid + CAPS5_PM_anhed + BDI_Total.imp, data = dat)
Residuals:
Min 1Q Median 3Q Max
-1.8804 -0.7338 -0.2520 0.6856 2.4957
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.81584 2.12475 1.325 0.1901
age_visit1 0.04057 0.02779 1.460 0.1495
iq_est -0.03054 0.01523 -2.006 0.0494 *
CAPS5_PM_reExp -0.18724 0.09914 -1.889 0.0638 .
CAPS5_PM_avoid 0.40780 0.17475 2.334 0.0230 *
CAPS5_PM_anhed -0.24554 0.11722 -2.095 0.0404 *
BDI_Total.imp 0.07553 0.02753 2.744 0.0080 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.18 on 60 degrees of freedom
Multiple R-squared: 0.3246, Adjusted R-squared: 0.257
F-statistic: 4.805 on 6 and 60 DF, p-value: 0.0004608
Feature selection using elastic net regression with cross-validation (code adapted from Agnes’ ACNP 2021 analyses):
#select predictors and target variable
model1 <- mascDat
#inspect correlation matrix:
corrMat<-round(cor(model1[1:ncol(model1)]),1) #including target variable
plot1<-ggcorrplot(corrMat, hc.order = TRUE)
plot1
#define predictors and target variable:
x=as.matrix(model1[,1:12]) #excluding target variable
x=scale(x)
y=model1$tot_mascNoM
#run cross-validated elastic net with grid search across alpha and lambda values:
alphas <- seq(0, 1, by=.002) #[alpha=1 is LASSO, alpha=0 is ridge]
mses <- numeric(501)
mins <- numeric(501)
maxes <- numeric(501)
for(i in 1:501){
cvfits1 <- cv.glmnet(x, y, alpha=alphas[i], nfolds=10, family="gaussian")
loc <- which(cvfits1$lambda==cvfits1$lambda.min)
maxes[i] <- cvfits1$lambda %>% max
mins[i] <- cvfits1$lambda %>% min
mses[i] <- cvfits1$cvm[loc]
}
plot2 <- plot(cvfits1)
plot2
NULL
plot(cvfits1[["glmnet.fit"]], xvar = "lambda")
#get coefficients and r2 for winning (MSE minimising) model:
coef(cvfits1, s="lambda.min") #gives coefficient for model (varying alpha, lambda) with minimum MSE
13 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) 1.86567164
age_visit1 0.20850806
gender_1isF -0.05868503
iq_est -0.26763731
CAPS5_PM_reExp -0.33597022
CAPS5_PM_avoid 0.56536803
CAPS5_PM_negAff .
CAPS5_PM_anhed -0.44167622
CAPS5_PM_extBeh -0.12563743
CAPS5_PM_anxArou .
CAPS5_PM_dysArou .
BDI_Total.imp 0.69241707
STAI_Total.imp -0.05401173
r2_1<-cvfits1$glmnet.fit$dev.ratio[which(cvfits1$glmnet.fit$lambda == cvfits1$lambda.min)]
r2_1
[1] 0.3233127
#visualise coefficients as bar plot:
tmp<-coef(cvfits1, s="lambda.min") #hack to extract names from glmnet output matrix to variable
cv.out=data.frame(beta=coef(cvfits1, s="lambda.min")[2:nrow(coef(cvfits1, s="lambda.min")),1],
name = tmp@Dimnames[[1]][2:nrow(coef(cvfits1, s="lambda.min"))])
cv.out$name<-factor(cv.out$name, levels=cv.out$name) #this stops ggplot reordering the x axis
plot3<-ggplot(data=cv.out, aes(x=name,y=beta)) +
geom_bar(stat="identity") +
theme_pubclean() + geom_hline(yintercept=0, color="black") +
ggpubr::rotate_x_text(30) +
labs(x="", y="beta")
plot3
Similar results to the stepwise regressions, except this time we have gender, CAPS5-PM externalizing behavior, and STAI in the model too (small beta values, both negatively associated with number of no-mentalizing errors).
Interesting that MASC no-mentalizing is the only behavioral variable where we see a strong effect of depression (BDI-II score).
Using package relaimpo.
Note: using variables selected via stepwise regression because they’re usually the same or a smaller number than the elastic net feature selection. But let me know if I should do otherwise - I’m not attached to doing one thing or the other.
library(relaimpo)
Loading required package: boot
Attaching package: ‘boot’
The following object is masked from ‘package:survival’:
aml
The following object is masked from ‘package:lattice’:
melanoma
Loading required package: survey
Loading required package: grid
Attaching package: ‘survey’
The following object is masked from ‘package:Hmisc’:
deff
The following object is masked from ‘package:graphics’:
dotchart
Loading required package: mitools
This is the global version of package relaimpo.
If you are a non-US user, a version with the interesting additional metric pmvd is available
from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
# rmet accuracy
m1<-lm(tot_rmetAcc ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou, data = data)
# masc accuracy
m2<-lm(tot_mascAcc ~ iq_est + CAPS5_PM_reExp + CAPS5_PM_avoid +
CAPS5_PM_anhed + STAI_Total.imp, data = data)
# masc hypo-mentalizing
m3<-lm(tot_mascHypoM ~ gender_1isF + iq_est + CAPS5_PM_avoid +
CAPS5_PM_anxArou + STAI_Total.imp, data = data)
# skipping hyper-mentalizing since best model was intercept only
# masc no-mentalizing
m4<-lm(tot_mascNoM ~ age_visit1 + iq_est + CAPS5_PM_reExp + CAPS5_PM_avoid +
CAPS5_PM_anhed + BDI_Total.imp, data = data)
m1.1<-boot.relimp(m1, b=3200, rela=TRUE)
booteval.relimp(m1.1, sort=TRUE)
Response variable: tot_rmetAcc
Total response variance: 0.01288166
Analysis based on 69 observations
4 Regressors:
gender_1isF iq_est CAPS5_PM_avoid CAPS5_PM_anxArou
Proportion of variance explained by model: 23.89%
Metrics are normalized to sum to 100% (rela=TRUE).
Relative importance metrics:
lmg
gender_1isF 0.09335778
iq_est 0.58199734
CAPS5_PM_avoid 0.23710611
CAPS5_PM_anxArou 0.08753876
Average coefficients for different model sizes:
1X 2Xs 3Xs 4Xs
gender_1isF -0.039785420 -0.041130785 -0.041108310 -0.041219249
iq_est 0.004262001 0.004193140 0.004096725 0.003995876
CAPS5_PM_avoid -0.015232682 -0.016418967 -0.018200555 -0.020517214
CAPS5_PM_anxArou -0.001568413 0.006046883 0.012514133 0.017789292
Confidence interval information ( 3200 bootstrap replicates, bty= perc ):
Sorted Relative Contributions with confidence intervals:
Lower Upper
percentage 0.95 0.95 0.95
iq_est.lmg 0.5820 ABCD 0.1154 0.8441
CAPS5_PM_avoid.lmg 0.2371 ABCD 0.0330 0.5847
gender_1isF.lmg 0.0934 ABCD 0.0023 0.4353
CAPS5_PM_anxArou.lmg 0.0875 _BCD 0.0240 0.3244
Letters indicate the ranks covered by bootstrap CIs.
(Rank bootstrap confidence intervals always obtained by percentile method)
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
Differences between Relative Contributions (sorted by absolute value):
Lower Upper
difference 0.95 0.95 0.95
iq_est-CAPS5_PM_anxArou.lmg 0.4945 -0.0929 0.7962
gender_1isF-iq_est.lmg -0.4886 -0.8223 0.2327
iq_est-CAPS5_PM_avoid.lmg 0.3449 -0.3906 0.7893
CAPS5_PM_avoid-CAPS5_PM_anxArou.lmg 0.1496 -0.1714 0.4776
gender_1isF-CAPS5_PM_avoid.lmg -0.1437 -0.5280 0.3087
gender_1isF-CAPS5_PM_anxArou.lmg 0.0058 -0.2575 0.3493
* indicates that CI for difference does not include 0.
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
m2.1<-boot.relimp(m2, b=3200, rela=TRUE)
booteval.relimp(m2.1, sort=TRUE)
Response variable: tot_mascAcc
Total response variance: 0.006119625
Analysis based on 67 observations
5 Regressors:
iq_est CAPS5_PM_reExp CAPS5_PM_avoid CAPS5_PM_anhed STAI_Total.imp
Proportion of variance explained by model: 36.29%
Metrics are normalized to sum to 100% (rela=TRUE).
Relative importance metrics:
lmg
iq_est 0.20632149
CAPS5_PM_reExp 0.10805044
CAPS5_PM_avoid 0.30709930
CAPS5_PM_anhed 0.09638167
STAI_Total.imp 0.28214710
Average coefficients for different model sizes:
1X 2Xs 3Xs 4Xs 5Xs
iq_est 0.002451463 0.00194203722 0.001962106 0.002007351 0.002074095
CAPS5_PM_reExp -0.005369451 0.00217683051 0.006460687 0.009066894 0.011013320
CAPS5_PM_avoid -0.017566888 -0.02054565325 -0.023673834 -0.026648454 -0.029583587
CAPS5_PM_anhed -0.007599171 0.00005047912 0.005512410 0.009435760 0.012770962
STAI_Total.imp -0.002428041 -0.00246596494 -0.002689143 -0.002916222 -0.003171291
Confidence interval information ( 3200 bootstrap replicates, bty= perc ):
Sorted Relative Contributions with confidence intervals:
Lower Upper
percentage 0.95 0.95 0.95
CAPS5_PM_avoid.lmg 0.3071 ABCDE 0.0477 0.5345
STAI_Total.imp.lmg 0.2821 ABCDE 0.0846 0.5034
iq_est.lmg 0.2063 ABCDE 0.0177 0.5877
CAPS5_PM_reExp.lmg 0.1081 _BCDE 0.0528 0.2578
CAPS5_PM_anhed.lmg 0.0964 _BCDE 0.0448 0.2002
Letters indicate the ranks covered by bootstrap CIs.
(Rank bootstrap confidence intervals always obtained by percentile method)
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
Differences between Relative Contributions (sorted by absolute value):
Lower Upper
difference 0.95 0.95 0.95
CAPS5_PM_avoid-CAPS5_PM_anhed.lmg 0.2107 -0.0359 0.4153
CAPS5_PM_reExp-CAPS5_PM_avoid.lmg -0.1990 -0.4188 0.1160
CAPS5_PM_anhed-STAI_Total.imp.lmg -0.1858 -0.4137 0.0530
CAPS5_PM_reExp-STAI_Total.imp.lmg -0.1741 -0.3947 0.0934
iq_est-CAPS5_PM_anhed.lmg 0.1099 -0.1266 0.5154
iq_est-CAPS5_PM_avoid.lmg -0.1008 -0.4882 0.4978
iq_est-CAPS5_PM_reExp.lmg 0.0983 -0.1586 0.5092
iq_est-STAI_Total.imp.lmg -0.0758 -0.4104 0.4343
CAPS5_PM_avoid-STAI_Total.imp.lmg 0.0250 -0.3819 0.3946
CAPS5_PM_reExp-CAPS5_PM_anhed.lmg 0.0117 -0.1180 0.1895
* indicates that CI for difference does not include 0.
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
m3.1<-boot.relimp(m3, b=3200, rela=TRUE)
booteval.relimp(m3.1, sort=TRUE)
Response variable: tot_mascHypoM
Total response variance: 7.335142
Analysis based on 67 observations
5 Regressors:
gender_1isF iq_est CAPS5_PM_avoid CAPS5_PM_anxArou STAI_Total.imp
Proportion of variance explained by model: 28.81%
Metrics are normalized to sum to 100% (rela=TRUE).
Relative importance metrics:
lmg
gender_1isF 0.08320567
iq_est 0.36101628
CAPS5_PM_avoid 0.20679330
CAPS5_PM_anxArou 0.16032658
STAI_Total.imp 0.18865816
Average coefficients for different model sizes:
1X 2Xs 3Xs 4Xs 5Xs
gender_1isF 1.07264957 1.07140465 1.06363555 1.04605817 1.01557555
iq_est -0.09104704 -0.08490595 -0.08210966 -0.08145965 -0.08176067
CAPS5_PM_avoid 0.45196872 0.44884158 0.44770138 0.43988397 0.41723217
CAPS5_PM_anxArou 0.04611650 -0.22819638 -0.42394891 -0.55301529 -0.62712119
STAI_Total.imp 0.06037586 0.05837106 0.05747721 0.05708040 0.05649724
Confidence interval information ( 3200 bootstrap replicates, bty= perc ):
Sorted Relative Contributions with confidence intervals:
Lower Upper
percentage 0.95 0.95 0.95
iq_est.lmg 0.3610 ABCDE 0.0263 0.7789
CAPS5_PM_avoid.lmg 0.2068 ABCDE 0.0235 0.5056
STAI_Total.imp.lmg 0.1887 ABCDE 0.0219 0.4349
CAPS5_PM_anxArou.lmg 0.1603 ABCDE 0.0350 0.3593
gender_1isF.lmg 0.0832 ABCDE 0.0028 0.4119
Letters indicate the ranks covered by bootstrap CIs.
(Rank bootstrap confidence intervals always obtained by percentile method)
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
Differences between Relative Contributions (sorted by absolute value):
Lower Upper
difference 0.95 0.95 0.95
gender_1isF-iq_est.lmg -0.2778 -0.7441 0.2616
iq_est-CAPS5_PM_anxArou.lmg 0.2007 -0.2140 0.7128
iq_est-STAI_Total.imp.lmg 0.1724 -0.3609 0.7359
iq_est-CAPS5_PM_avoid.lmg 0.1542 -0.4000 0.7245
gender_1isF-CAPS5_PM_avoid.lmg -0.1236 -0.4684 0.3340
gender_1isF-STAI_Total.imp.lmg -0.1055 -0.3675 0.2778
gender_1isF-CAPS5_PM_anxArou.lmg -0.0771 -0.3077 0.3150
CAPS5_PM_avoid-CAPS5_PM_anxArou.lmg 0.0465 -0.2593 0.3935
CAPS5_PM_anxArou-STAI_Total.imp.lmg -0.0283 -0.3212 0.2357
CAPS5_PM_avoid-STAI_Total.imp.lmg 0.0181 -0.2986 0.3333
* indicates that CI for difference does not include 0.
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
m4.1<-boot.relimp(m4, b=3200, rela=TRUE)
booteval.relimp(m4.1, sort=TRUE)
Response variable: tot_mascNoM
Total response variance: 1.875622
Analysis based on 67 observations
6 Regressors:
age_visit1 iq_est CAPS5_PM_reExp CAPS5_PM_avoid CAPS5_PM_anhed BDI_Total.imp
Proportion of variance explained by model: 32.46%
Metrics are normalized to sum to 100% (rela=TRUE).
Relative importance metrics:
lmg
age_visit1 0.1525502
iq_est 0.1025223
CAPS5_PM_reExp 0.1056241
CAPS5_PM_avoid 0.2554684
CAPS5_PM_anhed 0.1057406
BDI_Total.imp 0.2780944
Average coefficients for different model sizes:
1X 2Xs 3Xs 4Xs 5Xs 6Xs
age_visit1 0.06454487 0.06381176 0.05957571 0.05418210 0.04796321 0.04057374
iq_est -0.02472300 -0.01939273 -0.02071068 -0.02400723 -0.02730138 -0.03053984
CAPS5_PM_reExp 0.07988981 -0.02098996 -0.08805573 -0.13114769 -0.16131853 -0.18724305
CAPS5_PM_avoid 0.27227032 0.32958983 0.36390133 0.38191488 0.39336090 0.40779676
CAPS5_PM_anhed 0.09935115 -0.01718685 -0.10114026 -0.16103156 -0.20581369 -0.24554465
BDI_Total.imp 0.04238557 0.05005701 0.05674249 0.06302515 0.06907353 0.07552636
Confidence interval information ( 3200 bootstrap replicates, bty= perc ):
Sorted Relative Contributions with confidence intervals:
Lower Upper
percentage 0.95 0.95 0.95
BDI_Total.imp.lmg 0.2781 ABCDEF 0.0446 0.4332
CAPS5_PM_avoid.lmg 0.2555 ABCDE_ 0.0978 0.4926
age_visit1.lmg 0.1526 ABCDEF 0.0077 0.4729
CAPS5_PM_anhed.lmg 0.1057 _BCDEF 0.0444 0.2038
CAPS5_PM_reExp.lmg 0.1056 _BCDEF 0.0545 0.2747
iq_est.lmg 0.1025 ABCDEF 0.0084 0.3128
Letters indicate the ranks covered by bootstrap CIs.
(Rank bootstrap confidence intervals always obtained by percentile method)
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
Differences between Relative Contributions (sorted by absolute value):
Lower Upper
difference 0.95 0.95 0.95
iq_est-BDI_Total.imp.lmg -0.1756 -0.3869 0.1939
CAPS5_PM_reExp-BDI_Total.imp.lmg -0.1725 -0.3158 0.1480
CAPS5_PM_anhed-BDI_Total.imp.lmg -0.1724 -0.3334 0.0856
iq_est-CAPS5_PM_avoid.lmg -0.1529 -0.4390 0.1385
CAPS5_PM_reExp-CAPS5_PM_avoid.lmg -0.1498 -0.3668 0.0642
CAPS5_PM_avoid-CAPS5_PM_anhed.lmg 0.1497 -0.0212 0.3813
age_visit1-BDI_Total.imp.lmg -0.1255 -0.3798 0.3467
age_visit1-CAPS5_PM_avoid.lmg -0.1029 -0.4374 0.3300
age_visit1-iq_est.lmg 0.0500 -0.2454 0.4223
age_visit1-CAPS5_PM_reExp.lmg 0.0469 -0.2114 0.3875
age_visit1-CAPS5_PM_anhed.lmg 0.0468 -0.1539 0.4163
CAPS5_PM_avoid-BDI_Total.imp.lmg -0.0226 -0.2754 0.3920
iq_est-CAPS5_PM_anhed.lmg -0.0032 -0.1416 0.2135
iq_est-CAPS5_PM_reExp.lmg -0.0031 -0.2197 0.2259
CAPS5_PM_reExp-CAPS5_PM_anhed.lmg -0.0001 -0.1175 0.1895
* indicates that CI for difference does not include 0.
CAUTION: Bootstrap confidence intervals can be somewhat liberal.
Based on group (low-exp., higher-exp., PTSD) and/or non-symptomatic vs. symptomatic.
The brackets annotated with * on the 3-way comparison boxplots are just the results of posthoc t-tests between the groups (no correction for multiple comparison).
library(car)
Loading required package: carData
Attaching package: ‘car’
The following object is masked from ‘package:boot’:
logit
The following object is masked from ‘package:dplyr’:
recode
library(afex)
Loading required package: lme4
************
Welcome to afex. For support visit: http://afex.singmann.science/
- Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
- Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
- 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
- NEWS: emmeans() for ANOVA models now uses model = 'multivariate' as default.
- Get and set global package options with: afex_options()
- Set orthogonal sum-to-zero contrasts globally: set_sum_contrasts()
- For example analyses see: browseVignettes("afex")
************
Attaching package: ‘afex’
The following object is masked from ‘package:lme4’:
lmer
# ANOVA
aov_rmet <-
aov_car(tot_rmetAcc ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_rmet$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(1:69, .Label = c("NWTC-001",
"NWTC-002", "NWTC-004", "NWTC-005", "NWTC-006", "NWTC-007", "NWTC-008",
"NWTC-009", "NWTC-010", "NWTC-011", "NWTC-012", "NWTC-013", "NWTC-014",
"NWTC-015", "NWTC-017", "NWTC-018", "NWTC-019", "NWTC-022", "NWTC-023",
"NWTC-025", "NWTC-027", "NWTC-028", "NWTC-030", "NWTC-031", "NWTC-032",
"NWTC-033", "NWTC-034", "NWTC-035", "NWTC-037", "NWTC-039", "NWTC-040",
"NWTC-041", "NWTC-042", "NWTC-043", "NWTC-045", "NWTC-046", "NWTC-047",
"NWTC-053", "NWTC-056", "NWTC-058", "NWTC-059", "NWTC-060", "NWTC-062",
"NWTC-066", "NWTC-069", "NWTC-071", "NWTC-072", "NWTC-073", "NWTC-074",
"NWTC-076", "NWTC-078", "NWTC-079", "NWTC-080", "NWTC-081", "NWTC-082",
"NWTC-083", "NWTC-084", "NWTC-091", "NWTC-092", "NWTC-093", "NWTC-094",
"NWTC-095", "NWTC-100", "NWTC-109", "NWTC-113", "NWTC-117", "NWTC-118",
"NWTC-123", "NWTC-125"), class = "factor"), group_3low = structure(c(3L,
3L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L,
3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L,
2L, 3L, 3L, 2L, 2L, 2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L,
1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L,
1L, 1L, 1L, 1L), .Label = c("Low-exposed", "PTSD", "Resilient"
), class = "factor", contrasts = "contr.sum"), dv = c(0.777777777777778,
0.833333333333333, 0.75, 0.694444444444444, 0.888888888888889,
0.888888888888889, 0.722222222222222, 0.694444444444444, 0.722222222222222,
0.861111111111111, 0.75, 0.527777777777778, 0.861111111111111,
0.722222222222222, 0.833333333333333, 0.833333333333333, 0.888888888888889,
0.722222222222222, 0.777777777777778, 0.888888888888889, 0.542857142857143,
1, 0.833333333333333, 0.944444444444444, 0.777777777777778, 0.722222222222222,
0.722222222222222, 0.916666666666667, 0.694444444444444, 0.583333333333333,
0.916666666666667, 0.888888888888889, 0.722222222222222, 0.527777777777778,
0.527777777777778, 0.833333333333333, 0.6, 0.805555555555556,
0.638888888888889, 0.916666666666667, 0.75, 0.805555555555556,
0.722222222222222, 0.888888888888889, 0.861111111111111, 0.777777777777778,
0.833333333333333, 0.777777777777778, 0.611111111111111, 0.833333333333333,
0.666666666666667, 0.833333333333333, 0.944444444444444, 0.861111111111111,
0.611111111111111, 0.861111111111111, 0.805555555555556, 0.722222222222222,
0.527777777777778, 0.638888888888889, 0.694444444444444, 0.777777777777778,
0.638888888888889, 0.861111111111111, 0.694444444444444, 0.805555555555556,
0.75, 0.833333333333333, 0.861111111111111)), row.names = c(NA,
-69L), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-0.243056 -0.048611 0.006944 0.063889 0.229167
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.76834 0.01372 56.007 <0.0000000000000002 ***
group_3low1 0.02888 0.02000 1.444 0.153
group_3low2 -0.03138 0.01974 -1.589 0.117
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.1127 on 66 degrees of freedom
Multiple R-squared: 0.04285, Adjusted R-squared: 0.01384
F-statistic: 1.477 on 2 and 66 DF, p-value: 0.2357
aov_rmet
Anova Table (Type III tests)
Response: tot_rmetAcc
Effect df MSE F ges p.value
1 group_3low 2, 66 0.01 1.48 .043 .236
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_rmetAcc ~ group_2grp, data)
Welch Two Sample t-test
data: tot_rmetAcc by group_2grp
t = 1.4112, df = 32.312, p-value = 0.1678
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
-0.01987135 0.10960586
sample estimates:
mean in group noPTSD mean in group PTSD
0.7818287 0.7369615
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
grp_Tcomparisons <- list(c("noPTSD", "PTSD"))
p1 <- ggplot(data, aes(group_3low,tot_rmetAcc, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean() + theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
p1 <- ggplot(data, aes(group_2grp,tot_rmetAcc,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean() + theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
# ANOVA: groups
aov_masc <-
aov_car(tot_mascAcc ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Warning: Missing values for following ID(s):
NWTC-118, NWTC-125
Removing those cases from the analysis.
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_masc$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), .Label = c("NWTC-001", "NWTC-002", "NWTC-004", "NWTC-005",
"NWTC-006", "NWTC-007", "NWTC-008", "NWTC-009", "NWTC-010", "NWTC-011",
"NWTC-012", "NWTC-013", "NWTC-014", "NWTC-015", "NWTC-017", "NWTC-018",
"NWTC-019", "NWTC-022", "NWTC-023", "NWTC-025", "NWTC-027", "NWTC-028",
"NWTC-030", "NWTC-031", "NWTC-032", "NWTC-033", "NWTC-034", "NWTC-035",
"NWTC-037", "NWTC-039", "NWTC-040", "NWTC-041", "NWTC-042", "NWTC-043",
"NWTC-045", "NWTC-046", "NWTC-047", "NWTC-053", "NWTC-056", "NWTC-058",
"NWTC-059", "NWTC-060", "NWTC-062", "NWTC-066", "NWTC-069", "NWTC-071",
"NWTC-072", "NWTC-073", "NWTC-074", "NWTC-076", "NWTC-078", "NWTC-079",
"NWTC-080", "NWTC-081", "NWTC-082", "NWTC-083", "NWTC-084", "NWTC-091",
"NWTC-092", "NWTC-093", "NWTC-094", "NWTC-095", "NWTC-100", "NWTC-109",
"NWTC-113", "NWTC-117", "NWTC-118", "NWTC-123", "NWTC-125"), class = "factor"),
group_3low = structure(c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Low-exposed",
"PTSD", "Resilient"), class = "factor", contrasts = "contr.sum"),
dv = c(0.822222222222222, 0.888888888888889, 0.733333333333333,
0.777777777777778, 0.866666666666667, 0.844444444444444,
0.755555555555556, 0.688888888888889, 0.644444444444444,
0.755555555555556, 0.777777777777778, 0.666666666666667,
0.755555555555556, 0.666666666666667, 0.888888888888889,
0.866666666666667, 0.760869565217391, 0.666666666666667,
0.866666666666667, 0.666666666666667, 0.755555555555556,
0.760869565217391, 0.822222222222222, 0.755555555555556,
0.733333333333333, 0.844444444444444, 0.666666666666667,
0.8, 0.844444444444444, 0.739130434782609, 0.911111111111111,
0.777777777777778, 0.777777777777778, 0.755555555555556,
0.8, 0.777777777777778, 0.733333333333333, 0.577777777777778,
0.911111111111111, 0.777777777777778, 0.8, 0.8, 0.666666666666667,
0.888888888888889, 0.8, 0.688888888888889, 0.8, 0.659090909090909,
0.822222222222222, 0.733333333333333, 0.733333333333333,
0.733333333333333, 0.777777777777778, 0.733333333333333,
0.777777777777778, 0.8, 0.755555555555556, 0.666666666666667,
0.688888888888889, 0.733333333333333, 0.644444444444444,
0.577777777777778, 0.822222222222222, 0.844444444444444,
0.6, 0.8, 0.822222222222222)), row.names = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L,
57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-0.148401 -0.045182 0.000081 0.042888 0.184932
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.761070 0.009377 81.165 <0.0000000000000002 ***
group_3low1 0.018264 0.013908 1.313 0.1938
group_3low2 -0.034891 0.013355 -2.613 0.0112 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.07548 on 64 degrees of freedom
Multiple R-squared: 0.09719, Adjusted R-squared: 0.06898
F-statistic: 3.445 on 2 and 64 DF, p-value: 0.03794
aov_masc
Anova Table (Type III tests)
Response: tot_mascAcc
Effect df MSE F ges p.value
1 group_3low 2, 64 0.01 3.44 * .097 .038
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_mascAcc ~ group_2grp, data)
Welch Two Sample t-test
data: tot_mascAcc by group_2grp
t = 2.2269, df = 27.207, p-value = 0.03442
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
0.004118295 0.100199207
sample estimates:
mean in group noPTSD mean in group PTSD
0.7783377 0.7261790
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
p1 <- ggplot(data, aes(group_3low,tot_mascAcc, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean() + theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
p1 <- ggplot(data, aes(group_2grp,tot_mascAcc,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean() + theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
# ANOVA: groups
aov_masc <-
aov_car(tot_mascHypoM ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Warning: Missing values for following ID(s):
NWTC-118, NWTC-125
Removing those cases from the analysis.
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_masc$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), .Label = c("NWTC-001", "NWTC-002", "NWTC-004", "NWTC-005",
"NWTC-006", "NWTC-007", "NWTC-008", "NWTC-009", "NWTC-010", "NWTC-011",
"NWTC-012", "NWTC-013", "NWTC-014", "NWTC-015", "NWTC-017", "NWTC-018",
"NWTC-019", "NWTC-022", "NWTC-023", "NWTC-025", "NWTC-027", "NWTC-028",
"NWTC-030", "NWTC-031", "NWTC-032", "NWTC-033", "NWTC-034", "NWTC-035",
"NWTC-037", "NWTC-039", "NWTC-040", "NWTC-041", "NWTC-042", "NWTC-043",
"NWTC-045", "NWTC-046", "NWTC-047", "NWTC-053", "NWTC-056", "NWTC-058",
"NWTC-059", "NWTC-060", "NWTC-062", "NWTC-066", "NWTC-069", "NWTC-071",
"NWTC-072", "NWTC-073", "NWTC-074", "NWTC-076", "NWTC-078", "NWTC-079",
"NWTC-080", "NWTC-081", "NWTC-082", "NWTC-083", "NWTC-084", "NWTC-091",
"NWTC-092", "NWTC-093", "NWTC-094", "NWTC-095", "NWTC-100", "NWTC-109",
"NWTC-113", "NWTC-117", "NWTC-118", "NWTC-123", "NWTC-125"), class = "factor"),
group_3low = structure(c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Low-exposed",
"PTSD", "Resilient"), class = "factor", contrasts = "contr.sum"),
dv = c(4L, 2L, 3L, 4L, 0L, 2L, 5L, 7L, 11L, 6L, 3L, 11L,
4L, 6L, 2L, 3L, 7L, 11L, 3L, 5L, 5L, 3L, 6L, 4L, 7L, 2L,
5L, 1L, 3L, 6L, 2L, 4L, 7L, 6L, 5L, 5L, 5L, 9L, 0L, 1L, 2L,
4L, 6L, 1L, 2L, 5L, 3L, 3L, 1L, 1L, 7L, 5L, 3L, 7L, 2L, 4L,
2L, 7L, 5L, 10L, 6L, 9L, 4L, 5L, 11L, 5L, 3L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-5.6667 -1.5556 -0.4643 1.4444 6.5357
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.5622 0.3258 14.005 <0.0000000000000002 ***
group_3low1 -1.0066 0.4832 -2.083 0.0412 *
group_3low2 1.1045 0.4640 2.381 0.0203 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.622 on 64 degrees of freedom
Multiple R-squared: 0.09098, Adjusted R-squared: 0.06257
F-statistic: 3.203 on 2 and 64 DF, p-value: 0.04725
aov_masc
Anova Table (Type III tests)
Response: tot_mascHypoM
Effect df MSE F ges p.value
1 group_3low 2, 64 6.88 3.20 * .091 .047
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_mascHypoM ~ group_2grp, data)
Welch Two Sample t-test
data: tot_mascHypoM by group_2grp
t = -2.0893, df = 32.797, p-value = 0.04452
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
-3.07544338 -0.04049865
sample estimates:
mean in group noPTSD mean in group PTSD
4.108696 5.666667
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
p1 <- ggplot(data, aes(group_3low,tot_mascHypoM, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
p1 <- ggplot(data, aes(group_2grp,tot_mascHypoM,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
# ANOVA: groups
aov_masc <-
aov_car(tot_mascHyperM ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Warning: Missing values for following ID(s):
NWTC-118, NWTC-125
Removing those cases from the analysis.
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_masc$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), .Label = c("NWTC-001", "NWTC-002", "NWTC-004", "NWTC-005",
"NWTC-006", "NWTC-007", "NWTC-008", "NWTC-009", "NWTC-010", "NWTC-011",
"NWTC-012", "NWTC-013", "NWTC-014", "NWTC-015", "NWTC-017", "NWTC-018",
"NWTC-019", "NWTC-022", "NWTC-023", "NWTC-025", "NWTC-027", "NWTC-028",
"NWTC-030", "NWTC-031", "NWTC-032", "NWTC-033", "NWTC-034", "NWTC-035",
"NWTC-037", "NWTC-039", "NWTC-040", "NWTC-041", "NWTC-042", "NWTC-043",
"NWTC-045", "NWTC-046", "NWTC-047", "NWTC-053", "NWTC-056", "NWTC-058",
"NWTC-059", "NWTC-060", "NWTC-062", "NWTC-066", "NWTC-069", "NWTC-071",
"NWTC-072", "NWTC-073", "NWTC-074", "NWTC-076", "NWTC-078", "NWTC-079",
"NWTC-080", "NWTC-081", "NWTC-082", "NWTC-083", "NWTC-084", "NWTC-091",
"NWTC-092", "NWTC-093", "NWTC-094", "NWTC-095", "NWTC-100", "NWTC-109",
"NWTC-113", "NWTC-117", "NWTC-118", "NWTC-123", "NWTC-125"), class = "factor"),
group_3low = structure(c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Low-exposed",
"PTSD", "Resilient"), class = "factor", contrasts = "contr.sum"),
dv = c(1L, 2L, 8L, 6L, 4L, 2L, 2L, 4L, 3L, 4L, 4L, 1L, 7L,
4L, 2L, 3L, 2L, 2L, 3L, 5L, 4L, 4L, 2L, 6L, 4L, 4L, 6L, 6L,
2L, 3L, 2L, 5L, 2L, 4L, 4L, 4L, 4L, 7L, 3L, 7L, 6L, 3L, 5L,
3L, 5L, 6L, 6L, 10L, 6L, 10L, 3L, 4L, 4L, 3L, 4L, 5L, 7L,
3L, 7L, 2L, 8L, 7L, 4L, 1L, 4L, 4L, 3L)), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-3.611 -1.611 -0.381 1.389 6.071
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.30688 0.25378 16.971 <0.0000000000000002 ***
group_3low1 0.30423 0.37641 0.808 0.422
group_3low2 0.07407 0.36145 0.205 0.838
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.043 on 64 degrees of freedom
Multiple R-squared: 0.02053, Adjusted R-squared: -0.01007
F-statistic: 0.6709 on 2 and 64 DF, p-value: 0.5148
aov_masc
Anova Table (Type III tests)
Response: tot_mascHyperM
Effect df MSE F ges p.value
1 group_3low 2, 64 4.17 0.67 .021 .515
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_mascHyperM ~ group_2grp, data)
Welch Two Sample t-test
data: tot_mascHyperM by group_2grp
t = -0.35665, df = 42.506, p-value = 0.7231
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
-1.2334537 0.8628533
sample estimates:
mean in group noPTSD mean in group PTSD
4.195652 4.380952
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
p1 <- ggplot(data, aes(group_3low,tot_mascHyperM, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
p1 <- ggplot(data, aes(group_2grp,tot_mascHyperM,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
# ANOVA: groups
aov_masc <-
aov_car(tot_mascNoM ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Warning: Missing values for following ID(s):
NWTC-118, NWTC-125
Removing those cases from the analysis.
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_masc$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), .Label = c("NWTC-001", "NWTC-002", "NWTC-004", "NWTC-005",
"NWTC-006", "NWTC-007", "NWTC-008", "NWTC-009", "NWTC-010", "NWTC-011",
"NWTC-012", "NWTC-013", "NWTC-014", "NWTC-015", "NWTC-017", "NWTC-018",
"NWTC-019", "NWTC-022", "NWTC-023", "NWTC-025", "NWTC-027", "NWTC-028",
"NWTC-030", "NWTC-031", "NWTC-032", "NWTC-033", "NWTC-034", "NWTC-035",
"NWTC-037", "NWTC-039", "NWTC-040", "NWTC-041", "NWTC-042", "NWTC-043",
"NWTC-045", "NWTC-046", "NWTC-047", "NWTC-053", "NWTC-056", "NWTC-058",
"NWTC-059", "NWTC-060", "NWTC-062", "NWTC-066", "NWTC-069", "NWTC-071",
"NWTC-072", "NWTC-073", "NWTC-074", "NWTC-076", "NWTC-078", "NWTC-079",
"NWTC-080", "NWTC-081", "NWTC-082", "NWTC-083", "NWTC-084", "NWTC-091",
"NWTC-092", "NWTC-093", "NWTC-094", "NWTC-095", "NWTC-100", "NWTC-109",
"NWTC-113", "NWTC-117", "NWTC-118", "NWTC-123", "NWTC-125"), class = "factor"),
group_3low = structure(c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Low-exposed",
"PTSD", "Resilient"), class = "factor", contrasts = "contr.sum"),
dv = c(3L, 1L, 1L, 0L, 2L, 3L, 4L, 3L, 2L, 1L, 3L, 3L, 0L,
5L, 1L, 0L, 2L, 2L, 0L, 5L, 2L, 4L, 0L, 1L, 1L, 1L, 4L, 2L,
2L, 3L, 0L, 1L, 1L, 1L, 0L, 1L, 3L, 3L, 1L, 2L, 1L, 2L, 4L,
1L, 2L, 3L, 0L, 2L, 1L, 1L, 2L, 3L, 3L, 2L, 4L, 0L, 2L, 5L,
2L, 0L, 2L, 3L, 0L, 1L, 3L, 0L, 2L)), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-2.2857 -0.7778 -0.2857 0.7143 2.7143
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.8902 0.1687 11.202 <0.0000000000000002 ***
group_3low1 -0.1124 0.2503 -0.449 0.655
group_3low2 0.3955 0.2403 1.646 0.105
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.358 on 64 degrees of freedom
Multiple R-squared: 0.04617, Adjusted R-squared: 0.01636
F-statistic: 1.549 on 2 and 64 DF, p-value: 0.2203
aov_masc
Anova Table (Type III tests)
Response: tot_mascNoM
Effect df MSE F ges p.value
1 group_3low 2, 64 1.84 1.55 .046 .220
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_mascNoM ~ group_2grp, data)
Welch Two Sample t-test
data: tot_mascNoM by group_2grp
t = -1.6502, df = 35.189, p-value = 0.1078
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
-1.3642978 0.1406953
sample estimates:
mean in group noPTSD mean in group PTSD
1.673913 2.285714
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
p1 <- ggplot(data, aes(group_3low,tot_mascNoM, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
p1 <- ggplot(data, aes(group_2grp,tot_mascNoM,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
aov_masc <-
aov_car(tot_mascControl ~ group_3low + 1 + Error(record_id),
data = data, type="III", include_aov = FALSE)
Warning: Missing values for following ID(s):
NWTC-118, NWTC-125
Removing those cases from the analysis.
Contrasts set to contr.sum for the following variables: group_3low
summary(aov_masc$lm)
Call:
lm(formula = dv ~ group_3low, data = structure(list(record_id = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L,
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L
), .Label = c("NWTC-001", "NWTC-002", "NWTC-004", "NWTC-005",
"NWTC-006", "NWTC-007", "NWTC-008", "NWTC-009", "NWTC-010", "NWTC-011",
"NWTC-012", "NWTC-013", "NWTC-014", "NWTC-015", "NWTC-017", "NWTC-018",
"NWTC-019", "NWTC-022", "NWTC-023", "NWTC-025", "NWTC-027", "NWTC-028",
"NWTC-030", "NWTC-031", "NWTC-032", "NWTC-033", "NWTC-034", "NWTC-035",
"NWTC-037", "NWTC-039", "NWTC-040", "NWTC-041", "NWTC-042", "NWTC-043",
"NWTC-045", "NWTC-046", "NWTC-047", "NWTC-053", "NWTC-056", "NWTC-058",
"NWTC-059", "NWTC-060", "NWTC-062", "NWTC-066", "NWTC-069", "NWTC-071",
"NWTC-072", "NWTC-073", "NWTC-074", "NWTC-076", "NWTC-078", "NWTC-079",
"NWTC-080", "NWTC-081", "NWTC-082", "NWTC-083", "NWTC-084", "NWTC-091",
"NWTC-092", "NWTC-093", "NWTC-094", "NWTC-095", "NWTC-100", "NWTC-109",
"NWTC-113", "NWTC-117", "NWTC-118", "NWTC-123", "NWTC-125"), class = "factor"),
group_3low = structure(c(3L, 3L, 3L, 3L, 2L, 3L, 3L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 2L, 3L, 3L, 2L, 2L, 3L, 3L,
2L, 3L, 1L, 1L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L,
2L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 3L, 3L, 1L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L, 1L, 2L, 1L, 1L), .Label = c("Low-exposed",
"PTSD", "Resilient"), class = "factor", contrasts = "contr.sum"),
dv = c(5L, 6L, 5L, 6L, 5L, 6L, 4L, 6L, 5L, 6L, 6L, 4L, 6L,
6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 6L, 5L, 5L, 6L, 6L, 6L, 6L,
6L, 6L, 5L, 6L, 5L, 6L, 5L, 5L, 6L, 6L, 6L, 5L, 6L, 6L, 5L,
5L, 5L, 3L, 6L, 5L, 6L, 5L, 5L, 4L, 6L, 6L, 6L, 5L, 4L, 6L,
3L, 6L, 5L, 3L, 6L, 4L, 5L, 6L, 5L)), row.names = c(1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 68L), class = "data.frame"))
Residuals:
Min 1Q Median 3Q Max
-2.3571 -0.3571 -0.2381 0.6429 0.7619
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.32804 0.10125 52.622 <0.0000000000000002 ***
group_3low1 0.06085 0.15018 0.405 0.687
group_3low2 -0.08995 0.14421 -0.624 0.535
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8151 on 64 degrees of freedom
Multiple R-squared: 0.006084, Adjusted R-squared: -0.02498
F-statistic: 0.1959 on 2 and 64 DF, p-value: 0.8226
aov_masc
Anova Table (Type III tests)
Response: tot_mascControl
Effect df MSE F ges p.value
1 group_3low 2, 64 0.66 0.20 .006 .823
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘+’ 0.1 ‘ ’ 1
# T-test
data <- data %>% mutate(group_2grp = factor(if_else(group_3low == "PTSD", "PTSD","noPTSD")))
t.test(tot_mascControl ~ group_2grp, data)
Welch Two Sample t-test
data: tot_mascControl by group_2grp
t = 0.58475, df = 34.276, p-value = 0.5625
alternative hypothesis: true difference in means between group noPTSD and group PTSD is not equal to 0
95 percent confidence interval:
-0.3253048 0.5882447
sample estimates:
mean in group noPTSD mean in group PTSD
5.369565 5.238095
grp_comparisons <- list( c("Low-exposed", "PTSD"), c("Low-exposed", "Resilient"), c("PTSD", "Resilient"))
p1 <- ggplot(data, aes(group_3low,tot_mascControl, fill=group_3low)) +
geom_boxplot(aes(color = group_3low, alpha=.9)) + geom_point(aes(color = group_3low)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + scale_fill_manual(values=c("#2eb4e7", "#d4148d","#291d6d" )) + scale_color_manual(values=c("#2eb4e7","#d4148d", "#291d6d")) + stat_compare_means(
comparisons = grp_comparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).
p1 <- ggplot(data, aes(group_2grp,tot_mascControl,fill=group_2grp)) +
geom_boxplot(aes(color = group_2grp, alpha=.9)) + geom_point(aes(color = group_2grp)) +
theme_pubclean(base_size = 16) + theme(axis.text.x = element_text(
angle = 0,
vjust = 1,
hjust = 1),
legend.position = "none") + stat_compare_means(
comparisons = grp_Tcomparisons,
label = "p.signif",
method = "t.test",
hide.ns = FALSE )
p1
Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Warning: Removed 2 rows containing non-finite values (`stat_signif()`).
Warning: Removed 2 rows containing missing values (`geom_point()`).