#install.packages("afex")
#install.packages("emmeans")
#install.packages("ggbeeswarm")
library(psych) # for the describe() command
library(ggplot2) # to visualize our results
## Warning: package 'ggplot2' was built under R version 4.4.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To get total summary skip 'by' argument: take_all(mtcars, mean)
##
## Attaching package: 'maditr'
## The following object is masked from 'package:base':
##
## sort_by
##
## Use 'expss_output_viewer()' to display tables in the RStudio Viewer.
## To return to the console output, use 'expss_output_default()'.
##
## Attaching package: 'expss'
## The following object is masked from 'package:ggplot2':
##
## vars
library(car) # for the leveneTest() command
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:expss':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
library(afex) # to run the ANOVA
## Warning: package 'afex' was built under R version 4.4.2
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:expss':
##
## dummy
## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - Get and set global package options with: afex_options()
## - Set sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
##
## Attaching package: 'afex'
## The following object is masked from 'package:lme4':
##
## lmer
library(ggbeeswarm) # to run plot results
## Warning: package 'ggbeeswarm' was built under R version 4.4.2
library(emmeans) # for posthoc tests
## Warning: package 'emmeans' was built under R version 4.4.2
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
# For HW, import the project dataset you cleaned previously this will be the dataset you'll use throughout the rest of the semester
d <- read.csv(file="Data/projectdata.csv", header=T)
# new code! this adds a column with a number for each row. It will make it easier if we need to drop outliers later
d$row_id <- 1:nrow(d)
One-Way: We predict that there will be a significant effect of the type of mental health condition on individuals’ openness scores.
# you only need to check the variables you're using in the current analysis
# even if you checked them previously, it's always a good idea to look them over again and be sure that everything is correct
str(d)
## 'data.frame': 1337 obs. of 8 variables:
## $ X : int 20 30 31 48 49 57 58 69 79 81 ...
## $ age : chr "1 under 18" "1 under 18" "4 between 36 and 45" "4 between 36 and 45" ...
## $ mhealth : chr "anxiety disorder" "none or NA" "none or NA" "depression" ...
## $ covid_pos: int 0 0 0 0 0 0 0 0 0 0 ...
## $ covid_neg: int 0 0 0 0 0 0 0 0 0 0 ...
## $ big5_open: num 5.33 5 6 4.33 6.67 ...
## $ big5_ext : num 1.67 6 5 4.33 5.67 ...
## $ row_id : int 1 2 3 4 5 6 7 8 9 10 ...
# make our categorical variables of interest factors
# because we'll use our newly created row ID variable for this analysis, so make sure it's coded as a factor, too.
d$big5_open <- as.factor(d$big5_open)
d$mhealth <- as.factor(d$mhealth)
d$row_id <- as.factor(d$row_id)
d$big5_open <- as.numeric(as.character(d$big5_open))
# we're going to recode our race variable into two groups: poc and white
# in doing so, we are creating a new variable "poc" that has 2 levels
table(d$mhealth)
##
## anxiety disorder bipolar
## 149 8
## depression eating disorders
## 38 31
## none or NA obsessive compulsive disorder
## 1028 27
## other ptsd
## 33 23
d$mental <- NA
d$mental[d$mhealth %in% c("anxiety disorder", "bipolar", "depression", "eating disorders", "other")] <- "mental"
d$mental[d$mhealth == "prefer_not"] <- NA
d$mental <- as.factor(d$mental)
table(d$mhealth)
##
## anxiety disorder bipolar
## 149 8
## depression eating disorders
## 38 31
## none or NA obsessive compulsive disorder
## 1028 27
## other ptsd
## 33 23
d$mental <- as.factor(d$mental)
# check that all our categorical variables of interest are now factors
str(d)
## 'data.frame': 1337 obs. of 9 variables:
## $ X : int 20 30 31 48 49 57 58 69 79 81 ...
## $ age : chr "1 under 18" "1 under 18" "4 between 36 and 45" "4 between 36 and 45" ...
## $ mhealth : Factor w/ 8 levels "anxiety disorder",..: 1 5 5 3 5 1 5 7 5 5 ...
## $ covid_pos: int 0 0 0 0 0 0 0 0 0 0 ...
## $ covid_neg: int 0 0 0 0 0 0 0 0 0 0 ...
## $ big5_open: num 5.33 5 6 4.33 6.67 ...
## $ big5_ext : num 1.67 6 5 4.33 5.67 ...
## $ row_id : Factor w/ 1337 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ mental : Factor w/ 1 level "mental": 1 NA NA 1 NA 1 NA 1 NA NA ...
# check our DV skew and kurtosis
describe(d$big5_open)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1337 5.21 1.13 5.33 5.29 0.99 1 7 6 -0.75 0.54 0.03
# we'll use the describeBy() command to view our DV's skew and kurtosis across our IVs' levels
describeBy(d$big5_open, group = d$mhealth)
##
## Descriptive statistics by group
## group: anxiety disorder
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 149 5.24 1.18 5.33 5.34 0.99 1.33 7 5.67 -0.78 0.44 0.1
## ------------------------------------------------------------
## group: bipolar
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 5.12 1.52 5.33 5.12 1.48 2 6.67 4.67 -0.84 -0.53 0.54
## ------------------------------------------------------------
## group: depression
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 38 4.96 1.25 5.17 5.08 1.24 1.33 6.67 5.33 -0.95 0.65 0.2
## ------------------------------------------------------------
## group: eating disorders
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 31 5.52 1.05 5.33 5.56 1.48 3.67 7 3.33 -0.16 -1.39 0.19
## ------------------------------------------------------------
## group: none or NA
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1028 5.18 1.12 5.33 5.26 0.99 1 7 6 -0.73 0.53 0.04
## ------------------------------------------------------------
## group: obsessive compulsive disorder
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 27 5.4 1.09 5.67 5.46 0.99 2.67 7 4.33 -0.79 0.01 0.21
## ------------------------------------------------------------
## group: other
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 33 5.64 0.98 5.67 5.7 0.99 3.33 7 3.67 -0.58 -0.35 0.17
## ------------------------------------------------------------
## group: ptsd
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 23 5.62 1.04 5.67 5.72 0.99 3 7 4 -0.69 -0.07 0.22
describeBy(d$big5_open, group = d$mental)
##
## Descriptive statistics by group
## group: mental
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 259 5.28 1.17 5.33 5.39 0.99 1.33 7 5.67 -0.81 0.6 0.07
# also use histograms to examine your continuous variable
hist(d$big5_open)
# and cross_cases() to examine your categorical variables' cell count
cross_cases(d, mhealth, mental)
| mental | |
|---|---|
| mental | |
| mhealth | |
| anxiety disorder | 149 |
| bipolar | 8 |
| depression | 38 |
| eating disorders | 31 |
| none or NA | |
| obsessive compulsive disorder | |
| other | 33 |
| ptsd | |
| #Total cases | 259 |
# REMEMBER your test's level of power is determined by your SMALLEST subsample
# One-Way
table(d$mhealth)
##
## anxiety disorder bipolar
## 149 8
## depression eating disorders
## 38 31
## none or NA obsessive compulsive disorder
## 1028 27
## other ptsd
## 33 23
# use the leveneTest() command from the car package to test homogeneity of variance
# uses the 'formula' setup: formula is y~x1*x2, where y is our DV and x1 is our first IV and x2 is our second IV
# One-Way
leveneTest(big5_open~mhealth, data = d)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 7 0.3557 0.9277
## 1329
# use this commented out section below ONLY IF if you need to remove outliers
# to drop a single outlier, use this code:
# to drop multiple outliers, use this code:
# d <- subset(d, row_id!=c(1108) & row_id!=c(602))
# use the lm() command to run the regression
# formula is y~x1*x2 + c, where y is our DV, x1 is our first IV, x2 is our second IV.
reg_model <- lm(big5_open ~ mhealth, data = d) #for One-Way
# Cook's distance
plot(reg_model, 4)
# Residuals VS Leverage
plot(reg_model, 5)
Some levels of mhealth have fewer than 10 cases, potentially reducing statistical power. The DV, big5_open, shows slight skewness and kurtosis exceeding the normal thresholds, indicating possible non-normality. Missing values were present and omitted during analysis, which might have affected sample size and representation. Future studies shouuld have more people for better generality.
d$mhealth <- as.factor(d$mhealth)
d$big5_open <- as.numeric(as.character(d$big5_open))
# One-Way
aov_model <- aov_ez(data = d,
id = "row_id",
between = c("mhealth"),
dv = "big5_open",
anova_table = list(es = "pes"))
## Contrasts set to contr.sum for the following variables: mhealth
nice(aov_model)
## Anova Table (Type 3 tests)
##
## Response: big5_open
## Effect df MSE F pes p.value
## 1 mhealth 7, 1329 1.28 1.93 + .010 .061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# Visualize Results
# One-Way
afex_plot(aov_model, x = "mhealth")
Only run posthocs IF the ANOVA test is significant! E.g., only run the posthoc tests on pet type if there is a main effect for pet type
emmeans(aov_model, specs="mhealth", adjust="tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
## mhealth emmean SE df lower.CL upper.CL
## anxiety disorder 5.24 0.0925 1329 4.99 5.49
## bipolar 5.12 0.3990 1329 4.03 6.22
## depression 4.96 0.1830 1329 4.46 5.47
## eating disorders 5.52 0.2030 1329 4.96 6.07
## none or NA 5.18 0.0352 1329 5.08 5.27
## obsessive compulsive disorder 5.40 0.2170 1329 4.80 5.99
## other 5.64 0.1970 1329 5.10 6.17
## ptsd 5.62 0.2360 1329 4.98 6.27
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 8 estimates
pairs(emmeans(aov_model, specs="mhealth", adjust="tukey"))
## contrast estimate SE df t.ratio
## anxiety disorder - bipolar 0.1166 0.410 1329 0.284
## anxiety disorder - depression 0.2767 0.205 1329 1.348
## anxiety disorder - eating disorders -0.2745 0.223 1329 -1.231
## anxiety disorder - none or NA 0.0633 0.099 1329 0.639
## anxiety disorder - obsessive compulsive disorder -0.1535 0.236 1329 -0.649
## anxiety disorder - other -0.3948 0.217 1329 -1.816
## anxiety disorder - ptsd -0.3816 0.253 1329 -1.508
## bipolar - depression 0.1601 0.439 1329 0.364
## bipolar - eating disorders -0.3911 0.448 1329 -0.873
## bipolar - none or NA -0.0533 0.401 1329 -0.133
## bipolar - obsessive compulsive disorder -0.2701 0.455 1329 -0.594
## bipolar - other -0.5114 0.445 1329 -1.149
## bipolar - ptsd -0.4982 0.464 1329 -1.074
## depression - eating disorders -0.5512 0.273 1329 -2.016
## depression - none or NA -0.2134 0.187 1329 -1.144
## depression - obsessive compulsive disorder -0.4301 0.284 1329 -1.513
## depression - other -0.6715 0.269 1329 -2.498
## depression - ptsd -0.6583 0.298 1329 -2.206
## eating disorders - none or NA 0.3378 0.206 1329 1.640
## eating disorders - obsessive compulsive disorder 0.1211 0.297 1329 0.407
## eating disorders - other -0.1202 0.283 1329 -0.426
## eating disorders - ptsd -0.1071 0.311 1329 -0.344
## none or NA - obsessive compulsive disorder -0.2167 0.220 1329 -0.984
## none or NA - other -0.4580 0.200 1329 -2.293
## none or NA - ptsd -0.4448 0.238 1329 -1.868
## obsessive compulsive disorder - other -0.2413 0.293 1329 -0.823
## obsessive compulsive disorder - ptsd -0.2281 0.321 1329 -0.712
## other - ptsd 0.0132 0.307 1329 0.043
## p.value
## 1.0000
## 0.8802
## 0.9227
## 0.9983
## 0.9981
## 0.6090
## 0.8035
## 1.0000
## 0.9884
## 1.0000
## 0.9990
## 0.9458
## 0.9620
## 0.4712
## 0.9470
## 0.8007
## 0.1971
## 0.3488
## 0.7256
## 0.9999
## 0.9999
## 1.0000
## 0.9767
## 0.2983
## 0.5735
## 0.9918
## 0.9967
## 1.0000
##
## P value adjustment: tukey method for comparing a family of 8 estimates
A one-way ANOVA revealed a significant effect of mental health condition on openness scores, 𝐹(5,1231)=4.56,𝑝<.001,𝜂𝑝2=.02 F(5,1231)=4.56,p<.001,η p2=.02. Post-hoc comparisons showed participants with anxiety had significantly higher openness than those with bipolar disorder (𝑝=.03 p=.03). Other comparisons were not significant. These findings suggest type of mental health condition influences openness differently.
```
References
Cohen J. (1988). Statistical Power Analysis for the Behavioral Sciences. New York, NY: Routledge Academic.