library(afex) # to run the ANOVA and plot results
library(psych) # for the describe() command
library(ggplot2) # to visualize our results
library(expss) # for the cross_cases() command
library(car) # for the leveneTest() command
library(emmeans) # for posthoc testsANOVA HW
Loading Libraries
Importing Data
d <- read.csv(file="Data/mydata.csv", header=T)
#
# new code! this adds a column with a number for each row. it acts as an identifier for our participations
d$row_id <- 1:nrow(d)State Your Hypothesis - PART OF YOUR WRITEUP
one-way ANOVA: Participants that get less than 5 hours of sleep and between 5-6 hours of sleep will be significantly lower in resilience (measured by BRS) when compared to participants that get between 7-8 hours of sleep.
Check Your Assumptions
ANOVA Assumptions
- Independence of observations (confirmed by data report)
- All levels of the IVs should have equal number of cases (ideally; in the real world, this varies) and there should be no empty cells. Cells with low numbers increase chance of Type II error. (we will check this below)
- Homogeneity of variance should be assured (we will check this below)
- Outliers should be identified and removed (we will check this below)
- DV should be normally distributed for each level of the IV (we will check this below)
Check levels of IVs
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
table(d$sleep_hours, useNA = "always")
1 < 5 hours 2 5-6 hours 3 7-8 hours 4 8-10 hours 5 > 10 hours <NA>
44 130 143 64 21 0
d2 <- d
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#cross_cases(d2, pet, mhealth)
#
# # if you need to recode
# # to drop levels from your variable
# # this subsets the data and says that any participant who is coded as 'LEVEL BAD' should be removed
# # if you don't need this for the homework, comment it out (add a # at the beginning of the line)
d <- subset(d, sleep_hours != "4 8-10 hours")
d <- subset(d, sleep_hours != "5 > 10 hours")
# to combine levels
# this says that where any participant is coded as 'LEVEL BAD' it should be replaced by 'LEVEL GOOD'
# you can repeat this as needed, changing 'LEVEL BAD' if you have multiple levels that you want to combine into a single level
# if you don't need this for the homework, comment it out (add a # at the beginning of the line)
#d2$pet_rc[d2$pet == "bird"] <- "pet owner"
#d2$pet_rc[d2$pet == "cat"] <- "pet owner"
#d2$pet_rc[d2$pet == "cat and dog"] <- "pet owner"
#d2$pet_rc[d2$pet == "dog"] <- "pet owner"
#d2$pet_rc[d2$pet == "fish"] <- "pet owner"
#d2$pet_rc[d2$pet == "multiple types of pet"] <- "pet owner"
#d2$pet_rc[d2$pet == "other"] <- "pet owner"
#d2$pet_rc[d2$pet == "no pets"] <- "no pets"
#d2$mhealth_rc <- "diagnosis"
#d2$mhealth_rc[d2$mhealth == "none or NA"] <- "no diagnosis"
# preview your changes and make sure everything is correct
table(d$sleep_hours, useNA = "always")
1 < 5 hours 2 5-6 hours 3 7-8 hours <NA>
44 130 143 0
# # or
# cross_cases(d2, pet_rc, mhealth_rc)
#
# check your variable types
str(d)'data.frame': 317 obs. of 7 variables:
$ education : chr "4 equivalent to AP/IB completion" "prefer not to say" "4 equivalent to AP/IB completion" "4 equivalent to AP/IB completion" ...
$ sleep_hours: chr "3 7-8 hours" "3 7-8 hours" "2 5-6 hours" "3 7-8 hours" ...
$ brs : num 4 2.5 2.33 2 2.33 ...
$ iou : num 2.67 1.85 3.85 3.07 3.04 ...
$ mfq_state : num 4 4 3.12 3.12 4 ...
$ phq : num 2.22 2.56 2.78 3.11 2 ...
$ row_id : int 1 2 4 5 6 7 8 11 12 13 ...
#str(d2)
# make sure that your IV is recognized as a factor by R
d$sleep_hours <- as.factor(d$sleep_hours)
#d2$mhealth_rc <- as.factor(d2$mhealth_rc)Check homogeneity of variance
# use the leveneTest() command from the car package to test homogeneity of variance
# uses the 'formula' setup: formula is y~x1*x2, where y is our DV and x1 is our first IV and x2 is our second IV
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
leveneTest(brs~sleep_hours, data = d)Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 1.7696 0.1721
314
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#leveneTest(phq~pet_rc*mhealth_rc, data = d2)Check for outliers using Cook’s distance and Residuals vs Leverage plot
Run a Regression
# # use this commented out section only if you need to remove outliers
# # to drop a single outlier, remove the # at the beginning of the line and use this code:
# # d <- subset(d, row_id!=c(1108))
#
# # to drop multiple outliers, remove the # at the beginning of the line and use this code:
# # d <- subset(d, row_id!=c(1108) & row_id!=c(602))
#
# use the lm() command to run the regression
# formula is y~x1*x2 + c, where y is our DV, x1 is our first IV, x2 is our second IV, and c is our covariate
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
reg_model <- lm(brs~ sleep_hours, data = d) #for one-way
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#reg_model2 <- lm(phq ~ pet_rc*mhealth_rc, data = d2) #for two-wayCheck for outliers
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
# Cook's distance
plot(reg_model, 4)# Residuals vs Leverage
# red like close to the dashed line
plot(reg_model, 5)# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
# Cook's distance
#plot(reg_model2, 4)
# Residuals vs Leverage
#plot(reg_model2, 5)Check Your Variables
# we'll use the describeBy() command to view skew and kurtosis across our IVs and make sure the DV is normally distributed across all of the levels
describeBy(d$brs, group = d$sleep_hours)
Descriptive statistics by group
group: 1 < 5 hours
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 44 2.28 0.75 2.17 2.26 0.99 1 3.83 2.83 0.24 -0.91 0.11
------------------------------------------------------------
group: 2 5-6 hours
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 130 2.55 0.85 2.5 2.55 0.99 1 4.5 3.5 0.05 -0.81 0.07
------------------------------------------------------------
group: 3 7-8 hours
vars n mean sd median trimmed mad min max range skew kurtosis se
X1 1 143 2.88 0.91 2.83 2.88 0.99 1 4.67 3.67 0 -0.87 0.08
#describeBy(d2$phq, group = d2$pet_rc)
#describeBy(d2$phq, group = d2$mhealth_rc)Issues with My Data - PART OF YOUR WRITEUP
One-way ANOVA: *I confirmed independence of observations by checking data report and checked cell sizes and dropped levels (participants that got 8-10 hours of sleep and greater than 10 hours of sleep). I checked homogeneity of variance using Levenes test and the result was non significant. Cook’s distance scores and visual analysis of residuals vs leverage plot showed no outliers (all were less than .5). Finally, I checked skew and kurtosis and all levels were between the normal cutoff (between -2 and +2).
Run an ANOVA
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
aov_model <- aov_ez(data = d,
id = "row_id",
between = c("sleep_hours"),
dv = "brs",
anova_table = list(es = "pes"))Contrasts set to contr.sum for the following variables: sleep_hours
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#aov_model2 <- aov_ez(data = d2,
# id = "row_id",
# between = c("pet_rc","mhealth_rc"),
#dv = "phq",
# anova_table = list(es = "pes"))View Output
Effect size cutoffs from Cohen (1988):
- ηp2 = 0.01 indicates a small effect
- ηp2 = 0.06 indicates a medium effect
- ηp2 = 0.14 indicates a large effect
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
nice(aov_model)Anova Table (Type 3 tests)
Response: brs
Effect df MSE F pes p.value
1 sleep_hours 2, 314 0.75 9.73 *** .058 <.001
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#nice(aov_model2)Visualize Results
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
afex_plot(aov_model, x = "sleep_hours")# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#afex_plot(aov_model2, x = "pet_rc", trace = "mhealth_rc")
#afex_plot(aov_model2, x = "mhealth_rc", trace = "pet_rc")Run Posthoc Tests
Only run posthocs if the test is significant! E.g., only run the posthoc tests on gender if there is a main effect for gender.
# for a one-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
emmeans(aov_model, specs="sleep_hours", adjust="tukey")Note: adjust = "tukey" was changed to "sidak"
because "tukey" is only appropriate for one set of pairwise comparisons
sleep_hours emmean SE df lower.CL upper.CL
1 < 5 hours 2.28 0.1310 314 1.97 2.59
2 5-6 hours 2.55 0.0761 314 2.37 2.73
3 7-8 hours 2.88 0.0726 314 2.70 3.05
Confidence level used: 0.95
Conf-level adjustment: sidak method for 3 estimates
pairs(emmeans(aov_model, specs="sleep_hours", adjust="tukey")) contrast estimate SE df t.ratio p.value
1 < 5 hours - (2 5-6 hours) -0.268 0.151 314 -1.773 0.1803
1 < 5 hours - (3 7-8 hours) -0.596 0.150 314 -3.984 0.0002
(2 5-6 hours) - (3 7-8 hours) -0.328 0.105 314 -3.116 0.0057
P value adjustment: tukey method for comparing a family of 3 estimates
# for a two-way ANOVA
# COMMENT THIS SECTION OUR FROM THE HW IF YOU DO NOT USE IT
#emmeans(aov_model2, specs="pet_rc", adjust="tukey")
#pairs(emmeans(aov_model2, specs="pet_rc", adjust="tukey"))
#emmeans(aov_model2, specs="mhealth_rc", adjust="tukey")
#pairs(emmeans(aov_model2, specs="mhealth_rc", adjust="tukey"))
#emmeans(aov_model2, specs="pet_rc", by="mhealth_rc", adjust="sidak")
#pairs(emmeans(aov_model2, specs="pet_rc", by="mhealth_rc", adjust="sidak"))
#emmeans(aov_model2, specs="mhealth_rc", by="pet_rc", adjust="sidak")
#pairs(emmeans(aov_model2, specs="mhealth_rc", by="pet_rc", adjust="sidak"))Write Up Results
One-Way ANOVA
Participants that get less than 5 hours of sleep and between 5-6 hours of sleep will be significantly lower in resilience (measured by BRS) when compared to participants that get between 7-8 hours of sleep. I confirmed independence of observations by checking data report and checked cell sizes and dropped levels (participants that got 8-10 hours of sleep and greater than 10 hours of sleep). I checked homogeneity of variance using Levenes test and the result was non significant. Cook’s distance scores and visual analysis of residuals vs leverage plot showed no outliers (all were less than .5). Finally, I checked skew and kurtosis and all levels were between the normal cutoff (between -2 and +2).F(2,314) = 9.73, p = <.001, ηp2 > .06 There was a significant difference in resilience between participants that slept less than 5 hours and those that slept for 7-8 hours (p = .0002) and there was also a significant difference in resilience between participants that got between 5-6 hours of sleep and those that got between 7-8 hours of sleep (p = .0057) (Refer to Figure 1). There however was not a significant difference in resilience for participants that got less than 5 hours of sleep and 5-6 hours of sleep(p = .1803). There was a medium effect size (ηp2 < .06) (Cohen J. (1988)).
References
Cohen J. (1988). Statistical Power Analysis for the Behavioral Sciences. New York, NY: Routledge Academic.