library(psych) # for the describe() command
library(ggplot2) # to visualize our results
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(expss) # for the cross_cases() command
## Loading required package: maditr
##
## To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
##
## Use 'expss_output_viewer()' to display tables in the RStudio Viewer.
## To return to the console output, use 'expss_output_default()'.
##
## Attaching package: 'expss'
## The following object is masked from 'package:ggplot2':
##
## vars
library(car) # for the leveneTest() command
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:expss':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
library(afex) # to run the ANOVA and plot results
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lme4'
## The following object is masked from 'package:expss':
##
## dummy
## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - Get and set global package options with: afex_options()
## - Set sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
##
## Attaching package: 'afex'
## The following object is masked from 'package:lme4':
##
## lmer
library(emmeans) # for posthoc tests
# import the dataset you cleaned previously
# this will be the dataset you'll use throughout the rest of the semester
# use ARC data
d <- read.csv(file="Data/tt_clean_eammi2.csv", header=T)
# new code! this adds a column with a number for each row. it makes it easier when we drop outliers later
d$row_id <- 1:nrow(d)
Note: You can chose to run either a one-way ANOVA (a single IV with more than 3 levels) or a two-way/factorial ANOVA (at least two IVs) for the homework. You will need to specify your hypothesis and customize your code based on the choice you make. I will run both versions of the test here for illustrative purposes.
One-Way: We predict that there will be a significant effect of income on subjective well-being, as measured by the subjective well-being scale (SWB-6).
# you only need to check the variables you're using in the current analysis
# although you checked them previously, it's always a good idea to look them over again and be sure that everything is correct
str(d)
## 'data.frame': 3143 obs. of 8 variables:
## $ ResponseId: chr "R_BJN3bQqi1zUMid3" "R_2TGbiBXmAtxywsD" "R_12G7bIqN2wB2N65" "R_39pldNoon8CePfP" ...
## $ edu_rc : chr "Currently in college" "Completed Bachelors Degree" "Currently in college" "Currently in college" ...
## $ income_rc : chr "20,000 - 39,999" "20,000 - 39,999" "Rather not say" "Rather not say" ...
## $ stress : num 3.3 3.3 4 3.2 3.1 3.5 3.3 2.4 2.9 2.7 ...
## $ swb : num 4.33 4.17 1.83 5.17 3.67 ...
## $ support : num 6 6.75 5.17 5.58 6 ...
## $ belong : num 2.8 4.2 3.6 4 3.4 4.2 3.9 3.6 2.9 2.5 ...
## $ row_id : int 1 2 3 4 5 6 7 8 9 10 ...
# make our categorical variables factors
d$ResponseId <- as.factor(d$ResponseId) #we'll actually use our ID variable for this analysis, so make sure it's coded as a factor
d$edu_rc <- as.factor(d$edu_rc)
d$income_rc <- as.factor(d$income_rc)
d$row_id <- as.factor(d$row_id)
# we're going to recode our income variable into two groups: under 60,000 and over 60,000
table(d$income_rc)
##
## 100,000 - 199,999 20,000 - 39,999 200,000 - 1 million 40,000 - 59,999
## 388 360 139 344
## 60,000 - 79,999 80,000 - 99,999 Over 1 million Rather not say
## 298 236 7 854
## Under 20,000
## 517
d$under[d$income_rc == "Under 20,000"] <-"under"
d$under[d$income_rc == "20,000 - 39,999"] <- "under"
d$under[d$income_rc == "40,000 - 59,999"] <- "under"
d$under[d$income_rc == "60,000 - 79,999"] <- "over"
d$under[d$income_rc == "80,000 - 99,999"] <- "over"
d$under[d$income_rc == "100,000 - 199,999"] <- "over"
d$under[d$income_rc == "Over 1 million"] <- "over"
d$under[d$income_rc == "Rather not say"] <- NA
table(d$under)
##
## over under
## 929 1221
d$under <- as.factor(d$under)
# you can use the describe() command on an entire dataframe (d) or just on a single variable
describe(d$swb)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 3143 4.47 1.32 4.67 4.53 1.48 1 7 6 -0.36 -0.45 0.02
# we'll use the describeBy() command to view skew and kurtosis across our IVs
describeBy(d$swb, group = d$income_rc)
##
## Descriptive statistics by group
## group: 100,000 - 199,999
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 388 4.81 1.28 5 4.9 1.24 1 7 6 -0.63 0.07 0.06
## ------------------------------------------------------------
## group: 20,000 - 39,999
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 360 4.3 1.27 4.33 4.33 1.48 1 7 6 -0.21 -0.5 0.07
## ------------------------------------------------------------
## group: 200,000 - 1 million
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 139 4.72 1.39 5 4.8 1.24 1 7 6 -0.57 -0.37 0.12
## ------------------------------------------------------------
## group: 40,000 - 59,999
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 344 4.42 1.35 4.5 4.5 1.48 1 7 6 -0.42 -0.42 0.07
## ------------------------------------------------------------
## group: 60,000 - 79,999
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 298 4.62 1.31 4.83 4.69 1.24 1 7 6 -0.45 -0.47 0.08
## ------------------------------------------------------------
## group: 80,000 - 99,999
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 236 4.82 1.25 5 4.9 1.24 1.17 7 5.83 -0.55 -0.18 0.08
## ------------------------------------------------------------
## group: Over 1 million
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 7 5.1 1.9 5.83 5.1 1.48 2.33 6.83 4.5 -0.4 -1.83 0.72
## ------------------------------------------------------------
## group: Rather not say
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 854 4.39 1.28 4.5 4.43 1.48 1 7 6 -0.28 -0.53 0.04
## ------------------------------------------------------------
## group: Under 20,000
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 517 4.2 1.35 4.33 4.23 1.48 1 7 6 -0.22 -0.5 0.06
describeBy(d$swb, group = d$under
)
##
## Descriptive statistics by group
## group: over
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 929 4.75 1.29 4.83 4.83 1.24 1 7 6 -0.55 -0.19 0.04
## ------------------------------------------------------------
## group: under
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 1221 4.29 1.33 4.33 4.34 1.48 1 7 6 -0.28 -0.48 0.04
# also use histograms to examine your continuous variable
hist(d$swb)
# and cross_cases() to examine your categorical variables
cross_cases(d, income_rc, swb
)
| swb | |||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1.16666666666667 | 1.33333333333333 | 1.5 | 1.66666666666667 | 1.83333333333333 | 2 | 2.16666666666667 | 2.33333333333333 | 2.5 | 2.66666666666667 | 2.83333333333333 | 3 | 3.16666666666667 | 3.33333333333333 | 3.5 | 3.66666666666667 | 3.83333333333333 | 4 | 4.16666666666667 | 4.33333333333333 | 4.5 | 4.66666666666667 | 4.83333333333333 | 5 | 5.16666666666667 | 5.33333333333333 | 5.5 | 5.66666666666667 | 5.83333333333333 | 6 | 6.16666666666667 | 6.33333333333333 | 6.5 | 6.66666666666667 | 6.83333333333333 | 7 | |
| income_rc | |||||||||||||||||||||||||||||||||||||
| 100,000 - 199,999 | 3 | 2 | 2 | 4 | 1 | 5 | 3 | 5 | 5 | 3 | 7 | 9 | 14 | 4 | 10 | 7 | 15 | 7 | 17 | 26 | 17 | 26 | 24 | 14 | 22 | 17 | 16 | 20 | 28 | 14 | 11 | 6 | 8 | 7 | 9 | ||
| 20,000 - 39,999 | 1 | 2 | 2 | 3 | 5 | 2 | 7 | 9 | 9 | 10 | 9 | 9 | 9 | 13 | 7 | 14 | 16 | 24 | 20 | 17 | 16 | 21 | 18 | 8 | 18 | 18 | 15 | 10 | 9 | 13 | 6 | 6 | 6 | 2 | 4 | 2 | |
| 200,000 - 1 million | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 1 | 3 | 2 | 5 | 2 | 4 | 4 | 4 | 2 | 3 | 2 | 4 | 4 | 5 | 5 | 6 | 9 | 11 | 9 | 6 | 3 | 10 | 5 | 7 | 4 | 1 | 4 | 1 | 5 | |
| 40,000 - 59,999 | 3 | 2 | 2 | 2 | 5 | 3 | 8 | 6 | 1 | 5 | 4 | 11 | 4 | 6 | 14 | 15 | 10 | 16 | 11 | 17 | 13 | 20 | 11 | 13 | 13 | 25 | 18 | 12 | 15 | 14 | 15 | 7 | 7 | 5 | 3 | 5 | 3 |
| 60,000 - 79,999 | 1 | 2 | 1 | 1 | 3 | 2 | 6 | 9 | 1 | 3 | 4 | 12 | 9 | 8 | 2 | 11 | 9 | 9 | 11 | 16 | 8 | 18 | 13 | 12 | 19 | 15 | 18 | 13 | 16 | 11 | 8 | 9 | 6 | 5 | 1 | 6 | |
| 80,000 - 99,999 | 3 | 1 | 3 | 2 | 3 | 5 | 4 | 3 | 6 | 5 | 6 | 6 | 6 | 12 | 10 | 6 | 8 | 13 | 11 | 9 | 16 | 10 | 13 | 15 | 10 | 15 | 10 | 7 | 6 | 4 | 3 | 5 | |||||
| Over 1 million | 1 | 1 | 1 | 1 | 1 | 2 | |||||||||||||||||||||||||||||||
| Rather not say | 2 | 3 | 3 | 5 | 10 | 7 | 14 | 8 | 13 | 10 | 19 | 29 | 31 | 26 | 16 | 30 | 36 | 32 | 46 | 34 | 41 | 34 | 40 | 42 | 54 | 30 | 37 | 36 | 35 | 30 | 36 | 19 | 16 | 7 | 8 | 6 | 9 |
| Under 20,000 | 6 | 4 | 6 | 4 | 5 | 5 | 6 | 9 | 12 | 9 | 16 | 13 | 10 | 29 | 16 | 21 | 16 | 19 | 21 | 22 | 34 | 18 | 24 | 23 | 27 | 24 | 17 | 16 | 20 | 17 | 10 | 8 | 10 | 8 | 1 | 3 | 8 |
| #Total cases | 17 | 15 | 18 | 17 | 27 | 24 | 34 | 46 | 51 | 45 | 64 | 79 | 78 | 98 | 90 | 89 | 105 | 108 | 140 | 125 | 148 | 136 | 149 | 152 | 156 | 157 | 146 | 133 | 127 | 127 | 133 | 79 | 70 | 46 | 35 | 32 | 47 |
table(d$income_rc)
##
## 100,000 - 199,999 20,000 - 39,999 200,000 - 1 million 40,000 - 59,999
## 388 360 139 344
## 60,000 - 79,999 80,000 - 99,999 Over 1 million Rather not say
## 298 236 7 854
## Under 20,000
## 517
# our number of small nb participants is going to hurt us for the two-way anova, but it should be okay for the one-way anova
# so we'll create a new dataframe for the two-way analysis and call it d2
d2 <- subset(d, income_rc != "Over 1 million")
d2$income_rc <- droplevels(d2$income_rc)
# to double-check any changes we made
cross_cases(d2, income_rc, swb)
| swb | |||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1.16666666666667 | 1.33333333333333 | 1.5 | 1.66666666666667 | 1.83333333333333 | 2 | 2.16666666666667 | 2.33333333333333 | 2.5 | 2.66666666666667 | 2.83333333333333 | 3 | 3.16666666666667 | 3.33333333333333 | 3.5 | 3.66666666666667 | 3.83333333333333 | 4 | 4.16666666666667 | 4.33333333333333 | 4.5 | 4.66666666666667 | 4.83333333333333 | 5 | 5.16666666666667 | 5.33333333333333 | 5.5 | 5.66666666666667 | 5.83333333333333 | 6 | 6.16666666666667 | 6.33333333333333 | 6.5 | 6.66666666666667 | 6.83333333333333 | 7 | |
| income_rc | |||||||||||||||||||||||||||||||||||||
| 100,000 - 199,999 | 3 | 2 | 2 | 4 | 1 | 5 | 3 | 5 | 5 | 3 | 7 | 9 | 14 | 4 | 10 | 7 | 15 | 7 | 17 | 26 | 17 | 26 | 24 | 14 | 22 | 17 | 16 | 20 | 28 | 14 | 11 | 6 | 8 | 7 | 9 | ||
| 20,000 - 39,999 | 1 | 2 | 2 | 3 | 5 | 2 | 7 | 9 | 9 | 10 | 9 | 9 | 9 | 13 | 7 | 14 | 16 | 24 | 20 | 17 | 16 | 21 | 18 | 8 | 18 | 18 | 15 | 10 | 9 | 13 | 6 | 6 | 6 | 2 | 4 | 2 | |
| 200,000 - 1 million | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 1 | 3 | 2 | 5 | 2 | 4 | 4 | 4 | 2 | 3 | 2 | 4 | 4 | 5 | 5 | 6 | 9 | 11 | 9 | 6 | 3 | 10 | 5 | 7 | 4 | 1 | 4 | 1 | 5 | |
| 40,000 - 59,999 | 3 | 2 | 2 | 2 | 5 | 3 | 8 | 6 | 1 | 5 | 4 | 11 | 4 | 6 | 14 | 15 | 10 | 16 | 11 | 17 | 13 | 20 | 11 | 13 | 13 | 25 | 18 | 12 | 15 | 14 | 15 | 7 | 7 | 5 | 3 | 5 | 3 |
| 60,000 - 79,999 | 1 | 2 | 1 | 1 | 3 | 2 | 6 | 9 | 1 | 3 | 4 | 12 | 9 | 8 | 2 | 11 | 9 | 9 | 11 | 16 | 8 | 18 | 13 | 12 | 19 | 15 | 18 | 13 | 16 | 11 | 8 | 9 | 6 | 5 | 1 | 6 | |
| 80,000 - 99,999 | 3 | 1 | 3 | 2 | 3 | 5 | 4 | 3 | 6 | 5 | 6 | 6 | 6 | 12 | 10 | 6 | 8 | 13 | 11 | 9 | 16 | 10 | 13 | 15 | 10 | 15 | 10 | 7 | 6 | 4 | 3 | 5 | |||||
| Rather not say | 2 | 3 | 3 | 5 | 10 | 7 | 14 | 8 | 13 | 10 | 19 | 29 | 31 | 26 | 16 | 30 | 36 | 32 | 46 | 34 | 41 | 34 | 40 | 42 | 54 | 30 | 37 | 36 | 35 | 30 | 36 | 19 | 16 | 7 | 8 | 6 | 9 |
| Under 20,000 | 6 | 4 | 6 | 4 | 5 | 5 | 6 | 9 | 12 | 9 | 16 | 13 | 10 | 29 | 16 | 21 | 16 | 19 | 21 | 22 | 34 | 18 | 24 | 23 | 27 | 24 | 17 | 16 | 20 | 17 | 10 | 8 | 10 | 8 | 1 | 3 | 8 |
| #Total cases | 17 | 15 | 18 | 17 | 27 | 24 | 34 | 46 | 50 | 45 | 64 | 78 | 78 | 98 | 90 | 89 | 105 | 108 | 140 | 125 | 148 | 135 | 149 | 152 | 156 | 157 | 146 | 133 | 127 | 126 | 133 | 79 | 70 | 45 | 35 | 30 | 47 |
# use the leveneTest() command from the car package to test homogeneity of variance
# uses the 'formula' setup: formula is y~x1*x2, where y is our DV and x1 is our first IV and x2 is our second IV
leveneTest(swb~income_rc, data = d)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 8 1.0061 0.429
## 3134
# use this commented out section only if you need to remove outliers
# to drop a single outlier, remove the # at the beginning of the line and use this code:
# d <- subset(d, row_id!=c())
# to drop multiple outliers, remove the # at the beginning of the line and use this code:
# d <- subset(d, row_id!=c(1108) & row_id!=c(602)& row_id!=c(220))
# use the lm() command to run the regression
# formula is y~x1*x2 + c, where y is our DV, x1 is our first IV, x2 is our second IV, and c is our covariate
reg_model <- lm(swb ~ income_rc, data = d) #for one-way
# Cook's distance
plot(reg_model, 4)
# Residuals vs Leverage
plot(reg_model, 5)
# Cook's distance
# plot(reg_model2, 4)
# Residuals vs Leverage
# plot(reg_model2, 5)
Our cell sizes are very unbalanced. A small sample size for one of the levels of our variable limits our power and increases our Type II error rate.
Levene’s test is significant for our two-level income variable. We are ignoring this and continuing with the analysis anyway.
We did not remove any outliers.
aov_model <- aov_ez(data = d,
id = "ResponseId",
between = c("income_rc"),
dv = "swb",
anova_table = list(es = "pes"))
## Contrasts set to contr.sum for the following variables: income_rc
Effect size cutoffs from Cohen (1988):
nice(aov_model)
## Anova Table (Type 3 tests)
##
## Response: swb
## Effect df MSE F pes p.value
## 1 income_rc 8, 3134 1.70 10.86 *** .027 <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
#nice(aov_model2)
afex_plot(aov_model, x = "income_rc")
Only run posthocs if the test is significant! E.g., only run the posthoc tests on gender if there is a main effect for gender.
emmeans(aov_model, specs="income_rc", adjust="tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
## income_rc emmean SE df lower.CL upper.CL
## 100,000 - 199,999 4.81 0.0662 3134 4.63 4.99
## 20,000 - 39,999 4.30 0.0687 3134 4.11 4.49
## 200,000 - 1 million 4.72 0.1106 3134 4.41 5.03
## 40,000 - 59,999 4.42 0.0703 3134 4.23 4.62
## 60,000 - 79,999 4.62 0.0756 3134 4.41 4.83
## 80,000 - 99,999 4.82 0.0849 3134 4.59 5.06
## Over 1 million 5.10 0.4929 3134 3.73 6.46
## Rather not say 4.39 0.0446 3134 4.26 4.51
## Under 20,000 4.20 0.0574 3134 4.04 4.35
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 9 estimates
pairs(emmeans(aov_model, specs="income_rc", adjust="tukey"))
## contrast estimate SE df t.ratio
## (100,000 - 199,999) - (20,000 - 39,999) 0.5089 0.0954 3134 5.332
## (100,000 - 199,999) - (200,000 - 1 million) 0.0890 0.1289 3134 0.690
## (100,000 - 199,999) - (40,000 - 59,999) 0.3840 0.0966 3134 3.976
## (100,000 - 199,999) - (60,000 - 79,999) 0.1882 0.1005 3134 1.873
## (100,000 - 199,999) - (80,000 - 99,999) -0.0122 0.1077 3134 -0.113
## (100,000 - 199,999) - Over 1 million -0.2868 0.4974 3134 -0.577
## (100,000 - 199,999) - Rather not say 0.4228 0.0798 3134 5.295
## (100,000 - 199,999) - Under 20,000 0.6131 0.0876 3134 6.998
## (20,000 - 39,999) - (200,000 - 1 million) -0.4199 0.1302 3134 -3.224
## (20,000 - 39,999) - (40,000 - 59,999) -0.1249 0.0983 3134 -1.270
## (20,000 - 39,999) - (60,000 - 79,999) -0.3207 0.1021 3134 -3.140
## (20,000 - 39,999) - (80,000 - 99,999) -0.5211 0.1092 3134 -4.770
## (20,000 - 39,999) - Over 1 million -0.7957 0.4977 3134 -1.599
## (20,000 - 39,999) - Rather not say -0.0861 0.0820 3134 -1.051
## (20,000 - 39,999) - Under 20,000 0.1042 0.0895 3134 1.164
## (200,000 - 1 million) - (40,000 - 59,999) 0.2950 0.1311 3134 2.251
## (200,000 - 1 million) - (60,000 - 79,999) 0.0992 0.1340 3134 0.740
## (200,000 - 1 million) - (80,000 - 99,999) -0.1012 0.1394 3134 -0.726
## (200,000 - 1 million) - Over 1 million -0.3758 0.5052 3134 -0.744
## (200,000 - 1 million) - Rather not say 0.3338 0.1193 3134 2.798
## (200,000 - 1 million) - Under 20,000 0.5241 0.1246 3134 4.206
## (40,000 - 59,999) - (60,000 - 79,999) -0.1958 0.1032 3134 -1.897
## (40,000 - 59,999) - (80,000 - 99,999) -0.3962 0.1102 3134 -3.594
## (40,000 - 59,999) - Over 1 million -0.6708 0.4979 3134 -1.347
## (40,000 - 59,999) - Rather not say 0.0388 0.0833 3134 0.466
## (40,000 - 59,999) - Under 20,000 0.2291 0.0907 3134 2.524
## (60,000 - 79,999) - (80,000 - 99,999) -0.2004 0.1136 3134 -1.763
## (60,000 - 79,999) - Over 1 million -0.4750 0.4987 3134 -0.952
## (60,000 - 79,999) - Rather not say 0.2346 0.0877 3134 2.674
## (60,000 - 79,999) - Under 20,000 0.4249 0.0949 3134 4.479
## (80,000 - 99,999) - Over 1 million -0.2746 0.5002 3134 -0.549
## (80,000 - 99,999) - Rather not say 0.4350 0.0959 3134 4.535
## (80,000 - 99,999) - Under 20,000 0.6253 0.1025 3134 6.103
## Over 1 million - Rather not say 0.7096 0.4950 3134 1.434
## Over 1 million - Under 20,000 0.8999 0.4963 3134 1.813
## Rather not say - Under 20,000 0.1903 0.0727 3134 2.618
## p.value
## <.0001
## 0.9989
## 0.0023
## 0.6325
## 1.0000
## 0.9997
## <.0001
## <.0001
## 0.0347
## 0.9400
## 0.0449
## 0.0001
## 0.8058
## 0.9808
## 0.9639
## 0.3730
## 0.9982
## 0.9984
## 0.9981
## 0.1161
## 0.0009
## 0.6157
## 0.0100
## 0.9169
## 0.9999
## 0.2206
## 0.7066
## 0.9898
## 0.1576
## 0.0003
## 0.9998
## 0.0002
## <.0001
## 0.8849
## 0.6734
## 0.1793
##
## P value adjustment: tukey method for comparing a family of 9 estimates
Only run posthocs if the test is significant! E.g., only run the posthoc tests on gender if there is a main effect for gender.
# emmeans(aov_model, specs="gender_rc", adjust="tukey")
# pairs(emmeans(aov_model, specs="gender_rc", adjust="tukey"))
# emmeans(aov_model2, specs="poc", adjust="tukey")
# pairs(emmeans(aov_model2, specs="poc", adjust="tukey"))
# emmeans(aov_model2, specs="gender_rc", by="poc", adjust="sidak")
# pairs(emmeans(aov_model2, specs="gender_rc", by="poc", adjust="sidak"))
# emmeans(aov_model2, specs="poc", by="gender_rc", adjust="sidak")
# pairs(emmeans(aov_model2, specs="poc", by="gender_rc", adjust="sidak"))
To test our hypothesis that there would be a significant effect of income on subjective well-being (swb), we used a one-way ANOVA. Our data was slightly unbalanced, with participants who make 60,000 dollars or less participating in our survey (n = 1,221) than participants who make 60,000 dollars of more (n = 1,068). This reduces the power of our test and increases the chances of a Type II error. We also identified single outlier, but did not remove it following visual analysis of a Residuals vs Leverage plot. A significant Levene’s test (p = <.001) also indicates that our data violates the assumption of homogeneity of variance. This suggests that there is an increased chance of Type I error. We continued with our analysis for the purpose of this class.
We found a significant effect of income, F(8,3,133) = 11.19, p < .001, ηp2 = .028 (large effect size; Cohen, 1988). Posthoc tests using Tukey’s HSD revealed that women reported more stress than men but less stress than non-binary and other gender participants, while non-binary and other gender participants reported the highest amount of stress overall (see Figure 1 for a comparison).
To test our hypothesis that gender and race would impact stress and would interact significantly, we used a two-way/factorial ANOVA. Our data met most of the assumptions of the test, although our data was unbalanced, with many more women participating in our survey (n = 1004) than men (n = 195). We identified and removed a single outlier following visual analysis of a Residuals vs Leverage plot.
As predicted, we found a significant main effect for gender, F(1,1195) = 28.31, p < .001, ηp2 .023 (small effect size; Cohen, 1988). As predicted, women reported significantly more stress than men. Contrary to our expectations, we did not find a significant main effect for race (p = .453).
Lastly, we found a significant interaction between gender and race (see Figure 2), F(1,1195) = 3.43, p = .064, ηp2 = .003 (trivial effect size; Cohen, 1988). When comparing by race, women of color (M = 3.16, SE = .07) reported significantly more stress than men of color (M = 2.52, SE = .14; p < .001), as did white women (M = 2.93, SE = .03) compared to white men (M = 2.62, SE = .08; p < .001). When comparing by gender, women of color reported significantly more stress than white women (p = .002), while men of color and white men reported similar levels of stress (p = .546).
References
Cohen J. (1988). Statistical Power Analysis for the Behavioral Sciences. New York, NY: Routledge Academic.