library(tidyverse)
library(haven)
write_ancova <- read_dta("Writing_Tech.dta")
glimpse(write_ancova)
Rows: 30
Columns: 3
$ Treatment <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
$ Cog_Engage <dbl> 3, 2, 5, 2, 2, 2, 7, 2, 4, 7, 5, 3, 4, 4, 7, 5, 4, 9, 2, 6, 3, 4, 4, 4, 6, 4, 6, 2, 8, 5
$ Pre_Engage <dbl> 4, 1, 5, 1, 2, 2, 7, 4, 5, 5, 3, 1, 2, 2, 6, 4, 2, 1, 3, 5, 4, 3, 3, 2, 0, 1, 3, 0, 1, 0
write_ancova.clean <- write_ancova %>%
mutate(.,
treat.fac = as_factor(Treatment))
summary(write_ancova.clean)
Treatment Cog_Engage Pre_Engage treat.fac
Min. :1.000 Min. :2.000 Min. :0.000 Paper/ Pencil: 9
1st Qu.:1.000 1st Qu.:3.000 1st Qu.:1.000 Laptop : 8
Median :2.000 Median :4.000 Median :2.500 Tablet :13
Mean :2.133 Mean :4.367 Mean :2.733
3rd Qu.:3.000 3rd Qu.:5.750 3rd Qu.:4.000
Max. :3.000 Max. :9.000 Max. :7.000
Hopefully, somewhat similar! If not, there may not be systematic change over time. The cor.test function in base R works well for simple correlations.
cor.test(write_ancova.clean$Cog_Engage, write_ancova.clean$Pre_Engage)
Pearson's product-moment correlation
data: write_ancova.clean$Cog_Engage and write_ancova.clean$Pre_Engage
t = 1.345, df = 28, p-value = 0.1894
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1250150 0.5571688
sample estimates:
cor
0.2463496
Here, we have hap_post as the outcome, treat as the factor, and hap_pre as the covariate. Since we cleaned our data at the beginning, R recognizes that treat is a factor and hap_pre is continuous so it will run as an ANCOVA and not a two-way ANOVA.
ancova1<-aov(Cog_Engage ~ treat.fac + Pre_Engage, data = write_ancova.clean)
summary(ancova1)
Df Sum Sq Mean Sq F value Pr(>F)
treat.fac 2 16.84 8.422 2.770 0.0812 .
Pre_Engage 1 15.08 15.076 4.959 0.0348 *
Residuals 26 79.05 3.040
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TukeyHSD(ancova1, which = 'treat.fac')
non-factors ignored: Pre_Engage
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = Cog_Engage ~ treat.fac + Pre_Engage, data = write_ancova.clean)
$treat.fac
diff lwr upr p adj
Laptop-Paper/ Pencil 1.65277778 -0.4525629 3.758118 0.1448377
Tablet-Paper/ Pencil 1.62393162 -0.2548772 3.502740 0.0997411
Tablet-Laptop -0.02884615 -1.9758067 1.918114 0.9992530
library(effectsize)
package 㤼㸱effectsize㤼㸲 was built under R version 4.0.3
eta_squared(ancova1)
Parameter | Eta2 (partial) | 90% CI
------------------------------------------
treat.fac | 0.18 | [0.00, 0.37]
Pre_Engage | 0.16 | [0.01, 0.37]
omega_squared(ancova1)
Parameter | Omega2 (partial) | 90% CI
--------------------------------------------
treat.fac | 0.11 | [0.00, 0.28]
Pre_Engage | 0.12 | [0.00, 0.32]
Now, see the difference without the covariate- just an ANOVA for treat:
anova1<-aov(Cog_Engage ~ treat.fac, write_ancova.clean)
summary(anova1)
Df Sum Sq Mean Sq F value Pr(>F)
treat.fac 2 16.84 8.422 2.416 0.108
Residuals 27 94.12 3.486
Hint for this week’s content review!
fluency <- read_dta("fluency.dta")
glimpse(fluency)
Rows: 220
Columns: 3
$ id <dbl> 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4,...
$ time <dbl+lbl> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4...
$ test <dbl> 75.97259, 51.32138, 85.78768, 76.72881...
summary(fluency)
id time test
Min. : 1 Min. :1.00 Min. : 29.46
1st Qu.:14 1st Qu.:1.75 1st Qu.: 61.29
Median :28 Median :2.50 Median : 72.62
Mean :28 Mean :2.50 Mean : 71.92
3rd Qu.:42 3rd Qu.:3.25 3rd Qu.: 82.68
Max. :55 Max. :4.00 Max. :114.68
pivot_widerfluency.wide <- fluency %>%
pivot_wider(.,
id_cols = c("id"),
names_from = time,
values_from = test,
names_prefix = "test",
)
summary(fluency.wide)
id test1 test2
Min. : 1.0 Min. :29.46 Min. : 48.92
1st Qu.:14.5 1st Qu.:51.75 1st Qu.: 67.24
Median :28.0 Median :60.95 Median : 74.23
Mean :28.0 Mean :59.50 Mean : 74.56
3rd Qu.:41.5 3rd Qu.:70.59 3rd Qu.: 83.59
Max. :55.0 Max. :85.23 Max. :110.18
test3 test4
Min. : 58.07 Min. :36.22
1st Qu.: 76.93 1st Qu.:61.43
Median : 83.51 Median :70.18
Mean : 82.90 Mean :70.74
3rd Qu.: 89.23 3rd Qu.:78.06
Max. :114.68 Max. :94.05
kdensity1 <- ggplot(fluency.wide, aes(x=test1)) +
geom_density() +
stat_function(fun = dnorm, n = 100, args = list(mean = 59, sd = 10), color = "red", linetype = "dashed") +
labs(title="Distribution of Fluency on Test 1",x="Tests Scores", y = "Density", caption = "N = 55 participants. The red line indicates a normal distribution. The black lines represents the observed distribution.")
kdensity1
kdensity2 <- ggplot(fluency.wide, aes(x=test2)) +
geom_density() +
stat_function(fun = dnorm, n = 100, args = list(mean = 74, sd = 10), color = "red", linetype = "dashed") +
labs(title="Distribution of Fluency on Test2",x="Sleep (minutes per night)", y = "Density", caption = "N = 55 participants. The red line indicates a normal distribution. The black lines represents the observed distribution.")
kdensity2
kdensity3 <- ggplot(fluency.wide, aes(x=test3)) +
geom_density() +
stat_function(fun = dnorm, n = 100, args = list(mean = 83, sd = 10), color = "red", linetype = "dashed") +
labs(title="Distribution of Fluency on Test3",x="Sleep (minutes per night)", y = "Density", caption = "N = 55 participants. The red line indicates a normal distribution. The black lines represents the observed distribution.")
kdensity3
kdensity4 <- ggplot(fluency.wide, aes(x=test4)) +
geom_density() +
stat_function(fun = dnorm, n = 100, args = list(mean = 70, sd = 10), color = "red", linetype = "dashed") +
labs(title="Distribution of Fluency on Test4",x="Sleep (minutes per night)", y = "Density", caption = "N = 55 participants. The red line indicates a normal distribution. The black lines represents the observed distribution.")
kdensity4
summary(fluency)
id time test
Min. : 1 Min. :1.00 Min. : 29.46
1st Qu.:14 1st Qu.:1.75 1st Qu.: 61.29
Median :28 Median :2.50 Median : 72.62
Mean :28 Mean :2.50 Mean : 71.92
3rd Qu.:42 3rd Qu.:3.25 3rd Qu.: 82.68
Max. :55 Max. :4.00 Max. :114.68
library(rstatix)
library(ggpubr)
fluency %>%
group_by(time) %>%
get_summary_stats(2, type = "mean_sd")
Note:id is participant id in this case.
fluency.aov <- anova_test(data = fluency, dv = test, wid = id, within = time)
summary(fluency.aov)
Length Class Mode
ANOVA 7 data.frame list
Mauchly's Test for Sphericity 4 data.frame list
Sphericity Corrections 9 data.frame list
Here is the basic ANOVA table:
get_anova_table(fluency.aov)
ANOVA Table (type III tests)
Effect DFn DFd F p p<.05 ges
1 time 3 162 32.12 2.4e-16 * 0.296
Here are the corrected p-values with the sphericity adjustments:
fluency.aov$`Sphericity Corrections`
pairwise_t_test(
formula = test ~ time, paired = TRUE,
p.adjust.method = "bonferroni",
data = fluency
)
The last column indicates whether dataset follows a multivariate normality or not (i.e, YES or NO) at significance level 0.05.
library(kableExtra)
Error in library(kableExtra) : there is no package called ‘kableExtra’
This function performs multivariate skewness and kurtosis tests at the same time and combines test results for multivariate normality. If both tests indicate multivariate normality, then data follows a multivariate normal distribution at the 0.05 significance level.
sleep_mvn2 <- mvn(data = sleep.wide, mvnTest = "mardia")
sleep_mvn2$multivariateNormality
sleep.aov$`Mauchly's Test for Sphericity`