set.seed(1234)
<- data.frame(
my_data name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
Week 7 Formative Exercise
Comparing Means in R
2.1 Comparing one-sample mean to a standard known mean
What is one-sample t-test?
A statistical hypothesis test that determines if a population mean is different from a specific value
Visualize your data and compute one-sample t-test in R
# Print the first 10 rows of the data
head(my_data, 10)
name weight
1 M_1 17.6
2 M_2 20.6
3 M_3 22.2
4 M_4 15.3
5 M_5 20.9
6 M_6 21.0
7 M_7 18.9
8 M_8 18.9
9 M_9 18.9
10 M_10 18.2
# Statistical summaries of weight
summary(my_data$weight)
Min. 1st Qu. Median Mean 3rd Qu. Max.
15.30 18.38 18.90 19.25 20.82 22.20
library(ggpubr)
Warning: package 'ggpubr' was built under R version 4.4.2
Loading required package: ggplot2
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
Preleminary test to check one-sample t-test assumptions
need to check whether the data follow a normal distribution because n<30
shapiro.test(my_data$weight) # => p-value = 0.6993
Shapiro-Wilk normality test
data: my_data$weight
W = 0.9526, p-value = 0.6993
From the output, the p-value is greater than the significance level 0.05 implying that the distribution of the data are not significantly different from normal distribtion. In other words, we can assume the normality.
library("ggpubr")
ggqqplot(my_data$weight, ylab = "Men's weight",
ggtheme = theme_minimal())
Compute one-sample t-test
# One-sample t-test
<- t.test(my_data$weight, mu = 25)
res # Printing the results
res
One Sample t-test
data: my_data$weight
t = -9.0783, df = 9, p-value = 7.953e-06
alternative hypothesis: true mean is not equal to 25
95 percent confidence interval:
17.8172 20.6828
sample estimates:
mean of x
19.25
2.2 One-sample Wilcoxon test (non-parametric)
#Import data
set.seed(1234)
<- data.frame(
my_data name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
#Veiw Data
head(my_data, 10)
name weight
1 M_1 17.6
2 M_2 20.6
3 M_3 22.2
4 M_4 15.3
5 M_5 20.9
6 M_6 21.0
7 M_7 18.9
8 M_8 18.9
9 M_9 18.9
10 M_10 18.2
# Statistical summaries of weight
summary(my_data$weight)
Min. 1st Qu. Median Mean 3rd Qu. Max.
15.30 18.38 18.90 19.25 20.82 22.20
#Visualize data using box plots
library(ggpubr)
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
Compute one-sample Wilcoxon test
# One-sample wilcoxon test
<- wilcox.test(my_data$weight, mu = 25) res
Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
p-value with ties
# Printing the results
res
Wilcoxon signed rank test with continuity correction
data: my_data$weight
V = 0, p-value = 0.005793
alternative hypothesis: true location is not equal to 25
# print only the p-value
$p.value res
[1] 0.005793045
3.1 Comparing the means of two independent groups
#Import Data
# Data in two numeric vectors
<- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
women_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
men_weight # Create a data frame
<- data.frame(
my_data group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
)
# Print all data
print(my_data)
group weight
1 Woman 38.9
2 Woman 61.2
3 Woman 73.3
4 Woman 21.8
5 Woman 63.4
6 Woman 64.6
7 Woman 48.4
8 Woman 48.8
9 Woman 48.5
10 Man 67.8
11 Man 60.0
12 Man 63.4
13 Man 76.0
14 Man 89.4
15 Man 73.3
16 Man 67.3
17 Man 61.3
18 Man 62.4
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
group_by(my_data, group) %>%
summarise(count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
# A tibble: 2 × 4
group count mean sd
<chr> <int> <dbl> <dbl>
1 Man 9 69.0 9.38
2 Woman 9 52.1 15.6
#Visualize your data using box plots
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
#Preleminary test to check independent t-test assumptions
# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1
Shapiro-Wilk normality test
data: weight[group == "Man"]
W = 0.86425, p-value = 0.1066
# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6
Shapiro-Wilk normality test
data: weight[group == "Woman"]
W = 0.94266, p-value = 0.6101
# F-test to test for homogeneity in variances
<- var.test(weight ~ group, data = my_data)
res.ftest res.ftest
F test to compare two variances
data: weight by group
F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.08150656 1.60191315
sample estimates:
ratio of variances
0.3613398
#Compute unpaired two-samples t-test
# Compute t-test
<- t.test(women_weight, men_weight, var.equal = TRUE)
res res
Two Sample t-test
data: women_weight and men_weight
t = -2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-29.748019 -4.029759
sample estimates:
mean of x mean of y
52.10000 68.98889
#Compute independent t-test - Method 2
<- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res res
Two Sample t-test
data: weight by group
t = 2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
95 percent confidence interval:
4.029759 29.748019
sample estimates:
mean in group Man mean in group Woman
68.98889 52.10000
3.2 Unpaired two-samples Wilcoxon test (non-parametric)
#Import data
# Data in two numeric vectors
<- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
women_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
men_weight
# Create a data frame
<- data.frame(
my_data group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
) my_data
group weight
1 Woman 38.9
2 Woman 61.2
3 Woman 73.3
4 Woman 21.8
5 Woman 63.4
6 Woman 64.6
7 Woman 48.4
8 Woman 48.8
9 Woman 48.5
10 Man 67.8
11 Man 60.0
12 Man 63.4
13 Man 76.0
14 Man 89.4
15 Man 73.3
16 Man 67.3
17 Man 61.3
18 Man 62.4
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
# A tibble: 2 × 4
group count median IQR
<chr> <int> <dbl> <dbl>
1 Man 9 67.3 10.9
2 Woman 9 48.8 15
#Visualize your data using box plots
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
#Compute two-samples Wilcoxon test - Method 1:
<- wilcox.test(women_weight, men_weight) res
Warning in wilcox.test.default(women_weight, men_weight): cannot compute exact
p-value with ties
res
Wilcoxon rank sum test with continuity correction
data: women_weight and men_weight
W = 15, p-value = 0.02712
alternative hypothesis: true location shift is not equal to 0
<- wilcox.test(weight ~ group, data = my_data,
res exact = FALSE)
res
Wilcoxon rank sum test with continuity correction
data: weight by group
W = 66, p-value = 0.02712
alternative hypothesis: true location shift is not equal to 0
4.1 Paired Samples T-test in R
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
<-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
before
# Weight of the mice after treatment
<-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
after
# Create a data frame
<- data.frame(
my_data group = rep(c("before", "after"), each = 10),
weight = c(before, after)
) my_data
group weight
1 before 200.1
2 before 190.9
3 before 192.7
4 before 213.0
5 before 241.4
6 before 196.9
7 before 172.2
8 before 185.5
9 before 205.2
10 before 193.7
11 after 392.9
12 after 393.2
13 after 345.1
14 after 393.0
15 after 434.0
16 after 427.9
17 after 422.0
18 after 383.9
19 after 392.3
20 after 352.2
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
order = c("before", "after"),
ylab = "Weight", xlab = "Groups")
#Plot Paired Data
# Subset weight data before treatment
<- subset(my_data, group == "before", weight,
before drop = TRUE)
# subset weight data after treatment
<- subset(my_data, group == "after", weight,
after drop = TRUE)
# Plot paired data
library(PairedData)
Warning: package 'PairedData' was built under R version 4.4.2
Loading required package: MASS
Attaching package: 'MASS'
The following object is masked from 'package:dplyr':
select
Loading required package: gld
Warning: package 'gld' was built under R version 4.4.2
Loading required package: mvtnorm
Loading required package: lattice
Attaching package: 'PairedData'
The following object is masked from 'package:base':
summary
<- paired(before, after)
pd plot(pd, type = "profile") + theme_bw()
# compute the difference
<- with(my_data,
d == "before"] - weight[group == "after"])
weight[group
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141
Shapiro-Wilk normality test
data: d
W = 0.94536, p-value = 0.6141
#Compute paired samples t-test
<- t.test(before, after, paired = TRUE)
res res
Paired t-test
data: before and after
t = -20.883, df = 9, p-value = 6.2e-09
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-215.5581 -173.4219
sample estimates:
mean difference
-194.49
#### 4.2 Paired samples Wilcoxon test (non-parametric)
::: {.cell}
```{.r .cell-code}
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame(
group = rep(c("before", "after"), each = 10),
weight = c(before, after)
)
:::
# Print all data
print(my_data)
group weight
1 before 200.1
2 before 190.9
3 before 192.7
4 before 213.0
5 before 241.4
6 before 196.9
7 before 172.2
8 before 185.5
9 before 205.2
10 before 193.7
11 after 392.9
12 after 393.2
13 after 345.1
14 after 393.0
15 after 434.0
16 after 427.9
17 after 422.0
18 after 383.9
19 after 392.3
20 after 352.2
library("dplyr")
group_by(my_data, group) %>%
summarise(
count = n(),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
# A tibble: 2 × 4
group count median IQR
<chr> <int> <dbl> <dbl>
1 after 10 393. 28.8
2 before 10 195. 12.6
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
order = c("before", "after"),
ylab = "Weight", xlab = "Groups")
# Subset weight data before treatment
<- subset(my_data, group == "before", weight,
before drop = TRUE)
# subset weight data after treatment
<- subset(my_data, group == "after", weight,
after drop = TRUE)
# Plot paired data
library(PairedData)
<- paired(before, after)
pd plot(pd, type = "profile") + theme_bw()
#Compute paired-sample Wilcoxon test
<- wilcox.test(before, after, paired = TRUE)
res res
Wilcoxon signed rank exact test
data: before and after
V = 0, p-value = 0.001953
alternative hypothesis: true location shift is not equal to 0
5.1 One-Way ANOVA Test
<- PlantGrowth
my_data my_data
weight group
1 4.17 ctrl
2 5.58 ctrl
3 5.18 ctrl
4 6.11 ctrl
5 4.50 ctrl
6 4.61 ctrl
7 5.17 ctrl
8 4.53 ctrl
9 5.33 ctrl
10 5.14 ctrl
11 4.81 trt1
12 4.17 trt1
13 4.41 trt1
14 3.59 trt1
15 5.87 trt1
16 3.83 trt1
17 6.03 trt1
18 4.89 trt1
19 4.32 trt1
20 4.69 trt1
21 6.31 trt2
22 5.12 trt2
23 5.54 trt2
24 5.50 trt2
25 5.37 trt2
26 5.29 trt2
27 4.92 trt2
28 6.15 trt2
29 5.80 trt2
30 5.26 trt2
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
# A tibble: 3 × 4
group count mean sd
<fct> <int> <dbl> <dbl>
1 ctrl 10 5.03 0.583
2 trt1 10 4.66 0.794
3 trt2 10 5.53 0.443
# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight",
add = c("mean_se", "jitter"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
# Box plot
boxplot(weight ~ group, data = my_data,
xlab = "Treatment", ylab = "Weight",
frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))
# plotmeans
library("gplots")
Attaching package: 'gplots'
The following object is masked from 'package:stats':
lowess
plotmeans(weight ~ group, data = my_data, frame = FALSE,
xlab = "Treatment", ylab = "Weight",
main="Mean Plot with 95% CI")
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter
Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
a graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter
# Compute the analysis of variance
<- aov(weight ~ group, data = my_data)
res.aov
# Summary of the analysis
summary(res.aov)
Df Sum Sq Mean Sq F value Pr(>F)
group 2 3.766 1.8832 4.846 0.0159 *
Residuals 27 10.492 0.3886
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = weight ~ group, data = my_data)
$group
diff lwr upr p adj
trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
trt2-ctrl 0.494 -0.1972161 1.1852161 0.1979960
trt2-trt1 0.865 0.1737839 1.5562161 0.0120064
library(multcomp)
Warning: package 'multcomp' was built under R version 4.4.2
Loading required package: survival
Loading required package: TH.data
Warning: package 'TH.data' was built under R version 4.4.2
Attaching package: 'TH.data'
The following object is masked from 'package:MASS':
geyser
summary(glht(res.aov, linfct = mcp(group = "Tukey")))
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: aov(formula = weight ~ group, data = my_data)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
trt1 - ctrl == 0 -0.3710 0.2788 -1.331 0.3908
trt2 - ctrl == 0 0.4940 0.2788 1.772 0.1979
trt2 - trt1 == 0 0.8650 0.2788 3.103 0.0119 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
#Pairewise t-test
pairwise.t.test(my_data$weight, my_data$group,
p.adjust.method = "BH")
Pairwise comparisons using t tests with pooled SD
data: my_data$weight and my_data$group
ctrl trt1
trt1 0.194 -
trt2 0.132 0.013
P value adjustment method: BH
#Check ANOVA assumptions: test validity?
# 1. Homogeneity of variances
plot(res.aov, 1)
library(car)
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
leveneTest(weight ~ group, data = my_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 1.1192 0.3412
27
#ANOVA test with no assumption of equal variances
oneway.test(weight ~ group, data = my_data)
One-way analysis of means (not assuming equal variances)
data: weight and group
F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739
#Pairwise t-tests with no assumption of equal variances
pairwise.t.test(my_data$weight, my_data$group,
p.adjust.method = "BH", pool.sd = FALSE)
Pairwise comparisons using t tests with non-pooled SD
data: my_data$weight and my_data$group
ctrl trt1
trt1 0.250 -
trt2 0.072 0.028
P value adjustment method: BH
#Check the normality assumption
# 2. Normality
plot(res.aov, 2)
# Extract the residuals
<- residuals(object = res.aov )
aov_residuals
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
Shapiro-Wilk normality test
data: aov_residuals
W = 0.96607, p-value = 0.4379
#Non-parametric alternative to one-way ANOVA test
kruskal.test(weight ~ group, data = my_data)
Kruskal-Wallis rank sum test
data: weight by group
Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842
5.2 Two-Way ANOVA test
# Store the data in the variable my_data
<- ToothGrowth my_data
# Convert dose as a factor and recode the levels
# as "D0.5", "D1", "D2"
$dose <- factor(my_data$dose,
my_datalevels = c(0.5, 1, 2),
labels = c("D0.5", "D1", "D2"))
head(my_data)
len supp dose
1 4.2 VC D0.5
2 11.5 VC D0.5
3 7.3 VC D0.5
4 5.8 VC D0.5
5 6.4 VC D0.5
6 10.0 VC D0.5
table(my_data$supp, my_data$dose)
D0.5 D1 D2
OJ 10 10 10
VC 10 10 10
# Box plot with multiple groups
# +++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
library("ggpubr")
ggboxplot(my_data, x = "dose", y = "len", color = "supp",
palette = c("#00AFBB", "#E7B800"))
# Line plots with multiple groups
# +++++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "dose", y = "len", color = "supp",
add = c("mean_se", "dotplot"),
palette = c("#00AFBB", "#E7B800"))
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.
# Box plot with two factor variables
boxplot(len ~ supp * dose, data=my_data, frame = FALSE,
col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")
# Two-way interaction plot
interaction.plot(x.factor = my_data$dose, trace.factor = my_data$supp,
response = my_data$len, fun = mean,
type = "b", legend = TRUE,
xlab = "Dose", ylab="Tooth Length",
pch=c(1,19), col = c("#00AFBB", "#E7B800"))
#Compute two-way ANOVA test
<- aov(len ~ supp + dose, data = my_data)
res.aov2 summary(res.aov2)
Df Sum Sq Mean Sq F value Pr(>F)
supp 1 205.4 205.4 14.02 0.000429 ***
dose 2 2426.4 1213.2 82.81 < 2e-16 ***
Residuals 56 820.4 14.7
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Two-way ANOVA with interaction effect
# These two calls are equivalent
<- aov(len ~ supp * dose, data = my_data)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = my_data)
res.aov3 summary(res.aov3)
Df Sum Sq Mean Sq F value Pr(>F)
supp 1 205.4 205.4 15.572 0.000231 ***
dose 2 2426.4 1213.2 92.000 < 2e-16 ***
supp:dose 2 108.3 54.2 4.107 0.021860 *
Residuals 54 712.1 13.2
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Compute some summary statistics
require("dplyr")
group_by(my_data, supp, dose) %>%
summarise(
count = n(),
mean = mean(len, na.rm = TRUE),
sd = sd(len, na.rm = TRUE)
)
`summarise()` has grouped output by 'supp'. You can override using the
`.groups` argument.
# A tibble: 6 × 5
# Groups: supp [2]
supp dose count mean sd
<fct> <fct> <int> <dbl> <dbl>
1 OJ D0.5 10 13.2 4.46
2 OJ D1 10 22.7 3.91
3 OJ D2 10 26.1 2.66
4 VC D0.5 10 7.98 2.75
5 VC D1 10 16.8 2.52
6 VC D2 10 26.1 4.80
TukeyHSD(res.aov3, which = "dose")
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = len ~ supp + dose + supp:dose, data = my_data)
$dose
diff lwr upr p adj
D1-D0.5 9.130 6.362488 11.897512 0.0e+00
D2-D0.5 15.495 12.727488 18.262512 0.0e+00
D2-D1 6.365 3.597488 9.132512 2.7e-06
#Multiple comparisons using multcomp package
library(multcomp)
summary(glht(res.aov2, linfct = mcp(dose = "Tukey")))
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: aov(formula = len ~ supp + dose, data = my_data)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
D1 - D0.5 == 0 9.130 1.210 7.543 <1e-05 ***
D2 - D0.5 == 0 15.495 1.210 12.802 <1e-05 ***
D2 - D1 == 0 6.365 1.210 5.259 <1e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
#Pairwise t-test
pairwise.t.test(my_data$len, my_data$dose,
p.adjust.method = "BH")
Pairwise comparisons using t tests with pooled SD
data: my_data$len and my_data$dose
D0.5 D1
D1 1.0e-08 -
D2 4.4e-16 1.4e-05
P value adjustment method: BH
#Check ANOVA assumptions: test validity?
# 1. Homogeneity of variances
plot(res.aov3, 1)
library(car)
leveneTest(len ~ supp*dose, data = my_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 1.7086 0.1484
54
# 2. Normality
plot(res.aov3, 2)
# Extract the residuals
<- residuals(object = res.aov3)
aov_residuals
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
Shapiro-Wilk normality test
data: aov_residuals
W = 0.98499, p-value = 0.6694
6. MANOVA
# Store the data in the variable my_data
<- iris my_data
<- iris$Sepal.Length
sepl <- iris$Petal.Length
petl # MANOVA test
<- manova(cbind(Sepal.Length, Petal.Length) ~ Species, data = iris)
res.man summary(res.man)
Df Pillai approx F num Df den Df Pr(>F)
Species 2 0.9885 71.829 4 294 < 2.2e-16 ***
Residuals 147
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Look to see which differ
summary.aov(res.man)
Response Sepal.Length :
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 63.212 31.606 119.26 < 2.2e-16 ***
Residuals 147 38.956 0.265
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Response Petal.Length :
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 437.10 218.551 1180.2 < 2.2e-16 ***
Residuals 147 27.22 0.185
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1