Week 7 Formative Exercise

Author

Yasiru Dilshan

Comparing Means in R

2.1 Comparing one-sample mean to a standard known mean

What is one-sample t-test?

A statistical hypothesis test that determines if a population mean is different from a specific value

Visualize your data and compute one-sample t-test in R

set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)
# Print the first 10 rows of the data
head(my_data, 10)
   name weight
1   M_1   17.6
2   M_2   20.6
3   M_3   22.2
4   M_4   15.3
5   M_5   20.9
6   M_6   21.0
7   M_7   18.9
8   M_8   18.9
9   M_9   18.9
10 M_10   18.2
# Statistical summaries of weight
summary(my_data$weight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  15.30   18.38   18.90   19.25   20.82   22.20 
library(ggpubr)
Warning: package 'ggpubr' was built under R version 4.4.2
Loading required package: ggplot2
ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

Preleminary test to check one-sample t-test assumptions

need to check whether the data follow a normal distribution because n<30

shapiro.test(my_data$weight) # => p-value = 0.6993

    Shapiro-Wilk normality test

data:  my_data$weight
W = 0.9526, p-value = 0.6993

From the output, the p-value is greater than the significance level 0.05 implying that the distribution of the data are not significantly different from normal distribtion. In other words, we can assume the normality.

library("ggpubr")
ggqqplot(my_data$weight, ylab = "Men's weight",
         ggtheme = theme_minimal())

Compute one-sample t-test

# One-sample t-test
res <- t.test(my_data$weight, mu = 25)
# Printing the results
res

    One Sample t-test

data:  my_data$weight
t = -9.0783, df = 9, p-value = 7.953e-06
alternative hypothesis: true mean is not equal to 25
95 percent confidence interval:
 17.8172 20.6828
sample estimates:
mean of x 
    19.25 

2.2 One-sample Wilcoxon test (non-parametric)

#Import data
set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)
#Veiw Data
head(my_data, 10)
   name weight
1   M_1   17.6
2   M_2   20.6
3   M_3   22.2
4   M_4   15.3
5   M_5   20.9
6   M_6   21.0
7   M_7   18.9
8   M_8   18.9
9   M_9   18.9
10 M_10   18.2
# Statistical summaries of weight
summary(my_data$weight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  15.30   18.38   18.90   19.25   20.82   22.20 
#Visualize data using box plots
library(ggpubr)
ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

Compute one-sample Wilcoxon test

# One-sample wilcoxon test
res <- wilcox.test(my_data$weight, mu = 25)
Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
p-value with ties
# Printing the results
res 

    Wilcoxon signed rank test with continuity correction

data:  my_data$weight
V = 0, p-value = 0.005793
alternative hypothesis: true location is not equal to 25
# print only the p-value
res$p.value
[1] 0.005793045

3.1 Comparing the means of two independent groups

#Import Data

# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4) 
# Create a data frame
my_data <- data.frame( 
                group = rep(c("Woman", "Man"), each = 9),
                weight = c(women_weight,  men_weight)
                )
# Print all data
print(my_data)
   group weight
1  Woman   38.9
2  Woman   61.2
3  Woman   73.3
4  Woman   21.8
5  Woman   63.4
6  Woman   64.6
7  Woman   48.4
8  Woman   48.8
9  Woman   48.5
10   Man   67.8
11   Man   60.0
12   Man   63.4
13   Man   76.0
14   Man   89.4
15   Man   73.3
16   Man   67.3
17   Man   61.3
18   Man   62.4
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
group_by(my_data, group) %>%
  summarise(count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group count  mean    sd
  <chr> <int> <dbl> <dbl>
1 Man       9  69.0  9.38
2 Woman     9  52.1 15.6 
#Visualize your data using box plots

library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
        ylab = "Weight", xlab = "Groups")

#Preleminary test to check independent t-test assumptions

# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1

    Shapiro-Wilk normality test

data:  weight[group == "Man"]
W = 0.86425, p-value = 0.1066
# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6

    Shapiro-Wilk normality test

data:  weight[group == "Woman"]
W = 0.94266, p-value = 0.6101
# F-test to test for homogeneity in variances

res.ftest <- var.test(weight ~ group, data = my_data)
res.ftest

    F test to compare two variances

data:  weight by group
F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.08150656 1.60191315
sample estimates:
ratio of variances 
         0.3613398 
#Compute unpaired two-samples t-test

# Compute t-test
res <- t.test(women_weight, men_weight, var.equal = TRUE)
res

    Two Sample t-test

data:  women_weight and men_weight
t = -2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -29.748019  -4.029759
sample estimates:
mean of x mean of y 
 52.10000  68.98889 
#Compute independent t-test - Method 2
res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res

    Two Sample t-test

data:  weight by group
t = 2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
95 percent confidence interval:
  4.029759 29.748019
sample estimates:
  mean in group Man mean in group Woman 
           68.98889            52.10000 

3.2 Unpaired two-samples Wilcoxon test (non-parametric)

#Import data

# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4) 

# Create a data frame
my_data <- data.frame( 
                group = rep(c("Woman", "Man"), each = 9),
                weight = c(women_weight,  men_weight)
                )
my_data
   group weight
1  Woman   38.9
2  Woman   61.2
3  Woman   73.3
4  Woman   21.8
5  Woman   63.4
6  Woman   64.6
7  Woman   48.4
8  Woman   48.8
9  Woman   48.5
10   Man   67.8
11   Man   60.0
12   Man   63.4
13   Man   76.0
14   Man   89.4
15   Man   73.3
16   Man   67.3
17   Man   61.3
18   Man   62.4
library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group count median   IQR
  <chr> <int>  <dbl> <dbl>
1 Man       9   67.3  10.9
2 Woman     9   48.8  15  
#Visualize your data using box plots

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          ylab = "Weight", xlab = "Groups")

#Compute two-samples Wilcoxon test - Method 1:
res <- wilcox.test(women_weight, men_weight)
Warning in wilcox.test.default(women_weight, men_weight): cannot compute exact
p-value with ties
res

    Wilcoxon rank sum test with continuity correction

data:  women_weight and men_weight
W = 15, p-value = 0.02712
alternative hypothesis: true location shift is not equal to 0
res <- wilcox.test(weight ~ group, data = my_data,
                   exact = FALSE)
res

    Wilcoxon rank sum test with continuity correction

data:  weight by group
W = 66, p-value = 0.02712
alternative hypothesis: true location shift is not equal to 0

4.1 Paired Samples T-test in R

# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)

# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)

# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )
my_data
    group weight
1  before  200.1
2  before  190.9
3  before  192.7
4  before  213.0
5  before  241.4
6  before  196.9
7  before  172.2
8  before  185.5
9  before  205.2
10 before  193.7
11  after  392.9
12  after  393.2
13  after  345.1
14  after  393.0
15  after  434.0
16  after  427.9
17  after  422.0
18  after  383.9
19  after  392.3
20  after  352.2
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

#Plot Paired Data

# Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)

# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)

# Plot paired data
library(PairedData)
Warning: package 'PairedData' was built under R version 4.4.2
Loading required package: MASS

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select
Loading required package: gld
Warning: package 'gld' was built under R version 4.4.2
Loading required package: mvtnorm
Loading required package: lattice

Attaching package: 'PairedData'
The following object is masked from 'package:base':

    summary
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

# compute the difference

d <- with(my_data, 
        weight[group == "before"] - weight[group == "after"])

# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141

    Shapiro-Wilk normality test

data:  d
W = 0.94536, p-value = 0.6141
#Compute paired samples t-test

res <- t.test(before, after, paired = TRUE)
res

    Paired t-test

data:  before and after
t = -20.883, df = 9, p-value = 6.2e-09
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -215.5581 -173.4219
sample estimates:
mean difference 
        -194.49 

#### 4.2 Paired samples Wilcoxon test (non-parametric)

::: {.cell}

```{.r .cell-code}
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )

:::

# Print all data
print(my_data)
    group weight
1  before  200.1
2  before  190.9
3  before  192.7
4  before  213.0
5  before  241.4
6  before  196.9
7  before  172.2
8  before  185.5
9  before  205.2
10 before  193.7
11  after  392.9
12  after  393.2
13  after  345.1
14  after  393.0
15  after  434.0
16  after  427.9
17  after  422.0
18  after  383.9
19  after  392.3
20  after  352.2
library("dplyr")
group_by(my_data, group) %>%
  summarise(
    count = n(),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group  count median   IQR
  <chr>  <int>  <dbl> <dbl>
1 after     10   393.  28.8
2 before    10   195.  12.6
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

# Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)
# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)
# Plot paired data
library(PairedData)
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

#Compute paired-sample Wilcoxon test

res <- wilcox.test(before, after, paired = TRUE)
res

    Wilcoxon signed rank exact test

data:  before and after
V = 0, p-value = 0.001953
alternative hypothesis: true location shift is not equal to 0

5.1 One-Way ANOVA Test

my_data <- PlantGrowth
my_data
   weight group
1    4.17  ctrl
2    5.58  ctrl
3    5.18  ctrl
4    6.11  ctrl
5    4.50  ctrl
6    4.61  ctrl
7    5.17  ctrl
8    4.53  ctrl
9    5.33  ctrl
10   5.14  ctrl
11   4.81  trt1
12   4.17  trt1
13   4.41  trt1
14   3.59  trt1
15   5.87  trt1
16   3.83  trt1
17   6.03  trt1
18   4.89  trt1
19   4.32  trt1
20   4.69  trt1
21   6.31  trt2
22   5.12  trt2
23   5.54  trt2
24   5.50  trt2
25   5.37  trt2
26   5.29  trt2
27   4.92  trt2
28   6.15  trt2
29   5.80  trt2
30   5.26  trt2
library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
# A tibble: 3 × 4
  group count  mean    sd
  <fct> <int> <dbl> <dbl>
1 ctrl     10  5.03 0.583
2 trt1     10  4.66 0.794
3 trt2     10  5.53 0.443
# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("ctrl", "trt1", "trt2"),
          ylab = "Weight", xlab = "Treatment")

# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight", 
       add = c("mean_se", "jitter"), 
       order = c("ctrl", "trt1", "trt2"),
       ylab = "Weight", xlab = "Treatment")

# Box plot
boxplot(weight ~ group, data = my_data,
        xlab = "Treatment", ylab = "Weight",
        frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))

# plotmeans
library("gplots")

Attaching package: 'gplots'
The following object is masked from 'package:stats':

    lowess
plotmeans(weight ~ group, data = my_data, frame = FALSE,
          xlab = "Treatment", ylab = "Weight",
          main="Mean Plot with 95% CI") 
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter
Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
a graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter

# Compute the analysis of variance
res.aov <- aov(weight ~ group, data = my_data)

# Summary of the analysis

summary(res.aov)
            Df Sum Sq Mean Sq F value Pr(>F)  
group        2  3.766  1.8832   4.846 0.0159 *
Residuals   27 10.492  0.3886                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = weight ~ group, data = my_data)

$group
            diff        lwr       upr     p adj
trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
trt2-ctrl  0.494 -0.1972161 1.1852161 0.1979960
trt2-trt1  0.865  0.1737839 1.5562161 0.0120064
library(multcomp)
Warning: package 'multcomp' was built under R version 4.4.2
Loading required package: survival
Loading required package: TH.data
Warning: package 'TH.data' was built under R version 4.4.2

Attaching package: 'TH.data'
The following object is masked from 'package:MASS':

    geyser
summary(glht(res.aov, linfct = mcp(group = "Tukey")))

     Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = weight ~ group, data = my_data)

Linear Hypotheses:
                 Estimate Std. Error t value Pr(>|t|)  
trt1 - ctrl == 0  -0.3710     0.2788  -1.331   0.3908  
trt2 - ctrl == 0   0.4940     0.2788   1.772   0.1979  
trt2 - trt1 == 0   0.8650     0.2788   3.103   0.0119 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
#Pairewise t-test

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH")

    Pairwise comparisons using t tests with pooled SD 

data:  my_data$weight and my_data$group 

     ctrl  trt1 
trt1 0.194 -    
trt2 0.132 0.013

P value adjustment method: BH 
#Check ANOVA assumptions: test validity?
# 1. Homogeneity of variances
plot(res.aov, 1)

library(car)
Loading required package: carData

Attaching package: 'car'
The following object is masked from 'package:dplyr':

    recode
leveneTest(weight ~ group, data = my_data)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  2  1.1192 0.3412
      27               
#ANOVA test with no assumption of equal variances

oneway.test(weight ~ group, data = my_data)

    One-way analysis of means (not assuming equal variances)

data:  weight and group
F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739
#Pairwise t-tests with no assumption of equal variances

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH", pool.sd = FALSE)

    Pairwise comparisons using t tests with non-pooled SD 

data:  my_data$weight and my_data$group 

     ctrl  trt1 
trt1 0.250 -    
trt2 0.072 0.028

P value adjustment method: BH 
#Check the normality assumption

# 2. Normality
plot(res.aov, 2)

# Extract the residuals

aov_residuals <- residuals(object = res.aov )

# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

    Shapiro-Wilk normality test

data:  aov_residuals
W = 0.96607, p-value = 0.4379
#Non-parametric alternative to one-way ANOVA test

kruskal.test(weight ~ group, data = my_data)

    Kruskal-Wallis rank sum test

data:  weight by group
Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842

5.2 Two-Way ANOVA test

# Store the data in the variable my_data
my_data <- ToothGrowth
# Convert dose as a factor and recode the levels
# as "D0.5", "D1", "D2"
my_data$dose <- factor(my_data$dose, 
                  levels = c(0.5, 1, 2),
                  labels = c("D0.5", "D1", "D2"))
head(my_data)
   len supp dose
1  4.2   VC D0.5
2 11.5   VC D0.5
3  7.3   VC D0.5
4  5.8   VC D0.5
5  6.4   VC D0.5
6 10.0   VC D0.5
table(my_data$supp, my_data$dose)
    
     D0.5 D1 D2
  OJ   10 10 10
  VC   10 10 10
# Box plot with multiple groups
# +++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
library("ggpubr")
ggboxplot(my_data, x = "dose", y = "len", color = "supp",
          palette = c("#00AFBB", "#E7B800"))

# Line plots with multiple groups
# +++++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "dose", y = "len", color = "supp",
       add = c("mean_se", "dotplot"),
       palette = c("#00AFBB", "#E7B800"))
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

# Box plot with two factor variables
boxplot(len ~ supp * dose, data=my_data, frame = FALSE, 
        col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")

# Two-way interaction plot
interaction.plot(x.factor = my_data$dose, trace.factor = my_data$supp, 
                 response = my_data$len, fun = mean, 
                 type = "b", legend = TRUE, 
                 xlab = "Dose", ylab="Tooth Length",
                 pch=c(1,19), col = c("#00AFBB", "#E7B800"))

#Compute two-way ANOVA test

res.aov2 <- aov(len ~ supp + dose, data = my_data)
summary(res.aov2)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4   14.02 0.000429 ***
dose         2 2426.4  1213.2   82.81  < 2e-16 ***
Residuals   56  820.4    14.7                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(len ~ supp * dose, data = my_data)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = my_data)
summary(res.aov3)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4  15.572 0.000231 ***
dose         2 2426.4  1213.2  92.000  < 2e-16 ***
supp:dose    2  108.3    54.2   4.107 0.021860 *  
Residuals   54  712.1    13.2                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Compute some summary statistics

require("dplyr")
group_by(my_data, supp, dose) %>%
  summarise(
    count = n(),
    mean = mean(len, na.rm = TRUE),
    sd = sd(len, na.rm = TRUE)
  )
`summarise()` has grouped output by 'supp'. You can override using the
`.groups` argument.
# A tibble: 6 × 5
# Groups:   supp [2]
  supp  dose  count  mean    sd
  <fct> <fct> <int> <dbl> <dbl>
1 OJ    D0.5     10 13.2   4.46
2 OJ    D1       10 22.7   3.91
3 OJ    D2       10 26.1   2.66
4 VC    D0.5     10  7.98  2.75
5 VC    D1       10 16.8   2.52
6 VC    D2       10 26.1   4.80
TukeyHSD(res.aov3, which = "dose")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = len ~ supp + dose + supp:dose, data = my_data)

$dose
          diff       lwr       upr   p adj
D1-D0.5  9.130  6.362488 11.897512 0.0e+00
D2-D0.5 15.495 12.727488 18.262512 0.0e+00
D2-D1    6.365  3.597488  9.132512 2.7e-06
#Multiple comparisons using multcomp package
library(multcomp)
summary(glht(res.aov2, linfct = mcp(dose = "Tukey")))

     Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = len ~ supp + dose, data = my_data)

Linear Hypotheses:
               Estimate Std. Error t value Pr(>|t|)    
D1 - D0.5 == 0    9.130      1.210   7.543   <1e-05 ***
D2 - D0.5 == 0   15.495      1.210  12.802   <1e-05 ***
D2 - D1 == 0      6.365      1.210   5.259   <1e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
#Pairwise t-test
pairwise.t.test(my_data$len, my_data$dose,
                p.adjust.method = "BH")

    Pairwise comparisons using t tests with pooled SD 

data:  my_data$len and my_data$dose 

   D0.5    D1     
D1 1.0e-08 -      
D2 4.4e-16 1.4e-05

P value adjustment method: BH 
#Check ANOVA assumptions: test validity?

# 1. Homogeneity of variances
plot(res.aov3, 1)

library(car)
leveneTest(len ~ supp*dose, data = my_data)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  5  1.7086 0.1484
      54               
# 2. Normality
plot(res.aov3, 2)

# Extract the residuals
aov_residuals <- residuals(object = res.aov3)

# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

    Shapiro-Wilk normality test

data:  aov_residuals
W = 0.98499, p-value = 0.6694

6. MANOVA

# Store the data in the variable my_data
my_data <- iris
sepl <- iris$Sepal.Length
petl <- iris$Petal.Length
# MANOVA test
res.man <- manova(cbind(Sepal.Length, Petal.Length) ~ Species, data = iris)
summary(res.man)
           Df Pillai approx F num Df den Df    Pr(>F)    
Species     2 0.9885   71.829      4    294 < 2.2e-16 ***
Residuals 147                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Look to see which differ
summary.aov(res.man)
 Response Sepal.Length :
             Df Sum Sq Mean Sq F value    Pr(>F)    
Species       2 63.212  31.606  119.26 < 2.2e-16 ***
Residuals   147 38.956   0.265                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response Petal.Length :
             Df Sum Sq Mean Sq F value    Pr(>F)    
Species       2 437.10 218.551  1180.2 < 2.2e-16 ***
Residuals   147  27.22   0.185                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7 Kruskal-Wallis test