WEEK 7

Author

Kaenat Gul (N1325553)

Loading Packages

library(tibble)
library(tidyverse)
library(vtable)
library(ggplot2)
library(gplots)
library(graphics)
library(vcd)
library(dplyr)
library(tidyr)
library(knitr)
library(gmodels)
library(ggpubr)
library(PairedData)
library(multcomp)
library(car)

Comparing Means in R:

Comparing one-sample mean to a standard known mean:

1:One-sample T-test (parametric):

                              one-sample t-test is used to compare the mean of one sample to a known standard (or theoretical/hypothetical) mean (μ). 

For Example:

compare whether the mean weight of mice differs from 200 mg.

Visualize your data and compute one-sample t-test in R:

set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)
# Print the first 10 rows of the data
head(my_data, 10)
   name weight
1   M_1   17.6
2   M_2   20.6
3   M_3   22.2
4   M_4   15.3
5   M_5   20.9
6   M_6   21.0
7   M_7   18.9
8   M_8   18.9
9   M_9   18.9
10 M_10   18.2
# Statistical summaries of weight
summary(my_data$weight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  15.30   18.38   18.90   19.25   20.82   22.20 

Visualize your data using box plots:

library(ggpubr)
ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

Preleminary test to check one-sample t-test assumptions:

                  It’s possible to use the Shapiro-Wilk normality test and to look at the normality plot.

Shapiro-Wilk test:

Null hypothesis: the data are normally distributed Alternative hypothesis: the data are not normally distributed

shapiro.test(my_data$weight) # => p-value = 0.6993

    Shapiro-Wilk normality test

data:  my_data$weight
W = 0.9526, p-value = 0.6993

Visual inspection:

                Visual inspection of the data normality using Q-Q plots (quantile-quantile plots). Q-Q plot draws the correlation between a given sample and the normal distribution.
  ggqqplot(my_data$weight, ylab = "Men's weight",
         ggtheme = theme_minimal()) 

Compute one-sample t-test:

# One-sample t-test
res <- t.test(my_data$weight, mu = 25)
# Printing the results
res 

    One Sample t-test

data:  my_data$weight
t = -9.0783, df = 9, p-value = 7.953e-06
alternative hypothesis: true mean is not equal to 25
95 percent confidence interval:
 17.8172 20.6828
sample estimates:
mean of x 
    19.25 

Access to the values returned by t.test() function:

# printing the p-value
res$p.value
[1] 7.953383e-06
# printing the mean
res$estimate
mean of x 
    19.25 
# printing the confidence interval
res$conf.int
[1] 17.8172 20.6828
attr(,"conf.level")
[1] 0.95

2: 2.2 One-sample Wilcoxon test (non-parametric):

One-Sample Wilcoxon Signed Rank Test in R:

           The one-sample Wilcoxon signed rank test is a non-parametric alternative to one-sample t-test when the data cannot be assumed to be normally distributed. It’s used to determine whether the median of the sample is equal to a known standard value (i.e. theoretical value).

###Visualize your data and compute one-sample Wilcoxon test in R: ###Import Data:

set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)
# Print the first 10 rows of the data
head(my_data, 10)
   name weight
1   M_1   17.6
2   M_2   20.6
3   M_3   22.2
4   M_4   15.3
5   M_5   20.9
6   M_6   21.0
7   M_7   18.9
8   M_8   18.9
9   M_9   18.9
10 M_10   18.2
# Statistical summaries of weight
summary(my_data$weight)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  15.30   18.38   18.90   19.25   20.82   22.20 

Visualize your data using box plots:

library(ggpubr)
ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

Compute one-sample Wilcoxon test:

# One-sample wilcoxon test
res <- wilcox.test(my_data$weight, mu = 25)
Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
p-value with ties
# Printing the results
res 

    Wilcoxon signed rank test with continuity correction

data:  my_data$weight
V = 0, p-value = 0.005793
alternative hypothesis: true location is not equal to 25
# print only the p-value
res$p.value
[1] 0.005793045

3: Comparing the means of two independent groups:

Unpaired two samples t-test (parametric):

           The unpaired two-samples t-test is used to compare the mean of two independent groups.

###For Example: 50 women (group A) and 50 men (group B). We want to know if the mean weight of women (mA ) is significantly different from that of men (mB).

###Visualize your data and compute unpaired two-samples t-test in R: ###Import your data into R:

# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4) 
# Create a data frame
my_data <- data.frame( 
                group = rep(c("Woman", "Man"), each = 9),
                weight = c(women_weight,  men_weight)
                )
# Print all data
print(my_data)
   group weight
1  Woman   38.9
2  Woman   61.2
3  Woman   73.3
4  Woman   21.8
5  Woman   63.4
6  Woman   64.6
7  Woman   48.4
8  Woman   48.8
9  Woman   48.5
10   Man   67.8
11   Man   60.0
12   Man   63.4
13   Man   76.0
14   Man   89.4
15   Man   73.3
16   Man   67.3
17   Man   61.3
18   Man   62.4

Compute summary statistics by groups:

library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group count  mean    sd
  <chr> <int> <dbl> <dbl>
1 Man       9  69.0  9.38
2 Woman     9  52.1 15.6 

Visualize your data using box plots:

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
        ylab = "Weight", xlab = "Groups")

Preleminary test to check independent t-test assumptions:

          Use Shapiro-Wilk normality test as described at Normality Test in R. 
          

Null hypothesis: the data are normally distributed. Alternative hypothesis: the data are not normally distributed

# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1

    Shapiro-Wilk normality test

data:  weight[group == "Man"]
W = 0.86425, p-value = 0.1066
# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6

    Shapiro-Wilk normality test

data:  weight[group == "Woman"]
W = 0.94266, p-value = 0.6101

F-test to test for homogeneity in variances:

res.ftest <- var.test(weight ~ group, data = my_data)
res.ftest

    F test to compare two variances

data:  weight by group
F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.08150656 1.60191315
sample estimates:
ratio of variances 
         0.3613398 

Compute unpaired two-samples t-test: ###Method 1:

# Compute t-test
res <- t.test(women_weight, men_weight, var.equal = TRUE)
res

    Two Sample t-test

data:  women_weight and men_weight
t = -2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -29.748019  -4.029759
sample estimates:
mean of x mean of y 
 52.10000  68.98889 

Method 2:

# Compute t-test
res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res

    Two Sample t-test

data:  weight by group
t = 2.7842, df = 16, p-value = 0.01327
alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
95 percent confidence interval:
  4.029759 29.748019
sample estimates:
  mean in group Man mean in group Woman 
           68.98889            52.10000 

Access to the values returned by t.test() function:

# printing the p-value
res$p.value
[1] 0.0132656
# printing the mean
res$estimate
  mean in group Man mean in group Woman 
           68.98889            52.10000 
# printing the confidence interval
res$conf.int
[1]  4.029759 29.748019
attr(,"conf.level")
[1] 0.95

4: Comparing the means of paired samples:

4.1 Paired samples t-test (parametric):

       The paired samples t-test is used to compare the means between two related groups of samples. In this case, you have two values (i.e., pair of values) for the same samples. This article describes how to compute paired samples t-test using R software. 

For Example:

      20 mice received a treatment X during 3 months. We want to know whether the treatment X has an impact on the weight of the mice. The weight of the 20 mice has been measured before and after the treatment. This gives us 20 sets of values before treatment and 20 sets of values after treatment from measuring twice the weight of the same mice.

In such situations, paired t-test can be used to compare the mean weights before and after treatment.

Visualize your data and compute paired t-test in R:

###Import your data into R:

# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )
# Print all data
print(my_data)
    group weight
1  before  200.1
2  before  190.9
3  before  192.7
4  before  213.0
5  before  241.4
6  before  196.9
7  before  172.2
8  before  185.5
9  before  205.2
10 before  193.7
11  after  392.9
12  after  393.2
13  after  345.1
14  after  393.0
15  after  434.0
16  after  427.9
17  after  422.0
18  after  383.9
19  after  392.3
20  after  352.2

Compute summary statistics by groups:

library("dplyr")
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group  count  mean    sd
  <chr>  <int> <dbl> <dbl>
1 after     10  394.  29.4
2 before    10  199.  18.5

Visualize your data using box plots:

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

Plot paired data:

#Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)
# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)
# Plot paired data
library(PairedData)
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

Preleminary test to check paired t-test assumptions:

Use Shapiro-Wilk normality test as described at Normality Test in R.

Null hypothesis: the data are normally distributed Alternative hypothesis: the data are not normally distributed

# compute the difference
d <- with(my_data, 
        weight[group == "before"] - weight[group == "after"])
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141

    Shapiro-Wilk normality test

data:  d
W = 0.94536, p-value = 0.6141

Compute paired samples t-test:

# Compute t-test
res <- t.test(before, after, paired = TRUE)
res

    Paired t-test

data:  before and after
t = -20.883, df = 9, p-value = 6.2e-09
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -215.5581 -173.4219
sample estimates:
mean difference 
        -194.49 

Access to the values returned by t.test() function:

# printing the p-value
res$p.value
[1] 6.200298e-09
# printing the mean
res$estimate
mean difference 
        -194.49 
# printing the confidence interval
res$conf.int
[1] -215.5581 -173.4219
attr(,"conf.level")
[1] 0.95

4.2 Paired samples Wilcoxon test (non-parametric):

                   The paired samples Wilcoxon test (also known as Wilcoxon signed-rank test) is a non-parametric alternative to paired t-test used to compare paired data. It’s used when your data are not normally distributed. This tutorial describes how to compute paired samples Wilcoxon test in R.

Import your data into R:

# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )
# Print all data
print(my_data)
    group weight
1  before  200.1
2  before  190.9
3  before  192.7
4  before  213.0
5  before  241.4
6  before  196.9
7  before  172.2
8  before  185.5
9  before  205.2
10 before  193.7
11  after  392.9
12  after  393.2
13  after  345.1
14  after  393.0
15  after  434.0
16  after  427.9
17  after  422.0
18  after  383.9
19  after  392.3
20  after  352.2

Compute summary statistics (median and inter-quartile range (IQR)) by groups using the dplyr package can be used:

library("dplyr")
group_by(my_data, group) %>%
  summarise(
    count = n(),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  group  count median   IQR
  <chr>  <int>  <dbl> <dbl>
1 after     10   393.  28.8
2 before    10   195.  12.6

Visualize your data using box plots:

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

Plot paired data:

# Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)
# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)
# Plot paired data
library(PairedData)
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

Compute paired-sample Wilcoxon test:

res <- wilcox.test(before, after, paired = TRUE)
res

    Wilcoxon signed rank exact test

data:  before and after
V = 0, p-value = 0.001953
alternative hypothesis: true location shift is not equal to 0
# print only the p-value
res$p.value
[1] 0.001953125

5 Comparing the means of more than two groups:

5.1 One-way ANOVA test:

                   The one-way analysis of variance (ANOVA), also known as one-factor ANOVA, is an extension of independent two-samples t-test for comparing means in a situation where there are more than two groups. In one-way ANOVA, the data is organized into several groups base on one single grouping variable (also called factor variable).

Visualize your data and compute one-way ANOVA in R:

Import your data into R:

my_data <- PlantGrowth
# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)
   weight group
1    6.15  trt2
2    3.83  trt1
3    5.29  trt2
4    5.12  trt2
5    4.50  ctrl
6    4.17  trt1
7    5.87  trt1
8    5.33  ctrl
9    5.26  trt2
10   4.61  ctrl
# Show the levels
levels(my_data$group)
[1] "ctrl" "trt1" "trt2"

Compute summary statistics by groups - count, mean, sd:

library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
# A tibble: 3 × 4
  group count  mean    sd
  <fct> <int> <dbl> <dbl>
1 ctrl     10  5.03 0.583
2 trt1     10  4.66 0.794
3 trt2     10  5.53 0.443

Visualize your data:

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("ctrl", "trt1", "trt2"),
          ylab = "Weight", xlab = "Treatment")

Mean plot:

# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight", 
       add = c("mean_se", "jitter"), 
       order = c("ctrl", "trt1", "trt2"),
       ylab = "Weight", xlab = "Treatment")

BOXplot:

# Box plot
boxplot(weight ~ group, data = my_data,
        xlab = "Treatment", ylab = "Weight",
        frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))

PlotMean:

# plotmeans
library("gplots")
plotmeans(weight ~ group, data = my_data, frame = FALSE,
          xlab = "Treatment", ylab = "Weight",
          main="Mean Plot with 95% CI") 
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter
Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
a graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
graphical parameter

Compute one-way ANOVA test:

# Compute the analysis of variance
res.aov <- aov(weight ~ group, data = my_data)
# Summary of the analysis
summary(res.aov)
            Df Sum Sq Mean Sq F value Pr(>F)  
group        2  3.766  1.8832   4.846 0.0159 *
Residuals   27 10.492  0.3886                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Multiple pairwise-comparison between the means of groups:

Tukey multiple pairwise-comparisons:

TukeyHSD(res.aov)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = weight ~ group, data = my_data)

$group
            diff        lwr       upr     p adj
trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
trt2-ctrl  0.494 -0.1972161 1.1852161 0.1979960
trt2-trt1  0.865  0.1737839 1.5562161 0.0120064

Multiple comparisons using multcomp package:

library(multcomp)
summary(glht(res.aov, linfct = mcp(group = "Tukey")))

     Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts


Fit: aov(formula = weight ~ group, data = my_data)

Linear Hypotheses:
                 Estimate Std. Error t value Pr(>|t|)  
trt1 - ctrl == 0  -0.3710     0.2788  -1.331   0.3908  
trt2 - ctrl == 0   0.4940     0.2788   1.772   0.1979  
trt2 - trt1 == 0   0.8650     0.2788   3.103   0.0121 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)

Pairewise t-test:

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH")

    Pairwise comparisons using t tests with pooled SD 

data:  my_data$weight and my_data$group 

     ctrl  trt1 
trt1 0.194 -    
trt2 0.132 0.013

P value adjustment method: BH 

Check the homogeneity of variance assumption:

# 1. Homogeneity of variances
plot(res.aov, 1)

We recommend Levene’s test, which is less sensitive to departures from normal distribution. The function leveneTest() [in car package] will be used:

library(car)
leveneTest(weight ~ group, data = my_data)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  2  1.1192 0.3412
      27               

Relaxing the homogeneity of variance assumption:

ANOVA test with no assumption of equal variances:

oneway.test(weight ~ group, data = my_data)

    One-way analysis of means (not assuming equal variances)

data:  weight and group
F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739

Pairwise t-tests with no assumption of equal variances:

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH", pool.sd = FALSE)

    Pairwise comparisons using t tests with non-pooled SD 

data:  my_data$weight and my_data$group 

     ctrl  trt1 
trt1 0.250 -    
trt2 0.072 0.028

P value adjustment method: BH 

Check the normality assumption:

# 2. Normality
plot(res.aov, 2)

# Extract the residuals
aov_residuals <- residuals(object = res.aov )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

    Shapiro-Wilk normality test

data:  aov_residuals
W = 0.96607, p-value = 0.4379

Non-parametric alternative to one-way ANOVA test:

kruskal.test(weight ~ group, data = my_data)

    Kruskal-Wallis rank sum test

data:  weight by group
Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842

Two-Way ANOVA Test in R:

                  Two-way ANOVA test is used to evaluate simultaneously the effect of two grouping variables (A and B) on a response variable.

Import your data:

# Store the data in the variable my_data
my_data <- ToothGrowth

Convert dose as a factor and recode the levels:

# as "D0.5", "D1", "D2"
my_data$dose <- factor(my_data$dose, 
                  levels = c(0.5, 1, 2),
                  labels = c("D0.5", "D1", "D2"))
head(my_data)
   len supp dose
1  4.2   VC D0.5
2 11.5   VC D0.5
3  7.3   VC D0.5
4  5.8   VC D0.5
5  6.4   VC D0.5
6 10.0   VC D0.5
table(my_data$supp, my_data$dose)
    
     D0.5 D1 D2
  OJ   10 10 10
  VC   10 10 10

Box plot with multiple groups:

# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
library("ggpubr")
ggboxplot(my_data, x = "dose", y = "len", color = "supp",
          palette = c("#00AFBB", "#E7B800"))

Line plots with multiple groups:

# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "dose", y = "len", color = "supp",
       add = c("mean_se", "dotplot"),
       palette = c("#00AFBB", "#E7B800"))
Bin width defaults to 1/30 of the range of the data. Pick better value with
`binwidth`.

Box plot with two factor variables:

boxplot(len ~ supp * dose, data=my_data, frame = FALSE, 
        col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")

Two-way interaction plot:

interaction.plot(x.factor = my_data$dose, trace.factor = my_data$supp, 
                 response = my_data$len, fun = mean, 
                 type = "b", legend = TRUE, 
                 xlab = "Dose", ylab="Tooth Length",
                 pch=c(1,19), col = c("#00AFBB", "#E7B800"))

Compute two-way ANOVA test:

res.aov2 <- aov(len ~ supp + dose, data = my_data)
summary(res.aov2)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4   14.02 0.000429 ***
dose         2 2426.4  1213.2   82.81  < 2e-16 ***
Residuals   56  820.4    14.7                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Two-way ANOVA with interaction effect:

# These two calls are equivalent
res.aov3 <- aov(len ~ supp * dose, data = my_data)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = my_data)
summary(res.aov3)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4  15.572 0.000231 ***
dose         2 2426.4  1213.2  92.000  < 2e-16 ***
supp:dose    2  108.3    54.2   4.107 0.021860 *  
Residuals   54  712.1    13.2                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Compute some summary statistics:

require("dplyr")
group_by(my_data, supp, dose) %>%
  summarise(
    count = n(),
    mean = mean(len, na.rm = TRUE),
    sd = sd(len, na.rm = TRUE)
  )
`summarise()` has grouped output by 'supp'. You can override using the
`.groups` argument.
# A tibble: 6 × 5
# Groups:   supp [2]
  supp  dose  count  mean    sd
  <fct> <fct> <int> <dbl> <dbl>
1 OJ    D0.5     10 13.2   4.46
2 OJ    D1       10 22.7   3.91
3 OJ    D2       10 26.1   2.66
4 VC    D0.5     10  7.98  2.75
5 VC    D1       10 16.8   2.52
6 VC    D2       10 26.1   4.80
TukeyHSD(res.aov3, which = "dose")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = len ~ supp + dose + supp:dose, data = my_data)

$dose
          diff       lwr       upr   p adj
D1-D0.5  9.130  6.362488 11.897512 0.0e+00
D2-D0.5 15.495 12.727488 18.262512 0.0e+00
D2-D1    6.365  3.597488  9.132512 2.7e-06
### PAIRWISE T-TEST:

::: {.cell}

```{.r .cell-code}
pairwise.t.test(my_data$len, my_data$dose,
                p.adjust.method = "BH")

    Pairwise comparisons using t tests with pooled SD 

data:  my_data$len and my_data$dose 

   D0.5    D1     
D1 1.0e-08 -      
D2 4.4e-16 1.4e-05

P value adjustment method: BH 

:::

Check ANOVA assumptions: test validity?

# 1. Homogeneity of variances
plot(res.aov3, 1)

Extract the residuals:

aov_residuals <- residuals(object = res.aov3)

# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

    Shapiro-Wilk normality test

data:  aov_residuals
W = 0.98499, p-value = 0.6694

6.MANOVA:

# Store the data in the variable my_data
my_data <- iris
sepl <- iris$Sepal.Length
petl <- iris$Petal.Length
# MANOVA test
res.man <- manova(cbind(Sepal.Length, Petal.Length) ~ Species, data = iris)
summary(res.man)
           Df Pillai approx F num Df den Df    Pr(>F)    
Species     2 0.9885   71.829      4    294 < 2.2e-16 ***
Residuals 147                                            
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Look to see which differ
summary.aov(res.man)
 Response Sepal.Length :
             Df Sum Sq Mean Sq F value    Pr(>F)    
Species       2 63.212  31.606  119.26 < 2.2e-16 ***
Residuals   147 38.956   0.265                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response Petal.Length :
             Df Sum Sq Mean Sq F value    Pr(>F)    
Species       2 437.10 218.551  1180.2 < 2.2e-16 ***
Residuals   147  27.22   0.185                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1