##Comparing one-sample mean to a standard known mean: #One-sample T-test (parametric):
set.seed(1234)
my_data <- data.frame(
name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
#Check your data:
# Print the first 10 rows of the data
head(my_data, 10)
## name weight
## 1 M_1 17.6
## 2 M_2 20.6
## 3 M_3 22.2
## 4 M_4 15.3
## 5 M_5 20.9
## 6 M_6 21.0
## 7 M_7 18.9
## 8 M_8 18.9
## 9 M_9 18.9
## 10 M_10 18.2
# Statistical summaries of weight
summary(my_data$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.30 18.38 18.90 19.25 20.82 22.20
#Visualize your data using box plots:
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.2
## Loading required package: ggplot2
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
#Preleminary test to check one-sample t-test assumptions:
shapiro.test(my_data$weight) # => p-value = 0.6993
##
## Shapiro-Wilk normality test
##
## data: my_data$weight
## W = 0.9526, p-value = 0.6993
library("ggpubr")
ggqqplot(my_data$weight, ylab = "Men's weight",
ggtheme = theme_minimal())
#Compute one-sample t-test:
# One-sample t-test
res <- t.test(my_data$weight, mu = 25)
# Printing the results
res
##
## One Sample t-test
##
## data: my_data$weight
## t = -9.0783, df = 9, p-value = 7.953e-06
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
## 17.8172 20.6828
## sample estimates:
## mean of x
## 19.25
# printing the p-value
res$p.value
## [1] 7.953383e-06
# printing the mean
res$estimate
## mean of x
## 19.25
# printing the confidence interval
res$conf.int
## [1] 17.8172 20.6828
## attr(,"conf.level")
## [1] 0.95
#One-Sample Wilcoxon Signed Rank Test in R:
set.seed(1234)
my_data <- data.frame(
name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
#Check your data
# Print the first 10 rows of the data
head(my_data, 10)
## name weight
## 1 M_1 17.6
## 2 M_2 20.6
## 3 M_3 22.2
## 4 M_4 15.3
## 5 M_5 20.9
## 6 M_6 21.0
## 7 M_7 18.9
## 8 M_8 18.9
## 9 M_9 18.9
## 10 M_10 18.2
#Visualize your data using box plots
library(ggpubr)
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
#Compute one-sample Wilcoxon test
# One-sample wilcoxon test
res <- wilcox.test(my_data$weight, mu = 25)
## Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
## p-value with ties
# Printing the results
res
##
## Wilcoxon signed rank test with continuity correction
##
## data: my_data$weight
## V = 0, p-value = 0.005793
## alternative hypothesis: true location is not equal to 25
# print only the p-value
res$p.value
## [1] 0.005793045
##Comparing the means of two independent groups #Unpaired two samples t-test (parametric)
# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
# Create a data frame
my_data <- data.frame(
group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
)
#Check your data
# Print all data
print(my_data)
## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count mean sd
## <chr> <int> <dbl> <dbl>
## 1 Man 9 69.0 9.38
## 2 Woman 9 52.1 15.6
#Visualize your data using box plots
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Man"]
## W = 0.86425, p-value = 0.1066
# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Woman"]
## W = 0.94266, p-value = 0.6101
res.ftest <- var.test(weight ~ group, data = my_data)
res.ftest
##
## F test to compare two variances
##
## data: weight by group
## F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.08150656 1.60191315
## sample estimates:
## ratio of variances
## 0.3613398
# Compute t-test
res <- t.test(women_weight, men_weight, var.equal = TRUE)
res
##
## Two Sample t-test
##
## data: women_weight and men_weight
## t = -2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -29.748019 -4.029759
## sample estimates:
## mean of x mean of y
## 52.10000 68.98889
# Compute t-test
res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res
##
## Two Sample t-test
##
## data: weight by group
## t = 2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
## 95 percent confidence interval:
## 4.029759 29.748019
## sample estimates:
## mean in group Man mean in group Woman
## 68.98889 52.10000
# printing the p-value
res$p.value
## [1] 0.0132656
# printing the mean
res$estimate
## mean in group Man mean in group Woman
## 68.98889 52.10000
# printing the confidence interval
res$conf.int
## [1] 4.029759 29.748019
## attr(,"conf.level")
## [1] 0.95
##Unpaired two-samples Wilcoxon test (non-parametric): #Visualize your data and compute Wilcoxon test in R
# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
# Create a data frame
my_data <- data.frame(
group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
)
#Check your data
print(my_data)
## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count median IQR
## <chr> <int> <dbl> <dbl>
## 1 Man 9 67.3 10.9
## 2 Woman 9 48.8 15
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
#Compute unpaired two-samples Wilcoxon test
res <- wilcox.test(women_weight, men_weight)
## Warning in wilcox.test.default(women_weight, men_weight): cannot compute exact
## p-value with ties
res
##
## Wilcoxon rank sum test with continuity correction
##
## data: women_weight and men_weight
## W = 15, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0
res <- wilcox.test(weight ~ group, data = my_data,
exact = FALSE)
res
##
## Wilcoxon rank sum test with continuity correction
##
## data: weight by group
## W = 66, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0
# Print the p-value only
res$p.value
## [1] 0.02711657
##Comparing the means of paired samples # Paired samples t-test (parametric) #Visualize your data and compute paired t-test in R
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame(
group = rep(c("before", "after"), each = 10),
weight = c(before, after)
)
#Check your data
# Print all data
print(my_data)
## group weight
## 1 before 200.1
## 2 before 190.9
## 3 before 192.7
## 4 before 213.0
## 5 before 241.4
## 6 before 196.9
## 7 before 172.2
## 8 before 185.5
## 9 before 205.2
## 10 before 193.7
## 11 after 392.9
## 12 after 393.2
## 13 after 345.1
## 14 after 393.0
## 15 after 434.0
## 16 after 427.9
## 17 after 422.0
## 18 after 383.9
## 19 after 392.3
## 20 after 352.2
library("dplyr")
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count mean sd
## <chr> <int> <dbl> <dbl>
## 1 after 10 394. 29.4
## 2 before 10 199. 18.5
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
order = c("before", "after"),
ylab = "Weight", xlab = "Groups")
# Subset weight data before treatment
before <- subset(my_data, group == "before", weight,
drop = TRUE)
# subset weight data after treatment
after <- subset(my_data, group == "after", weight,
drop = TRUE)
# Plot paired data
library(PairedData)
## Warning: package 'PairedData' was built under R version 4.3.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: gld
## Warning: package 'gld' was built under R version 4.3.2
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.3.2
## Loading required package: lattice
##
## Attaching package: 'PairedData'
## The following object is masked from 'package:base':
##
## summary
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()
# compute the difference
d <- with(my_data,
weight[group == "before"] - weight[group == "after"])
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.94536, p-value = 0.6141
#Compute paired samples t-test
# Compute t-test
res <- t.test(before, after, paired = TRUE)
res
##
## Paired t-test
##
## data: before and after
## t = -20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -215.5581 -173.4219
## sample estimates:
## mean difference
## -194.49
# Compute t-test
res <- t.test(weight ~ group, data = my_data, paired = TRUE)
res
##
## Paired t-test
##
## data: weight by group
## t = 20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 173.4219 215.5581
## sample estimates:
## mean difference
## 194.49
##Paired samples Wilcoxon test (non-parametric) #Visualize your data and compute paired samples Wilcoxon test in R
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame(
group = rep(c("before", "after"), each = 10),
weight = c(before, after)
)
#Check your data
# Print all data
print(my_data)
## group weight
## 1 before 200.1
## 2 before 190.9
## 3 before 192.7
## 4 before 213.0
## 5 before 241.4
## 6 before 196.9
## 7 before 172.2
## 8 before 185.5
## 9 before 205.2
## 10 before 193.7
## 11 after 392.9
## 12 after 393.2
## 13 after 345.1
## 14 after 393.0
## 15 after 434.0
## 16 after 427.9
## 17 after 422.0
## 18 after 383.9
## 19 after 392.3
## 20 after 352.2
library("dplyr")
group_by(my_data, group) %>%
summarise(
count = n(),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count median IQR
## <chr> <int> <dbl> <dbl>
## 1 after 10 393. 28.8
## 2 before 10 195. 12.6
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
order = c("before", "after"),
ylab = "Weight", xlab = "Groups")
# Subset weight data before treatment
before <- subset(my_data, group == "before", weight,
drop = TRUE)
# subset weight data after treatment
after <- subset(my_data, group == "after", weight,
drop = TRUE)
# Plot paired data
library(PairedData)
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()
#Compute paired-sample Wilcoxon test
res <- wilcox.test(before, after, paired = TRUE)
res
##
## Wilcoxon signed rank exact test
##
## data: before and after
## V = 0, p-value = 0.001953
## alternative hypothesis: true location shift is not equal to 0
# Compute t-test
res <- wilcox.test(weight ~ group, data = my_data, paired = TRUE)
res
##
## Wilcoxon signed rank exact test
##
## data: weight by group
## V = 55, p-value = 0.001953
## alternative hypothesis: true location shift is not equal to 0
# print only the p-value
res$p.value
## [1] 0.001953125
##Comparing the means of more than two groups # One-way ANOVA test
my_data <- PlantGrowth
# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)
## weight group
## 1 6.15 trt2
## 2 3.83 trt1
## 3 5.29 trt2
## 4 5.12 trt2
## 5 4.50 ctrl
## 6 4.17 trt1
## 7 5.87 trt1
## 8 5.33 ctrl
## 9 5.26 trt2
## 10 4.61 ctrl
# Show the levels
levels(my_data$group)
## [1] "ctrl" "trt1" "trt2"
my_data$group <- ordered(my_data$group,
levels = c("ctrl", "trt1", "trt2"))
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 3 × 4
## group count mean sd
## <ord> <int> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583
## 2 trt1 10 4.66 0.794
## 3 trt2 10 5.53 0.443
# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight",
add = c("mean_se", "jitter"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
# Box plot
boxplot(weight ~ group, data = my_data,
xlab = "Treatment", ylab = "Weight",
frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))
# plotmeans
library("gplots")
## Warning: package 'gplots' was built under R version 4.3.2
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(weight ~ group, data = my_data, frame = FALSE,
xlab = "Treatment", ylab = "Weight",
main="Mean Plot with 95% CI")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
# Compute the analysis of variance
res.aov <- aov(weight ~ group, data = my_data)
# Summary of the analysis
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 3.766 1.8832 4.846 0.0159 *
## Residuals 27 10.492 0.3886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey multiple pairwise-comparisons:
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ group, data = my_data)
##
## $group
## diff lwr upr p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl 0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1 0.865 0.1737839 1.5562161 0.0120064
library(multcomp)
## Warning: package 'multcomp' was built under R version 4.3.2
## Loading required package: survival
## Loading required package: TH.data
## Warning: package 'TH.data' was built under R version 4.3.2
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
summary(glht(res.aov, linfct = mcp(group = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = weight ~ group, data = my_data)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## trt1 - ctrl == 0 -0.3710 0.2788 -1.331 0.3908
## trt2 - ctrl == 0 0.4940 0.2788 1.772 0.1979
## trt2 - trt1 == 0 0.8650 0.2788 3.103 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
#Pairewise t-test
pairwise.t.test(my_data$weight, my_data$group,
p.adjust.method = "BH")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: my_data$weight and my_data$group
##
## ctrl trt1
## trt1 0.194 -
## trt2 0.132 0.013
##
## P value adjustment method: BH
# 1. Homogeneity of variances
plot(res.aov, 1)
library(car)
## Warning: package 'car' was built under R version 4.3.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.2
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
leveneTest(weight ~ group, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1192 0.3412
## 27
#ANOVA test with no assumption of equal variances
oneway.test(weight ~ group, data = my_data)
##
## One-way analysis of means (not assuming equal variances)
##
## data: weight and group
## F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739
#Pairwise t-tests with no assumption of equal variances
pairwise.t.test(my_data$weight, my_data$group,
p.adjust.method = "BH", pool.sd = FALSE)
##
## Pairwise comparisons using t tests with non-pooled SD
##
## data: my_data$weight and my_data$group
##
## ctrl trt1
## trt1 0.250 -
## trt2 0.072 0.028
##
## P value adjustment method: BH
# 2. Normality
plot(res.aov, 2)
# Extract the residuals
aov_residuals <- residuals(object = res.aov )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.96607, p-value = 0.4379
#Non-parametric alternative to one-way ANOVA test
kruskal.test(weight ~ group, data = my_data)
##
## Kruskal-Wallis rank sum test
##
## data: weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842
##Two-Way ANOVA test:
# Store the data in the variable my_data
my_data <- ToothGrowth
# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)
## len supp dose
## 1 21.5 VC 2.0
## 2 17.3 VC 1.0
## 3 27.3 OJ 2.0
## 4 18.5 VC 2.0
## 5 8.2 OJ 0.5
## 6 26.4 OJ 1.0
## 7 25.8 OJ 1.0
## 8 5.2 VC 0.5
## 9 6.4 VC 0.5
## 10 9.4 OJ 0.5
# Check the structure
str(my_data)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
# Convert dose as a factor and recode the levels
# as "D0.5", "D1", "D2"
my_data$dose <- factor(my_data$dose,
levels = c(0.5, 1, 2),
labels = c("D0.5", "D1", "D2"))
head(my_data)
## len supp dose
## 1 4.2 VC D0.5
## 2 11.5 VC D0.5
## 3 7.3 VC D0.5
## 4 5.8 VC D0.5
## 5 6.4 VC D0.5
## 6 10.0 VC D0.5
table(my_data$supp, my_data$dose)
##
## D0.5 D1 D2
## OJ 10 10 10
## VC 10 10 10
# Box plot with multiple groups
# +++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
library("ggpubr")
ggboxplot(my_data, x = "dose", y = "len", color = "supp",
palette = c("#00AFBB", "#E7B800"))
# Line plots with multiple groups
# +++++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "dose", y = "len", color = "supp",
add = c("mean_se", "dotplot"),
palette = c("#00AFBB", "#E7B800"))
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.
# Box plot with two factor variables
boxplot(len ~ supp * dose, data=my_data, frame = FALSE,
col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")
# Two-way interaction plot
interaction.plot(x.factor = my_data$dose, trace.factor = my_data$supp,
response = my_data$len, fun = mean,
type = "b", legend = TRUE,
xlab = "Dose", ylab="Tooth Length",
pch=c(1,19), col = c("#00AFBB", "#E7B800"))
res.aov2 <- aov(len ~ supp + dose, data = my_data)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## supp 1 205.4 205.4 14.02 0.000429 ***
## dose 2 2426.4 1213.2 82.81 < 2e-16 ***
## Residuals 56 820.4 14.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(len ~ supp * dose, data = my_data)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = my_data)
summary(res.aov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## supp 1 205.4 205.4 15.572 0.000231 ***
## dose 2 2426.4 1213.2 92.000 < 2e-16 ***
## supp:dose 2 108.3 54.2 4.107 0.021860 *
## Residuals 54 712.1 13.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
require("dplyr")
group_by(my_data, supp, dose) %>%
summarise(
count = n(),
mean = mean(len, na.rm = TRUE),
sd = sd(len, na.rm = TRUE)
)
## `summarise()` has grouped output by 'supp'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 5
## # Groups: supp [2]
## supp dose count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 OJ D0.5 10 13.2 4.46
## 2 OJ D1 10 22.7 3.91
## 3 OJ D2 10 26.1 2.66
## 4 VC D0.5 10 7.98 2.75
## 5 VC D1 10 16.8 2.52
## 6 VC D2 10 26.1 4.80
model.tables(res.aov3, type="means", se = TRUE)
## Tables of means
## Grand mean
##
## 18.81333
##
## supp
## supp
## OJ VC
## 20.663 16.963
##
## dose
## dose
## D0.5 D1 D2
## 10.605 19.735 26.100
##
## supp:dose
## dose
## supp D0.5 D1 D2
## OJ 13.23 22.70 26.06
## VC 7.98 16.77 26.14
##
## Standard errors for differences of means
## supp dose supp:dose
## 0.9376 1.1484 1.6240
## replic. 30 20 10
TukeyHSD(res.aov3, which = "dose")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = len ~ supp + dose + supp:dose, data = my_data)
##
## $dose
## diff lwr upr p adj
## D1-D0.5 9.130 6.362488 11.897512 0.0e+00
## D2-D0.5 15.495 12.727488 18.262512 0.0e+00
## D2-D1 6.365 3.597488 9.132512 2.7e-06
library(multcomp)
summary(glht(res.aov2, linfct = mcp(dose = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = len ~ supp + dose, data = my_data)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## D1 - D0.5 == 0 9.130 1.210 7.543 <1e-05 ***
## D2 - D0.5 == 0 15.495 1.210 12.802 <1e-05 ***
## D2 - D1 == 0 6.365 1.210 5.259 <1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
#Pairwise t-test
pairwise.t.test(my_data$len, my_data$dose,
p.adjust.method = "BH")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: my_data$len and my_data$dose
##
## D0.5 D1
## D1 1.0e-08 -
## D2 4.4e-16 1.4e-05
##
## P value adjustment method: BH
#Check the homogeneity of variance assumption
# 1. Homogeneity of variances
plot(res.aov3, 1)
library(car)
leveneTest(len ~ supp*dose, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 1.7086 0.1484
## 54
#Check the normality assumpttion
# 2. Normality
plot(res.aov3, 2)
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.98499, p-value = 0.6694
#Compute two-way ANOVA test in R for unbalanced designs:
library(car)
my_anova <- aov(len ~ supp * dose, data = my_data)
Anova(my_anova, type = "III")
## Anova Table (Type III tests)
##
## Response: len
## Sum Sq Df F value Pr(>F)
## (Intercept) 1750.33 1 132.730 3.603e-16 ***
## supp 137.81 1 10.450 0.002092 **
## dose 885.26 2 33.565 3.363e-10 ***
## supp:dose 108.32 2 4.107 0.021860 *
## Residuals 712.11 54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##MANOVA test: Multivariate analysis of variance:
# Store the data in the variable my_data
my_data <- iris
#Check your data:
# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.2 3.5 1.5 0.2 setosa
## 2 5.7 2.6 3.5 1.0 versicolor
## 3 6.3 3.3 6.0 2.5 virginica
## 4 6.5 3.2 5.1 2.0 virginica
## 5 6.3 3.4 5.6 2.4 virginica
## 6 6.4 2.8 5.6 2.2 virginica
## 7 6.8 3.2 5.9 2.3 virginica
## 8 7.9 3.8 6.4 2.0 virginica
## 9 6.2 2.9 4.3 1.3 versicolor
## 10 7.1 3.0 5.9 2.1 virginica
#Compute MANOVA test
sepl <- iris$Sepal.Length
petl <- iris$Petal.Length
# MANOVA test
res.man <- manova(cbind(Sepal.Length, Petal.Length) ~ Species, data = iris)
summary(res.man)
## Df Pillai approx F num Df den Df Pr(>F)
## Species 2 0.9885 71.829 4 294 < 2.2e-16 ***
## Residuals 147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Look to see which differ
summary.aov(res.man)
## Response Sepal.Length :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 63.212 31.606 119.26 < 2.2e-16 ***
## Residuals 147 38.956 0.265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Petal.Length :
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 437.10 218.551 1180.2 < 2.2e-16 ***
## Residuals 147 27.22 0.185
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Kruskal-Wallis test:
my_data <- PlantGrowth
#Check your data
# print the head of the file
head(my_data)
## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
# Show the group levels
levels(my_data$group)
## [1] "ctrl" "trt1" "trt2"
my_data$group <- ordered(my_data$group,
levels = c("ctrl", "trt1", "trt2"))
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
## # A tibble: 3 × 6
## group count mean sd median IQR
## <ord> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583 5.15 0.743
## 2 trt1 10 4.66 0.794 4.55 0.662
## 3 trt2 10 5.53 0.443 5.44 0.467
# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight",
add = c("mean_se", "jitter"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
#Compute Kruskal-Wallis test
kruskal.test(weight ~ group, data = my_data)
##
## Kruskal-Wallis rank sum test
##
## data: weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842
pairwise.wilcox.test(PlantGrowth$weight, PlantGrowth$group,
p.adjust.method = "BH")
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot compute
## exact p-value with ties
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: PlantGrowth$weight and PlantGrowth$group
##
## ctrl trt1
## trt1 0.199 -
## trt2 0.095 0.027
##
## P value adjustment method: BH