library(tidyverse) # graphing and wrangling
library(knitr) # for kable tables
data("ToothGrowth")
ToothGrowth data set contains the result from an experiment studying the effect of vitamin C on tooth growth in 60 Guinea pigs. Each animal received one of three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of two delivery methods, (orange juice or ascorbic acid (a form of vitamin C and coded as VC).
\(~\)
dim(ToothGrowth)
## [1] 60 3
# Summary of the data
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
A dataframe with 60 observations and 3 variables len : Tooth length supp : Supplement type (VC or OJ) dose : Dose in milligrams
\(~\)
ToothGrowth %>%
ggplot(aes(as.factor(dose), len)) +
geom_boxplot(aes(fill = supp))+
scale_fill_brewer(palette = "Dark2") +
facet_wrap(~supp) +
labs(
title =" Effect of Vitamin C on guinea pig tooth growth",
subtitle ="Orange Juice (OJ) and Ascorbic Acid (VC) administered at 0.5, 1, 2 (mg)",
x ="Dose (mg)",
y ="Tooth length"
) +
theme_bw() +
theme(
legend.position = "none"
)
\(~\)
ToothGrowth %>% group_by(supp, dose) %>%
summarise(mean = mean(len)) %>%
ggplot(aes(as.factor(dose), mean)) +
geom_line(aes(group = supp)) +
geom_point(aes(color = supp), size = 4) +
scale_color_brewer(palette = "Dark2") +
labs(
title =" Effect of Vitamin C on guinea pig tooth growth",
subtitle = "Average tooth length per treatment",
x ="Dose (mg)",
y ="Average tooth length"
) +
theme_bw() +
theme(
legend.position = "bottom")
## `summarise()` has grouped output by 'supp'. You can override using the `.groups` argument.
\(~\)
kable(
ToothGrowth %>% group_by(supp, dose) %>%
summarise(mean = mean(len))
)
## `summarise()` has grouped output by 'supp'. You can override using the `.groups` argument.
| supp | dose | mean |
|---|---|---|
| OJ | 0.5 | 13.23 |
| OJ | 1.0 | 22.70 |
| OJ | 2.0 | 26.06 |
| VC | 0.5 | 7.98 |
| VC | 1.0 | 16.77 |
| VC | 2.0 | 26.14 |
\(~\)
# By dose
kable(
ToothGrowth %>% group_by(dose) %>%
summarise(mean = mean(len))
)
| dose | mean |
|---|---|
| 0.5 | 10.605 |
| 1.0 | 19.735 |
| 2.0 | 26.100 |
\(~\)
# By supplement
kable(
ToothGrowth %>% group_by(supp) %>%
summarise(mean = mean(len))
)
| supp | mean |
|---|---|
| OJ | 20.66333 |
| VC | 16.96333 |
\(~\)
ToothGrowth <- ToothGrowth %>% mutate(id = 1:60)
ToothGrowth %>% ggplot(aes(x = id, y = len)) +
geom_point(aes(color = as.factor(dose)), size = 2.5) +
facet_wrap(~supp) +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
theme(
legend.title = element_blank(),
legend.position = "bottom"
)
\(~\)
\(~\)
# Normality test
library(ggpubr)
ggqqplot(ToothGrowth$len)
shapiro.test(ToothGrowth$len)
##
## Shapiro-Wilk normality test
##
## data: ToothGrowth$len
## W = 0.96743, p-value = 0.1091
p-value > 0.05 implying that the distribution of the data are not significantly different from normal distribution
\(~\)
# Orange Juice vs. Ascorbic Acid: 0.5mg
# Orange Juice vs. Ascorbic Acid: 1 mg
# Orange Juice vs. Ascorbic Acid: 2mg
# Orange Juice vs. Ascorbic Acid: All
OJ_v_VC <- data.frame(
Group_A = rep( "Orange Juice", 4),
Group_B = rep( "Ascorbic Acid", 4),
Group_C = c(0.5, 1, 2, "All"),
p.value = c(
t.test(len ~ supp, paired = FALSE, data = ToothGrowth %>% filter(dose == 0.5))$p.value,
t.test(len ~ supp, paired = FALSE, data = ToothGrowth %>% filter(dose == 1))$p.value,
t.test(len ~ supp, paired = FALSE, data = ToothGrowth %>% filter(dose == 2))$p.value,
t.test(len ~ supp, paired = FALSE, data = ToothGrowth)$p.value
)
)
OJ_v_VC <- OJ_v_VC %>% mutate(significance = case_when(p.value < 0.05 ~ "Significant",
p.value > 0.05 ~ "Not Significant"))
kable(OJ_v_VC)
| Group_A | Group_B | Group_C | p.value | significance |
|---|---|---|---|---|
| Orange Juice | Ascorbic Acid | 0.5 | 0.0063586 | Significant |
| Orange Juice | Ascorbic Acid | 1 | 0.0010384 | Significant |
| Orange Juice | Ascorbic Acid | 2 | 0.9638516 | Not Significant |
| Orange Juice | Ascorbic Acid | All | 0.0606345 | Not Significant |
\(~\)
# Orange Juice: 0.5mg vs 1mg,
# Orange Juice: 0.5mg vs 2mg,
# Orange Juice: 1mg vs 2mg
OJ <- data.frame(
Group_A = rep("Orange Juice", 3),
Group_B = c(0.5, 0.5, 1),
Group_C = c(1, 2, 2),
conf.int.1 = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 1)) )$conf.int[1],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 2)) ) $conf.int[1],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(1, 2)) ) $conf.int[1]
),
conf.int.2 = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 1)) ) $conf.int[2],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 2)) ) $conf.int[2],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(1, 2)) ) $conf.int[2]
),
p.value = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 1)) ) $p.value,
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(0.5, 2)) ) $p.value,
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "OJ", dose == c(1, 2)) ) $p.value
)
)
OJ <- OJ %>% mutate(significance = case_when(p.value < 0.05 ~ "Significant",
p.value > 0.05 ~ "Not Significant"))
kable(OJ)
| Group_A | Group_B | Group_C | conf.int.1 | conf.int.2 | p.value | significance |
|---|---|---|---|---|---|---|
| Orange Juice | 0.5 | 1 | -14.94689 | -5.6131139 | 0.0012455 | Significant |
| Orange Juice | 0.5 | 2 | -16.94335 | -7.0966531 | 0.0005397 | Significant |
| Orange Juice | 1.0 | 2 | -11.23644 | -0.1635575 | 0.0450953 | Significant |
\(~\)
# Ascorbic Acid: 0.5mg vs 1mg,
# Ascorbic Acid: 0.5mg vs 2mg,
# Ascorbic Acid: 1mg vs 2mg,
VC <- data.frame(
Group_A = rep("Ascorbic Acid", 3),
Group_B = c(0.5, 0.5, 1),
Group_C = c(1, 2, 2),
conf.int.1 = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 1)) ) $conf.int[1],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 2)) ) $conf.int[1],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(1, 2)) ) $conf.int[1]
),
conf.int.2 = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 1)) ) $conf.int[2],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 2)) ) $conf.int[2],
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(1, 2)) ) $conf.int[2]
),
p.value = c(
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 1)) ) $p.value,
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(0.5, 2)) ) $p.value,
t.test(len ~ dose, paired = FALSE, data = ToothGrowth %>% filter(supp == "VC", dose == c(1, 2)) ) $p.value
)
)
VC <- VC %>% mutate(significance = case_when(p.value < 0.05 ~ "Significant",
p.value > 0.05 ~ "Not Significant"))
kable(VC)
| Group_A | Group_B | Group_C | conf.int.1 | conf.int.2 | p.value | significance |
|---|---|---|---|---|---|---|
| Ascorbic Acid | 0.5 | 1 | -12.65876 | -6.061241 | 0.0005573 | Significant |
| Ascorbic Acid | 0.5 | 2 | -25.62861 | -11.651394 | 0.0007094 | Significant |
| Ascorbic Acid | 1.0 | 2 | -15.32237 | -1.037628 | 0.0305694 | Significant |
\(~\)