# Load required packages ---------------------------
library(ggplot2)
library(knitr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
Part 2 of the project involves evaluating the effectiveness of the supplement types on tooth growth length vs dose levels. This part of the project utilizes the Tooth Growth data in R.
# Load and inspect ToothGrowth dataset -------------
data("ToothGrowth")
tooth_data <- ToothGrowth
# Structure and unique doses
str(tooth_data)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
unique(tooth_data$dose)
## [1] 0.5 1.0 2.0
# Convert dose to factor for analysis/plotting
tooth_data$dose <- factor(tooth_data$dose)
# Updated structure
str(tooth_data)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: Factor w/ 3 levels "0.5","1","2": 1 1 1 1 1 1 1 1 1 1 ...
# Summary statistics table -------------------------
summary_stats <- summary(tooth_data)
kable(summary_stats, caption = "Summary Statistics for ToothGrowth Dataset")
len | supp | dose | |
---|---|---|---|
Min. : 4.20 | OJ:30 | 0.5:20 | |
1st Qu.:13.07 | VC:30 | 1 :20 | |
Median :19.25 | NA | 2 :20 | |
Mean :18.81 | NA | NA | |
3rd Qu.:25.27 | NA | NA | |
Max. :33.90 | NA | NA |
# Boxplot of tooth length by supplement type and dose ---------------
ggplot(tooth_data, aes(x = dose, y = len, fill = supp)) +
geom_boxplot() +
scale_fill_manual(values = c("OJ" = "coral", "VC" = "cornsilk")) +
labs(
title = "Tooth Length by Supplement Type and Dose",
x = "Dose (mg)",
y = "Tooth Length"
) +
theme_classic()
From the plot, we observe that the 0.5 mg/day and 1.0 mg/day dose have larger differences in tooth length compared to the 2.0 mg/day dose. There is an observable trend that as the dosage increases, so does the tooth length.
Additionally, we can also surmise that for supplement OJ, the smaller doses (0.5mg/day, 1.0mg/day) are more effective than supplement VC, while at 2.0 mg/day, the growth trends are similar.
# Grouped mean and difference table ---------------------------------
mean_diff <- tooth_data |>
group_by(supp, dose) |>
summarise(mean_len = mean(len), .groups = "drop") |>
pivot_wider(names_from = supp, values_from = mean_len) |>
mutate(abs_diff = abs(VC - OJ))
kable(mean_diff, caption = "Mean Tooth Length by Supplement and Dose with Absolute Differences")
dose | OJ | VC | abs_diff |
---|---|---|---|
0.5 | 13.23 | 7.98 | 5.25 |
1 | 22.70 | 16.77 | 5.93 |
2 | 26.06 | 26.14 | 0.08 |
The hypotheses are:
Although the difference between the two supplements at 2.0 mg/day is small, a t-test will be performed on all dose amounts.
dose_half <- filter(tooth_data, dose == 0.5)
dose_one <- filter(tooth_data, dose == 1)
dose_two <- filter(tooth_data, dose == 2)
# Independent t-tests -----------------------------------------------
t_half <- t.test(len ~ supp, data = dose_half)
t_one <- t.test(len ~ supp, data = dose_one)
t_two <- t.test(len ~ supp, data = dose_two)
t_test_summary <- data.frame(
dose = c(0.5, 1.0, 2.0),
conf_int = c("1.72, 8.78", "2.80, 9.06", "-3.80, 3.64"),
p_value = c(0.0064, 0.0010, 0.9639),
decision = c("Reject null", "Reject null", "Do not reject null")
)
kable(t_test_summary, caption = "T-Test Results by Dose Level")
dose | conf_int | p_value | decision |
---|---|---|---|
0.5 | 1.72, 8.78 | 0.0064 | Reject null |
1.0 | 2.80, 9.06 | 0.0010 | Reject null |
2.0 | -3.80, 3.64 | 0.9639 | Do not reject null |
As expected, the p-values for the 0.5 mg/day and 1.0 mg/day doses are very small due to the larger mean differences between doses.
Thus:
The central assumption for the results is that the sample is representative of the population, and the variables are IID random variables.
For the t.test, two assumptions are made,
With that, in reviewing the t.test, supplement type OC are more effective than VC for doses less than 1.0. But for dose at 2.0 mg/day, there is no difference between the supplement types.