This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ## Code
data("ToothGrowth") # Load the dataset
head(ToothGrowth) # View the first few rows of the dataset
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
summary(ToothGrowth) # Get a summary of the dataset
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
table(ToothGrowth$supp, ToothGrowth$dose) # Distribution of treatments and doses
##
## 0.5 1 2
## OJ 10 10 10
## VC 10 10 10
# Bar Chart of Supplement and Dose
library(ggplot2)
# Enhanced ggplot with custom colors using hexadecimal codes
ggplot(ToothGrowth, aes(x=factor(dose), y=len, fill=supp)) +
geom_bar(stat="summary", fun="mean", position=position_dodge(), color="black") +
scale_fill_manual(values=c("VC"="lightblue", "OJ"="#FFD580")) + # Custom colors for each supplement type
labs(
title = "Effect of Vitamin C on Tooth Growth",
subtitle = "Average tooth length by supplement type and dose",
x = "Dose (mg/day)",
y = "Tooth Length",
fill = "Supplement Type"
) +
theme_minimal(base_size = 14) + # Increase base font size for better readability
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "top", # Move legend to top for better visibility
legend.title.align = 0.5 # Center the legend title
)
## Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
## 3.5.0.
## ℹ Please use theme(legend.title = element_text(hjust)) instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Histogram of Tooth Length with Mean and Shapiro Wilk
library(ggplot2)
# Perform Shapiro-Wilk test for normality
shapiro_test <- shapiro.test(ToothGrowth$len)
# Create the histogram and annotate with the p-value from the Shapiro-Wilk test
ggplot(ToothGrowth, aes(x=len)) +
geom_histogram(binwidth=5, fill="gray", color="black", alpha=0.7) + # Histogram with a professional look
geom_vline(aes(xintercept=mean(len)), color="red", linetype="dashed", size=1.5) + # Mean line dynamically calculated
labs(
title = "Distribution of Overall Tooth Lengths",
subtitle = sprintf("Overall mean: %.2f, Shapiro-Wilk p-value: %.3f", mean(ToothGrowth$len), shapiro_test$p.value),
x = "Tooth Length (micrometers)",
y = "Frequency"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = 0.5),
legend.position = "none"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Create Confidence Intervals
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(broom)
# Calculate the confidence interval for mean tooth lengths by supplement type
ci_by_supp <- ToothGrowth %>%
group_by(supp) %>%
summarise(t_test_results = list(tidy(t.test(len, conf.level = 0.95)))) %>%
unnest(t_test_results) %>%
select(supp, estimate = estimate, conf.low, conf.high)
print(ci_by_supp)
## # A tibble: 2 × 4
## supp estimate conf.low conf.high
## <fct> <dbl> <dbl> <dbl>
## 1 OJ 20.7 18.2 23.1
## 2 VC 17.0 13.9 20.0
# Calculate the confidence interval for mean tooth lengths by supplement type and dose
ci_by_supp_dose <- ToothGrowth %>%
group_by(supp, dose) %>%
summarise(t_test_results = list(tidy(t.test(len, conf.level = 0.95)))) %>%
unnest(t_test_results) %>%
select(supp, dose, estimate = estimate, conf.low, conf.high)
## `summarise()` has grouped output by 'supp'. You can override using the
## `.groups` argument.
print(ci_by_supp_dose)
## # A tibble: 6 × 5
## # Groups: supp [2]
## supp dose estimate conf.low conf.high
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 OJ 0.5 13.2 10.0 16.4
## 2 OJ 1 22.7 19.9 25.5
## 3 OJ 2 26.1 24.2 28.0
## 4 VC 0.5 7.98 6.02 9.94
## 5 VC 1 16.8 15.0 18.6
## 6 VC 2 26.1 22.7 29.6
# Bar Chart with Significance
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Data setup
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
# Summary statistics and error bars calculation
summary_data <- ToothGrowth %>%
group_by(dose, supp) %>%
summarise(mean_len = mean(len), se = sd(len) / sqrt(n()), .groups = 'drop')
# Calculate t-tests between supp types for each dose
t_tests <- by(ToothGrowth, ToothGrowth$dose, function(subdata) {
t.test(len ~ supp, data = subdata)
})
# Extract p-values and prepare for annotation
p_values <- sapply(t_tests, function(x) x$p.value)
significance_levels <- ifelse(p_values < 0.05, "p < 0.05", "ns")
# Prepare a dataframe for annotations
annotations <- data.frame(dose = levels(ToothGrowth$dose),
y = with(summary_data, tapply(mean_len + se, dose, max) + 1),
label = significance_levels)
# Create the plot with corrected geom_text usage
p <- ggplot(summary_data, aes(x = dose, y = mean_len, fill = supp)) +
geom_bar(stat="identity", position=position_dodge(width=0.9), width=0.8) +
geom_errorbar(aes(ymin = mean_len - se, ymax = mean_len + se), width = 0.25, position=position_dodge(width=0.9)) +
scale_fill_manual(values=c("VC" = "lightblue", "OJ" = "#FFD580")) +
labs(title = "Effect of Vitamin C on Tooth Growth",
subtitle = "Average tooth length by supplement type and dose",
x = "Dose (mg/day)",
y = "Tooth Length",
fill = "Supplement Type") +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "top",
legend.title.align = 0.5) +
geom_text(data = annotations, aes(x = dose, y = y, label = label), inherit.aes = FALSE, vjust = -0.5) # ensure aesthetics are not inherited
# Print the plot
print(p)
# If p_values is just a vector of p-values
doses <- c("0.5", "1", "2") # Define the doses associated with each p-value
#Tooth Growth at Different Doses
library(ggplot2)
# Perform the t-test as previously mentioned
growth_1mg <- ToothGrowth$len[ToothGrowth$dose == 1]
growth_2mg <- ToothGrowth$len[ToothGrowth$dose == 2]
t_test_result <- t.test(growth_1mg, growth_2mg, alternative = "two.sided", var.equal = FALSE)
# Create a subset for the plot for clarity
subset_data <- ToothGrowth[ToothGrowth$dose %in% c(1, 2), ]
# Create the plot
p <- ggplot(subset_data, aes(x = factor(dose), y = len, fill = factor(dose))) +
geom_bar(stat = "summary", fun = "mean", position = position_dodge(), color = "black", width = 0.5) +
geom_errorbar(stat = "summary", fun.data = mean_se, width = 0.2, position = position_dodge(0.5)) +
scale_fill_manual(values = c("1" = "lightgreen", "2" = "#CBC3E3")) +
labs(
title = "Comparison of Tooth Growth at Different Doses",
subtitle = sprintf("T-test p-value: %.6f", t_test_result$p.value),
x = "Dose (mg/day)",
y = "Average Tooth Length",
fill = "Dose"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.position = "top",
legend.title.align = 0.5
)
# Print the plot
print(p)