R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ## Code

data("ToothGrowth")  # Load the dataset
head(ToothGrowth)  # View the first few rows of the dataset
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
summary(ToothGrowth)  # Get a summary of the dataset
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
table(ToothGrowth$supp, ToothGrowth$dose)  # Distribution of treatments and doses
##     
##      0.5  1  2
##   OJ  10 10 10
##   VC  10 10 10
# Bar Chart of Supplement and Dose
library(ggplot2)

# Enhanced ggplot with custom colors using hexadecimal codes
ggplot(ToothGrowth, aes(x=factor(dose), y=len, fill=supp)) +
  geom_bar(stat="summary", fun="mean", position=position_dodge(), color="black") +
  scale_fill_manual(values=c("VC"="lightblue", "OJ"="#FFD580")) +  # Custom colors for each supplement type
  labs(
    title = "Effect of Vitamin C on Tooth Growth",
    subtitle = "Average tooth length by supplement type and dose",
    x = "Dose (mg/day)",
    y = "Tooth Length",
    fill = "Supplement Type"
  ) +
  theme_minimal(base_size = 14) +  # Increase base font size for better readability
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "top",  # Move legend to top for better visibility
    legend.title.align = 0.5  # Center the legend title
  )
## Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2
## 3.5.0.
## ℹ Please use theme(legend.title = element_text(hjust)) instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Histogram of Tooth Length with Mean and Shapiro Wilk
library(ggplot2)
# Perform Shapiro-Wilk test for normality
shapiro_test <- shapiro.test(ToothGrowth$len)

# Create the histogram and annotate with the p-value from the Shapiro-Wilk test
ggplot(ToothGrowth, aes(x=len)) +
  geom_histogram(binwidth=5, fill="gray", color="black", alpha=0.7) +  # Histogram with a professional look
  geom_vline(aes(xintercept=mean(len)), color="red", linetype="dashed", size=1.5) +  # Mean line dynamically calculated
  labs(
    title = "Distribution of Overall Tooth Lengths",
    subtitle = sprintf("Overall mean: %.2f, Shapiro-Wilk p-value: %.3f", mean(ToothGrowth$len), shapiro_test$p.value),
    x = "Tooth Length (micrometers)",
    y = "Frequency"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 14, hjust = 0.5),
    legend.position = "none"
  ) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create Confidence Intervals
# Load necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(broom)

# Calculate the confidence interval for mean tooth lengths by supplement type
ci_by_supp <- ToothGrowth %>%
  group_by(supp) %>%
  summarise(t_test_results = list(tidy(t.test(len, conf.level = 0.95)))) %>%
  unnest(t_test_results) %>%
  select(supp, estimate = estimate, conf.low, conf.high)

print(ci_by_supp)
## # A tibble: 2 × 4
##   supp  estimate conf.low conf.high
##   <fct>    <dbl>    <dbl>     <dbl>
## 1 OJ        20.7     18.2      23.1
## 2 VC        17.0     13.9      20.0
# Calculate the confidence interval for mean tooth lengths by supplement type and dose
ci_by_supp_dose <- ToothGrowth %>%
  group_by(supp, dose) %>%
  summarise(t_test_results = list(tidy(t.test(len, conf.level = 0.95)))) %>%
  unnest(t_test_results) %>%
  select(supp, dose, estimate = estimate, conf.low, conf.high)
## `summarise()` has grouped output by 'supp'. You can override using the
## `.groups` argument.
print(ci_by_supp_dose)
## # A tibble: 6 × 5
## # Groups:   supp [2]
##   supp   dose estimate conf.low conf.high
##   <fct> <dbl>    <dbl>    <dbl>     <dbl>
## 1 OJ      0.5    13.2     10.0      16.4 
## 2 OJ      1      22.7     19.9      25.5 
## 3 OJ      2      26.1     24.2      28.0 
## 4 VC      0.5     7.98     6.02      9.94
## 5 VC      1      16.8     15.0      18.6 
## 6 VC      2      26.1     22.7      29.6
# Bar Chart with Significance
# Load necessary libraries
library(ggplot2)
library(dplyr)

# Data setup
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)

# Summary statistics and error bars calculation
summary_data <- ToothGrowth %>%
  group_by(dose, supp) %>%
  summarise(mean_len = mean(len), se = sd(len) / sqrt(n()), .groups = 'drop')

# Calculate t-tests between supp types for each dose
t_tests <- by(ToothGrowth, ToothGrowth$dose, function(subdata) {
  t.test(len ~ supp, data = subdata)
})

# Extract p-values and prepare for annotation
p_values <- sapply(t_tests, function(x) x$p.value)
significance_levels <- ifelse(p_values < 0.05, "p < 0.05", "ns")

# Prepare a dataframe for annotations
annotations <- data.frame(dose = levels(ToothGrowth$dose), 
                          y = with(summary_data, tapply(mean_len + se, dose, max) + 1),
                          label = significance_levels)

# Create the plot with corrected geom_text usage
p <- ggplot(summary_data, aes(x = dose, y = mean_len, fill = supp)) +
  geom_bar(stat="identity", position=position_dodge(width=0.9), width=0.8) +
  geom_errorbar(aes(ymin = mean_len - se, ymax = mean_len + se), width = 0.25, position=position_dodge(width=0.9)) +
  scale_fill_manual(values=c("VC" = "lightblue", "OJ" = "#FFD580")) +
  labs(title = "Effect of Vitamin C on Tooth Growth",
       subtitle = "Average tooth length by supplement type and dose",
       x = "Dose (mg/day)",
       y = "Tooth Length",
       fill = "Supplement Type") +
  theme_minimal(base_size = 14) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.position = "top",
        legend.title.align = 0.5) +
  geom_text(data = annotations, aes(x = dose, y = y, label = label), inherit.aes = FALSE, vjust = -0.5) # ensure aesthetics are not inherited

# Print the plot
print(p)

# If p_values is just a vector of p-values
doses <- c("0.5", "1", "2")  # Define the doses associated with each p-value

#Tooth Growth at Different Doses
library(ggplot2)

# Perform the t-test as previously mentioned
growth_1mg <- ToothGrowth$len[ToothGrowth$dose == 1]
growth_2mg <- ToothGrowth$len[ToothGrowth$dose == 2]
t_test_result <- t.test(growth_1mg, growth_2mg, alternative = "two.sided", var.equal = FALSE)

# Create a subset for the plot for clarity
subset_data <- ToothGrowth[ToothGrowth$dose %in% c(1, 2), ]

# Create the plot
p <- ggplot(subset_data, aes(x = factor(dose), y = len, fill = factor(dose))) +
  geom_bar(stat = "summary", fun = "mean", position = position_dodge(), color = "black", width = 0.5) +
  geom_errorbar(stat = "summary", fun.data = mean_se, width = 0.2, position = position_dodge(0.5)) +
  scale_fill_manual(values = c("1" = "lightgreen", "2" = "#CBC3E3")) +
  labs(
    title = "Comparison of Tooth Growth at Different Doses",
    subtitle = sprintf("T-test p-value: %.6f", t_test_result$p.value),
    x = "Dose (mg/day)",
    y = "Average Tooth Length",
    fill = "Dose"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.position = "top",
    legend.title.align = 0.5
  )

# Print the plot
print(p)