Group 2 - Tooth Growth

# loading required libraries 
library(tidyverse)
library(ggfortify)
library(ggplot2)

# loading required data
data("ToothGrowth")

Data Exploration

Question 1 - Describing and summarising tooth dataset

This dataset consists of 3 variables: (1) length (mm), (2) supplement type and (3) dosage (mg/day). Histogram generated on teeth length shows a normal distribution.

# Checking dataset

head(ToothGrowth) # displaying first 6 rows of dataset
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
str(ToothGrowth) # shows the structral details of the dataset
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
summary(ToothGrowth) # showing the summary stats of dataset
##       len        supp         dose      
##  Min.   : 4.20   OJ:30   Min.   :0.500  
##  1st Qu.:13.07   VC:30   1st Qu.:0.500  
##  Median :19.25           Median :1.000  
##  Mean   :18.81           Mean   :1.167  
##  3rd Qu.:25.27           3rd Qu.:2.000  
##  Max.   :33.90           Max.   :2.000
# Plotting a histogram to analyse distributions of tooth length
hist(ToothGrowth$len, 
     main = "Distribution of Tooth Length",
     xlab = "Tooth Length",
     ylab = "Frequency") # tooth length

# Plotting boxplots to analyse distributions of supplement type and dosage against tooth length
boxplot(len~supp, data = ToothGrowth, 
     main = "Box Plot for Distribution of Supplement Type",
     xlab = "Supplement Type",
     ylab = "Tooth Length") # supplement

boxplot(len~dose, data = ToothGrowth, 
     main = "Box Plot for Distribution of Dosage",
     xlab = "Dosage",
     ylab = "Tooth Length") # dosage

Plotting and analysing relationships of supplement type and dosage level on tooth length

Question 2 - Does tooth length increase with dosage?

Yes. Tooth length does increase with dosage regardless of supplement. The scatter plot generated shows a positive correlation between tooth length and dosage where there both treatments have positive slope gradients with increasing dosage.

# Plotting a scatter plot to analyse length and dosage relationships differentiated by supplement type
ggplot(ToothGrowth, aes(x = dose, y = len, color = supp)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(values = c("OJ" = "orange", "VC" = "blue"),
                     labels = c("OJ", "VC")) +
  labs(title = "Tooth Length and Dosage Relationship Differenciated by Supplement Type",
       x = "Dosage (mg/day)",
       y = "Tooth Length",
       color = "Supplement Type", 
       border = "black") +
  theme_minimal()

Question 3 and 4 - Which supplement is more effective? What are the relationships between dosage and supplement type?

At lower dosages of 0.5mg/day and 1mg/day, treatment OJ is more effective. But at a higher dosage of 2mg/day, both treatments are equally effective as the mean of tooth length of both OJ and VC treatments are 26.06mm and 26.14mm respectively.

However, at lower concentrations (0.5mg/day), the performace of VC treatment is more consistent where the range of tooth lengths are smaller with less variation compared to OJ treatment.

Overall OJ is more effective across all 3 dosages.

# Plotting boxplot of tooth length distribution against dosage differenciated by supplement type
ggplot(ToothGrowth, aes(x = factor(dose), y = len, fill = supp)) +
  geom_boxplot(position = position_dodge(0.8), width = 0.7, alpha = 0.7) +
  scale_fill_manual(values = c("OJ" = "orange", "VC" = "blue"),
                    labels = c("OJ", "VC")) +
  labs(title = "Tooth Length Distribution by Dosage Differenciated by Supplement Type",
       x = "Dosage (mg/day)",
       y = "Tooth Length (mm)",
       border = "black",
       fill = "Supplement Type") +
  theme_minimal()

# Calculating means of tooth length at corresponding dosages
# At dosage = 0.5
mean_OJ_0.5 <- mean(ToothGrowth$len[ToothGrowth$supp == "OJ" & ToothGrowth$dose == 0.5])
mean_VC_0.5 <- mean(ToothGrowth$len[ToothGrowth$supp == "VC" & ToothGrowth$dose == 0.5])

# At dosage =1
mean_OJ_1 <- mean(ToothGrowth$len[ToothGrowth$supp == "OJ" & ToothGrowth$dose == 1])
mean_VC_1 <- mean(ToothGrowth$len[ToothGrowth$supp == "VC" & ToothGrowth$dose == 1])

# At dosage = 2
mean_OJ_2 <- mean(ToothGrowth$len[ToothGrowth$supp == "OJ" & ToothGrowth$dose == 2])
mean_VC_2 <- mean(ToothGrowth$len[ToothGrowth$supp == "VC" & ToothGrowth$dose == 2])



# Printing in rmd text
sprintf("Mean tooth length at dose 0.5 - OJ: %.2f, VC: %.2f", mean_OJ_0.5, mean_VC_0.5)
## [1] "Mean tooth length at dose 0.5 - OJ: 13.23, VC: 7.98"
sprintf("Mean tooth length at dose 1 - OJ: %.2f, VC: %.2f", mean_OJ_1, mean_VC_1)
## [1] "Mean tooth length at dose 1 - OJ: 22.70, VC: 16.77"
sprintf("Mean tooth length at dose 2 - OJ: %.2f, VC: %.2f", mean_OJ_2, mean_VC_2)
## [1] "Mean tooth length at dose 2 - OJ: 26.06, VC: 26.14"