Reviewing the help file “?ToothGrowth”, we can see the breakdown as follows:
str(ToothGrowth) provided the technical information of the dataframe.
library(tidyverse)
library(ggpubr)
library(glue)
library(cowplot)
library(tinytex)
library(matrixStats)
downloadDate <- date()
glue("Simulation date is {downloadDate}")
## Simulation date is Thu May 21 00:08:26 2020
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] matrixStats_0.56.0 tinytex_0.22 cowplot_1.0.0 glue_1.4.0
## [5] ggpubr_0.3.0 forcats_0.5.0 stringr_1.4.0 dplyr_0.8.5
## [9] purrr_0.3.4 readr_1.3.1 tidyr_1.0.2 tibble_3.0.1
## [13] ggplot2_3.3.0 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.4.6 lubridate_1.7.8 lattice_0.20-41 assertthat_0.2.1
## [5] digest_0.6.25 R6_2.4.1 cellranger_1.1.0 backports_1.1.6
## [9] reprex_0.3.0 evaluate_0.14 httr_1.4.1 pillar_1.4.3
## [13] rlang_0.4.5 curl_4.3 readxl_1.3.1 rstudioapi_0.11
## [17] data.table_1.12.8 car_3.0-7 rmarkdown_2.1 foreign_0.8-76
## [21] munsell_0.5.0 broom_0.5.6 compiler_4.0.0 modelr_0.1.6
## [25] xfun_0.13 pkgconfig_2.0.3 htmltools_0.4.0 tidyselect_1.0.0
## [29] rio_0.5.16 fansi_0.4.1 crayon_1.3.4 dbplyr_1.4.3
## [33] withr_2.2.0 grid_4.0.0 nlme_3.1-147 jsonlite_1.6.1
## [37] gtable_0.3.0 lifecycle_0.2.0 DBI_1.1.0 magrittr_1.5
## [41] scales_1.1.0 zip_2.0.4 cli_2.0.2 stringi_1.4.6
## [45] carData_3.0-3 ggsignif_0.6.0 fs_1.4.1 xml2_1.3.1
## [49] ellipsis_0.3.0 generics_0.0.2 vctrs_0.2.4 openxlsx_4.1.5
## [53] tools_4.0.0 hms_0.5.3 abind_1.4-5 yaml_2.2.1
## [57] colorspace_1.4-1 rstatix_0.5.0 rvest_0.3.5 knitr_1.28
## [61] haven_2.2.0
attach(ToothGrowth)
ToothGrowth shows 60 observations on 3 variables
Using view(toothGrowth) we can see the following:
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
e<-ggpaired(ToothGrowth, x = "supp", y = "len",
color = "supp", line.color = "red", line.size = 0.4,
palette = "jco")
e1 <- ggpaired(ToothGrowth, x = "supp", y = "len",
color = "supp", palette = "jco",
line.color = "red", line.size = 0.4,
facet.by = "dose", short.panel.labs = FALSE)
plot_grid(e,e1)
The graph on the left tells us the following:
## $OJ
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.20 15.53 22.70 20.66 25.73 30.90
##
## $VC
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.20 11.20 16.50 16.96 23.10 33.90
The graph on the right tell us the following:
## : OJ
## : 0.5
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.20 9.70 12.25 13.23 16.18 21.50
## ------------------------------------------------------------
## : VC
## : 0.5
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.20 5.95 7.15 7.98 10.90 11.50
## ------------------------------------------------------------
## : OJ
## : 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.50 20.30 23.45 22.70 25.65 27.30
## ------------------------------------------------------------
## : VC
## : 1
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.60 15.28 16.50 16.77 17.30 22.50
## ------------------------------------------------------------
## : OJ
## : 2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 22.40 24.57 25.95 26.06 27.07 30.90
## ------------------------------------------------------------
## : VC
## : 2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.50 23.38 25.95 26.14 28.80 33.90
H0 = mean of both OJ and VC are equal Ha != H0 There is a difference in mean for VC and OJ
OJ <- ToothGrowth %>% filter(supp=="OJ")
VC <- ToothGrowth %>% filter(supp=="VC")
t.test(OJ$len,VC$len, paired = TRUE)
##
## Paired t-test
##
## data: OJ$len and VC$len
## t = 3.3026, df = 29, p-value = 0.00255
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.408659 5.991341
## sample estimates:
## mean of the differences
## 3.7
mu <- ToothGrowth %>% group_by(supp) %>% dplyr::summarise("means"=mean(len))
ggplot(data = ToothGrowth, aes(x=len, color = supp))+geom_density()+
geom_vline(data=mu, aes(xintercept=means, color=supp),
linetype="dashed")
x <- OJ$len
y <- VC$len
OJVC <-data.frame(x,y)
combOJVC <- OJVC %>% mutate("meanss" =rowMeans(OJVC))
mu1<-mean(combOJVC$meanss)
ggplot()+geom_density(data = combOJVC, aes(x=meanss), color="blue")+geom_vline(xintercept = mu1, linetype="dashed", color="blue")+labs(title = "OJ/VC Averages")
lmfit <- lm(ToothGrowth$len~ToothGrowth$supp+ToothGrowth$dose)
summary(lmfit)
##
## Call:
## lm(formula = ToothGrowth$len ~ ToothGrowth$supp + ToothGrowth$dose)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.600 -3.700 0.373 2.116 8.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.2725 1.2824 7.231 1.31e-09 ***
## ToothGrowth$suppVC -3.7000 1.0936 -3.383 0.0013 **
## ToothGrowth$dose 9.7636 0.8768 11.135 6.31e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.236 on 57 degrees of freedom
## Multiple R-squared: 0.7038, Adjusted R-squared: 0.6934
## F-statistic: 67.72 on 2 and 57 DF, p-value: 8.716e-16
There is a statistically significant difference in means.
plot(lmfit)
Reject the null hypothesis because there are differences between the mean of OJ and VC.
OJdose_.5 <- ToothGrowth %>% filter(dose==0.5) %>% filter(supp=="OJ")
VCdose_.5 <- ToothGrowth %>% filter(dose==0.5) %>% filter(supp=="VC")
t.test(OJdose_.5$len, VCdose_.5$len, paired = F, var.equal = F)
##
## Welch Two Sample t-test
##
## data: OJdose_.5$len and VCdose_.5$len
## t = 3.1697, df = 14.969, p-value = 0.006359
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.719057 8.780943
## sample estimates:
## mean of x mean of y
## 13.23 7.98
OJdose_1 <- ToothGrowth %>% filter(dose==1.0) %>% filter(supp=="OJ")
VCdose_1 <- ToothGrowth %>% filter(dose==1.0) %>% filter(supp=="VC")
t.test(OJdose_1$len- VCdose_1$len)
##
## One Sample t-test
##
## data: OJdose_1$len - VCdose_1$len
## t = 3.3721, df = 9, p-value = 0.008229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.951911 9.908089
## sample estimates:
## mean of x
## 5.93
OJdose_2 <- ToothGrowth %>% filter(dose==2.0) %>% filter(supp=="OJ")
VCdose_2 <- ToothGrowth %>% filter(dose==2.0) %>% filter(supp=="VC")
t.test(OJdose_2$len- VCdose_2$len)
##
## One Sample t-test
##
## data: OJdose_2$len - VCdose_2$len
## t = -0.042592, df = 9, p-value = 0.967
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -4.328976 4.168976
## sample estimates:
## mean of x
## -0.08
P-value is .967 not enough to reject the null hypothesis. The difference between OJ mean and VC Mean is -.08 is very close to 0 but still not equal to zero.
There is a significant difference between tooth length and dose levels across different delivery methods. A higher dosage led to longer teeth. Based on our evidence, the orange juice delivery method is more effective than the alternative method. At dosage 2 mg, the OJ still has a comparable impact vs. VC but the effectiveness is not the same as the lower dosage.