library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(varhandle)
data("ToothGrowth")
TotalCol <- ncol(ToothGrowth)
TotalRow <- nrow(ToothGrowth)
for (i in 1:TotalCol) {
if (is.numeric(ToothGrowth[i]) == TRUE) {
print("Hello")
}
}
numericvars <- NULL
print(paste("There are ", TotalRow, " Obervations in this dataset", sep = ""))
## [1] "There are 60 Obervations in this dataset"
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
for (Var in names(ToothGrowth)) {
if (class(ToothGrowth[, Var]) == "numeric") {
print(paste("*** For variable ", Var, sep = ""))
print(paste("Maximum Value = ", max(ToothGrowth[, Var]), sep = ""))
print(paste("Minimum Value = ", min(ToothGrowth[, Var]), sep = ""))
print(paste("Mean = ", round(mean(ToothGrowth[, Var]), digits = 2), sep = ""))
print(paste("Median = ", round(median(ToothGrowth[, Var]), digits = 2), sep = ""))
print(paste("Variance = ", round(var(ToothGrowth[, Var]), digits = 2), sep = ""))
print("Quartiles:")
print(quantile(ToothGrowth[, Var]))
print(" ")
}
if (class(ToothGrowth[, Var]) != "numeric") {
print(paste("*** For variable ", Var, sep = ""))
AVT <- as.character(unique(ToothGrowth[, Var]))
print("Possible Values: ")
print(AVT)
print(" ")
}
}
## [1] "*** For variable len"
## [1] "Maximum Value = 33.9"
## [1] "Minimum Value = 4.2"
## [1] "Mean = 18.81"
## [1] "Median = 19.25"
## [1] "Variance = 58.51"
## [1] "Quartiles:"
## 0% 25% 50% 75% 100%
## 4.200 13.075 19.250 25.275 33.900
## [1] " "
## [1] "*** For variable supp"
## [1] "Possible Values: "
## [1] "VC" "OJ"
## [1] " "
## [1] "*** For variable dose"
## [1] "Maximum Value = 2"
## [1] "Minimum Value = 0.5"
## [1] "Mean = 1.17"
## [1] "Median = 1"
## [1] "Variance = 0.4"
## [1] "Quartiles:"
## 0% 25% 50% 75% 100%
## 0.5 0.5 1.0 2.0 2.0
## [1] " "
The following chart seems to indicate that the length of tooth will grow as the dosage increases.Orange juice impact increases as the dosage increases, while ascorbic acid results are more mitigated at high dose.
library(ggplot2)
levels(ToothGrowth$supp) <- c("Orange Juice", "Ascorbic Acid")
ggplot(ToothGrowth, aes(x = factor(dose), y = len)) + facet_grid(. ~ supp) + geom_boxplot(aes(fill = supp),
show.legend = FALSE) + labs(title = "Measuring Guinea Tooth Lengh by dosage per supplement",
x = "Dose (mg/day)", y = "Tooth Length")
ConfidenceLvl <- 0.95
Dose1 <- 0.5
Dose2 <- 1
Dose3 <- 2
We are looking to estimate the true mean difference in tooth growth depending on the supplement administrated Vitamine C or Orange Juice. We will use a two-side test and appl a 95% T-confidence interval.
Hypopthesis1 <- t.test(len ~ supp, data = ToothGrowth, alternative = "two.sided",
paired = FALSE, var.equal = TRUE, conf.level = ConfidenceLvl)
IntervalDeConfiance <- as.data.frame(Hypopthesis1$conf.int)
print(paste("We are at ", ConfidenceLvl, " confident that the true mean difference in tooth growth for both supplement is contained in the the interval: ",
round(IntervalDeConfiance[1, 1], digit = 4), " and ", round(IntervalDeConfiance[2,
1], digit = 4), sep = ""))
## [1] "We are at 0.95 confident that the true mean difference in tooth growth for both supplement is contained in the the interval: -0.167 and 7.567"
if (IntervalDeConfiance[1, 1] < 0 & IntervalDeConfiance[2, 1] > 0) {
print(paste("Since zero is within the interval, it is possible that there is no difference in tooth growth due to the product administrated"))
}
## [1] "Since zero is within the interval, it is possible that there is no difference in tooth growth due to the product administrated"
if (Hypopthesis1$p.value < 0.05) {
print(paste("Since ", round(Hypopthesis1$p.value, digit = 4), "is lower than 0.05, we can reject the NULL Hypothesis"))
}
if (Hypopthesis1$p.value > 0.05) {
print(paste("Since ", round(Hypopthesis1$p.value, digit = 4), "is lower than 0.05, we cannot reject the NULL Hypothesis"))
}
## [1] "Since 0.0604 is lower than 0.05, we cannot reject the NULL Hypothesis"
Hypopthesis2 <- t.test(len ~ supp, data = subset(ToothGrowth, dose == Dose1), alternative = "two.sided",
paired = FALSE, var.equal = TRUE, conf.level = ConfidenceLvl)
IntervalDeConfiance <- as.data.frame(Hypopthesis2$conf.int)
print(paste("We are at ", ConfidenceLvl, " confident that the true mean difference in tooth growth for ",
Dose1, "mg/day is contained in the the interval: ", round(IntervalDeConfiance[1,
1], digit = 4), " and ", round(IntervalDeConfiance[2, 1], digit = 4), sep = ""))
## [1] "We are at 0.95 confident that the true mean difference in tooth growth for 0.5mg/day is contained in the the interval: 1.7703 and 8.7297"
if (IntervalDeConfiance[1, 1] < 0 & IntervalDeConfiance[2, 1] > 0) {
print(paste("Since zero is within the interval, it is possible that there is no difference in tooth growth due to the quantity administrated"))
}
if (Hypopthesis2$p.value < 0.05) {
print(paste("Since ", round(Hypopthesis2$p.value, digit = 4), "is lower than 0.05, we can reject the NULL Hypothesis"))
}
## [1] "Since 0.0053 is lower than 0.05, we can reject the NULL Hypothesis"
if (Hypopthesis2$p.value > 0.05) {
print(paste("Since ", round(Hypopthesis2$p.value, digit = 4), "is higher than 0.05, we cannot reject the NULL Hypothesis"))
}
Hypopthesis3 <- t.test(len ~ supp, data = subset(ToothGrowth, dose == Dose2), alternative = "two.sided",
paired = FALSE, var.equal = TRUE, conf.level = ConfidenceLvl)
IntervalDeConfiance <- as.data.frame(Hypopthesis3$conf.int)
print(paste("We are at ", ConfidenceLvl, " confident that the true mean difference in tooth growth for ",
Dose2, "mg/day is contained in the the interval: ", round(IntervalDeConfiance[1,
1], digit = 4), " and ", round(IntervalDeConfiance[2, 1], digit = 4), sep = ""))
## [1] "We are at 0.95 confident that the true mean difference in tooth growth for 1mg/day is contained in the the interval: 2.8407 and 9.0193"
if (IntervalDeConfiance[1, 1] < 0 & IntervalDeConfiance[2, 1] > 0) {
print(paste("Since zero is within the interval, it is possible that there is no difference in tooth growth due to the quantity administrated"))
}
if (Hypopthesis3$p.value < 0.05) {
print(paste("Since ", round(Hypopthesis3$p.value, digit = 4), "is lower than 0.05, we can reject the NULL Hypothesis"))
}
## [1] "Since 8e-04 is lower than 0.05, we can reject the NULL Hypothesis"
if (Hypopthesis3$p.value > 0.05) {
print(paste("Since ", round(Hypopthesis3$p.value, digit = 4), "is higher than 0.05, we cannot reject the NULL Hypothesis"))
}
Hypopthesis4 <- t.test(len ~ supp, data = subset(ToothGrowth, dose == Dose3), alternative = "two.sided",
paired = FALSE, var.equal = TRUE, conf.level = ConfidenceLvl)
IntervalDeConfiance <- as.data.frame(Hypopthesis4$conf.int)
print(paste("We are at ", ConfidenceLvl, " confident that the true mean difference in tooth growth for ",
Dose3, "mg/day is contained in the the interval: ", round(IntervalDeConfiance[1,
1], digit = 4), " and ", round(IntervalDeConfiance[2, 1], digit = 4), sep = ""))
## [1] "We are at 0.95 confident that the true mean difference in tooth growth for 2mg/day is contained in the the interval: -3.723 and 3.563"
if (IntervalDeConfiance[1, 1] < 0 & IntervalDeConfiance[2, 1] > 0) {
print(paste("Since zero is within the interval, it is possible that there is no difference in tooth growth due to the quantity administrated"))
}
## [1] "Since zero is within the interval, it is possible that there is no difference in tooth growth due to the quantity administrated"
if (Hypopthesis4$p.value < 0.05) {
print(paste("Since ", round(Hypopthesis4$p.value, digit = 4), "is lower than 0.05, we can reject the NULL Hypothesis"))
}
if (Hypopthesis4$p.value > 0.05) {
print(paste("Since ", round(Hypopthesis4$p.value, digit = 4), "is higher than 0.05, we cannot reject the NULL Hypothesis"))
}
## [1] "Since 0.9637 is higher than 0.05, we cannot reject the NULL Hypothesis"
The above analysis shows that we obtain growth no matter which delivery method we use. Additionally we can state that Orange Juice deliver more growth when administrated at 0.5 to 1mg per day. But at 2mg per day its impact is very similar to the Ascorbic Acid. We assume : Normal distribution of the tooth lenghts The supplements are the only factors affecting the lengh of the tooth