Load the prebuilt Tooth Growth data.
data(ToothGrowth)
#To view top 6 rows of the data
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
#To simplify the variable name
tg <- ToothGrowth
#Transform the dose into a categorical variable
tg$dose <- as.factor(tg$dose)
Load the tidyverse library to be able to call its functions.
library(tidyverse)
# Plot the length (y) by the dosage (x)
g <- ggplot(tg, aes(x= dose, y= len)) +
geom_point(aes(color=supp))
print(g)
Another way of looking at the data
ggplot(tg, aes(x= supp, y= len)) +
geom_boxplot(aes(fill=supp))+ facet_grid(cols=vars(dose))
The null hypothesis is that there is no difference in tooth growth when using the supplement OJ and VC.
The alternate hypothesis to be there are more tooth growth when using supplement OJ than VC.
#First get the len data of only OJ
oj_len <- tg[which(tg$supp == "OJ"), "len"]
#Then same for the VC
vc_len <- tg[which(tg$supp == "VC"), "len"]
#One-sided t-test
t.test(oj_len, vc_len,
alternative = "greater",
paired = FALSE, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: oj_len and vc_len
## t = 1.9153, df = 55.309, p-value = 0.03032
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.4682687 Inf
## sample estimates:
## mean of x mean of y
## 20.66333 16.96333
The null hypothesis is that there is no difference in tooth growth between dosage.
The alternate hypothesis is that there are more tooth growth when the dosage increases.
dose_0.5 = tg[which(tg$dose == 0.5), "len"]
dose_1 = tg[which(tg$dose == 1), "len"]
dose_2 = tg[which(tg$dose == 2), "len"]
# Between dose 0.5 and 1
t.test(dose_0.5, dose_1, alternative = "less", paired = FALSE, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: dose_0.5 and dose_1
## t = -6.4766, df = 37.986, p-value = 6.342e-08
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -6.753323
## sample estimates:
## mean of x mean of y
## 10.605 19.735
#Between dose 1 and 2
t.test(dose_1, dose_2, alternative = "less", paired = FALSE, conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: dose_1 and dose_2
## t = -4.9005, df = 37.101, p-value = 9.532e-06
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -4.17387
## sample estimates:
## mean of x mean of y
## 19.735 26.100
Please, fill out the info in this table. Link:
Read the tsv table
classdata <- read_tsv("~/Downloads/Class_distribution - Sheet1.tsv")
## Rows: 86 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): First, Last, Height, BirthMonth, EyeColor
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.