library(knitr)
opts_chunk$set(echo = TRUE, results = "markup", warning = TRUE, cache = TRUE, tidy = FALSE)
library(datasets)
data("ToothGrowth")
attach(ToothGrowth)
str(ToothGrowth)
## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
attach(ToothGrowth)
## The following objects are masked from ToothGrowth (pos = 3):
##
## dose, len, supp
plot(len ~ dose, xlab = "Dose", ylab = "Length",
pch = c(1, 5), col = c("red", "green"))
legend("bottomright", inset = c(0.1, 0.08), c("Orange Juice", "Vitamin C"),
pch = c(1, 5), col = c("red", "green"))
ToothAggr <- aggregate(len ~ dose, , mean)
plot(ToothAggr, type = "b", xlab = "Dose", ylab = "Length", main = "Average Growth (len) per Dose", col="blue")
summary(ToothGrowth)
## len supp dose
## Min. : 4.20 OJ:30 Min. :0.500
## 1st Qu.:13.07 VC:30 1st Qu.:0.500
## Median :19.25 Median :1.000
## Mean :18.81 Mean :1.167
## 3rd Qu.:25.27 3rd Qu.:2.000
## Max. :33.90 Max. :2.000
fit1 <- lm(len ~ dose)
summary(fit1)
##
## Call:
## lm(formula = len ~ dose)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.4496 -2.7406 -0.7452 2.8344 10.1139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.4225 1.2601 5.89 2.06e-07 ***
## dose 9.7636 0.9525 10.25 1.23e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.601 on 58 degrees of freedom
## Multiple R-squared: 0.6443, Adjusted R-squared: 0.6382
## F-statistic: 105.1 on 1 and 58 DF, p-value: 1.233e-14
mean(ToothGrowth$len) + c(-1, 1) * 1.96 * sd(ToothGrowth$len)/sqrt(nrow(ToothGrowth))
## [1] 16.87779 20.74888
confint(fit1)
## 2.5 % 97.5 %
## (Intercept) 4.900171 9.944829
## dose 7.856870 11.670273
fit2 <- lm(ToothGrowth$len ~ ToothGrowth$dose + ToothGrowth$supp)
summary(fit2)
##
## Call:
## lm(formula = ToothGrowth$len ~ ToothGrowth$dose + ToothGrowth$supp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.600 -3.700 0.373 2.116 8.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.2725 1.2824 7.231 1.31e-09 ***
## ToothGrowth$dose 9.7636 0.8768 11.135 6.31e-16 ***
## ToothGrowth$suppVC -3.7000 1.0936 -3.383 0.0013 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.236 on 57 degrees of freedom
## Multiple R-squared: 0.7038, Adjusted R-squared: 0.6934
## F-statistic: 67.72 on 2 and 57 DF, p-value: 8.716e-16
confint(fit2)
## 2.5 % 97.5 %
## (Intercept) 6.704608 11.840392
## ToothGrowth$dose 8.007741 11.519402
## ToothGrowth$suppVC -5.889905 -1.510095
We observe a positive correlation between len and dose, but a negative correlation for Vitmin C as compared against Orange Juice .
ToothOJ <- ToothGrowth[ToothGrowth$supp == "OJ", ]
OJAggr <- aggregate(ToothOJ$len ~ ToothOJ$dose, , mean)
colnames(OJAggr) <- c("dose", "len")
ToothVC <- ToothGrowth[ToothGrowth$supp == "VC", ]
VCAggr <- aggregate(ToothVC$len ~ ToothVC$dose, , mean)
colnames(VCAggr) <- c("dose", "len")
plotxlim <- c( min( min(OJAggr$dose), min(VCAggr$dose) ),
max( max(OJAggr$dose), max(VCAggr$dose) ) )
plotylim <- c( min( min(OJAggr$len), min(VCAggr$len) ),
max( max(OJAggr$len), max(VCAggr$len) ) )
plot(OJAggr, xlim = plotxlim, ylim = plotylim,
type = "b", col = "red", pch = 1, lwd = 2, lty = 1,
xlab = "Dose", ylab = "Length", main = "len ~ dose by supp")
lines(VCAggr$dose, VCAggr$len, col = "green", lwd = 2, lty = 3)
points(VCAggr$dose, VCAggr$len, col = "green", pch = 5)
legend("bottomright", inset = c(0.1, 0.08), c("Orange Juice", "Vitamin C"),
pch = c(1, 5), lwd = 2, lty = c(1, 3), col = c("red", "green"))