htwt <- read.csv("htwt.txt", header = TRUE, sep = " ")
oldfaith <- read.csv("oldfaith.txt", header = TRUE, sep = " ")[-3]
Based on the information given by this scatterplot, it is possible to model the data using simple linear regression for the following reasons:
Sxx = sum((htwt$Ht - mean(htwt$Ht))^2)
Syy = sum((htwt$Wt - mean(htwt$Wt))^2)
Sxy = sum((htwt$Ht - mean(htwt$Ht)) * (htwt$Wt - mean(htwt$Wt)))
x <- c(mean(htwt$Ht), mean(htwt$Wt), Sxx, Syy, Sxy)
names(x) <- c("Ht_mean", "Wt_mean", "Sxx", "Syy", "Sxy")
x
## Ht_mean Wt_mean Sxx Syy Sxy
## 165.520 59.470 472.076 731.961 274.786
model <- lm(data = htwt, Wt ~ Ht)
model$coefficients
## (Intercept) Ht
## -36.87588 0.58208
ggplot(data = htwt, aes(x = Ht, y = Wt)) + geom_point(size = 4) + geom_smooth(method = lm, se = FALSE) + ggtitle("Scatterplot of htwt.txt")
MSE = sum(model$residuals^2)/(nrow(htwt)-2) %>% round(., 4)
b1_SE = summary(model)$coefficients[2, 2] %>% round(., 4)
b0_SE = summary(model)$coefficients[1, 2] %>% round(., 4)
x <- matrix(c(MSE, b0_SE, MSE, b1_SE), 2, 2, byrow = F, dimnames = list(c("MSE", "standard error"), c("intercept", "b1")))
x
## intercept b1
## MSE 71.5017 71.5017
## standard error 64.4728 0.3892
#cov(b0, b1)
unname(-mean(htwt$Ht) * diag(vcov(model))[2])
## [1] -25.07003
summary(model)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -36.87588 64.4728000 -0.5719603 0.5830589
## Ht 0.58208 0.3891815 1.4956517 0.1731089
anova(lm(data = htwt, Wt ~ Ht))
## Analysis of Variance Table
##
## Response: Wt
## Df Sum Sq Mean Sq F value Pr(>F)
## Ht 1 159.95 159.947 2.237 0.1731
## Residuals 8 572.01 71.502
oldfaith_model <- lm(data = oldfaith, Interval ~ Duration)
summary(oldfaith_model)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.9878076 1.18121714 28.77355 6.133342e-84
## Duration 0.1768629 0.00535212 33.04540 1.624248e-96
\[ \begin{align*} \mathrm{\hat{Interval}} = 33.9878076 &+ 0.1768629\; \mathrm{Duration} \\ \end{align*} \]
predict(oldfaith_model, newdata=list(Duration=250), interval="confidence", level=.95)
## fit lwr upr
## 1 78.20354 77.36915 79.03794
predict(oldfaith_model, newdata=list(Duration=250), interval="prediction", level=.95)
## fit lwr upr
## 1 78.20354 66.35401 90.05307