Course: Applied Regression Analysis, Coursera.
Building confidence and prediction intervals and plotting:
data1 <- read.csv("week2-HW-data.csv")
model1 <- lm(SBP ~ QUET, data=data1)
summary(model1)
##
## Call:
## lm(formula = SBP ~ QUET, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.231 -7.145 -1.604 7.798 22.531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.576 12.322 5.728 2.99e-06 ***
## QUET 21.492 3.545 6.062 1.17e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.812 on 30 degrees of freedom
## Multiple R-squared: 0.5506, Adjusted R-squared: 0.5356
## F-statistic: 36.75 on 1 and 30 DF, p-value: 1.172e-06
Using Core R:
plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")

95% confidence interval:
a) for a data point:
conf_interval_3 <- predict(model1, newdata=data.frame(QUET=3), interval="confidence",
level = 0.95)
conf_interval_3
## fit lwr upr
## 1 135.0514 130.2821 139.8207
b) for a regression line:
summary(data1$QUET)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.368 3.022 3.380 3.441 3.776 4.637
newx <- seq(2.368, 4.637, by=0.05)
plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")
conf_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="confidence",
level = 0.95)
lines(newx, conf_interval[,2], col="blue", lty=2)
lines(newx, conf_interval[,3], col="blue", lty=2)

95% prediction interval:
a) for a data point:
pred_interval_3 <- predict(model1, newdata=data.frame(QUET=3), interval="prediction",
level = 0.95)
pred_interval_3
## fit lwr upr
## 1 135.0514 114.4537 155.6491
b) for regression:
plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")
pred_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="prediction",
level = 0.95)
lines(newx, pred_interval[,2], col="orange", lty=2)
lines(newx, pred_interval[,3], col="orange", lty=2)

95% confidence and prediction intervals:
plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")
conf_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="confidence",
level = 0.95)
lines(newx, conf_interval[,2], col="blue", lty=2)
lines(newx, conf_interval[,3], col="blue", lty=2)
pred_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="prediction",
level = 0.95)
lines(newx, pred_interval[,2], col="orange", lty=2)
lines(newx, pred_interval[,3], col="orange", lty=2)

Using ggplot2:
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3
95% confidence interval:
ggplot(data1, aes(x=QUET, y=SBP))+
geom_point()+
geom_smooth(method=lm, se=TRUE)

95% confidence and prediction intervals:
temp_var <- predict(model1, interval="prediction")
## Warning in predict.lm(model1, interval = "prediction"): predictions on current data refer to _future_ responses
new_df <- cbind(data1, temp_var)
ggplot(new_df, aes(QUET, SBP))+
geom_point() +
geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
geom_line(aes(y=upr), color = "red", linetype = "dashed")+
geom_smooth(method=lm, se=TRUE)
