Course: Applied Regression Analysis, Coursera.

Building confidence and prediction intervals and plotting:

data1 <- read.csv("week2-HW-data.csv")

model1 <- lm(SBP ~ QUET, data=data1)
summary(model1)
## 
## Call:
## lm(formula = SBP ~ QUET, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.231  -7.145  -1.604   7.798  22.531 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   70.576     12.322   5.728 2.99e-06 ***
## QUET          21.492      3.545   6.062 1.17e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.812 on 30 degrees of freedom
## Multiple R-squared:  0.5506, Adjusted R-squared:  0.5356 
## F-statistic: 36.75 on 1 and 30 DF,  p-value: 1.172e-06

Using Core R:

plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")

95% confidence interval:

a) for a data point:

conf_interval_3 <- predict(model1, newdata=data.frame(QUET=3), interval="confidence",
                         level = 0.95)
conf_interval_3
##        fit      lwr      upr
## 1 135.0514 130.2821 139.8207

b) for a regression line:

summary(data1$QUET)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.368   3.022   3.380   3.441   3.776   4.637
newx <- seq(2.368, 4.637, by=0.05)
plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")

conf_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="confidence",
                         level = 0.95)
lines(newx, conf_interval[,2], col="blue", lty=2)
lines(newx, conf_interval[,3], col="blue", lty=2)

95% prediction interval:

a) for a data point:

pred_interval_3 <- predict(model1, newdata=data.frame(QUET=3), interval="prediction",
                         level = 0.95)
pred_interval_3
##        fit      lwr      upr
## 1 135.0514 114.4537 155.6491

b) for regression:

plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")

pred_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="prediction",
                         level = 0.95)
lines(newx, pred_interval[,2], col="orange", lty=2)
lines(newx, pred_interval[,3], col="orange", lty=2)

95% confidence and prediction intervals:

plot(data1$QUET, data1$SBP, ylim=c(100, 200), xlab="QUET", ylab="SBP", main="Regression")
abline(model1, col="lightblue")

conf_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="confidence",
                         level = 0.95)
lines(newx, conf_interval[,2], col="blue", lty=2)
lines(newx, conf_interval[,3], col="blue", lty=2)

pred_interval <- predict(model1, newdata=data.frame(QUET=newx), interval="prediction",
                         level = 0.95)
lines(newx, pred_interval[,2], col="orange", lty=2)
lines(newx, pred_interval[,3], col="orange", lty=2)

Using ggplot2:

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.1.3

95% confidence interval:

ggplot(data1, aes(x=QUET, y=SBP))+
    geom_point()+
    geom_smooth(method=lm, se=TRUE)

95% confidence and prediction intervals:

temp_var <- predict(model1, interval="prediction")
## Warning in predict.lm(model1, interval = "prediction"): predictions on current data refer to _future_ responses
new_df <- cbind(data1, temp_var)

ggplot(new_df, aes(QUET, SBP))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)