Fit a linear regression model on a sample of cars’ braking distances by speed.
require(ellipse)
data(cars)
fit <- lm(dist ~ speed, cars)
summary(fit)
Call:
lm(formula = dist ~ speed, data = cars)
Residuals:
Min 1Q Median 3Q Max
-29.069 -9.525 -2.272 9.215 43.201
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -17.5791 6.7584 -2.601 0.0123 *
speed 3.9324 0.4155 9.464 1.49e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.38 on 48 degrees of freedom
Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
Plot regression line by two estimted parameters: Intercept and Slope \[ dist = Intercept + Slope \cdot speed \]
elPts <- ellipse(fit, n=50, level=0.95)
par(mfrow=c(1,2))
plot(cars, xlim=c(0,max(cars$speed)), xlab="speed, mph", ylab="dist, ft")
abline(coef=fit$coefficients)
plot(x=fit$coefficients[1], y=fit$coefficients[2], xlab="Intercept", ylab="Slope", xlim=range(elPts[,1]))
Plot 95% HDI area on regresion plot and on parameter space plot.
par(mfrow=c(1,2))
plot(cars, xlim=c(0,max(cars$speed)), xlab="speed, mph", ylab="dist, ft")
for (i in 1:dim(elPts)[1]) abline(coef=elPts[i,], col="lightgray")
abline(coef=fit$coefficients)
plot(x=fit$coefficients[1], y=fit$coefficients[2], xlab="Intercept", ylab="Slope", xlim=range(elPts[,1]))
points(elPts, pch=19, col="lightgray")
Note the negative correlation between Intercept and Slope
rho<-summary(fit,correlation=T)$correlation[1,2]
rho
[1] -0.9468008
Plot the parameter space in 3D
require(mvtnorm)
dmvrnorm2D <- function(x,y, mux, muy, sigmax, sigmay, rho){
return(dmvnorm(cbind(x,y), mean=c(mux, muy),
sigma=matrix(c(sigmax^2,sigmax*sigmay*rho,sigmax*sigmay*rho,sigmay^2),ncol = 2)))
}
coeffs<-summary(fit)$coefficients
sigmax<-coeffs[1,2]
sigmay<-coeffs[2,2]
mux<-coeffs[1,1]
muy<-coeffs[2,1]
x<-seq(mux-sigmax*3,mux+sigmax*3,length=50)
y<-seq(muy-sigmay*3,muy+sigmay*3,length=50)
z<-outer(x,y,FUN=dmvrnorm2D,mux, muy, sigmax, sigmay, rho)
persp(x,y,z,col="lightgray",xlab="Intercept",ylab="Slope",zlab="Density",ticktype = "detailed")
Improving the model: recognizing that dist is a quadratic function of speed. \[ dist = Intercept + Slope \cdot speed^2 \]
fit <- lm(dist ~ I(speed^2), cars)
summary(fit)
Call:
lm(formula = dist ~ I(speed^2), data = cars)
Residuals:
Min 1Q Median 3Q Max
-28.448 -9.211 -3.594 5.076 45.862
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.86005 4.08633 2.168 0.0351 *
I(speed^2) 0.12897 0.01319 9.781 5.2e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 15.05 on 48 degrees of freedom
Multiple R-squared: 0.6659, Adjusted R-squared: 0.6589
F-statistic: 95.67 on 1 and 48 DF, p-value: 5.2e-13
Plot regression curve and parameter space
elPts <- ellipse(fit, n=50, level=0.95)
par(mfrow=c(1,2))
xlim<-c(0,max(cars$speed))
plot(cars, xlim=xlim, xlab="speed, mph", ylab="dist, ft")
for (i in 1:dim(elPts)[1]) curve(elPts[i,1]+elPts[i,2]*x^2, from=xlim[1], to=xlim[2], add=T, col="lightgray")
curve(fit$coefficients[1]+fit$coefficients[2]*x^2, from=xlim[1], to=xlim[2], add=T)
plot(x=fit$coefficients[1], y=fit$coefficients[2], xlab="Intercept", ylab="Slope", xlim=c(min(elPts[,1]),max(elPts[,1])))
points(elPts, pch=19, col="lightgray")