library(ISLR)
library(ggplot2)
df = as.data.frame(Auto)
model1 = lm(weight~horsepower, data = df)
summary(model1)
##
## Call:
## lm(formula = weight ~ horsepower, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2191.1 -297.7 -80.1 330.8 1150.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 984.5003 62.5143 15.75 <2e-16 ***
## horsepower 19.0782 0.5616 33.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 427.4 on 390 degrees of freedom
## Multiple R-squared: 0.7474, Adjusted R-squared: 0.7468
## F-statistic: 1154 on 1 and 390 DF, p-value: < 2.2e-16
There is a significant relationship between the predictor and the response as the p value is greater then 0.05, p < 0.001 .
corr <- cor.test(df$weight,df$horsepower)
corr
##
## Pearson's product-moment correlation
##
## data: df$weight and df$horsepower
## t = 33.972, df = 390, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8371778 0.8875815
## sample estimates:
## cor
## 0.8645377
There is a strong positive correlation of 0.8645.
model1$coefficients
## (Intercept) horsepower
## 984.50033 19.07816
The relationship between the predictor and the response is a positive one. The coefficent for horsepower is 19.078 suggest that on avergae for every unit of horsepower the weight increases by 19.078.
new_data <- data.frame(horsepower = 150)
predict(model1,
new_data,
interval = 'prediction',
level = 0.97)
## fit lwr upr
## 1 3846.225 2912.409 4780.041
The predicted weight of horsepower of 150 is 3846.225 with the 97% prediction interval of (2912.41, 4780.04)
Here is a plot of the 97% confidence interval and prediction interval.
df_copy <- df
pred <- predict(model1,interval = 'prediction')
pred_df <- cbind(df_copy,pred)
sp2 <- ggplot(pred_df, aes(x = horsepower , y = weight))+
theme_light()+
geom_point(size = 1, alpha = 0.5)+
geom_smooth(method=lm,level = 0.97,
aes(color = 'Line of Best Fit', fill = 'Confidence Interval'))+
geom_line(aes(y = lwr, color = 'lwr'),
col = 'red',linetype='dashed')+
geom_line(aes(y = upr, color = 'upr'),
col = 'green',linetype='dashed')+
scale_fill_manual(NULL,
values = c('Confidence Interval'='grey')) +
scale_color_manual(NULL,
values = c('Line of Best Fit'='#6495ED','upr'='green','lwr'='red')) +
theme(legend.position = "bottom",
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 3),
plot.title = element_text(size = 12)
)+
ggtitle("Linear Regression of Horsepower on Weight with Confidence & Prediction Intervals")+
xlab("Horsepower")+
ylab("Weight")+
xlim(min(pred_df$horsepower), max(pred_df$horsepower))+
ylim(min(pred_df$weight), max(pred_df$weight))
sp2