library(tidyverse)
## ── Attaching packages ─────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
intercept_tval<- -17.5791/6.7587
intercept_tval
## [1] -2.600959
speed_tval<- 3.9324/.4155
speed_tval
## [1] 9.46426
pt(speed_tval, 48, lower.tail=FALSE)
## [1] 7.442476e-13
48 degrees of freedom
F-statistic: 89.56562 on 1 and 48 DF p-value: 1.490228e-12
speed_MS<- 21186/1
speed_MS
## [1] 21186
residual_MS<- 11354/48
residual_MS
## [1] 236.5417
fval<- speed_MS/residual_MS
fval
## [1] 89.56562
pf(fval, 1, 48, lower.tail=FALSE)
## [1] 1.490228e-12
auto<-read.csv("Auto (2).csv", header=TRUE, na.strings = "?")
head(auto)
## mpg cylinders displacement horsepower weight acceleration year origin
## 1 18 8 307 130 3504 12.0 70 1
## 2 15 8 350 165 3693 11.5 70 1
## 3 18 8 318 150 3436 11.0 70 1
## 4 16 8 304 150 3433 12.0 70 1
## 5 17 8 302 140 3449 10.5 70 1
## 6 15 8 429 198 4341 10.0 70 1
## name
## 1 chevrolet chevelle malibu
## 2 buick skylark 320
## 3 plymouth satellite
## 4 amc rebel sst
## 5 ford torino
## 6 ford galaxie 500
Auto<-na.omit(auto)
attach(Auto)
## The following object is masked from package:ggplot2:
##
## mpg
plot(horsepower, mpg)
mod<-lm(mpg~horsepower, data=Auto)
summary(mod)
##
## Call:
## lm(formula = mpg ~ horsepower, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5710 -3.2592 -0.3435 2.7630 16.9240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.935861 0.717499 55.66 <2e-16 ***
## horsepower -0.157845 0.006446 -24.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.906 on 390 degrees of freedom
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.6049
## F-statistic: 599.7 on 1 and 390 DF, p-value: < 2.2e-16
There seems to be a linear, negative, moderately strong relationship between the response (mpg) and the predictor (horsepower).
ggplot(Auto, aes(x=horsepower, y=mpg))+
geom_point()+
geom_abline(slope=mod$coefficients[2], intercept=mod$coefficients[1],
color="blue", lty=2, lwd=1)+
theme_bw()
mpg.residual<-resid(mod)
plot(Auto$horsepower, mpg.residual)
abline(0,0)
While the residual plot seems pretty random, it is concerning to see a little bit of a curve in the residual data that could be a pattern that signals an inappropriate fit for the model. Data points above a horsepower of 150 consistently haev positive residual values, actual is consistently greater than the predicted.
auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",
header=TRUE,
na.strings = "?")
auto<-auto[,-c(8:9)]
auto2<-na.omit(auto)
pairs(auto2)
cor(auto2)
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## acceleration year
## mpg 0.4233285 0.5805410
## cylinders -0.5046834 -0.3456474
## displacement -0.5438005 -0.3698552
## horsepower -0.6891955 -0.4163615
## weight -0.4168392 -0.3091199
## acceleration 1.0000000 0.2903161
## year 0.2903161 1.0000000
mlr_mod<-lm(mpg~ horsepower+year+cylinders+displacement+acceleration+weight, data=auto2)
summary(mlr_mod)
##
## Call:
## lm(formula = mpg ~ horsepower + year + cylinders + displacement +
## acceleration + weight, data = auto2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6927 -2.3864 -0.0801 2.0291 14.3607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.454e+01 4.764e+00 -3.051 0.00244 **
## horsepower -3.914e-04 1.384e-02 -0.028 0.97745
## year 7.534e-01 5.262e-02 14.318 < 2e-16 ***
## cylinders -3.299e-01 3.321e-01 -0.993 0.32122
## displacement 7.678e-03 7.358e-03 1.044 0.29733
## acceleration 8.527e-02 1.020e-01 0.836 0.40383
## weight -6.795e-03 6.700e-04 -10.141 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.435 on 385 degrees of freedom
## Multiple R-squared: 0.8093, Adjusted R-squared: 0.8063
## F-statistic: 272.2 on 6 and 385 DF, p-value: < 2.2e-16
p<-6
Ther does appear to be a relationship between the response (mpg) and the predictors (horsepower, year, cylinders, displacement, acceleration, weight).
The predictors of year and weight appear to have a statistically significant relationship to the response of mpg.
For every 1 increase in year, you would predict a 0.7534 increase in mpg. (slope interpretation)
Y<-as.matrix(auto2$mpg)
head(Y)
## [,1]
## [1,] 18
## [2,] 15
## [3,] 18
## [4,] 16
## [5,] 17
## [6,] 15
n<-dim(Y)[1]
X<-matrix(c(rep(1, n),
auto2$cylinders, auto2$displacement, auto2$horsepower, auto2$weight, auto2$acceleration, auto2$year),
ncol=7,
byrow=FALSE)
head(X)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 8 307 130 3504 12.0 70
## [2,] 1 8 350 165 3693 11.5 70
## [3,] 1 8 318 150 3436 11.0 70
## [4,] 1 8 304 150 3433 12.0 70
## [5,] 1 8 302 140 3449 10.5 70
## [6,] 1 8 429 198 4341 10.0 70
dim(X)
## [1] 392 7
betaHat<-solve(t(X)%*%X)%*%t(X)%*%Y
betaHat
## [,1]
## [1,] -1.453525e+01
## [2,] -3.298591e-01
## [3,] 7.678430e-03
## [4,] -3.913556e-04
## [5,] -6.794618e-03
## [6,] 8.527325e-02
## [7,] 7.533672e-01
P<-X%*%solve(t(X)%*%X)%*%t(X)
dim(P)
## [1] 392 392
fit<-P%*%Y
head(fit)
## [,1]
## [1,] 15.08292
## [2,] 14.07257
## [3,] 15.53632
## [4,] 15.53447
## [5,] 15.28641
## [6,] 10.13543
head(mlr_mod$fitted)
## 1 2 3 4 5 6
## 15.08292 14.07257 15.53632 15.53447 15.28641 10.13543
I<-diag(n)
dim(I)
## [1] 392 392
res<-(I-P)%*%Y
head(res)
## [,1]
## [1,] 2.9170810
## [2,] 0.9274253
## [3,] 2.4636846
## [4,] 0.4655255
## [5,] 1.7135925
## [6,] 4.8645663
head(mlr_mod$residuals)
## 1 2 3 4 5 6
## 2.9170810 0.9274253 2.4636846 0.4655255 1.7135925 4.8645663
ss_res<-sum(t(res)%*%res)
ss_res
## [1] 4543.347
ms_res<-ss_res/(n-p-1)
ms_res
## [1] 11.8009
var_betaHat<-ms_res*solve(t(X)%*%X)
var_betaHat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 22.6945706995 -3.009494e-01 3.969964e-03 -3.197896e-02 6.246273e-04
## [2,] -0.3009493595 1.102932e-01 -1.637297e-03 4.719516e-04 -2.646884e-05
## [3,] 0.0039699638 -1.637297e-03 5.413628e-05 -2.418195e-05 -2.173088e-06
## [4,] -0.0319789568 4.719516e-04 -2.418195e-05 1.914493e-04 -4.139916e-06
## [5,] 0.0006246273 -2.646884e-05 -2.173088e-06 -4.139916e-06 4.489304e-07
## [6,] -0.2143654463 1.688654e-03 8.326532e-05 8.941480e-04 -3.298516e-05
## [7,] -0.2234420177 2.501586e-04 2.852420e-05 1.648842e-04 -5.908396e-06
## [,6] [,7]
## [1,] -2.143654e-01 -2.234420e-01
## [2,] 1.688654e-03 2.501586e-04
## [3,] 8.326532e-05 2.852420e-05
## [4,] 8.941480e-04 1.648842e-04
## [5,] -3.298516e-05 -5.908396e-06
## [6,] 1.041126e-02 4.203459e-04
## [7,] 4.203459e-04 2.768669e-03
se_betaHat<-sqrt(diag(var_betaHat))
se_betaHat
## [1] 4.7638818940 0.3321041332 0.0073577361 0.0138365215 0.0006700227
## [6] 0.1020355670 0.0526181480
mlr.resid<-resid(mod)
plot(year, mlr.resid)
plot(cylinders, mlr.resid)
plot(weight, mlr.resid)
plot(displacement, mlr.resid)
plot(acceleration, mlr.resid)