library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
`
Auto=read.csv("C:/Auto.csv",header=T,na.strings="?", stringsAsFactors=FALSE)
Auto=na.omit(Auto)
fix(Auto)
qualitative_columns = c(2,8,9)
pairs( Auto[,-qualitative_columns] )
auto.num= Auto[-9] #Exclude name variable
auto.cor = cor(auto.num)
auto.cor
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
lm.auto = lm(mpg~cylinders+displacement+horsepower+weight+acceleration+year+origin,
data = Auto)
summary(lm.auto)
##
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight +
## acceleration + year + origin, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(lm.auto)
lm.inter1 = lm(mpg~-name+cylinders*displacement+origin+horsepower*weight*acceleration+year,data = Auto)
summary(lm.inter1)
##
## Call:
## lm(formula = mpg ~ -name + cylinders * displacement + origin +
## horsepower * weight * acceleration + year, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3693 -1.5354 0.0358 1.3874 11.9225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.480e+00 1.117e+01 -0.670 0.50338
## cylinders -4.100e-01 5.011e-01 -0.818 0.41367
## displacement -2.079e-02 1.645e-02 -1.264 0.20692
## origin 6.491e-01 2.560e-01 2.536 0.01161 *
## horsepower 3.592e-02 1.040e-01 0.345 0.73007
## weight -9.580e-03 3.445e-03 -2.781 0.00569 **
## acceleration 6.872e-01 6.227e-01 1.104 0.27042
## year 7.631e-01 4.449e-02 17.154 < 2e-16 ***
## cylinders:displacement 2.521e-03 2.239e-03 1.126 0.26094
## horsepower:weight 1.115e-05 2.721e-05 0.410 0.68220
## horsepower:acceleration -1.832e-02 7.397e-03 -2.476 0.01371 *
## weight:acceleration 1.205e-05 2.099e-04 0.057 0.95425
## horsepower:weight:acceleration 2.700e-06 1.922e-06 1.404 0.16109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.877 on 379 degrees of freedom
## Multiple R-squared: 0.8683, Adjusted R-squared: 0.8642
## F-statistic: 208.3 on 12 and 379 DF, p-value: < 2.2e-16
lm.inter2 = lm(mpg~-name+displacement+origin+horsepower:weight+acceleration:cylinders+year,data = Auto)
summary(lm.inter2)
##
## Call:
## lm(formula = mpg ~ -name + displacement + origin + horsepower:weight +
## acceleration:cylinders + year, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2393 -2.1992 -0.2751 2.0135 13.7888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.149e+01 4.464e+00 -4.815 2.12e-06 ***
## displacement -9.433e-03 6.433e-03 -1.466 0.143
## origin 1.545e+00 3.092e-01 4.997 8.85e-07 ***
## year 7.088e-01 5.581e-02 12.700 < 2e-16 ***
## horsepower:weight -1.289e-05 2.557e-06 -5.040 7.16e-07 ***
## acceleration:cylinders -6.221e-02 1.234e-02 -5.041 7.14e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.718 on 386 degrees of freedom
## Multiple R-squared: 0.776, Adjusted R-squared: 0.7731
## F-statistic: 267.5 on 5 and 386 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
attach(Auto)
plot(log(horsepower), mpg)
plot(sqrt(horsepower), mpg)
plot(I(horsepower^2), mpg)
#detach(Auto)
#attach(Carseats)
str(Carseats)
## 'data.frame': 400 obs. of 11 variables:
## $ Sales : num 9.5 11.22 10.06 7.4 4.15 ...
## $ CompPrice : num 138 111 113 117 141 124 115 136 132 132 ...
## $ Income : num 73 48 35 100 64 113 105 81 110 113 ...
## $ Advertising: num 11 16 10 4 3 13 0 15 0 0 ...
## $ Population : num 276 260 269 466 340 501 45 425 108 131 ...
## $ Price : num 120 83 80 97 128 72 108 120 124 124 ...
## $ ShelveLoc : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
## $ Age : num 42 65 59 55 38 78 71 67 76 76 ...
## $ Education : num 17 10 12 14 13 16 15 10 10 17 ...
## $ Urban : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
## $ US : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
lm.car = lm(Sales ~ Price + Urban + US, data = Carseats)
summary(lm.car)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
str(data.frame(Carseats$Price, Carseats$Urban, Carseats$US))
## 'data.frame': 400 obs. of 3 variables:
## $ Carseats.Price: num 120 83 80 97 128 72 108 120 124 124 ...
## $ Carseats.Urban: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
## $ Carseats.US : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
contrasts(Carseats$Urban)
## Yes
## No 0
## Yes 1
contrasts(Carseats$US)
## Yes
## No 0
## Yes 1
lm.sigvar = lm(Sales ~ Price+US, data = Carseats)
summary(lm.sigvar)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
anova(lm.car, lm.sigvar)
confint(lm.sigvar, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
par(mfrow=c(2,2))
plot(lm.sigvar)
which.max(hatvalues(lm.sigvar))
## 43
## 43
x=rnorm(100)
y=2+x+3
Exm_1<-lm(y~x+0)
summary(Exm_1)
##
## Call:
## lm(formula = y ~ x + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## 4.463 4.845 4.998 5.153 5.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 0.7637 0.5368 1.423 0.158
##
## Residual standard error: 5.02 on 99 degrees of freedom
## Multiple R-squared: 0.02003, Adjusted R-squared: 0.01013
## F-statistic: 2.024 on 1 and 99 DF, p-value: 0.158
Exm_2<-lm(x~y+0)
summary(Exm_2)
##
## Call:
## lm(formula = x ~ y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3418 -0.7695 -0.1377 0.5003 2.3609
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 0.02623 0.01844 1.423 0.158
##
## Residual standard error: 0.9304 on 99 degrees of freedom
## Multiple R-squared: 0.02003, Adjusted R-squared: 0.01013
## F-statistic: 2.024 on 1 and 99 DF, p-value: 0.158
x=1:100+rnorm(100)/1000
y=1:100+rnorm(100)/1000
Exm_3<-lm(y~x+0)
summary(Exm_3)
##
## Call:
## lm(formula = y ~ x + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0032797 -0.0010422 0.0000452 0.0009913 0.0033028
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.000e+00 2.484e-06 402501 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001445 on 99 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.62e+11 on 1 and 99 DF, p-value: < 2.2e-16
Exm_4<-lm(x~y+0)
summary(Exm_4)
##
## Call:
## lm(formula = x ~ y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0033027 -0.0009913 -0.0000452 0.0010422 0.0032797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 1.000e+00 2.484e-06 402501 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001445 on 99 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.62e+11 on 1 and 99 DF, p-value: < 2.2e-16
Comment on the output. For instance: