==================================================================================
The difference between KNN regression and KNN classification methods. - KNN regression tries to predict the value of a response based on the average of the responses in the vicinity. - KNN classification tries to predict the class of a qualitative response by calculating the probability in the area.
library("ISLR2")
pairs(Auto)
plot(Auto)
names(Auto)
## [1] "mpg" "cylinders" "displacement" "horsepower" "weight"
## [6] "acceleration" "year" "origin" "name"
cor(Auto[1:8])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
mlr<- lm(mpg~., data = Auto[1:8])
summary(mlr)
##
## Call:
## lm(formula = mpg ~ ., data = Auto[1:8])
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
plot(mlr)
summary(lm(mpg~.-name + log(horsepower), data = Auto))
##
## Call:
## lm(formula = mpg ~ . - name + log(horsepower), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5777 -1.6623 -0.1213 1.4913 12.0230
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.674e+01 1.106e+01 7.839 4.54e-14 ***
## cylinders -5.530e-02 2.907e-01 -0.190 0.849230
## displacement -4.607e-03 7.108e-03 -0.648 0.517291
## horsepower 1.764e-01 2.269e-02 7.775 7.05e-14 ***
## weight -3.366e-03 6.561e-04 -5.130 4.62e-07 ***
## acceleration -3.277e-01 9.670e-02 -3.388 0.000776 ***
## year 7.421e-01 4.534e-02 16.368 < 2e-16 ***
## origin 8.976e-01 2.528e-01 3.551 0.000432 ***
## log(horsepower) -2.685e+01 2.652e+00 -10.127 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.959 on 383 degrees of freedom
## Multiple R-squared: 0.8592, Adjusted R-squared: 0.8562
## F-statistic: 292.1 on 8 and 383 DF, p-value: < 2.2e-16
summary(lm(mpg~.-name + I(acceleration^2), data = Auto))
##
## Call:
## lm(formula = mpg ~ . - name + I(acceleration^2), data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.9680 -1.9266 -0.0124 1.9153 13.2722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.1088174 6.4930423 0.787 0.4319
## cylinders -0.3181584 0.3165577 -1.005 0.3155
## displacement 0.0090446 0.0076528 1.182 0.2380
## horsepower -0.0346411 0.0139094 -2.490 0.0132 *
## weight -0.0054113 0.0006719 -8.053 1.03e-14 ***
## acceleration -2.6374431 0.5758788 -4.580 6.30e-06 ***
## year 0.7535781 0.0495815 15.199 < 2e-16 ***
## origin 1.3265929 0.2713219 4.889 1.49e-06 ***
## I(acceleration^2) 0.0790472 0.0165131 4.787 2.42e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.237 on 383 degrees of freedom
## Multiple R-squared: 0.8316, Adjusted R-squared: 0.828
## F-statistic: 236.3 on 8 and 383 DF, p-value: < 2.2e-16
mlrc.fit <- lm(Sales~Price + Urban + US, data = Carseats)
summary(mlrc.fit)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
Sales = 13.04 - 0.05Price - 0.02Urbanyes + 1.2USyes
mlrc2.fit <- lm(Sales~Price + US, data = Carseats)
summary(mlrc2.fit)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
anova(mlrc.fit, mlrc2.fit)
## Analysis of Variance Table
##
## Model 1: Sales ~ Price + Urban + US
## Model 2: Sales ~ Price + US
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 396 2420.8
## 2 397 2420.9 -1 -0.03979 0.0065 0.9357
confint(mlrc2.fit)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
plot(mlrc2.fit)
x <-rnorm(100)
y <- x^2
coefficients(lm(x~y))
## (Intercept) y
## -0.1282396935 0.0005139354
coefficients(lm(y~x))
## (Intercept) x
## 0.9720771731 0.0008859643
x <- rnorm(100)
y <- x
coefficients(lm(x~y))
## (Intercept) y
## 2.238058e-17 1.000000e+00
coefficients(lm(y~x))
## (Intercept) x
## 2.238058e-17 1.000000e+00