This question should be answered using the Weekly data set, which is part of the ISLR2 package. This data is similar in nature to the Smarket data from this chapter’s lab, except that it contains 1, 089 weekly returns for 21 years, from the beginning of 1990 to the end of 2010.
cor(Weekly[, -9])
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
pairs(Weekly)
Lag 2 is statistically significant.
direct <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly,
family = "binomial")
summary(direct)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = "binomial", data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
glm.probs = predict(direct, type = "response")
glm.probs[1:10]
## 1 2 3 4 5 6 7 8
## 0.6086249 0.6010314 0.5875699 0.4816416 0.6169013 0.5684190 0.5786097 0.5151972
## 9 10
## 0.5715200 0.5554287
contrasts(Weekly$Direction)
## Up
## Down 0
## Up 1
summary(Weekly$Direction)
## Down Up
## 484 605
dim(Weekly)
## [1] 1089 9
glm.pred = rep("Down", nrow(Weekly)) # uses the #of rows on weekly
glm.pred[glm.probs > .5] = "Up"
table(glm.pred, Weekly$Direction)
##
## glm.pred Down Up
## Down 54 48
## Up 430 557
train = (Weekly$Year<2009)
Weekly.2009 = Weekly[!train,]
Direction.2009 = Weekly$Direction[!train]
dim(Weekly.2009)
## [1] 104 9
direct = glm(Direction ~ Lag2, data = Weekly, family =binomial, subset = train)
glm.probs = predict(direct, Weekly.2009, type = "response")
glm.pred = rep("Down", nrow(Weekly.2009))
glm.pred[glm.probs > .5] = "Up"
table(glm.pred, Direction.2009)
## Direction.2009
## glm.pred Down Up
## Down 9 5
## Up 34 56
lda.fit = lda(Direction~Lag2, data = Weekly, subset = train)
lda.fit
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
##
## Coefficients of linear discriminants:
## LD1
## Lag2 0.4414162
lda.pred = predict(lda.fit, Weekly.2009)
names(lda.pred)
## [1] "class" "posterior" "x"
lda.class = lda.pred$class
table(lda.class, Direction.2009)
## Direction.2009
## lda.class Down Up
## Down 9 5
## Up 34 56
qda.fit = qda(Direction~Lag2, data = Weekly, subset = train)
qda.fit
## Call:
## qda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
qda.class = predict(qda.fit, Weekly.2009)$class
table(qda.class, Direction.2009)
## Direction.2009
## qda.class Down Up
## Down 0 0
## Up 43 61
attach(Weekly)
library(class)
train.X = cbind(Lag1, Lag2)[train,]
test.X = cbind(Lag1, Lag2)[!train,]
train.Direction = Weekly$Direction[train]
set.seed(1)
knn.pred = knn(train.X, test.X, train.Direction, k =1)
table(knn.pred, Direction.2009)
## Direction.2009
## knn.pred Down Up
## Down 18 29
## Up 25 32
detach(Weekly)
library(e1071)
nb.fit = naiveBayes(Direction~Lag2, data = Weekly, subset = train)
nb.fit
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## Down Up
## 0.4477157 0.5522843
##
## Conditional probabilities:
## Lag2
## Y [,1] [,2]
## Down -0.03568254 2.199504
## Up 0.26036581 2.317485
nb.class = predict(nb.fit, Weekly.2009)
table(nb.class, Direction.2009)
## Direction.2009
## nb.class Down Up
## Down 0 0
## Up 43 61
set.seed(1)
knn.pred <- knn(train.X,test.X,train.Direction ,k=4)
table(knn.pred, Direction.2009)
## Direction.2009
## knn.pred Down Up
## Down 23 32
## Up 20 29
mean(knn.pred==Direction.2009)
## [1] 0.5
nb_model <- naiveBayes(Direction~Lag1+Lag2+Lag3,data=Weekly, subset = train)
pred <- predict(nb_model,Weekly)
table(Direction.2009)
## Direction.2009
## Down Up
## 43 61
mean(pred==Direction.2009)
## [1] 0.5629017
qda.fit=qda(Direction~Lag1+Lag2+Lag3 ,data=Weekly ,subset=train)
qda.fit
## Call:
## qda(Direction ~ Lag1 + Lag2 + Lag3, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag1 Lag2 Lag3
## Down 0.289444444 -0.03568254 0.17080045
## Up -0.009213235 0.26036581 0.08404044
qda.class <- predict(qda.fit, Weekly)$class
mean(qda.class==Direction.2009)
## [1] 0.5546373
lda.fit <- lda(Direction~Lag1+Lag2+Lag3, data=Weekly, subset=train)
lda.fit
## Call:
## lda(Direction ~ Lag1 + Lag2 + Lag3, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag1 Lag2 Lag3
## Down 0.289444444 -0.03568254 0.17080045
## Up -0.009213235 0.26036581 0.08404044
##
## Coefficients of linear discriminants:
## LD1
## Lag1 -0.29658609
## Lag2 0.29258490
## Lag3 -0.04766747
lda.pred <- predict(lda.fit, Weekly)
names(lda.pred)
## [1] "class" "posterior" "x"
lda.class <- lda.pred$class
mean(lda.class==Direction.2009)
## [1] 0.5775941
mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto_V <- data.frame(Auto, mpg01)
Auto_NQ <- Auto_V %>% select_if(is.numeric)
names(Auto_V)
## [1] "mpg" "cylinders" "displacement" "horsepower" "weight"
## [6] "acceleration" "year" "origin" "name" "mpg01"
names(Auto_NQ)
## [1] "mpg" "cylinders" "displacement" "horsepower" "weight"
## [6] "acceleration" "year" "origin" "mpg01"
cor(Auto_NQ)
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## mpg01 0.8369392 -0.7591939 -0.7534766 -0.6670526 -0.7577566
## acceleration year origin mpg01
## mpg 0.4233285 0.5805410 0.5652088 0.8369392
## cylinders -0.5046834 -0.3456474 -0.5689316 -0.7591939
## displacement -0.5438005 -0.3698552 -0.6145351 -0.7534766
## horsepower -0.6891955 -0.4163615 -0.4551715 -0.6670526
## weight -0.4168392 -0.3091199 -0.5850054 -0.7577566
## acceleration 1.0000000 0.2903161 0.2127458 0.3468215
## year 0.2903161 1.0000000 0.1815277 0.4299042
## origin 0.2127458 0.1815277 1.0000000 0.5136984
## mpg01 0.3468215 0.4299042 0.5136984 1.0000000
plot(Auto_NQ)
glm.fits <- glm(mpg01~displacement+horsepower+weight+acceleration+year, data=Auto_NQ,family = binomial)
summary(glm.fits)
##
## Call:
## glm(formula = mpg01 ~ displacement + horsepower + weight + acceleration +
## year, family = binomial, data = Auto_NQ)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1982 -0.1129 0.0114 0.2243 3.3045
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -15.844178 5.636740 -2.811 0.004941 **
## displacement -0.007024 0.006519 -1.078 0.281237
## horsepower -0.035762 0.023176 -1.543 0.122826
## weight -0.003984 0.001084 -3.676 0.000237 ***
## acceleration 0.008144 0.141248 0.058 0.954019
## year 0.414101 0.072623 5.702 1.18e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 543.43 on 391 degrees of freedom
## Residual deviance: 159.34 on 386 degrees of freedom
## AIC: 171.34
##
## Number of Fisher Scoring iterations: 8
After using logistic regression weight and year have significant impacts on fuel effieciency.
set.seed(1)
sample1 <- sample(c(TRUE, FALSE), nrow(Auto_NQ), replace=TRUE, prob = c(.6,.4))
train <- Auto_NQ[sample1, ]
test <- Auto_NQ[!sample1, ]
dim(train)
## [1] 246 9
dim(test)
## [1] 146 9
lda.fit.auto <- lda(mpg01 ~ weight+year, data=train)
lda.fit.auto
## Call:
## lda(mpg01 ~ weight + year, data = train)
##
## Prior probabilities of groups:
## 0 1
## 0.504065 0.495935
##
## Group means:
## weight year
## 0 3620.040 74.70968
## 1 2315.344 77.63115
##
## Coefficients of linear discriminants:
## LD1
## weight -0.00176319
## year 0.11400180
lda.class.auto <- predict(lda.fit.auto,test)$class
table(lda.class.auto,test$mpg01)
##
## lda.class.auto 0 1
## 0 62 5
## 1 10 69
mean(lda.class.auto==test$mpg01)
## [1] 0.8972603
1-mean(lda.class.auto==test$mpg01)
## [1] 0.1027397
The test error is 10.27%.
qda.fit.auto <- qda(mpg01 ~ weight+year, data=train)
qda.fit.auto
## Call:
## qda(mpg01 ~ weight + year, data = train)
##
## Prior probabilities of groups:
## 0 1
## 0.504065 0.495935
##
## Group means:
## weight year
## 0 3620.040 74.70968
## 1 2315.344 77.63115
qda.class.auto <- predict(qda.fit.auto,test)$class
table(qda.class.auto,test$mpg01)
##
## qda.class.auto 0 1
## 0 61 6
## 1 11 68
mean(qda.class.auto==test$mpg01)
## [1] 0.8835616
1-mean(qda.class.auto==test$mpg01)
## [1] 0.1164384
The test error is 11.64%.
glm.auto <- glm(mpg01~weight+year, family=binomial, data=train)
summary(glm.auto)
##
## Call:
## glm(formula = mpg01 ~ weight + year, family = binomial, data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9209 -0.1360 -0.0007 0.2159 3.1868
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.082e+01 6.708e+00 -3.104 0.00191 **
## weight -5.946e-03 8.835e-04 -6.730 1.69e-11 ***
## year 4.929e-01 1.063e-01 4.636 3.55e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 341.01 on 245 degrees of freedom
## Residual deviance: 93.95 on 243 degrees of freedom
## AIC: 99.95
##
## Number of Fisher Scoring iterations: 7
glm.probs.auto <- predict(glm.auto,test,type="response")
glm.pred.auto <- rep(0,nrow(test))
glm.pred.auto[glm.probs.auto > 0.50]=1
table(glm.pred.auto,test$mpg01)
##
## glm.pred.auto 0 1
## 0 64 9
## 1 8 65
mean(glm.pred.auto==test$mpg01)
## [1] 0.8835616
1-mean(glm.pred.auto==test$mpg01)
## [1] 0.1164384
The test error is 11.64%.
nb_model.auto <- naiveBayes(mpg01~weight+year,data=train)
summary(nb_model.auto)
## Length Class Mode
## apriori 2 table numeric
## tables 2 -none- list
## levels 2 -none- character
## isnumeric 2 -none- logical
## call 4 -none- call
prob.auto <- predict(nb_model.auto,test)
pred.auto <- rep(0,nrow(test))
pred.auto[prob.auto > 0.50]=1
table(pred.auto,test$mpg01)
##
## pred.auto 0 1
## 0 72 74
mean(pred.auto==test$mpg01)
## [1] 0.4931507
1-mean(pred.auto==test$mpg01)
## [1] 0.5068493
The test error is 50.68%.
Using the Boston data set, fit classification models in order to predict whether a given census tract has a crime rate above or below the median. Explore logistic regression, LDA, naive Bayes, and KNN models using various subsets of the predictors. Describe your findings. Hint: You will have to create the response variable yourself, using the variables that are contained in the Boston data set.
crim01 <- ifelse(Boston$crim > median(Boston$crim), 1, 0)
Boston_V <- data.frame(Boston, crim01)
cor(Boston_V)
## crim zn indus chas nox
## crim 1.00000000 -0.20046922 0.40658341 -0.055891582 0.42097171
## zn -0.20046922 1.00000000 -0.53382819 -0.042696719 -0.51660371
## indus 0.40658341 -0.53382819 1.00000000 0.062938027 0.76365145
## chas -0.05589158 -0.04269672 0.06293803 1.000000000 0.09120281
## nox 0.42097171 -0.51660371 0.76365145 0.091202807 1.00000000
## rm -0.21924670 0.31199059 -0.39167585 0.091251225 -0.30218819
## age 0.35273425 -0.56953734 0.64477851 0.086517774 0.73147010
## dis -0.37967009 0.66440822 -0.70802699 -0.099175780 -0.76923011
## rad 0.62550515 -0.31194783 0.59512927 -0.007368241 0.61144056
## tax 0.58276431 -0.31456332 0.72076018 -0.035586518 0.66802320
## ptratio 0.28994558 -0.39167855 0.38324756 -0.121515174 0.18893268
## black -0.38506394 0.17552032 -0.35697654 0.048788485 -0.38005064
## lstat 0.45562148 -0.41299457 0.60379972 -0.053929298 0.59087892
## medv -0.38830461 0.36044534 -0.48372516 0.175260177 -0.42732077
## crim01 0.40939545 -0.43615103 0.60326017 0.070096774 0.72323480
## rm age dis rad tax ptratio
## crim -0.21924670 0.35273425 -0.37967009 0.625505145 0.58276431 0.2899456
## zn 0.31199059 -0.56953734 0.66440822 -0.311947826 -0.31456332 -0.3916785
## indus -0.39167585 0.64477851 -0.70802699 0.595129275 0.72076018 0.3832476
## chas 0.09125123 0.08651777 -0.09917578 -0.007368241 -0.03558652 -0.1215152
## nox -0.30218819 0.73147010 -0.76923011 0.611440563 0.66802320 0.1889327
## rm 1.00000000 -0.24026493 0.20524621 -0.209846668 -0.29204783 -0.3555015
## age -0.24026493 1.00000000 -0.74788054 0.456022452 0.50645559 0.2615150
## dis 0.20524621 -0.74788054 1.00000000 -0.494587930 -0.53443158 -0.2324705
## rad -0.20984667 0.45602245 -0.49458793 1.000000000 0.91022819 0.4647412
## tax -0.29204783 0.50645559 -0.53443158 0.910228189 1.00000000 0.4608530
## ptratio -0.35550149 0.26151501 -0.23247054 0.464741179 0.46085304 1.0000000
## black 0.12806864 -0.27353398 0.29151167 -0.444412816 -0.44180801 -0.1773833
## lstat -0.61380827 0.60233853 -0.49699583 0.488676335 0.54399341 0.3740443
## medv 0.69535995 -0.37695457 0.24992873 -0.381626231 -0.46853593 -0.5077867
## crim01 -0.15637178 0.61393992 -0.61634164 0.619786249 0.60874128 0.2535684
## black lstat medv crim01
## crim -0.38506394 0.4556215 -0.3883046 0.40939545
## zn 0.17552032 -0.4129946 0.3604453 -0.43615103
## indus -0.35697654 0.6037997 -0.4837252 0.60326017
## chas 0.04878848 -0.0539293 0.1752602 0.07009677
## nox -0.38005064 0.5908789 -0.4273208 0.72323480
## rm 0.12806864 -0.6138083 0.6953599 -0.15637178
## age -0.27353398 0.6023385 -0.3769546 0.61393992
## dis 0.29151167 -0.4969958 0.2499287 -0.61634164
## rad -0.44441282 0.4886763 -0.3816262 0.61978625
## tax -0.44180801 0.5439934 -0.4685359 0.60874128
## ptratio -0.17738330 0.3740443 -0.5077867 0.25356836
## black 1.00000000 -0.3660869 0.3334608 -0.35121093
## lstat -0.36608690 1.0000000 -0.7376627 0.45326273
## medv 0.33346082 -0.7376627 1.0000000 -0.26301673
## crim01 -0.35121093 0.4532627 -0.2630167 1.00000000
set.seed(1)
sample_b <- sample(c(TRUE, FALSE), nrow(Boston_V), replace=TRUE, prob = c(.6,.4))
trainb <- Boston_V[sample_b, ]
testb <- Boston_V[!sample_b, ]
dim(trainb)
## [1] 314 15
dim(testb)
## [1] 192 15
glm.bos <- glm(crim01~indus+nox+age+rad+tax+lstat, family=binomial, data=trainb)
summary(glm.bos)
##
## Call:
## glm(formula = crim01 ~ indus + nox + age + rad + tax + lstat,
## family = binomial, data = trainb)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.08917 -0.19479 0.00026 0.01065 2.74919
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -22.535448 3.975270 -5.669 1.44e-08 ***
## indus -0.072520 0.056568 -1.282 0.199841
## nox 40.095414 8.392236 4.778 1.77e-06 ***
## age 0.022459 0.012367 1.816 0.069362 .
## rad 0.576305 0.151603 3.801 0.000144 ***
## tax -0.008295 0.003199 -2.593 0.009521 **
## lstat -0.011124 0.043571 -0.255 0.798480
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 435.25 on 313 degrees of freedom
## Residual deviance: 139.35 on 307 degrees of freedom
## AIC: 153.35
##
## Number of Fisher Scoring iterations: 8
glm.probs.b <- predict(glm.bos,testb,type="response")
glm.pred.b <- rep(0,nrow(testb))
glm.pred.b[glm.probs.b > 0.50]=1
table(glm.pred.b,testb$crim01)
##
## glm.pred.b 0 1
## 0 88 13
## 1 10 81
mean(glm.pred.b==testb$crim01)
## [1] 0.8802083
1-mean(glm.pred.b==testb$crim01)
## [1] 0.1197917
uccessfully predict 88.02% with a test error rate of 11.98%
lda.fit.b <- lda(crim01~indus+nox+age+rad+tax+lstat, data=trainb)
lda.fit.b
## Call:
## lda(crim01 ~ indus + nox + age + rad + tax + lstat, data = trainb)
##
## Prior probabilities of groups:
## 0 1
## 0.4936306 0.5063694
##
## Group means:
## indus nox age rad tax lstat
## 0 6.663226 0.4648245 50.03871 4.096774 303.8065 9.240387
## 1 15.310189 0.6426730 86.79308 14.610063 506.4969 15.832264
##
## Coefficients of linear discriminants:
## LD1
## indus 0.019009728
## nox 7.126273863
## age 0.018582977
## rad 0.089442271
## tax -0.001811084
## lstat -0.022915042
lda.class.b <- predict(lda.fit.b,testb)$class
table(lda.class.b,testb$crim01)
##
## lda.class.b 0 1
## 0 90 23
## 1 8 71
mean(lda.class.b==testb$crim01)
## [1] 0.8385417
1-mean(lda.class.b==testb$crim01)
## [1] 0.1614583
LDA produces a lower success rate, with 83.85%. The test error rate is 16.15%.
nb_model.b <- naiveBayes(crim01~indus+nox+age+rad+tax+lstat, data=trainb)
summary(nb_model.b)
## Length Class Mode
## apriori 2 table numeric
## tables 6 -none- list
## levels 2 -none- character
## isnumeric 6 -none- logical
## call 4 -none- call
prob.b <- predict(nb_model.b,testb)
pred.b <- rep(0,nrow(testb))
pred.b[prob.b > 0.50]=1
table(pred.b,testb$crim01)
##
## pred.b 0 1
## 0 98 94
mean(pred.b==testb$crim01)
## [1] 0.5104167
1-mean(pred.b==testb$crim01)
## [1] 0.4895833
Naive Bayes produces 51.04% success. 48.96% error rate.
set.seed(1)
train.Boston = trainb[,c("indus","nox","age","rad","tax","lstat")]
test.Boston = testb[,c("indus","nox","age","rad","tax","lstat")]
knn.pred=knn(train.Boston,test.Boston,trainb$crim01,k=1)
table(knn.pred,testb$crim01)
##
## knn.pred 0 1
## 0 91 7
## 1 7 87
mean(knn.pred==testb$crim01)
## [1] 0.9270833
1-mean(knn.pred==testb$crim01)
## [1] 0.07291667
knn.pred=knn(train.Boston,test.Boston,trainb$crim01,k=2)
table(knn.pred,testb$crim01)
##
## knn.pred 0 1
## 0 86 12
## 1 12 82
mean(knn.pred==testb$crim01)
## [1] 0.875
knn.pred=knn(train.Boston,test.Boston,trainb$crim01,k=3)
table(knn.pred,testb$crim01)
##
## knn.pred 0 1
## 0 91 10
## 1 7 84
mean(knn.pred==testb$crim01)
## [1] 0.9114583
knn.pred=knn(train.Boston,test.Boston,trainb$crim01,k=4)
table(knn.pred,testb$crim01)
##
## knn.pred 0 1
## 0 90 11
## 1 8 83
mean(knn.pred==testb$crim01)
## [1] 0.9010417
KNN with K = 1 prodcies success rate of 92.71% with a test error rate of 7.29%.