This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
Question 13.)This question should be answered using the Weekly data set, which is part of the ISLR2 package. This data is similar in nature to the Smarket data from this chapter’s lab, except that it contains 1,089 weekly returns for 21 years, from the beginning of 1990 to the end of 2010.
library(ISLR2)
head(Weekly)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 1990 0.816 1.572 -3.936 -0.229 -3.484 0.1549760 -0.270 Down
## 2 1990 -0.270 0.816 1.572 -3.936 -0.229 0.1485740 -2.576 Down
## 3 1990 -2.576 -0.270 0.816 1.572 -3.936 0.1598375 3.514 Up
## 4 1990 3.514 -2.576 -0.270 0.816 1.572 0.1616300 0.712 Up
## 5 1990 0.712 3.514 -2.576 -0.270 0.816 0.1537280 1.178 Up
## 6 1990 1.178 0.712 3.514 -2.576 -0.270 0.1544440 -1.372 Down
pairs(Weekly)
cor(subset(Weekly, select = -Direction))
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
logit_model <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly,
family = binomial)
summary(logit_model)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
show_model_performance <- function(predicted_status, observed_status) {
confusion_matrix <- table(predicted_status,
observed_status,
dnn = c("predicted Status", "observed Status"))
print(confusion_matrix)
error_rate <- mean(predicted_status != observed_status)
cat("\n")
cat(" Error Rate:", 100 * error_rate, "%\n")
cat("Correctly Predicted:", 100 * (1-error_rate), "%\n")
cat("False Positive Rate:", 100 * confusion_matrix[2,1] / sum(confusion_matrix[,1]), "%\n")
cat("False Negative Rate:", 100 * confusion_matrix[1,2] / sum(confusion_matrix[,2]), "%\n")
}
predict_glm_direction <- function(model, newdata = NULL) {
predictions <- predict(model, newdata, type="response")
return(as.factor(ifelse(predictions < 0.5, "Down", "Up")))
}
predicted_direction <- predict_glm_direction(logit_model)
show_model_performance(predicted_direction, Weekly$Direction)
## observed Status
## predicted Status Down Up
## Down 54 48
## Up 430 557
##
## Error Rate: 43.89348 %
## Correctly Predicted: 56.10652 %
## False Positive Rate: 88.84298 %
## False Negative Rate: 7.933884 %
train <- (Weekly$Year < 2009)
train_set <- Weekly[train, ]
test_set <- Weekly[!train, ]
logit_model <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train)
predicted_direction <- predict_glm_direction(logit_model, test_set)
show_model_performance(predicted_direction, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 9 5
## Up 34 56
##
## Error Rate: 37.5 %
## Correctly Predicted: 62.5 %
## False Positive Rate: 79.06977 %
## False Negative Rate: 8.196721 %
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
lda_model <- lda(Direction ~ Lag2, data = Weekly, subset = train)
predictions <- predict(lda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 9 5
## Up 34 56
##
## Error Rate: 37.5 %
## Correctly Predicted: 62.5 %
## False Positive Rate: 79.06977 %
## False Negative Rate: 8.196721 %
qda_model <- qda(Direction ~ Lag2, data = Weekly, subset = train)
predictions <- predict(qda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 0 0
## Up 43 61
##
## Error Rate: 41.34615 %
## Correctly Predicted: 58.65385 %
## False Positive Rate: 100 %
## False Negative Rate: 0 %
library(class)
run_knn <- function(train, test, train_class, test_class, k) {
set.seed(12345)
predictions <- knn(train, test, train_class, k)
cat("KNN: k =", k, "\n")
show_model_performance(predictions, test_class)
}
train_matrix <- as.matrix(train_set$Lag2)
test_matrix <- as.matrix(test_set$Lag2)
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 1)
## KNN: k = 1
## observed Status
## predicted Status Down Up
## Down 21 29
## Up 22 32
##
## Error Rate: 49.03846 %
## Correctly Predicted: 50.96154 %
## False Positive Rate: 51.16279 %
## False Negative Rate: 47.54098 %
train=(Weekly$Year<2009)
weekly09=Weekly[!train ,]
direction09=Weekly$Direction[!train]
dim(weekly09)
## [1] 104 9
glm_fit=glm(Direction~Lag2, data = Weekly,family=binomial ,subset=train)
glm_probability=predict (glm_fit,weekly09, type="response")
glm_prediction=rep("Down",104)
glm_prediction[glm_probability >.5]=" Up"
table(glm_prediction ,direction09)
## direction09
## glm_prediction Down Up
## Up 34 56
## Down 9 5
library(e1071)
library(ISLR2)
nbayes=naiveBayes(Direction~Lag2 ,data=Weekly ,subset=train)
nbayes
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## Down Up
## 0.4477157 0.5522843
##
## Conditional probabilities:
## Lag2
## Y [,1] [,2]
## Down -0.03568254 2.199504
## Up 0.26036581 2.317485
nbayes.class=predict(nbayes ,weekly09)
table(nbayes.class ,direction09)
## direction09
## nbayes.class Down Up
## Down 0 0
## Up 43 61
logit_model <- glm(Direction ~ Lag1 * Lag2, data = Weekly, family = binomial, subset = train)
predicted_direction <- predict_glm_direction(logit_model, test_set)
show_model_performance(predicted_direction, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 7 8
## Up 36 53
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 83.72093 %
## False Negative Rate: 13.11475 %
lda_model <- lda(Direction ~ Lag1 * Lag2, data = Weekly, subset = train)
predictions <- predict(lda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 7 8
## Up 36 53
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 83.72093 %
## False Negative Rate: 13.11475 %
qda_model <- qda(Direction ~ Lag2 + sqrt(abs(Lag2)), data = Weekly, subset = train)
predictions <- predict(qda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 12 13
## Up 31 48
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 72.09302 %
## False Negative Rate: 21.31148 %
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 10)
## KNN: k = 10
## observed Status
## predicted Status Down Up
## Down 18 21
## Up 25 40
##
## Error Rate: 44.23077 %
## Correctly Predicted: 55.76923 %
## False Positive Rate: 58.13953 %
## False Negative Rate: 34.42623 %
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 100)
## KNN: k = 100
## observed Status
## predicted Status Down Up
## Down 10 13
## Up 33 48
##
## Error Rate: 44.23077 %
## Correctly Predicted: 55.76923 %
## False Positive Rate: 76.74419 %
## False Negative Rate: 21.31148 %
Question 14.)In this problem, you will develop a model to predict whether a given car gets high or low gas mileage based on the Auto data set.
Auto$mpg01 <- Auto$mpg > median(Auto$mpg)
head(Auto$mpg01, n = 20)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
cor(subset(Auto, select = -name))
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## mpg01 0.8369392 -0.7591939 -0.7534766 -0.6670526 -0.7577566
## acceleration year origin mpg01
## mpg 0.4233285 0.5805410 0.5652088 0.8369392
## cylinders -0.5046834 -0.3456474 -0.5689316 -0.7591939
## displacement -0.5438005 -0.3698552 -0.6145351 -0.7534766
## horsepower -0.6891955 -0.4163615 -0.4551715 -0.6670526
## weight -0.4168392 -0.3091199 -0.5850054 -0.7577566
## acceleration 1.0000000 0.2903161 0.2127458 0.3468215
## year 0.2903161 1.0000000 0.1815277 0.4299042
## origin 0.2127458 0.1815277 1.0000000 0.5136984
## mpg01 0.3468215 0.4299042 0.5136984 1.0000000
pairs(Auto)
train <- sample(nrow(Auto) * 0.7)
train_set <- Auto[train, ]
test_set <- Auto[-train, ]
lda_model <- lda(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
subset = train)
predictions <- predict(lda_model, test_set)
show_model_performance(predictions$class, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 16
## TRUE 2 82
##
## Error Rate: 15.25424 %
## Correctly Predicted: 84.74576 %
## False Positive Rate: 10 %
## False Negative Rate: 16.32653 %
qda_model <- qda(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
subset = train)
predictions <- predict(qda_model, test_set)
show_model_performance(predictions$class, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 19
## TRUE 2 79
##
## Error Rate: 17.79661 %
## Correctly Predicted: 82.20339 %
## False Positive Rate: 10 %
## False Negative Rate: 19.38776 %
logit_model <- glm(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
family = binomial,
subset = train)
predictions <- predict(logit_model, test_set, type = "response")
show_model_performance(predictions > 0.5, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 21
## TRUE 0 77
##
## Error Rate: 17.79661 %
## Correctly Predicted: 82.20339 %
## False Positive Rate: 0 %
## False Negative Rate: 21.42857 %
library(e1071)
library(ISLR2)
nbayes=naiveBayes(cylinders ~ mpg01 ,data=Auto ,subset=train)
nbayes
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 3 4 5 6 8
## 0.010948905 0.416058394 0.003649635 0.233576642 0.335766423
##
## Conditional probabilities:
## mpg01
## Y FALSE TRUE
## 3 1.0000000 0.0000000
## 4 0.1578947 0.8421053
## 5 1.0000000 0.0000000
## 6 0.9687500 0.0312500
## 8 1.0000000 0.0000000
nbayes.class=predict(nbayes ,Auto)
vars <- c("cylinders", "weight", "displacement", "horsepower")
train_matrix <- as.matrix(train_set[, vars])
test_matrix <- as.matrix(test_set[, vars])
predictions <- knn(train_matrix, test_matrix, train_set$mpg01, 1)
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 1)
## KNN: k = 1
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 24
## TRUE 2 74
##
## Error Rate: 22.0339 %
## Correctly Predicted: 77.9661 %
## False Positive Rate: 10 %
## False Negative Rate: 24.4898 %
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 10)
## KNN: k = 10
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 22
## TRUE 0 76
##
## Error Rate: 18.64407 %
## Correctly Predicted: 81.35593 %
## False Positive Rate: 0 %
## False Negative Rate: 22.44898 %
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 100)
## KNN: k = 100
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 25
## TRUE 0 73
##
## Error Rate: 21.18644 %
## Correctly Predicted: 78.81356 %
## False Positive Rate: 0 %
## False Negative Rate: 25.5102 %
Question 16.)Using the Boston data set, fit classification models in order to predict whether a given census tract has a crime rate above or below the median. Explore logistic regression, LDA, naive Bayes, and KNN models using various subsets of the predictors. Describe your findings. Hint: You will have to create the response variable yourself, using the variables that are contained in the Boston data set.
Answer:After evaluating the correlation plots for the variables we can conclude that the following variables will be good predictors for our crime variable because they seem to have a correlation with our crime rate: medv, age, nox. We first run a regression logistic model so that we can clearly identify the significant predictors. We then evaluate the effectiveness that the predicting model will be below or above the median crime rate. We were able to ignore the variables age and dis, because they were not significant statistical predictors. Therefore we will use the remaining predictors medv and nox. The model shows how the median value of owner occupied homes and nox variables increases which causes the increase of the crime rate. The final error rate is within 19.2% and 50%. After this we now run a LDA analysis using all the predictors from the beginning. We can see that medv, age and nox all have positive correlation above median crime rate. Dis variable is the only one that shows a negative correlation. While using the LDA regression model the final error rate is lower than the previous model used with a rate of 18.5%.
For our final model we will use the K nearest neighbors. We will use predictors pratio, tax and nox. Our finding show error rates of 6.2% and 7.5% with 1 k of 3 and 1 k of 5, which are a lot better results in comparison to the two models used before. The only flaw from this model approach is that we are unable to verify which predictors are valuable and how they affect the outcome probability. To better have an understanding of crime rates we should evaluate what communities with lower crime rate are doing in terms of tax rates, air pollution, school funding.
attach(Boston)
median_crime = median(crim)
crim_lvl <- rep(0, 506)
crim_lvl[crim > median_crime] = 1
crim_lvl <- as.factor(crim_lvl)
Boston_2 <- data.frame(Boston, crim_lvl)
detach(Boston)
pairs(Boston_2)
set.seed(1)
train_13 <- rbinom(506, 1, 0.7)
Boston_2 <- cbind(Boston_2, train_13)
Boston.train <- Boston_2[train_13 == 1,]
Boston.test <- Boston_2[train_13 == 0,]
attach(Boston.train)
## The following objects are masked _by_ .GlobalEnv:
##
## crim_lvl, train_13
log_13_fits <- glm(crim_lvl~nox + age + dis + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + age + dis + medv, family = binomial,
## data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.17404 -0.35742 0.00278 0.26418 2.53635
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -24.900404 4.019591 -6.195 5.84e-10 ***
## nox 38.426015 5.859800 6.558 5.47e-11 ***
## age 0.016253 0.009966 1.631 0.1029
## dis 0.309361 0.164026 1.886 0.0593 .
## medv 0.087237 0.028230 3.090 0.0020 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 210.55 on 355 degrees of freedom
## AIC: 220.55
##
## Number of Fisher Scoring iterations: 7
log_13_fits <- glm(crim_lvl~nox + dis + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + dis + medv, family = binomial,
## data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.19958 -0.39388 0.00252 0.26563 2.48475
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.94414 3.89175 -6.153 7.63e-10 ***
## nox 39.78032 5.77733 6.886 5.75e-12 ***
## dis 0.23393 0.15697 1.490 0.13615
## medv 0.07713 0.02678 2.880 0.00398 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 213.25 on 356 degrees of freedom
## AIC: 221.25
##
## Number of Fisher Scoring iterations: 7
log_13_fits <- glm(crim_lvl~nox + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + medv, family = binomial, data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.17657 -0.38729 0.00523 0.30375 2.65695
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -19.74864 2.42833 -8.133 4.2e-16 ***
## nox 33.97633 3.88025 8.756 < 2e-16 ***
## medv 0.06605 0.02524 2.617 0.00887 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 215.41 on 357 degrees of freedom
## AIC: 221.41
##
## Number of Fisher Scoring iterations: 6
detach(Boston.train)
log_13_prob <- predict(log_13_fits, Boston.test, type = 'response')
log_13_preds <- rep(0, 146)
log_13_preds[log_13_prob > 0.5] = 1
dat <- matrix(data=table(log_13_preds, Boston.test$crim_lvl), nrow=2, ncol=2,
dimnames=list(c("Below median", "Above median"), c("Below", "Above")))
names(dimnames(dat)) <- c("predicted", "observed")
print(dat)
## observed
## predicted Below Above
## Below median 63 16
## Above median 12 55
lda_13_fits <- lda(crim_lvl~nox + age+ dis+medv, data = Boston_2, subset= (train_13==1))
lda_13_fits
## Call:
## lda(crim_lvl ~ nox + age + dis + medv, data = Boston_2, subset = (train_13 ==
## 1))
##
## Prior probabilities of groups:
## 0 1
## 0.4944444 0.5055556
##
## Group means:
## nox age dis medv
## 0 0.4690051 51.33371 5.241056 24.95618
## 1 0.6401758 86.26044 2.498684 20.19670
##
## Coefficients of linear discriminants:
## LD1
## nox 9.32383649
## age 0.01342745
## dis -0.08315746
## medv 0.01499271
lda_13_preds <- predict(lda_13_fits, Boston.test)
lda_13_class <- lda_13_preds$class
dat <- matrix(data=table(lda_13_class, Boston.test$crim_lvl), nrow=2, ncol=2,
dimnames=list(c("Below median", "Above median"), c("Below", "Above")))
names(dimnames(dat)) <- c("predicted", "observed")
print(dat)
## observed
## predicted Below Above
## Below median 62 14
## Above median 13 57
train.x_13 <- cbind(Boston.train$nox, Boston.train$tax, Boston.train$pratio)
test.x_13 <- cbind(Boston.test$nox, Boston.test$tax, Boston.test$pratio)
set.seed(1)
knn_pred_13 <- knn(train.x_13, test.x_13, Boston.train$crim_lvl, k=3)
table(knn_pred_13, Boston.test$crim_lvl)
##
## knn_pred_13 0 1
## 0 71 5
## 1 4 66
knn_pred_13_2 <- knn(train.x_13, test.x_13, Boston.train$crim_lvl, k=5)
table(knn_pred_13_2, Boston.test$crim_lvl)
##
## knn_pred_13_2 0 1
## 0 69 5
## 1 6 66