This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the plot.
Chapter 04 (page 168): 13, 14, 16
13 This question should be answered using the Weekly data set, whichis part of the ISLR2 package. This data is similar in nature to the Smarket data from this chapter’s lab, except that it contains 1, 089weekly returns for 21 years, from the beginning of 1990 to the end of2010.
library(ISLR2)
head(Weekly)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 1990 0.816 1.572 -3.936 -0.229 -3.484 0.1549760 -0.270 Down
## 2 1990 -0.270 0.816 1.572 -3.936 -0.229 0.1485740 -2.576 Down
## 3 1990 -2.576 -0.270 0.816 1.572 -3.936 0.1598375 3.514 Up
## 4 1990 3.514 -2.576 -0.270 0.816 1.572 0.1616300 0.712 Up
## 5 1990 0.712 3.514 -2.576 -0.270 0.816 0.1537280 1.178 Up
## 6 1990 1.178 0.712 3.514 -2.576 -0.270 0.1544440 -1.372 Down
pairs(Weekly)
Yes there appears to be relationship between Year and Volume.
cor(subset(Weekly, select = -Direction))
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
logit_model <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly,
family = binomial)
summary(logit_model)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
Lag2 has statistical Significance with less than .05 p-value
show_model_performance <- function(predicted_status, observed_status) {
confusion_matrix <- table(predicted_status,
observed_status,
dnn = c("predicted Status", "observed Status"))
print(confusion_matrix)
error_rate <- mean(predicted_status != observed_status)
cat("\n")
cat(" Error Rate:", 100 * error_rate, "%\n")
cat("Correctly Predicted:", 100 * (1-error_rate), "%\n")
cat("False Positive Rate:", 100 * confusion_matrix[2,1] / sum(confusion_matrix[,1]), "%\n")
cat("False Negative Rate:", 100 * confusion_matrix[1,2] / sum(confusion_matrix[,2]), "%\n")
}
predict_glm_direction <- function(model, newdata = NULL) {
predictions <- predict(model, newdata, type="response")
return(as.factor(ifelse(predictions < 0.5, "Down", "Up")))
}
predicted_direction <- predict_glm_direction(logit_model)
show_model_performance(predicted_direction, Weekly$Direction)
## observed Status
## predicted Status Down Up
## Down 54 48
## Up 430 557
##
## Error Rate: 43.89348 %
## Correctly Predicted: 56.10652 %
## False Positive Rate: 88.84298 %
## False Negative Rate: 7.933884 %
model prediction reveals market trend is correctly stated at 56.11%. The down trend shows 11.15% correct. The Up trend shows 92.07% was correct.
train <- (Weekly$Year < 2009)
train_set <- Weekly[train, ]
test_set <- Weekly[!train, ]
logit_model <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train)
predicted_direction <- predict_glm_direction(logit_model, test_set)
show_model_performance(predicted_direction, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 9 5
## Up 34 56
##
## Error Rate: 37.5 %
## Correctly Predicted: 62.5 %
## False Positive Rate: 79.06977 %
## False Negative Rate: 8.196721 %
the data shows 62.5% correctly predicted.
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
lda_model <- lda(Direction ~ Lag2, data = Weekly, subset = train)
predictions <- predict(lda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 9 5
## Up 34 56
##
## Error Rate: 37.5 %
## Correctly Predicted: 62.5 %
## False Positive Rate: 79.06977 %
## False Negative Rate: 8.196721 %
the data shows 62.5% correctly predicted. (f) Repeat (d) using QDA.
qda_model <- qda(Direction ~ Lag2, data = Weekly, subset = train)
predictions <- predict(qda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 0 0
## Up 43 61
##
## Error Rate: 41.34615 %
## Correctly Predicted: 58.65385 %
## False Positive Rate: 100 %
## False Negative Rate: 0 %
the data shows 58.65% correctly predicted. (g) Repeat (d) using KNN with K = 1.
library(class)
run_knn <- function(train, test, train_class, test_class, k) {
set.seed(12345)
predictions <- knn(train, test, train_class, k)
cat("KNN: k =", k, "\n")
show_model_performance(predictions, test_class)
}
train_matrix <- as.matrix(train_set$Lag2)
test_matrix <- as.matrix(test_set$Lag2)
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 1)
## KNN: k = 1
## observed Status
## predicted Status Down Up
## Down 21 29
## Up 22 32
##
## Error Rate: 49.03846 %
## Correctly Predicted: 50.96154 %
## False Positive Rate: 51.16279 %
## False Negative Rate: 47.54098 %
the data shows 50.96% correctly predicted.
train=(Weekly$Year<2009)
weekly09=Weekly[!train ,]
direction09=Weekly$Direction[!train]
dim(weekly09)
## [1] 104 9
glm_fit=glm(Direction~Lag2, data = Weekly,family=binomial ,subset=train)
glm_probability=predict (glm_fit,weekly09, type="response")
glm_prediction=rep("Down",104)
glm_prediction[glm_probability >.5]=" Up"
table(glm_prediction ,direction09)
## direction09
## glm_prediction Down Up
## Up 34 56
## Down 9 5
library(e1071)
library(ISLR2)
nbayes=naiveBayes(Direction~Lag2 ,data=Weekly ,subset=train)
nbayes
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## Down Up
## 0.4477157 0.5522843
##
## Conditional probabilities:
## Lag2
## Y [,1] [,2]
## Down -0.03568254 2.199504
## Up 0.26036581 2.317485
nbayes.class=predict(nbayes ,weekly09)
table(nbayes.class ,direction09)
## direction09
## nbayes.class Down Up
## Down 0 0
## Up 43 61
Logistic regression seems to correctly predicted 62.5%. Comparing it to other method linear regression seemed to have highest correct prediction.
The logistic regression and the LDA perform better in terms of test error rate performance.
logit_model <- glm(Direction ~ Lag1 * Lag2, data = Weekly, family = binomial, subset = train)
predicted_direction <- predict_glm_direction(logit_model, test_set)
show_model_performance(predicted_direction, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 7 8
## Up 36 53
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 83.72093 %
## False Negative Rate: 13.11475 %
lda_model <- lda(Direction ~ Lag1 * Lag2, data = Weekly, subset = train)
predictions <- predict(lda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 7 8
## Up 36 53
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 83.72093 %
## False Negative Rate: 13.11475 %
qda_model <- qda(Direction ~ Lag2 + sqrt(abs(Lag2)), data = Weekly, subset = train)
predictions <- predict(qda_model, test_set, type="response")
show_model_performance(predictions$class, test_set$Direction)
## observed Status
## predicted Status Down Up
## Down 12 13
## Up 31 48
##
## Error Rate: 42.30769 %
## Correctly Predicted: 57.69231 %
## False Positive Rate: 72.09302 %
## False Negative Rate: 21.31148 %
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 10)
## KNN: k = 10
## observed Status
## predicted Status Down Up
## Down 18 21
## Up 25 40
##
## Error Rate: 44.23077 %
## Correctly Predicted: 55.76923 %
## False Positive Rate: 58.13953 %
## False Negative Rate: 34.42623 %
run_knn(train_matrix, test_matrix, train_set$Direction, test_set$Direction, k = 100)
## KNN: k = 100
## observed Status
## predicted Status Down Up
## Down 10 13
## Up 33 48
##
## Error Rate: 44.23077 %
## Correctly Predicted: 55.76923 %
## False Positive Rate: 76.74419 %
## False Negative Rate: 21.31148 %
Auto$mpg01 <- Auto$mpg > median(Auto$mpg)
head(Auto$mpg01, n = 20)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE TRUE FALSE FALSE FALSE TRUE TRUE
Horsepower, mpg, displacement, weight, and cylinders appear to be the most likely to be beneficial in forecasting mpg01.
cor(subset(Auto, select = -name))
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## mpg01 0.8369392 -0.7591939 -0.7534766 -0.6670526 -0.7577566
## acceleration year origin mpg01
## mpg 0.4233285 0.5805410 0.5652088 0.8369392
## cylinders -0.5046834 -0.3456474 -0.5689316 -0.7591939
## displacement -0.5438005 -0.3698552 -0.6145351 -0.7534766
## horsepower -0.6891955 -0.4163615 -0.4551715 -0.6670526
## weight -0.4168392 -0.3091199 -0.5850054 -0.7577566
## acceleration 1.0000000 0.2903161 0.2127458 0.3468215
## year 0.2903161 1.0000000 0.1815277 0.4299042
## origin 0.2127458 0.1815277 1.0000000 0.5136984
## mpg01 0.3468215 0.4299042 0.5136984 1.0000000
pairs(Auto)
train <- sample(nrow(Auto) * 0.7)
train_set <- Auto[train, ]
test_set <- Auto[-train, ]
test rate is 15.25
lda_model <- lda(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
subset = train)
predictions <- predict(lda_model, test_set)
show_model_performance(predictions$class, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 16
## TRUE 2 82
##
## Error Rate: 15.25424 %
## Correctly Predicted: 84.74576 %
## False Positive Rate: 10 %
## False Negative Rate: 16.32653 %
test rate is 17.80
qda_model <- qda(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
subset = train)
predictions <- predict(qda_model, test_set)
show_model_performance(predictions$class, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 19
## TRUE 2 79
##
## Error Rate: 17.79661 %
## Correctly Predicted: 82.20339 %
## False Positive Rate: 10 %
## False Negative Rate: 19.38776 %
test rate is 17.80
logit_model <- glm(mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
family = binomial,
subset = train)
predictions <- predict(logit_model, test_set, type = "response")
show_model_performance(predictions > 0.5, test_set$mpg01)
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 21
## TRUE 0 77
##
## Error Rate: 17.79661 %
## Correctly Predicted: 82.20339 %
## False Positive Rate: 0 %
## False Negative Rate: 21.42857 %
library(e1071)
library(ISLR2)
nbayes=naiveBayes(cylinders ~ mpg01 ,data=Auto ,subset=train)
nbayes
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 3 4 5 6 8
## 0.010948905 0.416058394 0.003649635 0.233576642 0.335766423
##
## Conditional probabilities:
## mpg01
## Y FALSE TRUE
## 3 1.0000000 0.0000000
## 4 0.1578947 0.8421053
## 5 1.0000000 0.0000000
## 6 0.9687500 0.0312500
## 8 1.0000000 0.0000000
We have a test error rate of 18.64% for K=10, 21.19% for K=100, and 24.49% for K=1, implying that k=100 performs the best on this data set.
vars <- c("cylinders", "weight", "displacement", "horsepower")
train_matrix <- as.matrix(train_set[, vars])
test_matrix <- as.matrix(test_set[, vars])
predictions <- knn(train_matrix, test_matrix, train_set$mpg01, 1)
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 1)
## KNN: k = 1
## observed Status
## predicted Status FALSE TRUE
## FALSE 18 24
## TRUE 2 74
##
## Error Rate: 22.0339 %
## Correctly Predicted: 77.9661 %
## False Positive Rate: 10 %
## False Negative Rate: 24.4898 %
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 10)
## KNN: k = 10
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 22
## TRUE 0 76
##
## Error Rate: 18.64407 %
## Correctly Predicted: 81.35593 %
## False Positive Rate: 0 %
## False Negative Rate: 22.44898 %
run_knn(train_matrix, test_matrix, train_set$mpg01, test_set$mpg01, k = 100)
## KNN: k = 100
## observed Status
## predicted Status FALSE TRUE
## FALSE 20 25
## TRUE 0 73
##
## Error Rate: 21.18644 %
## Correctly Predicted: 78.81356 %
## False Positive Rate: 0 %
## False Negative Rate: 25.5102 %
We may deduce from analyzing the correlation plots for the variables that the following variables will be good predictors for our crime variable since they appear to have a correlation with our crime rate: medv, age, and nox. We begin by running a regression logistic model to determine the key predictors. We then assess the model’s ability to forecast whether the crime rate will be lower or higher than the national average. We were able to exclude the variables age and dis since they were not statistically significant predictors. As a result, we shall employ the remaining predictors medv and nox. The model depicts how the median value of owner-occupied homes and nox variables rise, causing the crime rate to rise. The overall error rate is between 19.2% and 50%. Following that, we do an LDA analysis with all of the predictors from the beginning. We can observe that medv, age, and nox all have a positive link with crime rates above the median. The only variable with a negative connection is dis. The ultimate error rate while using the LDA regression model is 18.5%, which is lower than the rate when using the prior model. We will utilize the K closest neighbors for our final model. The predictors pratio, tax, and nox will be used. Our results reveal error rates of 6.2% and 7.5% with 1 k of 3 and 1 k of 5, which are significantly better than the previous two models. The primary drawback with this model method is that we can’t tell which predictors are useful and how they impact outcome probability. To gain a better understanding of crime rates, we should look at what areas with lower crime rates do in terms of taxation, air pollution, and school finance.
attach(Boston)
median_crime = median(crim)
crim_lvl <- rep(0, 506)
crim_lvl[crim > median_crime] = 1
crim_lvl <- as.factor(crim_lvl)
Boston_2 <- data.frame(Boston, crim_lvl)
detach(Boston)
pairs(Boston_2)
set.seed(1)
train_13 <- rbinom(506, 1, 0.7)
Boston_2 <- cbind(Boston_2, train_13)
Boston.train <- Boston_2[train_13 == 1,]
Boston.test <- Boston_2[train_13 == 0,]
attach(Boston.train)
## The following objects are masked _by_ .GlobalEnv:
##
## crim_lvl, train_13
log_13_fits <- glm(crim_lvl~nox + age + dis + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + age + dis + medv, family = binomial,
## data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.17404 -0.35742 0.00278 0.26418 2.53635
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -24.900404 4.019591 -6.195 5.84e-10 ***
## nox 38.426015 5.859800 6.558 5.47e-11 ***
## age 0.016253 0.009966 1.631 0.1029
## dis 0.309361 0.164026 1.886 0.0593 .
## medv 0.087237 0.028230 3.090 0.0020 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 210.55 on 355 degrees of freedom
## AIC: 220.55
##
## Number of Fisher Scoring iterations: 7
log_13_fits <- glm(crim_lvl~nox + dis + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + dis + medv, family = binomial,
## data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.19958 -0.39388 0.00252 0.26563 2.48475
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -23.94414 3.89175 -6.153 7.63e-10 ***
## nox 39.78032 5.77733 6.886 5.75e-12 ***
## dis 0.23393 0.15697 1.490 0.13615
## medv 0.07713 0.02678 2.880 0.00398 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 213.25 on 356 degrees of freedom
## AIC: 221.25
##
## Number of Fisher Scoring iterations: 7
log_13_fits <- glm(crim_lvl~nox + medv, data = Boston.train, family = binomial)
summary(log_13_fits)
##
## Call:
## glm(formula = crim_lvl ~ nox + medv, family = binomial, data = Boston.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.17657 -0.38729 0.00523 0.30375 2.65695
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -19.74864 2.42833 -8.133 4.2e-16 ***
## nox 33.97633 3.88025 8.756 < 2e-16 ***
## medv 0.06605 0.02524 2.617 0.00887 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 499.02 on 359 degrees of freedom
## Residual deviance: 215.41 on 357 degrees of freedom
## AIC: 221.41
##
## Number of Fisher Scoring iterations: 6
detach(Boston.train)
log_13_prob <- predict(log_13_fits, Boston.test, type = 'response')
log_13_preds <- rep(0, 146)
log_13_preds[log_13_prob > 0.5] = 1
dat <- matrix(data=table(log_13_preds, Boston.test$crim_lvl), nrow=2, ncol=2,
dimnames=list(c("Below median", "Above median"), c("Below", "Above")))
names(dimnames(dat)) <- c("predicted", "observed")
print(dat)
## observed
## predicted Below Above
## Below median 63 16
## Above median 12 55
lda_13_fits <- lda(crim_lvl~nox + age+ dis+medv, data = Boston_2, subset= (train_13==1))
lda_13_fits
## Call:
## lda(crim_lvl ~ nox + age + dis + medv, data = Boston_2, subset = (train_13 ==
## 1))
##
## Prior probabilities of groups:
## 0 1
## 0.4944444 0.5055556
##
## Group means:
## nox age dis medv
## 0 0.4690051 51.33371 5.241056 24.95618
## 1 0.6401758 86.26044 2.498684 20.19670
##
## Coefficients of linear discriminants:
## LD1
## nox 9.32383649
## age 0.01342745
## dis -0.08315746
## medv 0.01499271
lda_13_preds <- predict(lda_13_fits, Boston.test)
lda_13_class <- lda_13_preds$class
dat <- matrix(data=table(lda_13_class, Boston.test$crim_lvl), nrow=2, ncol=2,
dimnames=list(c("Below median", "Above median"), c("Below", "Above")))
names(dimnames(dat)) <- c("predicted", "observed")
print(dat)
## observed
## predicted Below Above
## Below median 62 14
## Above median 13 57
train.x_13 <- cbind(Boston.train$nox, Boston.train$tax, Boston.train$pratio)
test.x_13 <- cbind(Boston.test$nox, Boston.test$tax, Boston.test$pratio)
set.seed(1)
knn_pred_13 <- knn(train.x_13, test.x_13, Boston.train$crim_lvl, k=3)
table(knn_pred_13, Boston.test$crim_lvl)
##
## knn_pred_13 0 1
## 0 71 5
## 1 4 66
knn_pred_13_2 <- knn(train.x_13, test.x_13, Boston.train$crim_lvl, k=5)
table(knn_pred_13_2, Boston.test$crim_lvl)
##
## knn_pred_13_2 0 1
## 0 69 5
## 1 6 66