the first step is to stubstitute \(z\) for \(e^{\beta_0+\beta_1X}\)
plugin to equation (4.2) \(p(X) = \frac{Z}{1+Z}\)
which we can transform to \(\frac{1}{p(X)} = \frac{1+Z}{Z} = 1+\frac{1}{Z}\)
solve for \(Z = \frac{1}{\frac{1}{p(X)}-1} = \frac{1}{\frac{1-p(X)}{p(X)}} = \frac{p(X)}{1-p(X)}\)
\(p_k(x) = \frac {\pi_k \frac {1} {\sqrt{2 \pi} \sigma} \exp(- \frac {1} {2 \sigma^2} (x - \mu_k)^2) } {\sum { \pi_l \frac {1} {\sqrt{2 \pi} \sigma} \exp(- \frac {1} {2 \sigma^2} (x - \mu_l)^2) }}\) (4.12)
\(\delta_k(x) = x \frac{\mu_k} {\sigma^2} - \frac{\mu_k} {2\sigma^2} + log \pi_k\) (4.13)
substitude the term independent of k \(Z = \frac { \frac {1} {\sqrt{2 \pi} \sigma} \exp(- \frac {1} {2 \sigma^2} (x^2)) } {\sum { \pi_l \frac {1} {\sqrt{2 \pi} \sigma} \exp(- \frac {1} {2 \sigma^2} (x - \mu_l)^2) }}\)
now we can write (4.12) as follows: \(p_k(x) = Z \pi_k \exp(- \frac {1} {2 \sigma^2} (\mu_k^2 - 2x \mu_k))\)
Take log \(log(p_k(x)) = log(C) + log(\pi_k) + (- \frac {1} {2 \sigma^2} (\mu_k^2 - 2x \mu_k))\)
rearrange \(log(p_k(x)) = (\frac {2x \mu_k} {2 \sigma^2} -\frac {\mu_k^2} {2 \sigma^2}) + log(\pi_k) + log(Z)\)
since \(log(Z)\) is just a constant which gets added to every K we can just leave it out
Equation (4.12) becomes: \(p_k(x) = \frac {\pi_k \frac {1} {\sqrt{2 \pi} \sigma_k} \exp(- \frac {1} {2 \sigma_k^2} (x - \mu_k)^2) } {\sum { \pi_l \frac {1} {\sqrt{2 \pi} \sigma_k} \exp(- \frac {1} {2 \sigma_k^2} (x - \mu_l)^2) }}\)
substitude the term independent of k \(Z = \frac { \frac {1} {\sqrt{2 \pi}}} {\sum { \pi_l \frac {1} {\sqrt{2 \pi} \sigma_k} \exp(- \frac {1} {2 \sigma_k^2} (x - \mu_l)^2) }}\)
transform to \(p_k(x) = Z \frac{\pi_k}{\sigma_k} \exp(- \frac {1} {2 \sigma_k^2} (x - \mu_k)^2)\)
take log \(log(p_k(x)) = log(Z) + log(\pi_k) - log(\sigma_k) + (- \frac {1} {2 \sigma_k^2} (x - \mu_k)^2)\)
rearrange \(log(p_k(x)) = (- \frac {1} {2 \sigma_k^2} (x^2 + \mu_k^2 - 2x\mu_k)) + log(\pi_k) - log(\sigma_k) + log(Z)\)
library(ISLR)
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
library(ISLR)
pairs(Weekly, col=Weekly$Direction)
* Volume and Year are correlated
glm.fit1 = glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=Weekly, family=binomial)
summary(glm.fit1)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
glm.probs = predict(glm.fit1, type = "response")
glm.pred1 = rep("Down", length(glm.probs))
glm.pred1[glm.probs > 0.5] = "Up"
table(glm.pred1, Weekly$Direction)
##
## glm.pred1 Down Up
## Down 54 48
## Up 430 557
(54+557)/1089
## [1] 0.5610652
54/(54+48)
## [1] 0.5294118
557/(557+430)
## [1] 0.5643364
train.years = Weekly$Year < 2009
train = Weekly[train.years,]
test = Weekly[!train.years,]
glm.fit2 = glm(Direction~Lag2, data=train, family=binomial)
glm.probs2 = predict(glm.fit2, test, type="response")
glm.pred2 = ifelse(glm.probs2 > 0.5, "Up", "Down")
table(glm.pred2, test$Direction)
##
## glm.pred2 Down Up
## Down 9 5
## Up 34 56
mean(glm.pred2 == test$Direction)
## [1] 0.625
lda.fit = lda(Direction~Lag2, data=train)
lda.fit.pred = predict(lda.fit, test)$class
table(lda.fit.pred, test$Direction)
##
## lda.fit.pred Down Up
## Down 9 5
## Up 34 56
mean(lda.fit.pred == test$Direction)
## [1] 0.625
qda.fit = qda(Direction~Lag2, data=train)
qda.fit.pred = predict(qda.fit, test)$class
table(qda.fit.pred, test$Direction)
##
## qda.fit.pred Down Up
## Down 0 0
## Up 43 61
mean(qda.fit.pred == test$Direction)
## [1] 0.5865385
library(class)
set.seed(1)
train.X = as.matrix(train$Lag2)
test.X = as.matrix(test$Lag2)
knn.pred = knn(train.X, test.X, train$Direction, k=1)
table(knn.pred, test$Direction)
##
## knn.pred Down Up
## Down 21 30
## Up 22 31
mean(knn.pred == test$Direction)
## [1] 0.5
*Logistic Regression and LDA provide the best results
knn.pred = knn(train.X, test.X, train$Direction, k=5)
table(knn.pred, test$Direction)
##
## knn.pred Down Up
## Down 15 20
## Up 28 41
mean(knn.pred == test$Direction)
## [1] 0.5384615
knn.pred = knn(train.X, test.X, train$Direction, k=10)
table(knn.pred, test$Direction)
##
## knn.pred Down Up
## Down 17 19
## Up 26 42
mean(knn.pred == test$Direction)
## [1] 0.5673077
knn.pred = knn(train.X, test.X, train$Direction, k=20)
table(knn.pred, test$Direction)
##
## knn.pred Down Up
## Down 21 20
## Up 22 41
mean(knn.pred == test$Direction)
## [1] 0.5961538
knn.pred = knn(train.X, test.X, train$Direction, k=18)
table(knn.pred, test$Direction)
##
## knn.pred Down Up
## Down 19 19
## Up 24 42
mean(knn.pred == test$Direction)
## [1] 0.5865385
qda.fit = qda(Direction~Lag2+Lag1, data=train)
qda.fit.pred = predict(qda.fit, test)$class
table(qda.fit.pred, test$Direction)
##
## qda.fit.pred Down Up
## Down 7 10
## Up 36 51
mean(qda.fit.pred == test$Direction)
## [1] 0.5576923
lda.fit = lda(Direction~Lag2*Lag3+Lag4+Lag5, data=train)
lda.fit.pred = predict(lda.fit, test)$class
table(lda.fit.pred, test$Direction)
##
## lda.fit.pred Down Up
## Down 9 5
## Up 34 56
mean(lda.fit.pred == test$Direction)
## [1] 0.625
library(ISLR)
data(Auto)
mpg01 = ifelse(Auto$mpg > median(Auto$mpg), 1,0)
df = data.frame(Auto, mpg01)
pairs(df)
par(mfrow=c(2,3))
boxplot(cylinders ~ mpg01, data = Auto, main = "Cylinders vs mpg01", col="salmon")
boxplot(displacement ~ mpg01, data = Auto, main = "Displacement vs mpg01", col="salmon2")
boxplot(horsepower ~ mpg01, data = Auto, main = "Horsepower vs mpg01", col="salmon3")
boxplot(weight ~ mpg01, data = Auto, main = "Weight vs mpg01", col="plum")
boxplot(acceleration ~ mpg01, data = Auto, main = "Acceleration vs mpg01", col="plum2")
boxplot(year ~ mpg01, data = Auto, main = "Year vs mpg01", col="plum4")
trainid = sample(1:nrow(df), nrow(df)*0.7 , replace=F)
train = df[trainid,]
test = df[-trainid,]
lda.fit = lda(mpg01~displacement+horsepower+weight+cylinders, data=train)
lda.fit.pred = predict(lda.fit, test)$class
table(lda.fit.pred, test$mpg01)
##
## lda.fit.pred 0 1
## 0 54 4
## 1 7 53
mean(lda.fit.pred != test$mpg01)
## [1] 0.09322034
qda.fit = qda(mpg01~displacement+horsepower+weight+cylinders, data=train)
qda.fit.pred = predict(qda.fit, test)$class
table(qda.fit.pred, test$mpg01)
##
## qda.fit.pred 0 1
## 0 56 4
## 1 5 53
mean(qda.fit.pred != test$mpg01)
## [1] 0.07627119
logit.fit = glm(mpg01~displacement+horsepower+weight+cylinders, data=train, family=binomial)
logit.fit.prob = predict(logit.fit, test, type="response")
logit.fit.pred = ifelse(logit.fit.prob > 0.5, 1, 0)
table(logit.fit.pred, test$mpg01)
##
## logit.fit.pred 0 1
## 0 58 4
## 1 3 53
mean(logit.fit.pred != test$mpg01) # error rate
## [1] 0.05932203
train.X = cbind(train$displacement, train$horsepower, train$weight, train$cylinders)
test.X = cbind(test$displacement, test$horsepower, test$weight, test$cylinders)
knn.pred = knn(train.X, test.X, train$mpg01, k=1)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 56 9
## 1 5 48
mean(knn.pred != test$mpg01)
## [1] 0.1186441
knn.pred = knn(train.X, test.X, train$mpg01, k=10)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 56 6
## 1 5 51
mean(knn.pred != test$mpg01)
## [1] 0.09322034
knn.pred = knn(train.X, test.X, train$mpg01, k=20)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 56 6
## 1 5 51
mean(knn.pred != test$mpg01)
## [1] 0.09322034
knn.pred = knn(train.X, test.X, train$mpg01, k=30)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 56 6
## 1 5 51
mean(knn.pred != test$mpg01)
## [1] 0.09322034
knn.pred = knn(train.X, test.X, train$mpg01, k=50)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 54 4
## 1 7 53
mean(knn.pred != test$mpg01)
## [1] 0.09322034
knn.pred = knn(train.X, test.X, train$mpg01, k=100)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 55 4
## 1 6 53
mean(knn.pred != test$mpg01)
## [1] 0.08474576
knn.pred = knn(train.X, test.X, train$mpg01, k=200)
table(knn.pred, test$mpg01)
##
## knn.pred 0 1
## 0 51 2
## 1 10 55
mean(knn.pred != test$mpg01)
## [1] 0.1016949
Power = function(){
x=2^3
print(x)
}
Power2 = function(x, a){
y= x^a
print(y)
}
Power2(10,3)
## [1] 1000
Power2(8, 17)
## [1] 2.2518e+15
Power2(131, 3)
## [1] 2248091
Power3 = function(x, a){
result= x^a
return(result)
}
x = 1:10
plot(x, Power3(x,2), log="y", main="log(x^2) vs. x",
xlab="x", ylab="log(x^2)")
Create a function, PlotPower(), that allows you to create a plot of x against \(x^a\) for a fixed a and for a range of values of x.
PlotPower = function(x, a) {
plot(x, Power3(x,2), main="x^a versus x",
xlab="x", ylab=paste0("x^",a))
}
PlotPower(1:10,3)
data(Auto)
crim01 = ifelse(Boston$crim > median(Boston$crim), 1,0)
boston_df = data.frame(Boston, crim01)
summary(boston_df)
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08204 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio black
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 0.32
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.:375.38
## Median : 5.000 Median :330.0 Median :19.05 Median :391.44
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :356.67
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:396.23
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :396.90
## lstat medv crim01
## Min. : 1.73 Min. : 5.00 Min. :0.0
## 1st Qu.: 6.95 1st Qu.:17.02 1st Qu.:0.0
## Median :11.36 Median :21.20 Median :0.5
## Mean :12.65 Mean :22.53 Mean :0.5
## 3rd Qu.:16.95 3rd Qu.:25.00 3rd Qu.:1.0
## Max. :37.97 Max. :50.00 Max. :1.0
pairs(boston_df)
cor(boston_df)
## crim zn indus chas nox
## crim 1.00000000 -0.20046922 0.40658341 -0.055891582 0.42097171
## zn -0.20046922 1.00000000 -0.53382819 -0.042696719 -0.51660371
## indus 0.40658341 -0.53382819 1.00000000 0.062938027 0.76365145
## chas -0.05589158 -0.04269672 0.06293803 1.000000000 0.09120281
## nox 0.42097171 -0.51660371 0.76365145 0.091202807 1.00000000
## rm -0.21924670 0.31199059 -0.39167585 0.091251225 -0.30218819
## age 0.35273425 -0.56953734 0.64477851 0.086517774 0.73147010
## dis -0.37967009 0.66440822 -0.70802699 -0.099175780 -0.76923011
## rad 0.62550515 -0.31194783 0.59512927 -0.007368241 0.61144056
## tax 0.58276431 -0.31456332 0.72076018 -0.035586518 0.66802320
## ptratio 0.28994558 -0.39167855 0.38324756 -0.121515174 0.18893268
## black -0.38506394 0.17552032 -0.35697654 0.048788485 -0.38005064
## lstat 0.45562148 -0.41299457 0.60379972 -0.053929298 0.59087892
## medv -0.38830461 0.36044534 -0.48372516 0.175260177 -0.42732077
## crim01 0.40939545 -0.43615103 0.60326017 0.070096774 0.72323480
## rm age dis rad tax ptratio
## crim -0.21924670 0.35273425 -0.37967009 0.625505145 0.58276431 0.2899456
## zn 0.31199059 -0.56953734 0.66440822 -0.311947826 -0.31456332 -0.3916785
## indus -0.39167585 0.64477851 -0.70802699 0.595129275 0.72076018 0.3832476
## chas 0.09125123 0.08651777 -0.09917578 -0.007368241 -0.03558652 -0.1215152
## nox -0.30218819 0.73147010 -0.76923011 0.611440563 0.66802320 0.1889327
## rm 1.00000000 -0.24026493 0.20524621 -0.209846668 -0.29204783 -0.3555015
## age -0.24026493 1.00000000 -0.74788054 0.456022452 0.50645559 0.2615150
## dis 0.20524621 -0.74788054 1.00000000 -0.494587930 -0.53443158 -0.2324705
## rad -0.20984667 0.45602245 -0.49458793 1.000000000 0.91022819 0.4647412
## tax -0.29204783 0.50645559 -0.53443158 0.910228189 1.00000000 0.4608530
## ptratio -0.35550149 0.26151501 -0.23247054 0.464741179 0.46085304 1.0000000
## black 0.12806864 -0.27353398 0.29151167 -0.444412816 -0.44180801 -0.1773833
## lstat -0.61380827 0.60233853 -0.49699583 0.488676335 0.54399341 0.3740443
## medv 0.69535995 -0.37695457 0.24992873 -0.381626231 -0.46853593 -0.5077867
## crim01 -0.15637178 0.61393992 -0.61634164 0.619786249 0.60874128 0.2535684
## black lstat medv crim01
## crim -0.38506394 0.4556215 -0.3883046 0.40939545
## zn 0.17552032 -0.4129946 0.3604453 -0.43615103
## indus -0.35697654 0.6037997 -0.4837252 0.60326017
## chas 0.04878848 -0.0539293 0.1752602 0.07009677
## nox -0.38005064 0.5908789 -0.4273208 0.72323480
## rm 0.12806864 -0.6138083 0.6953599 -0.15637178
## age -0.27353398 0.6023385 -0.3769546 0.61393992
## dis 0.29151167 -0.4969958 0.2499287 -0.61634164
## rad -0.44441282 0.4886763 -0.3816262 0.61978625
## tax -0.44180801 0.5439934 -0.4685359 0.60874128
## ptratio -0.17738330 0.3740443 -0.5077867 0.25356836
## black 1.00000000 -0.3660869 0.3334608 -0.35121093
## lstat -0.36608690 1.0000000 -0.7376627 0.45326273
## medv 0.33346082 -0.7376627 1.0000000 -0.26301673
## crim01 -0.35121093 0.4532627 -0.2630167 1.00000000
par(mfrow=c(2, 4))
boxplot(indus ~ crim01, data = boston_df, main = "indus vs crim01", col="salmon")
boxplot(zn ~ crim01, data = boston_df, main = "zn vs crim01", col="salmon")
boxplot(nox ~ crim01, data = boston_df, main = "nox vs crim01", col="salmon")
boxplot(age ~ crim01, data = boston_df, main = "age vs crim01", col="salmon")
boxplot(dis ~ crim01, data = boston_df, main = "dis vs crim01", col="salmon")
boxplot(rad ~ crim01, data = boston_df, main = "rad vs crim01", col="salmon")
boxplot(tax ~ crim01, data = boston_df, main = "tax vs crim01", col="salmon")
trainid = sample(1:nrow(boston_df), nrow(boston_df)*0.7, replace=F)
train = boston_df[trainid,]
test = boston_df[-trainid,]
train.X1 = cbind(train$zn, train$indus, train$chas, train$nox, train$rm, train$age, train$dis, train$rad, train$tax, train$ptratio, train$black, train$lstat, train$medv)
test.X1 = cbind(test$zn, test$indus, test$chas, test$nox, test$rm, test$age, test$dis, test$rad, test$tax, test$ptratio, test$black, test$lstat, test$medv)
train.X2 = cbind(train$age, train$tax, train$rad)
test.X2 = cbind(test$age, test$tax, test$rad)
LDA Models
lda.fit = lda(crim01~age+tax+rad, data=train)
lda.fit.pred = predict(lda.fit, test)$class
table(lda.fit.pred, test$crim01)
##
## lda.fit.pred 0 1
## 0 64 18
## 1 6 64
mean(lda.fit.pred != test$crim01)
## [1] 0.1578947
lda.fit = lda(crim01~ . - crim -crim01, data=train)
lda.fit.pred = predict(lda.fit, test)$class
table(lda.fit.pred, test$crim01)
##
## lda.fit.pred 0 1
## 0 64 21
## 1 6 61
mean(lda.fit.pred != test$crim01)
## [1] 0.1776316
QDA Models
qda.fit = qda(crim01~age+tax+rad, data=train)
qda.fit.pred = predict(qda.fit, test)$class
table(qda.fit.pred, test$crim01)
##
## qda.fit.pred 0 1
## 0 66 37
## 1 4 45
mean(qda.fit.pred != test$crim01)
## [1] 0.2697368
qda.fit = qda(crim01~ . - crim -crim01, data=train)
qda.fit.pred = predict(qda.fit, test)$class
table(qda.fit.pred, test$crim01)
##
## qda.fit.pred 0 1
## 0 68 13
## 1 2 69
mean(qda.fit.pred != test$crim01)
## [1] 0.09868421
Logistic Regression Models
# Logistic Regression models
logit.fit = glm(crim01~age+tax+rad, data=train, family=binomial)
logit.fit.prob = predict(logit.fit, test, type="response")
logit.fit.pred = ifelse(logit.fit.prob > 0.5, 1, 0)
mean(logit.fit.pred != test$crim01) # error rate
## [1] 0.1710526
logit.fit = glm(crim01~. - crim -crim01, data=train, family=binomial)
logit.fit.prob = predict(logit.fit, test, type="response")
logit.fit.pred = ifelse(logit.fit.prob > 0.5, 1, 0)
mean(logit.fit.pred != test$crim01) # error rate
## [1] 0.09210526
set.seed(1)
require(class)
knn1.pred = knn(train.X1, test.X1, train$crim01, k=1)
mean(knn1.pred != test$crim01)
## [1] 0.08552632
knn1.pred = knn(train.X1, test.X1, train$crim01, k=5)
mean(knn1.pred != test$crim01)
## [1] 0.09868421
knn1.pred = knn(train.X1, test.X1, train$crim01, k=10)
mean(knn1.pred != test$crim01)
## [1] 0.1118421
knn1.pred = knn(train.X1, test.X1, train$crim01, k=20)
mean(knn1.pred != test$crim01)
## [1] 0.1513158
knn1.pred = knn(train.X1, test.X1, train$crim01, k=50)
mean(knn1.pred != test$crim01)
## [1] 0.1381579
knn1.pred = knn(train.X1, test.X1, train$crim01, k=100)
mean(knn1.pred != test$crim01)
## [1] 0.2171053
knn1.pred = knn(train.X1, test.X1, train$crim01, k=200)
mean(knn1.pred != test$crim01)
## [1] 0.2763158
knn2.pred = knn(train.X2, test.X2, train$crim01, k=1)
mean(knn2.pred != test$crim01)
## [1] 0.09210526
knn2.pred = knn(train.X2, test.X2, train$crim01, k=5)
mean(knn2.pred != test$crim01)
## [1] 0.06578947
knn2.pred = knn(train.X2, test.X2, train$crim01, k=10)
mean(knn2.pred != test$crim01)
## [1] 0.1118421
knn2.pred = knn(train.X2, test.X2, train$crim01, k=20)
mean(knn2.pred != test$crim01)
## [1] 0.1578947
knn2.pred = knn(train.X2, test.X2, train$crim01, k=50)
mean(knn2.pred != test$crim01)
## [1] 0.1513158
knn2.pred = knn(train.X2, test.X2, train$crim01, k=100)
mean(knn2.pred != test$crim01)
## [1] 0.2894737
knn2.pred = knn(train.X2, test.X2, train$crim01, k=200)
mean(knn2.pred != test$crim01)
## [1] 0.2763158