df <- data.frame(
X1 = c(0, 2, 0, 0, 1, 2),
X2 = c(3, 0, 1, 2, 1, 3),
Y = c('Red', 'Red', 'Red', 'Green', 'Green', 'Red')
)
test_point <- c(1, 2)
df$Distance <- sqrt((df$X1 - test_point[1])^2 + (df$X2 - test_point[2])^2)
df <- df[order(df$Distance), ]
print(df)
## X1 X2 Y Distance
## 4 0 2 Green 1.000000
## 5 1 1 Green 1.000000
## 1 0 3 Red 1.414214
## 3 0 1 Red 1.414214
## 6 2 3 Red 1.414214
## 2 2 0 Red 2.236068
cat("K=1 Prediction:", df$Y[1], "\n")
## K=1 Prediction: Green
cat("K=3 Prediction:", names(sort(table(df$Y[1:3]), decreasing = TRUE))[1], "\n")
## K=3 Prediction: Green
library(ISLR)
data(Auto)
Auto <- na.omit(Auto)
str(Auto)
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : num 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : num 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : num 3504 3693 3436 3433 3449 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : num 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : num 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
sapply(Auto[, sapply(Auto, is.numeric)], range)
## mpg cylinders displacement horsepower weight acceleration year origin
## [1,] 9.0 3 68 46 1613 8.0 70 1
## [2,] 46.6 8 455 230 5140 24.8 82 3
sapply(Auto[, sapply(Auto, is.numeric)], mean)
## mpg cylinders displacement horsepower weight acceleration
## 23.445918 5.471939 194.411990 104.469388 2977.584184 15.541327
## year origin
## 75.979592 1.576531
sapply(Auto[, sapply(Auto, is.numeric)], sd)
## mpg cylinders displacement horsepower weight acceleration
## 7.8050075 1.7057832 104.6440039 38.4911599 849.4025600 2.7588641
## year origin
## 3.6837365 0.8055182
Auto_new <- Auto[-(10:85), ]
sapply(Auto_new[, sapply(Auto_new, is.numeric)], range)
## mpg cylinders displacement horsepower weight acceleration year origin
## [1,] 11.0 3 68 46 1649 8.5 70 1
## [2,] 46.6 8 455 230 4997 24.8 82 3
sapply(Auto_new[, sapply(Auto_new, is.numeric)], mean)
## mpg cylinders displacement horsepower weight acceleration
## 24.404430 5.373418 187.240506 100.721519 2935.971519 15.726899
## year origin
## 77.145570 1.601266
sapply(Auto_new[, sapply(Auto_new, is.numeric)], sd)
## mpg cylinders displacement horsepower weight acceleration
## 7.867283 1.654179 99.678367 35.708853 811.300208 2.693721
## year origin
## 3.106217 0.819910
pairs(Auto)
Scatterplot matrix for Auto dataset
cor(Auto[, sapply(Auto, is.numeric)])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
library(MASS)
data(Boston)
dim(Boston)
## [1] 506 14
pairs(Boston[, 1:6])
Scatterplot matrix for Boston dataset
cor(Boston$crim, Boston[, -which(names(Boston) == "crim")])
## zn indus chas nox rm age dis
## [1,] -0.2004692 0.4065834 -0.05589158 0.4209717 -0.2192467 0.3527343 -0.3796701
## rad tax ptratio black lstat medv
## [1,] 0.6255051 0.5827643 0.2899456 -0.3850639 0.4556215 -0.3883046
summary(Boston$crim)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00632 0.08205 0.25651 3.61352 3.67708 88.97620
summary(Boston$tax)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 187.0 279.0 330.0 408.2 666.0 711.0
summary(Boston$ptratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.60 17.40 19.05 18.46 20.20 22.00
sum(Boston$chas == 1)
## [1] 35
median(Boston$ptratio)
## [1] 19.05
Boston[which.min(Boston$medv), ]
table(Boston$rm > 7)
##
## FALSE TRUE
## 442 64
table(Boston$rm > 8)
##
## FALSE TRUE
## 493 13