library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
set.seed(123)
Classification Skills Practice II
Exercise 1: Elite Colleges
set.seed(1)
College <- College |> mutate(elite = Top10perc > 50, id = row_number())
train <- College |> sample_frac(0.7)
test <- College |> anti_join(train, by="id")
train_true <- train$elite
test_true <- test$elite
train <- train |> select(Apps, Accept, Enroll)
test <- test |> select(Apps, Accept, Enroll)
knn_pred = knn(train, test, train_true, k = 10)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.09012876
Exercise 2: Running knn again
set.seed(1)
Credit <- Credit |> mutate(Married01 = ifelse(Married == "Yes", 1, 0), id = row_number())
train <- Credit |> sample_frac(0.7)
test <- Credit |> anti_join(train, by="id")
train_true <- train$Married01
test_true <- test$Married01
train <- train |> select(Balance, Education, Limit, Income)
test <- test |> select(Balance, Education, Limit, Income)
knn_pred = knn(train, test, train_true, k = 50)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.4
Exercise 3: Improving your model
set.seed(1)
Credit <- Credit |>
mutate(
Married01 = ifelse(Married == "Yes", 1, 0),
id = row_number(),
Balance = (Balance-mean(Balance))/sd(Balance),
Education = (Education-mean(Education))/sd(Education),
Limit = (Limit-mean(Limit))/sd(Limit),
Income = (Income-mean(Income))/sd(Income)
)
train <- Credit |> sample_frac(0.7)
test <- Credit |> anti_join(train, by="id")
train_true <- train$Married01
test_true <- test$Married01
train <- train |> select(Balance, Education, Limit, Income)
test <- test |> select(Balance, Education, Limit, Income)
knn_pred = knn(train, test, train_true, k = 30)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.4083333