library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
set.seed(123)

Classification Skills Practice II

Exercise 1: Elite Colleges

set.seed(1)
College <- College |> mutate(elite = Top10perc > 50, id = row_number())
train <- College |> sample_frac(0.7)
test <- College |> anti_join(train, by="id")
train_true <- train$elite
test_true <- test$elite
train <- train |> select(Apps, Accept, Enroll)
test <- test |> select(Apps, Accept, Enroll)

knn_pred = knn(train, test, train_true, k = 10)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.09012876

Exercise 2: Running knn again

set.seed(1)
Credit <- Credit |> mutate(Married01 = ifelse(Married == "Yes", 1, 0), id = row_number())
train <- Credit |> sample_frac(0.7)
test <- Credit |> anti_join(train, by="id")
train_true <- train$Married01
test_true <- test$Married01
train <- train |> select(Balance, Education, Limit, Income)
test <- test |> select(Balance, Education, Limit, Income)

knn_pred = knn(train, test, train_true, k = 50)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.4

Exercise 3: Improving your model

set.seed(1)
Credit <- Credit |> 
  mutate(
    Married01 = ifelse(Married == "Yes", 1, 0), 
    id = row_number(),
    Balance = (Balance-mean(Balance))/sd(Balance),
    Education = (Education-mean(Education))/sd(Education),
    Limit = (Limit-mean(Limit))/sd(Limit),
    Income = (Income-mean(Income))/sd(Income)
  )
train <- Credit |> sample_frac(0.7)
test <- Credit |> anti_join(train, by="id")
train_true <- train$Married01
test_true <- test$Married01
train <- train |> select(Balance, Education, Limit, Income)
test <- test |> select(Balance, Education, Limit, Income)

knn_pred = knn(train, test, train_true, k = 30)
error = 1-mean(knn_pred == test_true)
error
## [1] 0.4083333