library(MASS)
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
library(dplyr)
set.seed(123)
df <- Boston
df

1.

df <- df |> mutate(highcrime = crim > 25, id = row_number())
df

2.

knn_train <- df |> sample_frac(0.7)
knn_test <- df |> anti_join(knn_train, by="id")

knn_train_true <- knn_train$highcrime
knn_test_true <- knn_test$highcrime
knn_test <- knn_test |> dplyr::select(indus, age, dis)
knn_train <- knn_train |> dplyr::select(indus, age, dis)

knn_predictions <- knn(knn_train, knn_test, knn_train_true, k = 50)
knn_predictions
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Levels: FALSE TRUE

3.

df <- df |> mutate(oldHouse = age > 90) |> mutate(oldHouse01 = ifelse(oldHouse, 1, 0))
glm_train <- df |> sample_frac(0.7)
glm_test <- df |> anti_join(glm_train, by="id")

model <- glm(oldHouse01 ~ crim+rm+tax+ptratio, data=glm_train, family = binomial)

glm_test |>
  add_predictions(model, type="response") |>
  mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
  mutate(right = ifelse(prediction == oldHouse01, 1, 0)) |>
  summarise(error = 1-sum(right)/nrow(glm_test))

4.

error <- 1-mean(knn_predictions == knn_test_true)
error
## [1] 0.01973684