library(MASS)
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
library(dplyr)
set.seed(123)
df <- Boston
df
1.
df <- df |> mutate(highcrime = crim > 25, id = row_number())
df
2.
knn_train <- df |> sample_frac(0.7)
knn_test <- df |> anti_join(knn_train, by="id")
knn_train_true <- knn_train$highcrime
knn_test_true <- knn_test$highcrime
knn_test <- knn_test |> dplyr::select(indus, age, dis)
knn_train <- knn_train |> dplyr::select(indus, age, dis)
knn_predictions <- knn(knn_train, knn_test, knn_train_true, k = 50)
knn_predictions
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Levels: FALSE TRUE
3.
df <- df |> mutate(oldHouse = age > 90) |> mutate(oldHouse01 = ifelse(oldHouse, 1, 0))
glm_train <- df |> sample_frac(0.7)
glm_test <- df |> anti_join(glm_train, by="id")
model <- glm(oldHouse01 ~ crim+rm+tax+ptratio, data=glm_train, family = binomial)
glm_test |>
add_predictions(model, type="response") |>
mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
mutate(right = ifelse(prediction == oldHouse01, 1, 0)) |>
summarise(error = 1-sum(right)/nrow(glm_test))
4.
error <- 1-mean(knn_predictions == knn_test_true)
error
## [1] 0.01973684