library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
set.seed(123)
Logistic Regression 1
ggplot(Default, aes(balance, income, color=default, alpha = 0.1)) +
geom_point()

set.seed(1)
Default <- Default |> mutate(id = row_number(), default01 = ifelse(default == "Yes", 1, 0))
train <- Default |> sample_frac(0.7)
test <- Default |> anti_join(train, by="id")
model <- glm(default01 ~ income+balance, data = train, family = binomial)
test |>
add_predictions(model, type="response") |>
mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
mutate(right = ifelse(prediction == default01, 1, 0)) |>
summarise(error = 1-sum(right)/nrow(test))
Logistic Regression 2
ggplot(Wage, aes(wage, fill = health_ins)) +
geom_histogram() +
facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Wage, aes(age, fill = health_ins)) +
geom_histogram() +
facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Wage, aes(year, fill = health_ins)) +
geom_histogram() +
facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

set.seed(1)
Wage <- Wage |> mutate(id = row_number(), health_ins_01 = ifelse(health_ins == "1. Yes", 1, 0))
train <- Wage |> sample_frac(0.7)
test <- Wage |> anti_join(train, by="id")
model <- glm(health_ins_01 ~ wage+age+year, data = train, family = binomial)
test |>
add_predictions(model, type="response") |>
mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
mutate(right = ifelse(prediction == health_ins_01, 1, 0)) |>
summarise(error = 1-sum(right)/nrow(test))