library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ISLR)
library(moderndive)
library(skimr)
library(ggthemes)
library(class)
library(modelr)
set.seed(123)

Logistic Regression 1

ggplot(Default, aes(balance, income, color=default, alpha = 0.1)) +
  geom_point()

set.seed(1)
Default <- Default |> mutate(id = row_number(), default01 = ifelse(default == "Yes", 1, 0))
train <- Default |> sample_frac(0.7)
test <- Default |> anti_join(train, by="id")

model <- glm(default01 ~ income+balance, data = train, family = binomial)

test |>
  add_predictions(model, type="response") |>
  mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
  mutate(right = ifelse(prediction == default01, 1, 0)) |>
  summarise(error = 1-sum(right)/nrow(test))

Logistic Regression 2

ggplot(Wage, aes(wage, fill = health_ins)) +
  geom_histogram() +
  facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Wage, aes(age, fill = health_ins)) +
  geom_histogram() +
  facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Wage, aes(year, fill = health_ins)) +
  geom_histogram() +
  facet_wrap(~health_ins)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

set.seed(1)
Wage <- Wage |> mutate(id = row_number(), health_ins_01 = ifelse(health_ins == "1. Yes", 1, 0))
train <- Wage |> sample_frac(0.7)
test <- Wage |> anti_join(train, by="id")

model <- glm(health_ins_01 ~ wage+age+year, data = train, family = binomial)

test |>
  add_predictions(model, type="response") |>
  mutate(prediction = ifelse(pred > 0.5, 1, 0)) |>
  mutate(right = ifelse(prediction == health_ins_01, 1, 0)) |>
  summarise(error = 1-sum(right)/nrow(test))