library(ISLR)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom        1.0.8     ✔ recipes      1.3.0
## ✔ dials        1.4.0     ✔ rsample      1.3.0
## ✔ dplyr        1.1.4     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.2     ✔ tidyr        1.3.1
## ✔ infer        1.0.8     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
library(dplyr)

set.seed(1)
train_index <- sample(1:nrow(Auto), nrow(Auto) / 2)

# Fit a linear model using horsepower to predict mpg
lm_fit1 <- lm(mpg ~ horsepower, data = Auto, subset = train_index)
mean((Auto$mpg[-train_index] - predict(lm_fit1, Auto[-train_index, ]))^2)
## [1] 23.26601
# Fit a quadratic model
lm_fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto, subset = train_index)
mean((Auto$mpg[-train_index] - predict(lm_fit2, Auto[-train_index, ]))^2)
## [1] 18.71646
# Fit a cubic model
lm_fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train_index)
mean((Auto$mpg[-train_index] - predict(lm_fit3, Auto[-train_index, ]))^2)
## [1] 18.79401
# Use tidymodels and boot::cv.glm
library(boot)

glm_fit <- glm(mpg ~ horsepower, data = Auto)
cv.glm(Auto, glm_fit)$delta[1]
## [1] 24.23151
# Try higher degree polynomials
cv_errors <- rep(0, 5)

for (d in 1:5) {
  glm_fit <- glm(mpg ~ poly(horsepower, d), data = Auto)
  cv_errors[d] <- cv.glm(Auto, glm_fit)$delta[1]
}

cv_errors
## [1] 24.23151 19.24821 19.33498 19.42443 19.03321
set.seed(17)
cv_errors_kfold <- rep(0, 10)

for (d in 1:10) {
  glm_fit <- glm(mpg ~ poly(horsepower, d), data = Auto)
  cv_errors_kfold[d] <- cv.glm(Auto, glm_fit, K = 10)$delta[1]
}

cv_errors_kfold
##  [1] 24.27207 19.26909 19.34805 19.29496 19.03198 18.89781 19.12061 19.14666
##  [9] 18.87013 20.95520
# Function to return alpha statistic
alpha_fn <- function(data, index) {
  X <- data$X[index]
  Y <- data$Y[index]
  return((var(Y) - cov(X, Y)) / (var(X) + var(Y) - 2 * cov(X, Y)))
}

# Test function
alpha_fn(Portfolio, 1:100)
## [1] 0.5758321
# Bootstrap the alpha function
set.seed(1)
boot(Portfolio, alpha_fn, R = 1000)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = Portfolio, statistic = alpha_fn, R = 1000)
## 
## 
## Bootstrap Statistics :
##      original       bias    std. error
## t1* 0.5758321 -0.001596422  0.09376093