library(ISLR2)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
data(Boston)
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 4.98 24.0
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 9.14 21.6
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 4.03 34.7
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 2.94 33.4
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 5.33 36.2
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 5.21 28.7
ggpairs(Boston[, 1:6])
cor(Boston$crim, Boston)
## crim zn indus chas nox rm age
## [1,] 1 -0.2004692 0.4065834 -0.05589158 0.4209717 -0.2192467 0.3527343
## dis rad tax ptratio lstat medv
## [1,] -0.3796701 0.6255051 0.5827643 0.2899456 0.4556215 -0.3883046
summary(Boston$crim)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00632 0.08204 0.25651 3.61352 3.67708 88.97620
summary(Boston$tax)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 187.0 279.0 330.0 408.2 666.0 711.0
summary(Boston$ptratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.60 17.40 19.05 18.46 20.20 22.00
sum(Boston$chas == 1)
## [1] 35
median(Boston$ptratio)
## [1] 19.05
Boston[which.min(Boston$medv), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 399 38.3518 0 18.1 0 0.693 5.453 100 1.4896 24 666 20.2 30.59 5
sum(Boston$rm > 7)
## [1] 64
sum(Boston$rm > 8)
## [1] 13
Boston[Boston$rm > 8, ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 98 0.12083 0 2.89 0 0.4450 8.069 76.0 3.4952 2 276 18.0 4.21 38.7
## 164 1.51902 0 19.58 1 0.6050 8.375 93.9 2.1620 5 403 14.7 3.32 50.0
## 205 0.02009 95 2.68 0 0.4161 8.034 31.9 5.1180 4 224 14.7 2.88 50.0
## 225 0.31533 0 6.20 0 0.5040 8.266 78.3 2.8944 8 307 17.4 4.14 44.8
## 226 0.52693 0 6.20 0 0.5040 8.725 83.0 2.8944 8 307 17.4 4.63 50.0
## 227 0.38214 0 6.20 0 0.5040 8.040 86.5 3.2157 8 307 17.4 3.13 37.6
## 233 0.57529 0 6.20 0 0.5070 8.337 73.3 3.8384 8 307 17.4 2.47 41.7
## 234 0.33147 0 6.20 0 0.5070 8.247 70.4 3.6519 8 307 17.4 3.95 48.3
## 254 0.36894 22 5.86 0 0.4310 8.259 8.4 8.9067 7 330 19.1 3.54 42.8
## 258 0.61154 20 3.97 0 0.6470 8.704 86.9 1.8010 5 264 13.0 5.12 50.0
## 263 0.52014 20 3.97 0 0.6470 8.398 91.5 2.2885 5 264 13.0 5.91 48.8
## 268 0.57834 20 3.97 0 0.5750 8.297 67.0 2.4216 5 264 13.0 7.44 50.0
## 365 3.47428 0 18.10 1 0.7180 8.780 82.9 1.9047 24 666 20.2 5.29 21.9
library(class)
library(FNN)
##
## Attaching package: 'FNN'
## The following objects are masked from 'package:class':
##
## knn, knn.cv
data(Carseats)
install.packages("ISLR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ISLR)
##
## Attaching package: 'ISLR'
## The following objects are masked from 'package:ISLR2':
##
## Auto, Credit
data(Carseats)
model_full <- lm(Sales ~ Price + Urban + US, data = Carseats)
summary(model_full)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
summary(model_full)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.04346894 0.651012245 20.03567373 3.626602e-62
## Price -0.05445885 0.005241855 -10.38923205 1.609917e-22
## UrbanYes -0.02191615 0.271650277 -0.08067781 9.357389e-01
## USYes 1.20057270 0.259041508 4.63467306 4.860245e-06
coefs <- summary(model_full)$coefficients
intercept <- round(coefs["(Intercept)", "Estimate"], 2)
price_coef <- round(coefs["Price", "Estimate"], 2)
urban_coef <- round(coefs["UrbanYes", "Estimate"], 2)
us_coef <- round(coefs["USYes", "Estimate"], 2)
cat("Intercept:", intercept,
"- Expected Sales when Price = 0, Urban = 'No', US = 'No'\n")
## Intercept: 13.04 - Expected Sales when Price = 0, Urban = 'No', US = 'No'
cat("Price:", price_coef,
"- For each $1 increase in Price, Sales decreases by", abs(price_coef), "units\n")
## Price: -0.05 - For each $1 increase in Price, Sales decreases by 0.05 units
cat("UrbanYes:", urban_coef,
"- Difference in Sales between Urban = 'Yes' and Urban = 'No', holding other variables constant\n")
## UrbanYes: -0.02 - Difference in Sales between Urban = 'Yes' and Urban = 'No', holding other variables constant
cat("USYes:", us_coef,
"- Difference in Sales between US = 'Yes' and US = 'No', holding other variables constant\n")
## USYes: 1.2 - Difference in Sales between US = 'Yes' and US = 'No', holding other variables constant
coef(model_full)
## (Intercept) Price UrbanYes USYes
## 13.04346894 -0.05445885 -0.02191615 1.20057270
summary(model_full)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.04346894 0.651012245 20.03567373 3.626602e-62
## Price -0.05445885 0.005241855 -10.38923205 1.609917e-22
## UrbanYes -0.02191615 0.271650277 -0.08067781 9.357389e-01
## USYes 1.20057270 0.259041508 4.63467306 4.860245e-06
model_small <- lm(Sales ~ Price + US, data = Carseats)
summary(model_small)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
summary(model_full)$r.squared
## [1] 0.2392754
summary(model_small)$r.squared
## [1] 0.2392629
confint(model_small, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
par(mfrow = c(2, 2))
plot(model_small)
#Chapter4 exercise12
beta0 <- 2
beta1 <- -1
alpha_orange0_c <- 2
alpha_apple0_c <- 0
alpha_orange1_c <- -1
alpha_apple1_c <- 0
cat("Part (c) - One possible solution for friend's coefficients:\n")
## Part (c) - One possible solution for friend's coefficients:
cat("α_orange0 =", alpha_orange0_c, "\n")
## α_orange0 = 2
cat("α_orange1 =", alpha_orange1_c, "\n")
## α_orange1 = -1
cat("α_apple0 =", alpha_apple0_c, "\n")
## α_apple0 = 0
cat("α_apple1 =", alpha_apple1_c, "\n\n")
## α_apple1 = 0
alpha_orange0_alt <- 0
alpha_apple0_alt <- -2
alpha_orange1_alt <- 0
alpha_apple1_alt <- 1
cat("Part (c) - Alternative solution for friend's coefficients:\n")
## Part (c) - Alternative solution for friend's coefficients:
cat("α_orange0 =", alpha_orange0_alt, "\n")
## α_orange0 = 0
cat("α_orange1 =", alpha_orange1_alt, "\n")
## α_orange1 = 0
cat("α_apple0 =", alpha_apple0_alt, "\n")
## α_apple0 = -2
cat("α_apple1 =", alpha_apple1_alt, "\n\n")
## α_apple1 = 1
alpha_orange0 <- 1.2
alpha_orange1 <- -2
alpha_apple0 <- 3
alpha_apple1 <- 0.6
your_beta0 <- alpha_orange0 - alpha_apple0
your_beta1 <- alpha_orange1 - alpha_apple1
cat("Part (d) - Your model coefficients:\n")
## Part (d) - Your model coefficients:
cat("β₀ =", your_beta0, "\n")
## β₀ = -1.8
cat("β₁ =", your_beta1, "\n\n")
## β₁ = -2.6
your_model_prob <- function(x, beta0, beta1) {
linear_pred <- beta0 + beta1 * x
exp(linear_pred) / (1 + exp(linear_pred))
}
friend_model_prob <- function(x, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1) {
orange_score <- exp(alpha_orange0 + alpha_orange1 * x)
apple_score <- exp(alpha_apple0 + alpha_apple1 * x)
orange_score / (orange_score + apple_score)
}
set.seed(123)
test_data <- runif(2000, -5, 5)
your_preds <- ifelse(your_model_prob(test_data, your_beta0, your_beta1) > 0.5, "orange", "apple")
friend_preds <- ifelse(friend_model_prob(test_data, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1) > 0.5, "orange", "apple")
agreement <- mean(your_preds == friend_preds)
cat("Part (e) - Fraction of agreement:", agreement, "\n\n")
## Part (e) - Fraction of agreement: 1
your_boundary <- -your_beta0/your_beta1
friend_boundary <- -(alpha_orange0 - alpha_apple0)/(alpha_orange1 - alpha_apple1)
cat("Decision boundary in your model: x =", your_boundary, "\n")
## Decision boundary in your model: x = -0.6923077
cat("Decision boundary in friend's model: x =", friend_boundary, "\n\n")
## Decision boundary in friend's model: x = -0.6923077
x_range <- seq(-5, 5, length.out = 100)
your_probs <- sapply(x_range, function(x) your_model_prob(x, your_beta0, your_beta1))
friend_probs <- sapply(x_range, function(x)
friend_model_prob(x, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1))
plot_data <- data.frame(
x = rep(x_range, 2),
prob = c(your_probs, friend_probs),
model = rep(c("Your Model", "Friend's Model"), each = length(x_range))
)
par(mfrow=c(1,1))
plot(x_range, your_probs, type = "l", col = "blue", lwd = 2,
xlab = "X", ylab = "P(orange|X)",
main = "Probability of Orange by Model", ylim = c(0, 1))
lines(x_range, friend_probs, col = "red", lwd = 2)
abline(h = 0.5, lty = 2)
abline(v = your_boundary, col = "blue", lty = 3)
abline(v = friend_boundary, col = "red", lty = 3)
legend("topright", legend = c("Your Model", "Friend's Model", "Decision Boundary"),
col = c("blue", "red", "black"), lty = c(1, 1, 2))
prob_diff <- abs(your_probs - friend_probs)
max_diff <- max(prob_diff)
cat("Maximum difference in probabilities:", max_diff, "\n")
## Maximum difference in probabilities: 1.110223e-16
x_check <- seq(-5, 5, by = 0.01)
your_pred_check <- ifelse(your_model_prob(x_check, your_beta0, your_beta1) > 0.5, 1, 0)
friend_pred_check <- ifelse(friend_model_prob(x_check, alpha_orange0, alpha_orange1,
alpha_apple0, alpha_apple1) > 0.5, 1, 0)
all_equal <- all(your_pred_check == friend_pred_check)
cat("All predictions match:", all_equal)
## All predictions match: TRUE