Chapter2: Exercise№ 10

# Load required libraries
library(ISLR2)
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggplot2)
# Load Boston data
data(Boston)
# (a) View dataset
head(Boston)
##      crim zn indus chas   nox    rm  age    dis rad tax ptratio lstat medv
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3  4.98 24.0
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8  9.14 21.6
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8  4.03 34.7
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7  2.94 33.4
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7  5.33 36.2
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7  5.21 28.7
# (b) Pairwise scatterplots (first 6 variables for simplicity)
ggpairs(Boston[, 1:6])

# (c) Correlation of crim with other predictors
cor(Boston$crim, Boston)
##      crim         zn     indus        chas       nox         rm       age
## [1,]    1 -0.2004692 0.4065834 -0.05589158 0.4209717 -0.2192467 0.3527343
##             dis       rad       tax   ptratio     lstat       medv
## [1,] -0.3796701 0.6255051 0.5827643 0.2899456 0.4556215 -0.3883046
# (d) Summary of key predictors
summary(Boston$crim)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.00632  0.08204  0.25651  3.61352  3.67708 88.97620
summary(Boston$tax)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   187.0   279.0   330.0   408.2   666.0   711.0
summary(Boston$ptratio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.60   17.40   19.05   18.46   20.20   22.00
# (e) Tracts near the Charles River
sum(Boston$chas == 1)
## [1] 35
# (f) Median pupil-teacher ratio
median(Boston$ptratio)
## [1] 19.05
# (g) Tract with lowest median value
Boston[which.min(Boston$medv), ]
##        crim zn indus chas   nox    rm age    dis rad tax ptratio lstat medv
## 399 38.3518  0  18.1    0 0.693 5.453 100 1.4896  24 666    20.2 30.59    5
# (h) Tracts with high average rooms
sum(Boston$rm > 7)
## [1] 64
sum(Boston$rm > 8)
## [1] 13
Boston[Boston$rm > 8, ]
##        crim zn indus chas    nox    rm  age    dis rad tax ptratio lstat medv
## 98  0.12083  0  2.89    0 0.4450 8.069 76.0 3.4952   2 276    18.0  4.21 38.7
## 164 1.51902  0 19.58    1 0.6050 8.375 93.9 2.1620   5 403    14.7  3.32 50.0
## 205 0.02009 95  2.68    0 0.4161 8.034 31.9 5.1180   4 224    14.7  2.88 50.0
## 225 0.31533  0  6.20    0 0.5040 8.266 78.3 2.8944   8 307    17.4  4.14 44.8
## 226 0.52693  0  6.20    0 0.5040 8.725 83.0 2.8944   8 307    17.4  4.63 50.0
## 227 0.38214  0  6.20    0 0.5040 8.040 86.5 3.2157   8 307    17.4  3.13 37.6
## 233 0.57529  0  6.20    0 0.5070 8.337 73.3 3.8384   8 307    17.4  2.47 41.7
## 234 0.33147  0  6.20    0 0.5070 8.247 70.4 3.6519   8 307    17.4  3.95 48.3
## 254 0.36894 22  5.86    0 0.4310 8.259  8.4 8.9067   7 330    19.1  3.54 42.8
## 258 0.61154 20  3.97    0 0.6470 8.704 86.9 1.8010   5 264    13.0  5.12 50.0
## 263 0.52014 20  3.97    0 0.6470 8.398 91.5 2.2885   5 264    13.0  5.91 48.8
## 268 0.57834 20  3.97    0 0.5750 8.297 67.0 2.4216   5 264    13.0  7.44 50.0
## 365 3.47428  0 18.10    1 0.7180 8.780 82.9 1.9047  24 666    20.2  5.29 21.9

Chapter3: Exercise 2

Exercise 2: KNN Classifier vs. KNN Regression The K-Nearest Neighbors (KNN) method is a simple and versatile machine learning algorithm that can be used for both classification and regression tasks. The two variants of KNN operate differently based on the type of response variable (outcome):

🔹 KNN Classifier -Used for classification problems, where the response variable is categorical. -To classify a new observation:

  1. Identify the K observations in the training data that are closest (nearest) to the new observation using a distance metric (e.g., Euclidean distance).
  2. The most frequent class among the K neighbors is assigned as the predicted class.

Example: Predicting whether a person has a disease (Yes/No) based on features like age, weight, and blood pressure.

🔸 KNN Regression -Used for regression problems, where the response variable is quantitative (continuous). -To make a prediction:

  1. Identify the K nearest neighbors of the observation to be predicted.
  2. The predicted value is the average (mean) of the response values of those K neighbors.

Example: Predicting house price based on its size, location, and number of rooms.

# Install and load necessary package
install.packages("ggpubr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggpubr)

# Create a data frame with the comparison info
# Install and load the required package
install.packages("ggpubr")  # Run only once
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(ggpubr)

# Create the comparison data frame
comparison_table <- data.frame(
  Feature = c("Type of Output", "Prediction Method", "Evaluation Metrics", "Use Cases"),
  KNN_Classifier = c(
    "Categorical (e.g., 'Yes'/'No')",
    "Majority vote among K neighbors",
    "Accuracy, Precision, Confusion Matrix",
    "Spam detection, disease prediction"
  ),
  KNN_Regression = c(
    "Continuous (e.g., 145.2, 23.9)",
    "Average of values among K neighbors",
    "MSE, RMSE",
    "House price prediction, temperature"
  )
)

# Generate a table plot
ggtexttable(comparison_table, 
            rows = NULL, 
            theme = ttheme("mBlue"))

cat("🔑 Summary:
• KNN Classification assigns the most common class among neighbors.
• KNN Regression averages the output values of neighbors.
• Both methods depend on the choice of K and the scale of input variables.")
## 🔑 Summary:
## • KNN Classification assigns the most common class among neighbors.
## • KNN Regression averages the output values of neighbors.
## • Both methods depend on the choice of K and the scale of input variables.

Chapter3: Exercise 10

# Load Carseats dataset
data(Carseats)
## (a) Fit a multiple regression model to predict Sales using Price, Urban, and US.
# Step 1: Install ISLR if you haven't already
install.packages("ISLR")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
# Step 2: Load the ISLR package
library(ISLR)
## 
## Attaching package: 'ISLR'
## The following objects are masked from 'package:ISLR2':
## 
##     Auto, Credit
# Step 3: Load the Carseats dataset
data(Carseats)

# Step 4: Fit the linear model
model_full <- lm(Sales ~ Price + Urban + US, data = Carseats)

# Step 5: View the model summary
summary(model_full)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
summary(model_full)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
## (b) Provide an interpretation of each coefficient in the model.

# Show coefficients
summary(model_full)$coefficients
##                Estimate  Std. Error      t value     Pr(>|t|)
## (Intercept) 13.04346894 0.651012245  20.03567373 3.626602e-62
## Price       -0.05445885 0.005241855 -10.38923205 1.609917e-22
## UrbanYes    -0.02191615 0.271650277  -0.08067781 9.357389e-01
## USYes        1.20057270 0.259041508   4.63467306 4.860245e-06
# Extract coefficients
coefs <- summary(model_full)$coefficients

intercept <- round(coefs["(Intercept)", "Estimate"], 2)
price_coef <- round(coefs["Price", "Estimate"], 2)
urban_coef <- round(coefs["UrbanYes", "Estimate"], 2)
us_coef <- round(coefs["USYes", "Estimate"], 2)

cat("Intercept:", intercept, 
    "- Expected Sales when Price = 0, Urban = 'No', US = 'No'\n")
## Intercept: 13.04 - Expected Sales when Price = 0, Urban = 'No', US = 'No'
cat("Price:", price_coef,
    "- For each $1 increase in Price, Sales decreases by", abs(price_coef), "units\n")
## Price: -0.05 - For each $1 increase in Price, Sales decreases by 0.05 units
cat("UrbanYes:", urban_coef, 
    "- Difference in Sales between Urban = 'Yes' and Urban = 'No', holding other variables constant\n")
## UrbanYes: -0.02 - Difference in Sales between Urban = 'Yes' and Urban = 'No', holding other variables constant
cat("USYes:", us_coef,
    "- Difference in Sales between US = 'Yes' and US = 'No', holding other variables constant\n")
## USYes: 1.2 - Difference in Sales between US = 'Yes' and US = 'No', holding other variables constant
## (c) Write out the model in equation form.
coef(model_full)
## (Intercept)       Price    UrbanYes       USYes 
## 13.04346894 -0.05445885 -0.02191615  1.20057270
## (d) Hypothesis Testing for Predictors
summary(model_full)$coefficients
##                Estimate  Std. Error      t value     Pr(>|t|)
## (Intercept) 13.04346894 0.651012245  20.03567373 3.626602e-62
## Price       -0.05445885 0.005241855 -10.38923205 1.609917e-22
## UrbanYes    -0.02191615 0.271650277  -0.08067781 9.357389e-01
## USYes        1.20057270 0.259041508   4.63467306 4.860245e-06
## (e) Fit Smaller Model Using Only Significant Predictors
model_small <- lm(Sales ~ Price + US, data = Carseats)
summary(model_small)
## 
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16
## (f) Model Fit Comparison
# Compare R-squared values
summary(model_full)$r.squared
## [1] 0.2392754
summary(model_small)$r.squared
## [1] 0.2392629
## (g) 95% Confidence Intervals for Coefficients
confint(model_small, level = 0.95)
##                   2.5 %      97.5 %
## (Intercept) 11.79032020 14.27126531
## Price       -0.06475984 -0.04419543
## USYes        0.69151957  1.70776632
## (h) Checking for Outliers and High Leverage Points
par(mfrow = c(2, 2))
plot(model_small)

Chapter4: Exercise 12

# --- Part (a): Log odds in your model ---
# The log odds in the standard logistic regression model is β₀ + β₁x
# This is derived directly from the model equation and doesn't require computation

# --- Part (b): Log odds in your friend's softmax model ---
# The log odds in the softmax model is (α_orange0 - α_apple0) + (α_orange1 - α_apple1)x
# This comes from taking the log of the ratio of probabilities

# --- Part (c): Finding coefficients in friend's model given your model ---
# Given β₀ = 2 and β₁ = -1
beta0 <- 2
beta1 <- -1

# One possible solution:
alpha_orange0_c <- 2  # arbitrary choice
alpha_apple0_c <- 0   # arbitrary choice
alpha_orange1_c <- -1 # satisfies constraint
alpha_apple1_c <- 0   # arbitrary choice

cat("Part (c) - One possible solution for friend's coefficients:\n")
## Part (c) - One possible solution for friend's coefficients:
cat("α_orange0 =", alpha_orange0_c, "\n")
## α_orange0 = 2
cat("α_orange1 =", alpha_orange1_c, "\n")
## α_orange1 = -1
cat("α_apple0 =", alpha_apple0_c, "\n")
## α_apple0 = 0
cat("α_apple1 =", alpha_apple1_c, "\n\n")
## α_apple1 = 0
# Alternative solution:
alpha_orange0_alt <- 0
alpha_apple0_alt <- -2
alpha_orange1_alt <- 0
alpha_apple1_alt <- 1

cat("Part (c) - Alternative solution for friend's coefficients:\n")
## Part (c) - Alternative solution for friend's coefficients:
cat("α_orange0 =", alpha_orange0_alt, "\n")
## α_orange0 = 0
cat("α_orange1 =", alpha_orange1_alt, "\n")
## α_orange1 = 0
cat("α_apple0 =", alpha_apple0_alt, "\n")
## α_apple0 = -2
cat("α_apple1 =", alpha_apple1_alt, "\n\n")
## α_apple1 = 1
# --- Part (d): Finding your coefficients given your friend's model ---
# Given coefficients from friend's model
alpha_orange0 <- 1.2
alpha_orange1 <- -2
alpha_apple0 <- 3
alpha_apple1 <- 0.6

# Calculate your model's coefficients
your_beta0 <- alpha_orange0 - alpha_apple0
your_beta1 <- alpha_orange1 - alpha_apple1

cat("Part (d) - Your model coefficients:\n")
## Part (d) - Your model coefficients:
cat("β₀ =", your_beta0, "\n")
## β₀ = -1.8
cat("β₁ =", your_beta1, "\n\n")
## β₁ = -2.6
# --- Part (e): Agreement between models ---
# Function to calculate probabilities using your logistic regression model
your_model_prob <- function(x, beta0, beta1) {
  linear_pred <- beta0 + beta1 * x
  exp(linear_pred) / (1 + exp(linear_pred))
}

# Function to calculate probabilities using your friend's softmax model
friend_model_prob <- function(x, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1) {
  orange_score <- exp(alpha_orange0 + alpha_orange1 * x)
  apple_score <- exp(alpha_apple0 + alpha_apple1 * x)
  orange_score / (orange_score + apple_score)
}

# Generate 2000 test observations
set.seed(123)
test_data <- runif(2000, -5, 5)

# Calculate predictions from both models
your_preds <- ifelse(your_model_prob(test_data, your_beta0, your_beta1) > 0.5, "orange", "apple")
friend_preds <- ifelse(friend_model_prob(test_data, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1) > 0.5, "orange", "apple")

# Calculate agreement
agreement <- mean(your_preds == friend_preds)
cat("Part (e) - Fraction of agreement:", agreement, "\n\n")
## Part (e) - Fraction of agreement: 1
# Calculate decision boundaries for visualization
your_boundary <- -your_beta0/your_beta1
friend_boundary <- -(alpha_orange0 - alpha_apple0)/(alpha_orange1 - alpha_apple1)

cat("Decision boundary in your model: x =", your_boundary, "\n")
## Decision boundary in your model: x = -0.6923077
cat("Decision boundary in friend's model: x =", friend_boundary, "\n\n")
## Decision boundary in friend's model: x = -0.6923077
# --- Visualization of probability curves ---
x_range <- seq(-5, 5, length.out = 100)
your_probs <- sapply(x_range, function(x) your_model_prob(x, your_beta0, your_beta1))
friend_probs <- sapply(x_range, function(x) 
  friend_model_prob(x, alpha_orange0, alpha_orange1, alpha_apple0, alpha_apple1))

# Create data frame for plotting
plot_data <- data.frame(
  x = rep(x_range, 2),
  prob = c(your_probs, friend_probs),
  model = rep(c("Your Model", "Friend's Model"), each = length(x_range))
)

# Plot the probability curves
par(mfrow=c(1,1))
plot(x_range, your_probs, type = "l", col = "blue", lwd = 2,
     xlab = "X", ylab = "P(orange|X)", 
     main = "Probability of Orange by Model", ylim = c(0, 1))
lines(x_range, friend_probs, col = "red", lwd = 2)
abline(h = 0.5, lty = 2)
abline(v = your_boundary, col = "blue", lty = 3)
abline(v = friend_boundary, col = "red", lty = 3)
legend("topright", legend = c("Your Model", "Friend's Model", "Decision Boundary"), 
       col = c("blue", "red", "black"), lty = c(1, 1, 2))

# --- Demonstration that models are equivalent for binary classification ---
# Calculate absolute difference between probabilities at each point
prob_diff <- abs(your_probs - friend_probs)
max_diff <- max(prob_diff)
cat("Maximum difference in probabilities:", max_diff, "\n")
## Maximum difference in probabilities: 1.110223e-16
# Check if predictions match at all points
x_check <- seq(-5, 5, by = 0.01)
your_pred_check <- ifelse(your_model_prob(x_check, your_beta0, your_beta1) > 0.5, 1, 0)
friend_pred_check <- ifelse(friend_model_prob(x_check, alpha_orange0, alpha_orange1, 
                                            alpha_apple0, alpha_apple1) > 0.5, 1, 0)
all_equal <- all(your_pred_check == friend_pred_check)
cat("All predictions match:", all_equal)
## All predictions match: TRUE