Chapter 2 Exercise 10
library(ISLR2)
data(Boston)
# Basic info
dim(Boston)
## [1] 506 13
?Boston
# Exploratory plots
pairs(Boston[, 1:6])
#There are both linear and non-linear relationships among the predictors. For example, rm (average number of rooms) tends to increase with medv (median home value), while lstat (percentage of lower-status population) shows a negative association with medv.
cor(Boston$crim, Boston[, -1])
## zn indus chas nox rm age dis
## [1,] -0.2004692 0.4065834 -0.05589158 0.4209717 -0.2192467 0.3527343 -0.3796701
## rad tax ptratio lstat medv
## [1,] 0.6255051 0.5827643 0.2899456 0.4556215 -0.3883046
plot(crim ~ rad, data = Boston)
#The crime rate (crim) is positively related to both rad (access to radial highways) and tax, and negatively related to ptratio (pupil-teacher ratio). Areas with higher crime often have more highways, higher taxes, and fewer teachers per student.
# Summary stats
summary(Boston[c("crim", "tax", "ptratio")])
## crim tax ptratio
## Min. : 0.00632 Min. :187.0 Min. :12.60
## 1st Qu.: 0.08205 1st Qu.:279.0 1st Qu.:17.40
## Median : 0.25651 Median :330.0 Median :19.05
## Mean : 3.61352 Mean :408.2 Mean :18.46
## 3rd Qu.: 3.67708 3rd Qu.:666.0 3rd Qu.:20.20
## Max. :88.97620 Max. :711.0 Max. :22.00
# High-value rows
Boston[which.max(Boston$crim), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 381 88.9762 0 18.1 0 0.671 6.968 91.9 1.4165 24 666 20.2 17.21 10.4
Boston[which.max(Boston$tax), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 489 0.15086 0 27.74 0 0.609 5.454 92.7 1.8209 4 711 20.1 18.06 15.2
Boston[which.max(Boston$ptratio), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 355 0.04301 80 1.91 0 0.413 5.663 21.9 10.5857 4 334 22 8.05 18.2
#The crim variable reaches as high as 88.98, indicating the presence of significant outliers. Tax values range from 187 to 711, and the pupil-teacher ratio varies between 12.6 and 22. Some tracts clearly deviate from the norm.
# Chas == 1 and median of ptratio
sum(Boston$chas == 1)
## [1] 35
#A total of 35 census tracts border the Charles River.
median(Boston$ptratio)
## [1] 19.05
#The median pupil-teacher ratio across all tracts is 19.05.
# Min medv
Boston[which.min(Boston$medv), ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 399 38.3518 0 18.1 0 0.693 5.453 100 1.4896 24 666 20.2 30.59 5
summary(Boston)
## crim zn indus chas
## Min. : 0.00632 Min. : 0.00 Min. : 0.46 Min. :0.00000
## 1st Qu.: 0.08205 1st Qu.: 0.00 1st Qu.: 5.19 1st Qu.:0.00000
## Median : 0.25651 Median : 0.00 Median : 9.69 Median :0.00000
## Mean : 3.61352 Mean : 11.36 Mean :11.14 Mean :0.06917
## 3rd Qu.: 3.67708 3rd Qu.: 12.50 3rd Qu.:18.10 3rd Qu.:0.00000
## Max. :88.97620 Max. :100.00 Max. :27.74 Max. :1.00000
## nox rm age dis
## Min. :0.3850 Min. :3.561 Min. : 2.90 Min. : 1.130
## 1st Qu.:0.4490 1st Qu.:5.886 1st Qu.: 45.02 1st Qu.: 2.100
## Median :0.5380 Median :6.208 Median : 77.50 Median : 3.207
## Mean :0.5547 Mean :6.285 Mean : 68.57 Mean : 3.795
## 3rd Qu.:0.6240 3rd Qu.:6.623 3rd Qu.: 94.08 3rd Qu.: 5.188
## Max. :0.8710 Max. :8.780 Max. :100.00 Max. :12.127
## rad tax ptratio lstat
## Min. : 1.000 Min. :187.0 Min. :12.60 Min. : 1.73
## 1st Qu.: 4.000 1st Qu.:279.0 1st Qu.:17.40 1st Qu.: 6.95
## Median : 5.000 Median :330.0 Median :19.05 Median :11.36
## Mean : 9.549 Mean :408.2 Mean :18.46 Mean :12.65
## 3rd Qu.:24.000 3rd Qu.:666.0 3rd Qu.:20.20 3rd Qu.:16.95
## Max. :24.000 Max. :711.0 Max. :22.00 Max. :37.97
## medv
## Min. : 5.00
## 1st Qu.:17.02
## Median :21.20
## Mean :22.53
## 3rd Qu.:25.00
## Max. :50.00
#The census tract with the lowest medv is valued at $5,000. It also exhibits extreme values—very high crime and lstat, and a low number of rooms—suggesting poor living conditions and lower socioeconomic status.
# rm > 7 and > 8
sum(Boston$rm > 7)
## [1] 64
sum(Boston$rm > 8)
## [1] 13
Boston[Boston$rm > 8, ]
## crim zn indus chas nox rm age dis rad tax ptratio lstat medv
## 98 0.12083 0 2.89 0 0.4450 8.069 76.0 3.4952 2 276 18.0 4.21 38.7
## 164 1.51902 0 19.58 1 0.6050 8.375 93.9 2.1620 5 403 14.7 3.32 50.0
## 205 0.02009 95 2.68 0 0.4161 8.034 31.9 5.1180 4 224 14.7 2.88 50.0
## 225 0.31533 0 6.20 0 0.5040 8.266 78.3 2.8944 8 307 17.4 4.14 44.8
## 226 0.52693 0 6.20 0 0.5040 8.725 83.0 2.8944 8 307 17.4 4.63 50.0
## 227 0.38214 0 6.20 0 0.5040 8.040 86.5 3.2157 8 307 17.4 3.13 37.6
## 233 0.57529 0 6.20 0 0.5070 8.337 73.3 3.8384 8 307 17.4 2.47 41.7
## 234 0.33147 0 6.20 0 0.5070 8.247 70.4 3.6519 8 307 17.4 3.95 48.3
## 254 0.36894 22 5.86 0 0.4310 8.259 8.4 8.9067 7 330 19.1 3.54 42.8
## 258 0.61154 20 3.97 0 0.6470 8.704 86.9 1.8010 5 264 13.0 5.12 50.0
## 263 0.52014 20 3.97 0 0.6470 8.398 91.5 2.2885 5 264 13.0 5.91 48.8
## 268 0.57834 20 3.97 0 0.5750 8.297 67.0 2.4216 5 264 13.0 7.44 50.0
## 365 3.47428 0 18.10 1 0.7180 8.780 82.9 1.9047 24 666 20.2 5.29 21.9
#There are 64 tracts where the average number of rooms exceeds 7, and 13 where it exceeds 8. These areas generally show higher medv, lower crim, and overall better socioeconomic characteristics.
Chapter 3 Exercise 2
# Load required libraries
library(class)
library(FNN)
##
## Attaching package: 'FNN'
## The following objects are masked from 'package:class':
##
## knn, knn.cv
library(ISLR2)
# --- KNN Classification on iris ---
set.seed(123)
data(iris)
# Train-test split
idx <- sample(nrow(iris), 100)
train_iris <- iris[idx, ]
test_iris <- iris[-idx, ]
# Fit KNN Classifier (k = 3)
knn_pred <- knn(train = train_iris[, 1:4],
test = test_iris[, 1:4],
cl = train_iris$Species,
k = 3)
# Classification accuracy
mean(knn_pred == test_iris$Species)
## [1] 0.96
#The classification model predicts the species of a flower based on the majority vote from its 3 nearest neighbors.
# --- KNN Regression on Boston ---
set.seed(123)
data(Boston)
# Train-test split
idx_boston <- sample(nrow(Boston), 400)
train_boston <- Boston[idx_boston, ]
test_boston <- Boston[-idx_boston, ]
# Fit KNN Regression (k = 3)
knn_reg <- knn.reg(train = train_boston[, -14],
test = test_boston[, -14],
y = train_boston$medv,
k = 3)
# Predictions and MSE
head(knn_reg$pred)
## [1] 22.06667 24.46667 19.36667 27.53333 24.76667 15.23333
mean((knn_reg$pred - test_boston$medv)^2)
## [1] 19.35238
#The regression model estimates medv (median home value) by taking the average value of the 3 nearest neighbors.
Chapter 3 Exercise 10
# Load data and libraries
library(ISLR2)
data(Carseats)
# Fit linear models
mod_full <- lm(Sales ~ Price + Urban + US, data = Carseats)
mod_simple <- lm(Sales ~ Price + US, data = Carseats)
# Summaries
summary(mod_full)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
#b)Term | Estimate | Interpretation
#Intercept | 13.0435 → This is the predicted Sales when Price = 0, and both Urban and US are set to “No.” While not realistic in practice, it serves as a baseline value.
#Price | -0.0545 → For every $1 increase in price, Sales are expected to drop by 0.0545 units, assuming other factors remain unchanged.
#UrbanYes | -0.0219 → On average, stores in urban locations sell 0.0219 fewer units than non-urban stores, holding other variables constant. However, this effect is statistically insignificant (p = 0.936).
#USYes | +1.2006 → Stores located in the US sell, on average, 1.2 more units than those outside the US, assuming price and urban setting are constant. This effect is statistically significant.
#c)Model equation:
#Sales = 13.0435 − 0.0545 × Price − 0.0219 × UrbanYes + 1.2006 × USYes + ϵ
#Where:
#UrbanYes = 1 if the store is in an urban area, otherwise 0
#USYes = 1 if the store is in the US, otherwise 0
#d)Variable | p-value | Decision
#Price | < 2e-16 | Reject H₀ → Significant
#UrbanYes | 0.936 | Fail to reject H₀ → Not significant
#USYes | 4.86e-06 | Reject H₀ → Significant
summary(mod_simple)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
# Adjusted R-squared comparison
summary(mod_full)$adj.r.squared
## [1] 0.2335123
summary(mod_simple)$adj.r.squared
## [1] 0.2354305
#When comparing models, look at the adjusted R-squared values. If two models perform similarly, prefer the simpler one with fewer variables.
# 95% Confidence Intervals for the simpler model
confint(mod_simple)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
#The smaller model also provides 95% confidence intervals for each coefficient, showing the range within which the true values likely fall.
# Diagnostic plots
par(mfrow = c(2, 2))
plot(mod_simple)
Chapter 4 Exercise 12
#a) log(Pr(Y=orange∣x)/Pr(Y=apple∣x))=β0+β1x
#b) log(Pr(Y=orange)/Pr(Y=apple))=(αorange0−αapple0)+(αorange1−αapple1)x
#c) Your model: 𝛽0=2, 𝛽1=−1
#Let’s find friend’s coefficients assuming softmax uses apple as the reference (arbitrary shift possible because softmax is invariant to adding same constant to both linear terms):
#One way to convert:
#Let: 𝛼orange0=2, 𝛼orange1=−1 𝛼apple0=0, 𝛼apple1=0
#Then: log(exp(2−𝑥)/exp(2−𝑥)+exp(0)) matches 𝛽0+𝛽x=2−x So: 𝛼orange0=2 𝛼orange1=−1 𝛼apple0=0 𝛼apple1=0
#d) 𝛼orange0=1.2, 𝛼orange1=−2 𝛼apple0=3, 𝛼apple1=0.6
#Convert to the model form (log odds): log(Pr(𝑌=apple)/Pr(𝑌=orange))=(𝛼orange0−𝛼apple0)+(𝛼orange1−𝛼apple1)𝑥=(1.2−3)+(−2−0.6)𝑥=−1.8−2.6𝑥
#So the model estimates:
#𝛽0=−1.8 𝛽1=−2.6
set.seed(123)
x <- seq(-5, 5, length.out = 1000)
# Friend's softmax model logits
logit_o <- 1.2 - 2 * x
logit_a <- 3 + 0.6 * x
# Softmax probabilities
p_soft_o <- exp(logit_o) / (exp(logit_o) + exp(logit_a))
p_soft_a <- 1 - p_soft_o
# Your logistic model
p_logit_o <- plogis(-1.8 - 2.6 * x)
# Predicted classes
pred_soft <- ifelse(p_soft_o > 0.5, "orange", "apple")
pred_logit <- ifelse(p_logit_o > 0.5, "orange", "apple")
# Agreement rate
mean(pred_soft == pred_logit)
## [1] 1
#This code computes the proportion of cases where your model and your friend’s model make the same prediction.