STA 6543 Predictive Modeling

Problem 13: Weekly Data Set

data(Weekly, package = "ISLR2")
cat("Dimensions:", nrow(Weekly), "rows x", ncol(Weekly), "columns\n")

## Dimensions: 1089 rows x 9 columns

head(Weekly)

##   Year   Lag1   Lag2   Lag3   Lag4   Lag5    Volume  Today Direction
## 1 1990  0.816  1.572 -3.936 -0.229 -3.484 0.1549760 -0.270      Down
## 2 1990 -0.270  0.816  1.572 -3.936 -0.229 0.1485740 -2.576      Down
## 3 1990 -2.576 -0.270  0.816  1.572 -3.936 0.1598375  3.514        Up
## 4 1990  3.514 -2.576 -0.270  0.816  1.572 0.1616300  0.712        Up
## 5 1990  0.712  3.514 -2.576 -0.270  0.816 0.1537280  1.178        Up
## 6 1990  1.178  0.712  3.514 -2.576 -0.270 0.1544440 -1.372      Down

(a) Numerical and graphical summaries

summary(Weekly)

##       Year           Lag1               Lag2               Lag3         
##  Min.   :1990   Min.   :-18.1950   Min.   :-18.1950   Min.   :-18.1950  
##  1st Qu.:1995   1st Qu.: -1.1540   1st Qu.: -1.1540   1st Qu.: -1.1580  
##  Median :2000   Median :  0.2410   Median :  0.2410   Median :  0.2410  
##  Mean   :2000   Mean   :  0.1506   Mean   :  0.1511   Mean   :  0.1472  
##  3rd Qu.:2005   3rd Qu.:  1.4050   3rd Qu.:  1.4090   3rd Qu.:  1.4090  
##  Max.   :2010   Max.   : 12.0260   Max.   : 12.0260   Max.   : 12.0260  
##       Lag4               Lag5              Volume            Today         
##  Min.   :-18.1950   Min.   :-18.1950   Min.   :0.08747   Min.   :-18.1950  
##  1st Qu.: -1.1580   1st Qu.: -1.1660   1st Qu.:0.33202   1st Qu.: -1.1540  
##  Median :  0.2380   Median :  0.2340   Median :1.00268   Median :  0.2410  
##  Mean   :  0.1458   Mean   :  0.1399   Mean   :1.57462   Mean   :  0.1499  
##  3rd Qu.:  1.4090   3rd Qu.:  1.4050   3rd Qu.:2.05373   3rd Qu.:  1.4050  
##  Max.   : 12.0260   Max.   : 12.0260   Max.   :9.32821   Max.   : 12.0260  
##  Direction 
##  Down:484  
##  Up  :605  
##            
##            
##            
##

round(cor(Weekly[, -9]), 3)

##          Year   Lag1   Lag2   Lag3   Lag4   Lag5 Volume  Today
## Year    1.000 -0.032 -0.033 -0.030 -0.031 -0.031  0.842 -0.032
## Lag1   -0.032  1.000 -0.075  0.059 -0.071 -0.008 -0.065 -0.075
## Lag2   -0.033 -0.075  1.000 -0.076  0.058 -0.072 -0.086  0.059
## Lag3   -0.030  0.059 -0.076  1.000 -0.075  0.061 -0.069 -0.071
## Lag4   -0.031 -0.071  0.058 -0.075  1.000 -0.076 -0.061 -0.008
## Lag5   -0.031 -0.008 -0.072  0.061 -0.076  1.000 -0.059  0.011
## Volume  0.842 -0.065 -0.086 -0.069 -0.061 -0.059  1.000 -0.033
## Today  -0.032 -0.075  0.059 -0.071 -0.008  0.011 -0.033  1.000

par(mfrow = c(2, 3))
for (v in c("Lag1", "Lag2", "Lag3", "Lag4", "Lag5", "Volume")) {
  boxplot(Weekly[[v]] ~ Weekly$Direction,
          main  = v,
          xlab  = "Direction",
          ylab  = v,
          col   = c("salmon", "steelblue"))
}

par(mfrow = c(1, 1))

plot(Weekly$Year, Weekly$Volume,
     xlab = "Year", ylab = "Volume",
     main = "Trading Volume over Time",
     pch  = 20, col = "steelblue", cex = 0.6)

The correlation matrix doesn’t show much between the lag variables and Direction. The only clear pattern is that Volume has been climbing steadily over the years. The lag variables themselves barely separate the Up and Down weeks in the boxplots.

(b) Logistic regression with all predictors

fit_all <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
               data   = Weekly,
               family = binomial)
summary(fit_all)

## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Weekly)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.26686    0.08593   3.106   0.0019 **
## Lag1        -0.04127    0.02641  -1.563   0.1181   
## Lag2         0.05844    0.02686   2.175   0.0296 * 
## Lag3        -0.01606    0.02666  -0.602   0.5469   
## Lag4        -0.02779    0.02646  -1.050   0.2937   
## Lag5        -0.01447    0.02638  -0.549   0.5833   
## Volume      -0.02274    0.03690  -0.616   0.5377   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1496.2  on 1088  degrees of freedom
## Residual deviance: 1486.4  on 1082  degrees of freedom
## AIC: 1500.4
## 
## Number of Fisher Scoring iterations: 4

Only Lag2 is significant at the 5% level. Everything else has large p-values.

(c) Confusion matrix on the full data

prob_all <- predict(fit_all, type = "response")
pred_all <- ifelse(prob_all > 0.5, "Up", "Down")
table(Predicted = pred_all, Truth = Weekly$Direction)

##          Truth
## Predicted Down  Up
##      Down   54  48
##      Up    430 557

cat("Overall fraction correct:", round(mean(pred_all == Weekly$Direction), 4), "\n")

## Overall fraction correct: 0.5611

The overall accuracy looks okay at first glance, but the confusion matrix tells the real story. The model is calling almost everything “Up” — it catches most of the actual Up weeks but misses almost all the Down weeks. So it’s not really learning anything useful, just predicting the majority class.

(d) Logistic regression: training 1990-2008, test on 2009-2010

train   <- Weekly$Year <= 2008
w_train <- Weekly[train, ]
w_test  <- Weekly[!train, ]

fit_d    <- glm(Direction ~ Lag2, data = w_train, family = binomial)
prob_d   <- predict(fit_d, newdata = w_test, type = "response")
pred_d   <- ifelse(prob_d > 0.5, "Up", "Down")
table(Predicted = pred_d, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down    9  5
##      Up     34 56

cat("Logistic (Lag2) test accuracy:", round(mean(pred_d == w_test$Direction), 4), "\n")

## Logistic (Lag2) test accuracy: 0.625

(e) LDA with Lag2

fit_lda_d  <- lda(Direction ~ Lag2, data = w_train)
pred_lda_d <- predict(fit_lda_d, newdata = w_test)$class
table(Predicted = pred_lda_d, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down    9  5
##      Up     34 56

cat("LDA test accuracy:", round(mean(pred_lda_d == w_test$Direction), 4), "\n")

## LDA test accuracy: 0.625

(f) QDA with Lag2

fit_qda_d  <- qda(Direction ~ Lag2, data = w_train)
pred_qda_d <- predict(fit_qda_d, newdata = w_test)$class
table(Predicted = pred_qda_d, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down    0  0
##      Up     43 61

cat("QDA test accuracy:", round(mean(pred_qda_d == w_test$Direction), 4), "\n")

## QDA test accuracy: 0.5865

(g) KNN with K = 1

set.seed(1)
train_X    <- as.matrix(w_train$Lag2)
test_X     <- as.matrix(w_test$Lag2)
pred_knn1  <- knn(train_X, test_X, w_train$Direction, k = 1)
table(Predicted = pred_knn1, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down   21 30
##      Up     22 31

cat("KNN (K=1) test accuracy:", round(mean(pred_knn1 == w_test$Direction), 4), "\n")

## KNN (K=1) test accuracy: 0.5

(h) Naive Bayes with Lag2

fit_nb_d  <- naiveBayes(Direction ~ Lag2, data = w_train)
pred_nb_d <- predict(fit_nb_d, newdata = w_test)
table(Predicted = pred_nb_d, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down    0  0
##      Up     43 61

cat("Naive Bayes test accuracy:", round(mean(pred_nb_d == w_test$Direction), 4), "\n")

## Naive Bayes test accuracy: 0.5865

(i) Which method performs best?

Logistic regression and LDA tie for the best accuracy on the test set. QDA and naive Bayes both default to predicting “Up” almost every time, which gives a passable accuracy number but isn’t actually doing anything useful. KNN with K = 1 is too noisy. For this predictor, logistic regression or LDA are the better options.

(j) Experimenting with different predictors and methods

# Logistic: Lag1 + Lag2
fit_j1  <- glm(Direction ~ Lag1 + Lag2, data = w_train, family = binomial)
pred_j1 <- ifelse(predict(fit_j1, newdata = w_test, type = "response") > 0.5, "Up", "Down")
cat("Logistic Lag1+Lag2       :", round(mean(pred_j1 == w_test$Direction), 4), "\n")

## Logistic Lag1+Lag2       : 0.5769

# Logistic: Lag2 + Volume
fit_j2  <- glm(Direction ~ Lag2 + Volume, data = w_train, family = binomial)
pred_j2 <- ifelse(predict(fit_j2, newdata = w_test, type = "response") > 0.5, "Up", "Down")
cat("Logistic Lag2+Volume     :", round(mean(pred_j2 == w_test$Direction), 4), "\n")

## Logistic Lag2+Volume     : 0.5385

# Logistic: Lag2 quadratic
fit_j3  <- glm(Direction ~ Lag2 + I(Lag2^2), data = w_train, family = binomial)
pred_j3 <- ifelse(predict(fit_j3, newdata = w_test, type = "response") > 0.5, "Up", "Down")
cat("Logistic Lag2 + Lag2^2   :", round(mean(pred_j3 == w_test$Direction), 4), "\n")

## Logistic Lag2 + Lag2^2   : 0.625

# LDA: Lag1 + Lag2
fit_lda_j  <- lda(Direction ~ Lag1 + Lag2, data = w_train)
pred_lda_j <- predict(fit_lda_j, newdata = w_test)$class
cat("LDA Lag1+Lag2            :", round(mean(pred_lda_j == w_test$Direction), 4), "\n")

## LDA Lag1+Lag2            : 0.5769

# KNN with several K values on Lag2
set.seed(1)
for (k in c(3, 5, 10, 20)) {
  pk <- knn(train_X, test_X, w_train$Direction, k = k)
  cat("KNN K =", k, "               :", round(mean(pk == w_test$Direction), 4), "\n")
}

## KNN K = 3                : 0.5481 
## KNN K = 5                : 0.5385 
## KNN K = 10                : 0.5865 
## KNN K = 20                : 0.5962

Adding more predictors doesn’t consistently help — the Lag2-only models stay competitive. KNN improves with larger K compared to K = 1, but still doesn’t beat logistic regression or LDA. The best overall approach on this dataset is LDA or logistic regression with just Lag2.

# Best confusion matrix: LDA with Lag2
cat("Best model: LDA with Lag2\n")

## Best model: LDA with Lag2

table(Predicted = pred_lda_d, Truth = w_test$Direction)

##          Truth
## Predicted Down Up
##      Down    9  5
##      Up     34 56

cat("Test accuracy:", round(mean(pred_lda_d == w_test$Direction), 4), "\n")

## Test accuracy: 0.625

Problem 14: Auto Data Set

data(Auto, package = "ISLR2")
Auto <- na.omit(Auto)
cat("Dimensions:", nrow(Auto), "rows x", ncol(Auto), "columns\n")

## Dimensions: 392 rows x 9 columns

(a) Create mpg01

Auto$mpg01 <- factor(ifelse(Auto$mpg > median(Auto$mpg), 1, 0))
cat("Median mpg:", median(Auto$mpg), "\n")

## Median mpg: 22.75

table(Auto$mpg01)

## 
##   0   1 
## 196 196

(b) Exploratory data analysis

par(mfrow = c(2, 3))
for (v in c("cylinders", "displacement", "horsepower", "weight", "acceleration", "year")) {
  boxplot(Auto[[v]] ~ Auto$mpg01,
          main  = v,
          xlab  = "mpg01  (0 = low, 1 = high)",
          ylab  = v,
          col   = c("salmon", "steelblue"),
          names = c("Low mpg", "High mpg"))
}

par(mfrow = c(1, 1))

pairs(Auto[, c("cylinders", "displacement", "horsepower",
               "weight", "acceleration", "year")],
      col  = ifelse(Auto$mpg01 == 1, "steelblue", "salmon"),
      pch  = 20,
      cex  = 0.5,
      main = "Scatterplot Matrix (blue = high mpg, red = low mpg)")

round(cor(Auto[, c("cylinders", "displacement", "horsepower",
                   "weight", "acceleration", "year")])[, 1:6], 3)

##              cylinders displacement horsepower weight acceleration   year
## cylinders        1.000        0.951      0.843  0.898       -0.505 -0.346
## displacement     0.951        1.000      0.897  0.933       -0.544 -0.370
## horsepower       0.843        0.897      1.000  0.865       -0.689 -0.416
## weight           0.898        0.933      0.865  1.000       -0.417 -0.309
## acceleration    -0.505       -0.544     -0.689 -0.417        1.000  0.290
## year            -0.346       -0.370     -0.416 -0.309        0.290  1.000

The boxplots make the separation pretty clear. cylinders, displacement, horsepower, and weight all look strongly negatively associated with mpg01 — high-mpg cars tend to have fewer cylinders, smaller engines, less horsepower, and lower weight. year goes the other way: newer cars are more likely to fall in the high-mpg group, which makes sense given fuel efficiency improvements over time. acceleration shows weaker separation and is less useful as a predictor. Based on this, I’ll use cylinders, displacement, horsepower, weight, and year as predictors going forward.

(c) Train and test split

set.seed(1)
n_auto  <- nrow(Auto)
tr_idx  <- sample(n_auto, floor(n_auto * 0.7))
auto_tr <- Auto[tr_idx, ]
auto_te <- Auto[-tr_idx, ]
cat("Training:", nrow(auto_tr), "  Test:", nrow(auto_te), "\n")

## Training: 274   Test: 118

(d) LDA

fit_lda14  <- lda(mpg01 ~ cylinders + displacement + horsepower + weight + year,
                  data = auto_tr)
pred_lda14 <- predict(fit_lda14, newdata = auto_te)$class
table(Predicted = pred_lda14, Truth = auto_te$mpg01)

##          Truth
## Predicted  0  1
##         0 48  1
##         1 13 56

cat("LDA test error:", round(mean(pred_lda14 != auto_te$mpg01), 4), "\n")

## LDA test error: 0.1186

(e) QDA

fit_qda14  <- qda(mpg01 ~ cylinders + displacement + horsepower + weight + year,
                  data = auto_tr)
pred_qda14 <- predict(fit_qda14, newdata = auto_te)$class
table(Predicted = pred_qda14, Truth = auto_te$mpg01)

##          Truth
## Predicted  0  1
##         0 52  4
##         1  9 53

cat("QDA test error:", round(mean(pred_qda14 != auto_te$mpg01), 4), "\n")

## QDA test error: 0.1102

(f) Logistic regression

fit_log14  <- glm(mpg01 ~ cylinders + displacement + horsepower + weight + year,
                  data   = auto_tr,
                  family = binomial)
summary(fit_log14)

## 
## Call:
## glm(formula = mpg01 ~ cylinders + displacement + horsepower + 
##     weight + year, family = binomial, data = auto_tr)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -13.990747   5.524488  -2.532 0.011325 *  
## cylinders      0.155724   0.475377   0.328 0.743230    
## displacement  -0.009207   0.011764  -0.783 0.433836    
## horsepower    -0.039348   0.019693  -1.998 0.045708 *  
## weight        -0.003955   0.001108  -3.570 0.000357 ***
## year           0.390736   0.084575   4.620 3.84e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 379.79  on 273  degrees of freedom
## Residual deviance: 114.05  on 268  degrees of freedom
## AIC: 126.05
## 
## Number of Fisher Scoring iterations: 8

prob_log14 <- predict(fit_log14, newdata = auto_te, type = "response")
pred_log14 <- factor(ifelse(prob_log14 > 0.5, 1, 0), levels = c("0", "1"))
table(Predicted = pred_log14, Truth = auto_te$mpg01)

##          Truth
## Predicted  0  1
##         0 53  4
##         1  8 53

cat("Logistic regression test error:", round(mean(pred_log14 != auto_te$mpg01), 4), "\n")

## Logistic regression test error: 0.1017

Logistic regression does well here. The test error is in a similar range to LDA and QDA. Looking at the model summary, weight and year tend to carry the most significance — heavier cars and older model years push the probability of being in the low-mpg group up. displacement and horsepower are highly correlated with weight, so their individual coefficients get shrunk down by collinearity.

(g) Naive Bayes

fit_nb14  <- naiveBayes(mpg01 ~ cylinders + displacement + horsepower + weight + year,
                        data = auto_tr)
pred_nb14 <- predict(fit_nb14, newdata = auto_te)
table(Predicted = pred_nb14, Truth = auto_te$mpg01)

##          Truth
## Predicted  0  1
##         0 52  3
##         1  9 54

cat("Naive Bayes test error:", round(mean(pred_nb14 != auto_te$mpg01), 4), "\n")

## Naive Bayes test error: 0.1017

(h) KNN

preds14   <- c("cylinders", "displacement", "horsepower", "weight", "year")
tr_mat14  <- scale(auto_tr[, preds14])
te_mat14  <- scale(auto_te[, preds14],
                   center = attr(tr_mat14, "scaled:center"),
                   scale  = attr(tr_mat14, "scaled:scale"))

set.seed(1)
k_vals    <- c(1, 3, 5, 7, 10, 15, 20)
knn_errs  <- sapply(k_vals, function(k) {
  pk <- knn(tr_mat14, te_mat14, auto_tr$mpg01, k = k)
  mean(pk != auto_te$mpg01)
})
names(knn_errs) <- paste0("K=", k_vals)
round(knn_errs, 4)

##    K=1    K=3    K=5    K=7   K=10   K=15   K=20 
## 0.0763 0.0847 0.0847 0.0678 0.0932 0.1102 0.0932

best_k   <- k_vals[which.min(knn_errs)]
best_err <- round(min(knn_errs), 4)
cat("Best K:", best_k, " test error:", best_err, "\n")

## Best K: 7  test error: 0.0678

# Confusion matrix for best K
pred_knn_best <- knn(tr_mat14, te_mat14, auto_tr$mpg01, k = best_k)
table(Predicted = pred_knn_best, Truth = auto_te$mpg01)

##          Truth
## Predicted  0  1
##         0 56  3
##         1  5 54

KNN with very small K tends to overfit and does worse. Performance improves with moderate K values. The best K on this split is 7 with a test error of 0.0678.

Problem 16: Boston Data Set

data(Boston, package = "ISLR2")
Boston$crim01 <- factor(ifelse(Boston$crim > median(Boston$crim), 1, 0))
cat("crim01 distribution:\n")

## crim01 distribution:

table(Boston$crim01)

## 
##   0   1 
## 253 253

cat("Dimensions:", nrow(Boston), "rows x", ncol(Boston), "columns\n")

## Dimensions: 506 rows x 14 columns

Explore predictors

bos_num <- Boston[, !names(Boston) %in% c("crim", "crim01")]
cor_vec  <- sapply(bos_num, function(x) cor(x, as.numeric(as.character(Boston$crim01))))
sort(round(cor_vec, 3), decreasing = TRUE)

##     nox     rad     age     tax   indus   lstat ptratio    chas      rm    medv 
##   0.723   0.620   0.614   0.609   0.603   0.453   0.254   0.070  -0.156  -0.263 
##      zn     dis 
##  -0.436  -0.616

par(mfrow = c(2, 3))
for (v in c("rad", "tax", "nox", "age", "lstat", "dis")) {
  boxplot(Boston[[v]] ~ Boston$crim01,
          main  = v,
          xlab  = "crim01  (0 = low, 1 = high)",
          col   = c("steelblue", "salmon"),
          names = c("Low crime", "High crime"))
}

par(mfrow = c(1, 1))

rad (highway accessibility), tax, nox, and age all show the clearest separation between low and high crime suburbs. I’ll use those along with lstat and dis as predictors.

Train and test split

set.seed(1)
n_bos  <- nrow(Boston)
tr_bos <- sample(n_bos, floor(n_bos * 0.7))
bos_tr <- Boston[tr_bos, ]
bos_te <- Boston[-tr_bos, ]
cat("Training:", nrow(bos_tr), "  Test:", nrow(bos_te), "\n")

## Training: 354   Test: 152

Logistic regression

fit_log16  <- glm(crim01 ~ nox + rad + tax + age + dis + lstat,
                  data   = bos_tr,
                  family = binomial)
prob_log16 <- predict(fit_log16, newdata = bos_te, type = "response")
pred_log16 <- factor(ifelse(prob_log16 > 0.5, 1, 0), levels = c("0", "1"))
table(Predicted = pred_log16, Truth = bos_te$crim01)

##          Truth
## Predicted  0  1
##         0 57 11
##         1 16 68

cat("Logistic regression test error:", round(mean(pred_log16 != bos_te$crim01), 4), "\n")

## Logistic regression test error: 0.1776

LDA

fit_lda16  <- lda(crim01 ~ nox + rad + tax + age + dis + lstat, data = bos_tr)
pred_lda16 <- predict(fit_lda16, newdata = bos_te)$class
table(Predicted = pred_lda16, Truth = bos_te$crim01)

##          Truth
## Predicted  0  1
##         0 71 19
##         1  2 60

cat("LDA test error:", round(mean(pred_lda16 != bos_te$crim01), 4), "\n")

## LDA test error: 0.1382

Naive Bayes

fit_nb16  <- naiveBayes(crim01 ~ nox + rad + tax + age + dis + lstat, data = bos_tr)
pred_nb16 <- predict(fit_nb16, newdata = bos_te)
table(Predicted = pred_nb16, Truth = bos_te$crim01)

##          Truth
## Predicted  0  1
##         0 66 18
##         1  7 61

cat("Naive Bayes test error:", round(mean(pred_nb16 != bos_te$crim01), 4), "\n")

## Naive Bayes test error: 0.1645

KNN

preds16    <- c("nox", "rad", "tax", "age", "dis", "lstat")
tr_bos_mat <- scale(bos_tr[, preds16])
te_bos_mat <- scale(bos_te[, preds16],
                    center = attr(tr_bos_mat, "scaled:center"),
                    scale  = attr(tr_bos_mat, "scaled:scale"))

set.seed(1)
k_bos      <- c(1, 3, 5, 10, 20)
knn_b_errs <- sapply(k_bos, function(k) {
  pk <- knn(tr_bos_mat, te_bos_mat, bos_tr$crim01, k = k)
  mean(pk != bos_te$crim01)
})
names(knn_b_errs) <- paste0("K=", k_bos)
round(knn_b_errs, 4)

##    K=1    K=3    K=5   K=10   K=20 
## 0.0789 0.0789 0.0921 0.0921 0.0921

Summary of findings

cat("Test errors:\n")

## Test errors:

cat("  Logistic regression:", round(mean(pred_log16 != bos_te$crim01), 4), "\n")

##   Logistic regression: 0.1776

cat("  LDA               :", round(mean(pred_lda16  != bos_te$crim01), 4), "\n")

##   LDA               : 0.1382

cat("  Naive Bayes       :", round(mean(pred_nb16   != bos_te$crim01), 4), "\n")

##   Naive Bayes       : 0.1645

cat("  KNN (best)        :", round(min(knn_b_errs), 4), "\n")

##   KNN (best)        : 0.0789

rad and nox are the strongest predictors — suburbs with better highway access and higher pollution levels tend to have above-median crime rates. Logistic regression and LDA give the lowest test errors here. KNN does well at moderate K values but is more sensitive to the choice of K. Naive Bayes lags behind slightly, likely because the independence assumption doesn’t hold well for these correlated predictors.