library(ISLR2)
library(ggplot2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
library(class)
library(e1071)
# (a) Summaries of the Weekly data
data(Weekly)
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
plot(Weekly)

# (b) Logistic regression with all predictors
model <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Weekly, family = "binomial")
summary(model)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = "binomial", data = Weekly)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
# (c) Confusion matrix and overall fraction correct for logistic regression
predictions <- ifelse(predict(model, Weekly, type = "response") > 0.5, "Up", "Down")
confusion_matrix <- table(Actual = Weekly$Direction, Predicted = predictions)
fraction_correct <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
confusion_matrix
## Predicted
## Actual Down Up
## Down 54 430
## Up 48 557
fraction_correct
## [1] 0.5610652
# (d) Logistic regression on training data and confusion matrix for held-out data
train_data <- Weekly[Weekly$Year < 2009, ]
test_data <- Weekly[Weekly$Year >= 2009, ]
train_model <- glm(Direction ~ Lag2, data = train_data, family = "binomial")
test_predictions <- ifelse(predict(train_model, test_data, type = "response") > 0.5, "Up", "Down")
test_confusion_matrix <- table(Actual = test_data$Direction, Predicted = test_predictions)
test_fraction_correct <- sum(diag(test_confusion_matrix)) / sum(test_confusion_matrix)
test_confusion_matrix
## Predicted
## Actual Down Up
## Down 9 34
## Up 5 56
test_fraction_correct
## [1] 0.625
# (e) LDA on training data and confusion matrix for held-out data
lda_model <- lda(Direction ~ Lag2, data = train_data)
lda_predictions <- predict(lda_model, test_data)$class
lda_confusion_matrix <- table(Actual = test_data$Direction, Predicted = lda_predictions)
lda_fraction_correct <- sum(diag(lda_confusion_matrix)) / sum(lda_confusion_matrix)
lda_confusion_matrix
## Predicted
## Actual Down Up
## Down 9 34
## Up 5 56
lda_fraction_correct
## [1] 0.625
# (f) QDA on training data and confusion matrix for held-out data
qda_model <- qda(Direction ~ Lag2, data = train_data)
qda_predictions <- predict(qda_model, test_data)$class
qda_confusion_matrix <- table(Actual = test_data$Direction, Predicted = qda_predictions)
qda_fraction_correct <- sum(diag(qda_confusion_matrix)) / sum(qda_confusion_matrix)
qda_confusion_matrix
## Predicted
## Actual Down Up
## Down 0 43
## Up 0 61
qda_fraction_correct
## [1] 0.5865385
# Convert predictor variables to matrices
train_matrix <- as.matrix(train_data[, "Lag2"])
test_matrix <- as.matrix(test_data[, "Lag2"])
# (g) KNN (K=1) on training data and confusion matrix for held-out data
knn_model <- knn(train_matrix, test_matrix, train_data$Direction, k = 1)
knn_confusion_matrix <- table(Actual = test_data$Direction, Predicted = knn_model)
knn_fraction_correct <- sum(diag(knn_confusion_matrix)) / sum(knn_confusion_matrix)
knn_confusion_matrix
## Predicted
## Actual Down Up
## Down 21 22
## Up 29 32
knn_fraction_correct
## [1] 0.5096154
# (h) Naive Bayes on training data and confusion matrix for held-out data
nb_model <- naiveBayes(Direction ~ Lag2, data = train_data)
nb_predictions <- predict(nb_model, test_data)
nb_confusion_matrix <- table(Actual = test_data$Direction, Predicted = nb_predictions)
nb_fraction_correct <- sum(diag(nb_confusion_matrix)) / sum(nb_confusion_matrix)
nb_confusion_matrix
## Predicted
## Actual Down Up
## Down 0 43
## Up 0 61
nb_fraction_correct
## [1] 0.5865385
# (i) Compare results of different methods
results <- c(fraction_correct, test_fraction_correct, lda_fraction_correct, qda_fraction_correct, knn_fraction_correct, nb_fraction_correct)
method_names <- c("Logistic Regression", "Logistic Regression (Hold-out)", "LDA", "QDA", "KNN (K=1)", "Naive Bayes")
best_method <- method_names[which.max(results)]
best_results <- data.frame(Method = method_names, Fraction_Correct = results)
best_results
## Method Fraction_Correct
## 1 Logistic Regression 0.5610652
## 2 Logistic Regression (Hold-out) 0.6250000
## 3 LDA 0.6250000
## 4 QDA 0.5865385
## 5 KNN (K=1) 0.5096154
## 6 Naive Bayes 0.5865385
# (j) Experiment with different predictors and interactions
# Example: Logistic regression with Lag2 and Lag3 as predictors
new_model <- glm(Direction ~ Lag2 + Lag3, data = train_data, family = "binomial")
new_predictions <- ifelse(predict(new_model, test_data, type = "response") > 0.5, "Up", "Down")
new_confusion_matrix <- table(Actual = test_data$Direction, Predicted = new_predictions)
new_fraction_correct <- sum(diag(new_confusion_matrix)) / sum(new_confusion_matrix)
new_confusion_matrix
## Predicted
## Actual Down Up
## Down 8 35
## Up 4 57
new_fraction_correct
## [1] 0.625