R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

============================================================

Q13: Weekly Data Set - Logistic Regression & Classification

============================================================

library(ISLR2) library(MASS) library(class) library(e1071)

(a) Numerical and graphical summaries

summary(Weekly) cor(Weekly[, -9])
pairs(Weekly)

Volume over time

plot(Weekly$Volume, main = “Volume Over Time”, ylab = “Volume”, xlab = “Index”)

(b) Logistic regression: Direction ~ Lag1-5 + Volume

glm.fit13 <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Weekly, family = binomial) summary(glm.fit13) # Lag2 appears statistically significant (p < 0.05)

(c) Confusion matrix on full data

glm.probs13 <- predict(glm.fit13, type = “response”) glm.pred13 <- ifelse(glm.probs13 > 0.5, “Up”, “Down”) table(glm.pred13, Weekly\(Direction) mean(glm.pred13 == Weekly\)Direction)

(d) Logistic regression with Lag2 only, train 1990-2008, test 2009-2010

train13 <- Weekly\(Year < 2009 test13 <- Weekly[!train13, ] Direction.test13 <- Weekly\)Direction[!train13]

glm.fit13d <- glm(Direction ~ Lag2, data = Weekly, family = binomial, subset = train13) glm.probs13d <- predict(glm.fit13d, test13, type = “response”) glm.pred13d <- ifelse(glm.probs13d > 0.5, “Up”, “Down”) table(glm.pred13d, Direction.test13) mean(glm.pred13d == Direction.test13)

(e) LDA with Lag2

lda.fit13 <- lda(Direction ~ Lag2, data = Weekly, subset = train13) lda.pred13 <- predict(lda.fit13, test13) table(lda.pred13\(class, Direction.test13) mean(lda.pred13\)class == Direction.test13)

(f) QDA with Lag2

qda.fit13 <- qda(Direction ~ Lag2, data = Weekly, subset = train13) qda.pred13 <- predict(qda.fit13, test13) table(qda.pred13\(class, Direction.test13) mean(qda.pred13\)class == Direction.test13)

(g) KNN with K=1

train.X13 <- matrix(Weekly\(Lag2[train13]) test.X13 <- matrix(Weekly\)Lag2[!train13]) train.Y13 <- Weekly$Direction[train13] set.seed(1) knn.pred13 <- knn(train.X13, test.X13, train.Y13, k = 1) table(knn.pred13, Direction.test13) mean(knn.pred13 == Direction.test13)

(h) Naive Bayes with Lag2

nb.fit13 <- naiveBayes(Direction ~ Lag2, data = Weekly, subset = train13) nb.pred13 <- predict(nb.fit13, test13) table(nb.pred13, Direction.test13) mean(nb.pred13 == Direction.test13)