The Stock Market Data
library("ISLR")
head(Smarket)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 2001 0.381 -0.192 -2.624 -1.055 5.010 1.191 0.959 Up
## 2 2001 0.959 0.381 -0.192 -2.624 -1.055 1.296 1.032 Up
## 3 2001 1.032 0.959 0.381 -0.192 -2.624 1.411 -0.623 Down
## 4 2001 -0.623 1.032 0.959 0.381 -0.192 1.276 0.614 Up
## 5 2001 0.614 -0.623 1.032 0.959 0.381 1.206 0.213 Up
## 6 2001 0.213 0.614 -0.623 1.032 0.959 1.349 1.392 Up
summary(Smarket)
## Year Lag1 Lag2 Lag3
## Min. :2001 Min. :-4.922 Min. :-4.922 Min. :-4.922
## 1st Qu.:2002 1st Qu.:-0.640 1st Qu.:-0.640 1st Qu.:-0.640
## Median :2003 Median : 0.039 Median : 0.039 Median : 0.038
## Mean :2003 Mean : 0.004 Mean : 0.004 Mean : 0.002
## 3rd Qu.:2004 3rd Qu.: 0.597 3rd Qu.: 0.597 3rd Qu.: 0.597
## Max. :2005 Max. : 5.733 Max. : 5.733 Max. : 5.733
## Lag4 Lag5 Volume Today
## Min. :-4.922 Min. :-4.922 Min. :0.356 Min. :-4.922
## 1st Qu.:-0.640 1st Qu.:-0.640 1st Qu.:1.257 1st Qu.:-0.640
## Median : 0.038 Median : 0.038 Median :1.423 Median : 0.038
## Mean : 0.002 Mean : 0.006 Mean :1.478 Mean : 0.003
## 3rd Qu.: 0.597 3rd Qu.: 0.597 3rd Qu.:1.642 3rd Qu.: 0.597
## Max. : 5.733 Max. : 5.733 Max. :3.152 Max. : 5.733
## Direction
## Down:602
## Up :648
##
##
##
##
dim(Smarket)
## [1] 1250 9
Pairwise correlations of Smarket data
round(cor(Smarket[-9]),2)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today
## Year 1.00 0.03 0.03 0.03 0.04 0.03 0.54 0.03
## Lag1 0.03 1.00 -0.03 -0.01 0.00 -0.01 0.04 -0.03
## Lag2 0.03 -0.03 1.00 -0.03 -0.01 0.00 -0.04 -0.01
## Lag3 0.03 -0.01 -0.03 1.00 -0.02 -0.02 -0.04 0.00
## Lag4 0.04 0.00 -0.01 -0.02 1.00 -0.03 -0.05 -0.01
## Lag5 0.03 -0.01 0.00 -0.02 -0.03 1.00 -0.02 -0.03
## Volume 0.54 0.04 -0.04 -0.04 -0.05 -0.02 1.00 0.01
## Today 0.03 -0.03 -0.01 0.00 -0.01 -0.03 0.01 1.00
corrplot(cor(Smarket[-9]), method="pie")

Fit a logistic regression model
Make a logistic regression model
glm.fit <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=Smarket, family=binomial)
summary(glm.fit)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Smarket)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.45 -1.20 1.07 1.15 1.33
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.12600 0.24074 -0.52 0.60
## Lag1 -0.07307 0.05017 -1.46 0.15
## Lag2 -0.04230 0.05009 -0.84 0.40
## Lag3 0.01109 0.04994 0.22 0.82
## Lag4 0.00936 0.04997 0.19 0.85
## Lag5 0.01031 0.04951 0.21 0.83
## Volume 0.13544 0.15836 0.86 0.39
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1727.6 on 1243 degrees of freedom
## AIC: 1742
##
## Number of Fisher Scoring iterations: 3
Predict the probability
glm.probs <- predict(glm.fit,type="response")
glm.probs[1:10]
## 1 2 3 4 5 6 7 8 9 10
## 0.5071 0.4815 0.4811 0.5152 0.5108 0.5070 0.4927 0.5092 0.5176 0.4888
Market will go up or down
glm.pred <- rep("Down",1250)
glm.pred[glm.probs>0.5] <- "Up"
confirm how many observations were correctly or incorrectly classified
table(Predict=glm.pred, Direction=Smarket$Direction)
## Direction
## Predict Down Up
## Down 145 141
## Up 457 507
mean(glm.pred==Smarket$Direction)
## [1] 0.5216
(145+507)/1250
## [1] 0.5216