glm(response ~ explanantory_variables, family=binomial) Load the data and splitit on testing and training
library(ISLR)
attach(Smarket)
training<- (Year<2005)
testing<-!training
training_data <- Smarket[training, ]
testing_data <- Smarket[testing,]
Direction_testing <- Direction[testing]
Fit a logistic regression model using training data
stock_model <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=training_data, family=binomial)
summary(stock_model)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = training_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.302 -1.190 1.079 1.160 1.350
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.191213 0.333690 0.573 0.567
## Lag1 -0.054178 0.051785 -1.046 0.295
## Lag2 -0.045805 0.051797 -0.884 0.377
## Lag3 0.007200 0.051644 0.139 0.889
## Lag4 0.006441 0.051706 0.125 0.901
## Lag5 -0.004223 0.051138 -0.083 0.934
## Volume -0.116257 0.239618 -0.485 0.628
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1383.3 on 997 degrees of freedom
## Residual deviance: 1381.1 on 991 degrees of freedom
## AIC: 1395.1
##
## Number of Fisher Scoring iterations: 3
Use the fitted model to do predictions for the test data
model_pred_probs <- predict(stock_model, testing_data, type="response")
model_pred_Direction<-rep("Down", 252)
model_pred_Direction[model_pred_probs>.5]<-"Up"
Create a confusion matrix, and compute misclassification rate
table(model_pred_Direction, Direction_testing)
## Direction_testing
## model_pred_Direction Down Up
## Down 77 97
## Up 34 44
mean(model_pred_Direction != Direction_testing) #bad model/bad data
## [1] 0.5198413