Notes Generalized Linear Models

Logistic Regresion

glm(response ~ explanantory_variables, family=binomial) Load the data and splitit on testing and training

library(ISLR)
attach(Smarket)

training<- (Year<2005)
testing<-!training

training_data <- Smarket[training, ]
testing_data <- Smarket[testing,]

Direction_testing <- Direction[testing]

Fit a logistic regression model using training data

stock_model <- glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, data=training_data, family=binomial)
summary(stock_model)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = training_data)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.302  -1.190   1.079   1.160   1.350  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.191213   0.333690   0.573    0.567
## Lag1        -0.054178   0.051785  -1.046    0.295
## Lag2        -0.045805   0.051797  -0.884    0.377
## Lag3         0.007200   0.051644   0.139    0.889
## Lag4         0.006441   0.051706   0.125    0.901
## Lag5        -0.004223   0.051138  -0.083    0.934
## Volume      -0.116257   0.239618  -0.485    0.628
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1383.3  on 997  degrees of freedom
## Residual deviance: 1381.1  on 991  degrees of freedom
## AIC: 1395.1
## 
## Number of Fisher Scoring iterations: 3

Use the fitted model to do predictions for the test data

model_pred_probs <- predict(stock_model, testing_data, type="response")
model_pred_Direction<-rep("Down", 252)
model_pred_Direction[model_pred_probs>.5]<-"Up"

Create a confusion matrix, and compute misclassification rate

table(model_pred_Direction, Direction_testing)
##                     Direction_testing
## model_pred_Direction Down Up
##                 Down   77 97
##                 Up     34 44
mean(model_pred_Direction != Direction_testing) #bad model/bad data
## [1] 0.5198413