Link : Multiple Regression Analysis, Testing
cor(Smarket[,-9])
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 0.029699649 0.030596422 0.033194581 0.035688718
## Lag1 0.02969965 1.000000000 -0.026294328 -0.010803402 -0.002985911
## Lag2 0.03059642 -0.026294328 1.000000000 -0.025896670 -0.010853533
## Lag3 0.03319458 -0.010803402 -0.025896670 1.000000000 -0.024051036
## Lag4 0.03568872 -0.002985911 -0.010853533 -0.024051036 1.000000000
## Lag5 0.02978799 -0.005674606 -0.003557949 -0.018808338 -0.027083641
## Volume 0.53900647 0.040909908 -0.043383215 -0.041823686 -0.048414246
## Today 0.03009523 -0.026155045 -0.010250033 -0.002447647 -0.006899527
## Lag5 Volume Today
## Year 0.029787995 0.53900647 0.030095229
## Lag1 -0.005674606 0.04090991 -0.026155045
## Lag2 -0.003557949 -0.04338321 -0.010250033
## Lag3 -0.018808338 -0.04182369 -0.002447647
## Lag4 -0.027083641 -0.04841425 -0.006899527
## Lag5 1.000000000 -0.02200231 -0.034860083
## Volume -0.022002315 1.00000000 0.014591823
## Today -0.034860083 0.01459182 1.000000000
par(mfrow=c(1,2))
plot(Volume)
plot(Year, Volume)
###C. Logistic Regression 1) Construct Logistic Regression Model
glm.fits<-glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, family=binomial)
summary(glm.fits)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.446 -1.203 1.065 1.145 1.326
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000 0.240736 -0.523 0.601
## Lag1 -0.073074 0.050167 -1.457 0.145
## Lag2 -0.042301 0.050086 -0.845 0.398
## Lag3 0.011085 0.049939 0.222 0.824
## Lag4 0.009359 0.049974 0.187 0.851
## Lag5 0.010313 0.049511 0.208 0.835
## Volume 0.135441 0.158360 0.855 0.392
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1727.6 on 1243 degrees of freedom
## AIC: 1741.6
##
## Number of Fisher Scoring iterations: 3
coef(glm.fits)
## (Intercept) Lag1 Lag2 Lag3 Lag4
## -0.126000257 -0.073073746 -0.042301344 0.011085108 0.009358938
## Lag5 Volume
## 0.010313068 0.135440659
summary(glm.fits)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.126000257 0.24073574 -0.5233966 0.6006983
## Lag1 -0.073073746 0.05016739 -1.4565986 0.1452272
## Lag2 -0.042301344 0.05008605 -0.8445733 0.3983491
## Lag3 0.011085108 0.04993854 0.2219750 0.8243333
## Lag4 0.009358938 0.04997413 0.1872757 0.8514445
## Lag5 0.010313068 0.04951146 0.2082966 0.8349974
## Volume 0.135440659 0.15835970 0.8552723 0.3924004
p_value<-summary(glm.fits)$coef[,4]
p_value
## (Intercept) Lag1 Lag2 Lag3 Lag4 Lag5
## 0.6006983 0.1452272 0.3983491 0.8243333 0.8514445 0.8349974
## Volume
## 0.3924004
glm.probs<-predict(glm.fits, type="response")
glm.probs[1:10]
## 1 2 3 4 5 6 7
## 0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 0.4926509
## 8 9 10
## 0.5092292 0.5176135 0.4888378
contrasts(Direction)
## Up
## Down 0
## Up 1
length<-nrow(Smarket) #returns 1250
glm.pred<-rep("Down", 1250)
glm.pred[glm.probs>=0.5]="Up"
table<-table(glm.pred, Direction)
table
## Direction
## glm.pred Down Up
## Down 145 141
## Up 457 507
mean<-mean(glm.pred==Direction)
mean==(table[1]+table[4])/length
## [1] TRUE
train=(Year<2005)
Smarket.2005<-Smarket[!train,]
dim(Smarket.2005)
## [1] 252 9
Direction.2005<-Direction[!train]
length(Direction.2005)
## [1] 252
glm.fits<-glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, family=binomial, subset = train)
glm.probs=predict(glm.fits, Smarket.2005, type="response")
summary(Smarket.2005)
## Year Lag1 Lag2 Lag3
## Min. :2005 Min. :-1.67200 Min. :-1.67200 Min. :-1.67200
## 1st Qu.:2005 1st Qu.:-0.41525 1st Qu.:-0.41525 1st Qu.:-0.41525
## Median :2005 Median : 0.05600 Median : 0.05600 Median : 0.05150
## Mean :2005 Mean : 0.01521 Mean : 0.01642 Mean : 0.01588
## 3rd Qu.:2005 3rd Qu.: 0.42950 3rd Qu.: 0.42950 3rd Qu.: 0.42950
## Max. :2005 Max. : 1.97400 Max. : 1.97400 Max. : 1.97400
## Lag4 Lag5 Volume Today
## Min. :-1.67200 Min. :-1.67200 Min. :0.7249 Min. :-1.6720
## 1st Qu.:-0.39125 1st Qu.:-0.41525 1st Qu.:1.6539 1st Qu.:-0.4377
## Median : 0.05600 Median : 0.05600 Median :1.8969 Median : 0.0560
## Mean : 0.02251 Mean : 0.02063 Mean :1.9173 Mean : 0.0138
## 3rd Qu.: 0.44200 3rd Qu.: 0.44200 3rd Qu.:2.1470 3rd Qu.: 0.4295
## Max. : 1.97400 Max. : 1.97400 Max. :3.1525 Max. : 1.9740
## Direction
## Down:111
## Up :141
##
##
##
##
dim(Smarket.2005)
## [1] 252 9
glm.pred<-rep("Down", 252)
glm.pred[glm.probs>.5]="Up"
table(glm.pred, Direction.2005)
## Direction.2005
## glm.pred Down Up
## Down 77 97
## Up 34 44
mean(glm.pred==Direction.2005)
## [1] 0.4801587
mean(glm.pred!=Direction.2005)
## [1] 0.5198413
summary(glm.fits)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.191212621 0.33368991 0.57302488 0.5666278
## Lag1 -0.054178292 0.05178534 -1.04620896 0.2954646
## Lag2 -0.045805334 0.05179687 -0.88432632 0.3765201
## Lag3 0.007200118 0.05164416 0.13941784 0.8891200
## Lag4 0.006440875 0.05170561 0.12456821 0.9008654
## Lag5 -0.004222672 0.05113838 -0.08257344 0.9341907
## Volume -0.116256960 0.23961830 -0.48517564 0.6275518
glm.fits<-glm(Direction~Lag1+Lag2, family=binomial, subset=train)
glm.probs<-predict(glm.fits, Smarket.2005, type="response")
glm.pred<-rep("Down", 252)
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction.2005)
## Direction.2005
## glm.pred Down Up
## Down 35 35
## Up 76 106
mean(glm.pred==Direction.2005)
## [1] 0.5595238
106/(106+76)
## [1] 0.5824176
predict(glm.fits,newdata=data.frame(Lag1=c(1.2,1.5),Lag2=c(1.1,-0.8)),type="response")
## 1 2
## 0.4791462 0.4960939