library(ISLR)
attach(Smarket)
?Smarket
## starting httpd help server ...
## done
a<-boxplot(Smarket$Lag1)
a$out
## [1] -4.318 -2.584 2.557 -3.439 4.368 2.707 3.889 2.845 -4.922 -3.106
## [11] 3.898 -2.861 3.750 2.869 3.673 -3.396 -2.702 -3.835 -3.291 -2.702
## [21] 5.733 5.408 -2.957 -3.430 2.992 3.272 4.005 -4.154 -3.006 2.486
## [31] -3.226 4.002 -2.729 3.497 3.906 4.734 2.463 2.799 3.320 -2.923
## [41] -2.583 3.446 3.543 -3.523 2.612
boxplot(Smarket$Lag1)

boxplot(Smarket$Lag2)
boxplot(Smarket$Lag3)

boxplot(Smarket$Lag4)
boxplot(Smarket$Lag5)
library('ggplot2')

qplot(x=Smarket$Direction,y=Smarket$Today,geom='boxplot')+
guides(fill=F)

##logistic regression
logit<-glm(Direction~Lag1+Lag2+Lag3,data = Smarket,family="binomial")
summary(logit)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3, family = "binomial",
## data = Smarket)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.384 -1.204 1.077 1.146 1.348
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.074230 0.056672 1.310 0.190
## Lag1 -0.071404 0.050102 -1.425 0.154
## Lag2 -0.044260 0.050019 -0.885 0.376
## Lag3 0.008873 0.049855 0.178 0.859
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1731.2 on 1249 degrees of freedom
## Residual deviance: 1728.4 on 1246 degrees of freedom
## AIC: 1736.4
##
## Number of Fisher Scoring iterations: 3
###train data and test data
train<-Smarket[Year<2005,]
test<-Smarket[Year==2005,]
logit<-glm(Direction~Lag1+Lag2+Lag3,data = train,family="binomial")
test.probs<-predict(logit,test,type='response')
pred.logit<-rep('Down',length(test.probs))
pred.logit[test.probs>=.5]<-'Up'
table(pred.logit,test$Direction)
##
## pred.logit Down Up
## Down 39 31
## Up 72 110
library(caret)
## Loading required package: lattice
confusionMatrix(test$Direction,pred.logit)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Down Up
## Down 39 72
## Up 31 110
##
## Accuracy : 0.5913
## 95% CI : (0.5278, 0.6526)
## No Information Rate : 0.7222
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1369
## Mcnemar's Test P-Value : 8.104e-05
##
## Sensitivity : 0.5571
## Specificity : 0.6044
## Pos Pred Value : 0.3514
## Neg Pred Value : 0.7801
## Prevalence : 0.2778
## Detection Rate : 0.1548
## Detection Prevalence : 0.4405
## Balanced Accuracy : 0.5808
##
## 'Positive' Class : Down
##