library(ISLR)
attach(Smarket)
?Smarket
## starting httpd help server ...
##  done
a<-boxplot(Smarket$Lag1)
a$out
##  [1] -4.318 -2.584  2.557 -3.439  4.368  2.707  3.889  2.845 -4.922 -3.106
## [11]  3.898 -2.861  3.750  2.869  3.673 -3.396 -2.702 -3.835 -3.291 -2.702
## [21]  5.733  5.408 -2.957 -3.430  2.992  3.272  4.005 -4.154 -3.006  2.486
## [31] -3.226  4.002 -2.729  3.497  3.906  4.734  2.463  2.799  3.320 -2.923
## [41] -2.583  3.446  3.543 -3.523  2.612
boxplot(Smarket$Lag1)

boxplot(Smarket$Lag2)
boxplot(Smarket$Lag3)

boxplot(Smarket$Lag4)
boxplot(Smarket$Lag5)
library('ggplot2')

qplot(x=Smarket$Direction,y=Smarket$Today,geom='boxplot')+
  guides(fill=F)

##logistic regression
logit<-glm(Direction~Lag1+Lag2+Lag3,data = Smarket,family="binomial")
summary(logit)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3, family = "binomial", 
##     data = Smarket)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.384  -1.204   1.077   1.146   1.348  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.074230   0.056672   1.310    0.190
## Lag1        -0.071404   0.050102  -1.425    0.154
## Lag2        -0.044260   0.050019  -0.885    0.376
## Lag3         0.008873   0.049855   0.178    0.859
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1731.2  on 1249  degrees of freedom
## Residual deviance: 1728.4  on 1246  degrees of freedom
## AIC: 1736.4
## 
## Number of Fisher Scoring iterations: 3
###train data and test data
train<-Smarket[Year<2005,]
test<-Smarket[Year==2005,]
logit<-glm(Direction~Lag1+Lag2+Lag3,data = train,family="binomial")
test.probs<-predict(logit,test,type='response')
pred.logit<-rep('Down',length(test.probs))
pred.logit[test.probs>=.5]<-'Up'
table(pred.logit,test$Direction)
##           
## pred.logit Down  Up
##       Down   39  31
##       Up     72 110
library(caret)
## Loading required package: lattice
confusionMatrix(test$Direction,pred.logit)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Down  Up
##       Down   39  72
##       Up     31 110
##                                           
##                Accuracy : 0.5913          
##                  95% CI : (0.5278, 0.6526)
##     No Information Rate : 0.7222          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1369          
##  Mcnemar's Test P-Value : 8.104e-05       
##                                           
##             Sensitivity : 0.5571          
##             Specificity : 0.6044          
##          Pos Pred Value : 0.3514          
##          Neg Pred Value : 0.7801          
##              Prevalence : 0.2778          
##          Detection Rate : 0.1548          
##    Detection Prevalence : 0.4405          
##       Balanced Accuracy : 0.5808          
##                                           
##        'Positive' Class : Down            
##