4.6.1 The stock Market Data

Cargamos la libreria del libro:

library(ISLR)
names(Smarket)
[1] "Year"      "Lag1"      "Lag2"      "Lag3"      "Lag4"     
[6] "Lag5"      "Volume"    "Today"     "Direction"
dim(Smarket)
[1] 1250    9
summary(Smarket)
      Year           Lag1                Lag2          
 Min.   :2001   Min.   :-4.922000   Min.   :-4.922000  
 1st Qu.:2002   1st Qu.:-0.639500   1st Qu.:-0.639500  
 Median :2003   Median : 0.039000   Median : 0.039000  
 Mean   :2003   Mean   : 0.003834   Mean   : 0.003919  
 3rd Qu.:2004   3rd Qu.: 0.596750   3rd Qu.: 0.596750  
 Max.   :2005   Max.   : 5.733000   Max.   : 5.733000  
      Lag3                Lag4                Lag5         
 Min.   :-4.922000   Min.   :-4.922000   Min.   :-4.92200  
 1st Qu.:-0.640000   1st Qu.:-0.640000   1st Qu.:-0.64000  
 Median : 0.038500   Median : 0.038500   Median : 0.03850  
 Mean   : 0.001716   Mean   : 0.001636   Mean   : 0.00561  
 3rd Qu.: 0.596750   3rd Qu.: 0.596750   3rd Qu.: 0.59700  
 Max.   : 5.733000   Max.   : 5.733000   Max.   : 5.73300  
     Volume           Today           Direction 
 Min.   :0.3561   Min.   :-4.922000   Down:602  
 1st Qu.:1.2574   1st Qu.:-0.639500   Up  :648  
 Median :1.4229   Median : 0.038500             
 Mean   :1.4783   Mean   : 0.003138             
 3rd Qu.:1.6417   3rd Qu.: 0.596750             
 Max.   :3.1525   Max.   : 5.733000             
pairs(Smarket)

Se debe eliminar la columna 9 debido a que no es numerica:

cor(Smarket[,-9])
             Year         Lag1         Lag2         Lag3
Year   1.00000000  0.029699649  0.030596422  0.033194581
Lag1   0.02969965  1.000000000 -0.026294328 -0.010803402
Lag2   0.03059642 -0.026294328  1.000000000 -0.025896670
Lag3   0.03319458 -0.010803402 -0.025896670  1.000000000
Lag4   0.03568872 -0.002985911 -0.010853533 -0.024051036
Lag5   0.02978799 -0.005674606 -0.003557949 -0.018808338
Volume 0.53900647  0.040909908 -0.043383215 -0.041823686
Today  0.03009523 -0.026155045 -0.010250033 -0.002447647
               Lag4         Lag5      Volume        Today
Year    0.035688718  0.029787995  0.53900647  0.030095229
Lag1   -0.002985911 -0.005674606  0.04090991 -0.026155045
Lag2   -0.010853533 -0.003557949 -0.04338321 -0.010250033
Lag3   -0.024051036 -0.018808338 -0.04182369 -0.002447647
Lag4    1.000000000 -0.027083641 -0.04841425 -0.006899527
Lag5   -0.027083641  1.000000000 -0.02200231 -0.034860083
Volume -0.048414246 -0.022002315  1.00000000  0.014591823
Today  -0.006899527 -0.034860083  0.01459182  1.000000000
attach(Smarket)
plot(Volume)

4.6.2 Logistic Regression

glm.fit = glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume, family = binomial)
summary(glm.fit)

Call:
glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
    Volume, family = binomial)

Deviance Residuals: 
   Min      1Q  Median      3Q     Max  
-1.446  -1.203   1.065   1.145   1.326  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.126000   0.240736  -0.523    0.601
Lag1        -0.073074   0.050167  -1.457    0.145
Lag2        -0.042301   0.050086  -0.845    0.398
Lag3         0.011085   0.049939   0.222    0.824
Lag4         0.009359   0.049974   0.187    0.851
Lag5         0.010313   0.049511   0.208    0.835
Volume       0.135441   0.158360   0.855    0.392

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 1731.2  on 1249  degrees of freedom
Residual deviance: 1727.6  on 1243  degrees of freedom
AIC: 1741.6

Number of Fisher Scoring iterations: 3

Coeficientes de la regresion logistica:

coef(glm.fit)
 (Intercept)         Lag1         Lag2         Lag3         Lag4 
-0.126000257 -0.073073746 -0.042301344  0.011085108  0.009358938 
        Lag5       Volume 
 0.010313068  0.135440659 

Otra manera de obtener los coeficientes es por medio de Summary:

summary(glm.fit)$coef
                Estimate Std. Error    z value  Pr(>|z|)
(Intercept) -0.126000257 0.24073574 -0.5233966 0.6006983
Lag1        -0.073073746 0.05016739 -1.4565986 0.1452272
Lag2        -0.042301344 0.05008605 -0.8445733 0.3983491
Lag3         0.011085108 0.04993854  0.2219750 0.8243333
Lag4         0.009358938 0.04997413  0.1872757 0.8514445
Lag5         0.010313068 0.04951146  0.2082966 0.8349974
Volume       0.135440659 0.15835970  0.8552723 0.3924004
summary(glm.fit)$coef[,4]
(Intercept)        Lag1        Lag2        Lag3        Lag4 
  0.6006983   0.1452272   0.3983491   0.8243333   0.8514445 
       Lag5      Volume 
  0.8349974   0.3924004 
glm.probs = predict(glm.fit, type= "response")
glm.probs[1:10]
        1         2         3         4         5         6 
0.5070841 0.4814679 0.4811388 0.5152224 0.5107812 0.5069565 
        7         8         9        10 
0.4926509 0.5092292 0.5176135 0.4888378 
contrasts(Direction)
     Up
Down  0
Up    1
glm.pred = rep("Down", 1250)
glm.pred[glm.probs>.5] = "Up"

Creamos la tabla de predicciones:

table(glm.pred, Direction)
        Direction
glm.pred Down  Up
    Down  145 141
    Up    457 507
mean(glm.pred==Direction)
[1] 0.5216
train = (Year<2005)
Smarket.2005 = Smarket[!train,]
dim(Smarket.2005)
[1] 252   9
Direction.2005 = Direction[!train]
glm.fit=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume ,
data=Smarket ,family=binomial ,subset=train)
glm.probs = predict(glm.fit, Smarket.2005, type="response")
glm.pred = rep("Down",252)
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction.2005)
        Direction.2005
glm.pred Down Up
    Down   77 97
    Up     34 44
mean(glm.pred==Direction.2005)
[1] 0.4801587
mean(glm.pred!=Direction.2005)
[1] 0.5198413
glm.fit=glm(Direction~Lag1+Lag2 ,data=Smarket ,family=binomial ,
subset=train)
glm.probs = predict(glm.fit,Smarket.2005, type="response")
glm.pred = rep("Down",252)
glm.pred[glm.probs>.5]="Up"
table(glm.pred,Direction.2005)
        Direction.2005
glm.pred Down  Up
    Down   35  35
    Up     76 106
mean(glm.pred==Direction.2005)
[1] 0.5595238

Predicciones para cuando Lag1 y Lag2 es igual a 1.2 y 1.1, respectivamente, en un dia en el que fueron 1.5 y -0.8

predict (glm.fit ,newdata =data.frame(Lag1=c(1.2 ,1.5),
Lag2=c(1.1,-0.8) ),type="response")
        1         2 
0.4791462 0.4960939 

4.6.3 Linear Discriminant Analysis

library(MASS)
lda.fit = lda(Direction~Lag1+Lag2, dataset=Smarket, subset=train)
lda.fit
Call:
lda(Direction ~ Lag1 + Lag2, dataset = Smarket, subset = train)

Prior probabilities of groups:
    Down       Up 
0.491984 0.508016 

Group means:
            Lag1        Lag2
Down  0.04279022  0.03389409
Up   -0.03954635 -0.03132544

Coefficients of linear discriminants:
            LD1
Lag1 -0.6420190
Lag2 -0.5135293
lda.pred = predict(lda.fit, Smarket.2005)
names(lda.pred)
[1] "class"     "posterior" "x"        
lda.class = lda.pred$class
table(lda.class,Direction.2005)
         Direction.2005
lda.class Down  Up
     Down   35  35
     Up     76 106
mean(lda.class==Direction.2005)
[1] 0.5595238
sum(lda.pred$posterior[,1]>=.5)
[1] 70
sum(lda.pred$posterior[,1]<=.5)
[1] 182
lda.pred$posterior[1:20,1]
      999      1000      1001      1002      1003      1004 
0.4901792 0.4792185 0.4668185 0.4740011 0.4927877 0.4938562 
     1005      1006      1007      1008      1009      1010 
0.4951016 0.4872861 0.4907013 0.4844026 0.4906963 0.5119988 
     1011      1012      1013      1014      1015      1016 
0.4895152 0.4706761 0.4744593 0.4799583 0.4935775 0.5030894 
     1017      1018 
0.4978806 0.4886331 
lda.class[1:20]
 [1] Up   Up   Up   Up   Up   Up   Up   Up   Up   Up   Up   Down
[13] Up   Up   Up   Up   Up   Down Up   Up  
Levels: Down Up
sum(lda.pred$posterior[,1]>.9)
[1] 0
LS0tCnRpdGxlOiAiTGFib3JhdG9yaW8gNCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMjIyM0LjYuMSBUaGUgc3RvY2sgTWFya2V0IERhdGEKCkNhcmdhbW9zIGxhIGxpYnJlcmlhIGRlbCBsaWJybzoKCmBgYHtyfQpsaWJyYXJ5KElTTFIpCmBgYAoKCmBgYHtyfQpuYW1lcyhTbWFya2V0KQpkaW0oU21hcmtldCkKc3VtbWFyeShTbWFya2V0KQpwYWlycyhTbWFya2V0KQpgYGAKClNlIGRlYmUgZWxpbWluYXIgbGEgY29sdW1uYSA5IGRlYmlkbyBhIHF1ZSBubyBlcyBudW1lcmljYToKCmBgYHtyfQpjb3IoU21hcmtldFssLTldKQpgYGAKCmBgYHtyfQphdHRhY2goU21hcmtldCkKcGxvdChWb2x1bWUpCmBgYAoKCiMjIyM0LjYuMiBMb2dpc3RpYyBSZWdyZXNzaW9uCgoKYGBge3J9CmdsbS5maXQgPSBnbG0oRGlyZWN0aW9ufkxhZzErTGFnMitMYWczK0xhZzQrTGFnNStWb2x1bWUsIGZhbWlseSA9IGJpbm9taWFsKQoKc3VtbWFyeShnbG0uZml0KQpgYGAKCkNvZWZpY2llbnRlcyBkZSBsYSByZWdyZXNpb24gbG9naXN0aWNhOgoKYGBge3J9CmNvZWYoZ2xtLmZpdCkKYGBgCgoKT3RyYSBtYW5lcmEgZGUgb2J0ZW5lciBsb3MgY29lZmljaWVudGVzIGVzIHBvciBtZWRpbyBkZSBTdW1tYXJ5OgoKYGBge3J9CnN1bW1hcnkoZ2xtLmZpdCkkY29lZgpgYGAKCgpgYGB7cn0Kc3VtbWFyeShnbG0uZml0KSRjb2VmWyw0XQpgYGAKCgpgYGB7cn0KZ2xtLnByb2JzID0gcHJlZGljdChnbG0uZml0LCB0eXBlPSAicmVzcG9uc2UiKQpnbG0ucHJvYnNbMToxMF0KY29udHJhc3RzKERpcmVjdGlvbikKYGBgCgpgYGB7cn0KZ2xtLnByZWQgPSByZXAoIkRvd24iLCAxMjUwKQpnbG0ucHJlZFtnbG0ucHJvYnM+LjVdID0gIlVwIgpgYGAKCkNyZWFtb3MgbGEgdGFibGEgZGUgcHJlZGljY2lvbmVzOgoKYGBge3J9CnRhYmxlKGdsbS5wcmVkLCBEaXJlY3Rpb24pCm1lYW4oZ2xtLnByZWQ9PURpcmVjdGlvbikKYGBgCgoKYGBge3J9CnRyYWluID0gKFllYXI8MjAwNSkKU21hcmtldC4yMDA1ID0gU21hcmtldFshdHJhaW4sXQpkaW0oU21hcmtldC4yMDA1KQpEaXJlY3Rpb24uMjAwNSA9IERpcmVjdGlvblshdHJhaW5dCmBgYAoKYGBge3J9CmdsbS5maXQ9Z2xtKERpcmVjdGlvbn5MYWcxK0xhZzIrTGFnMytMYWc0K0xhZzUrVm9sdW1lICwKZGF0YT1TbWFya2V0ICxmYW1pbHk9Ymlub21pYWwgLHN1YnNldD10cmFpbikKCmdsbS5wcm9icyA9IHByZWRpY3QoZ2xtLmZpdCwgU21hcmtldC4yMDA1LCB0eXBlPSJyZXNwb25zZSIpCmBgYAoKCmBgYHtyfQpnbG0ucHJlZCA9IHJlcCgiRG93biIsMjUyKQpnbG0ucHJlZFtnbG0ucHJvYnM+LjVdPSJVcCIKdGFibGUoZ2xtLnByZWQsRGlyZWN0aW9uLjIwMDUpCm1lYW4oZ2xtLnByZWQ9PURpcmVjdGlvbi4yMDA1KQptZWFuKGdsbS5wcmVkIT1EaXJlY3Rpb24uMjAwNSkKYGBgCgoKYGBge3J9CmdsbS5maXQ9Z2xtKERpcmVjdGlvbn5MYWcxK0xhZzIgLGRhdGE9U21hcmtldCAsZmFtaWx5PWJpbm9taWFsICwKc3Vic2V0PXRyYWluKQpnbG0ucHJvYnMgPSBwcmVkaWN0KGdsbS5maXQsU21hcmtldC4yMDA1LCB0eXBlPSJyZXNwb25zZSIpCmdsbS5wcmVkID0gcmVwKCJEb3duIiwyNTIpCmdsbS5wcmVkW2dsbS5wcm9icz4uNV09IlVwIgp0YWJsZShnbG0ucHJlZCxEaXJlY3Rpb24uMjAwNSkKbWVhbihnbG0ucHJlZD09RGlyZWN0aW9uLjIwMDUpCmBgYAoKUHJlZGljY2lvbmVzIHBhcmEgY3VhbmRvIExhZzEgeSBMYWcyIGVzIGlndWFsIGEgMS4yIHkgMS4xLCByZXNwZWN0aXZhbWVudGUsIGVuIHVuIGRpYSBlbiBlbCBxdWUgZnVlcm9uIDEuNSB5IC0wLjgKCmBgYHtyfQpwcmVkaWN0IChnbG0uZml0ICxuZXdkYXRhID1kYXRhLmZyYW1lKExhZzE9YygxLjIgLDEuNSksCkxhZzI9YygxLjEsLTAuOCkgKSx0eXBlPSJyZXNwb25zZSIpCmBgYAoKIyMjIzQuNi4zIExpbmVhciBEaXNjcmltaW5hbnQgQW5hbHlzaXMKCmBgYHtyfQpsaWJyYXJ5KE1BU1MpCmxkYS5maXQgPSBsZGEoRGlyZWN0aW9ufkxhZzErTGFnMiwgZGF0YXNldD1TbWFya2V0LCBzdWJzZXQ9dHJhaW4pCmxkYS5maXQKCmBgYAoKCmBgYHtyfQpsZGEucHJlZCA9IHByZWRpY3QobGRhLmZpdCwgU21hcmtldC4yMDA1KQpuYW1lcyhsZGEucHJlZCkKYGBgCgpgYGB7cn0KbGRhLmNsYXNzID0gbGRhLnByZWQkY2xhc3MKdGFibGUobGRhLmNsYXNzLERpcmVjdGlvbi4yMDA1KQptZWFuKGxkYS5jbGFzcz09RGlyZWN0aW9uLjIwMDUpCmBgYAoKYGBge3J9CnN1bShsZGEucHJlZCRwb3N0ZXJpb3JbLDFdPj0uNSkKc3VtKGxkYS5wcmVkJHBvc3RlcmlvclssMV08PS41KQoKYGBgCgpgYGB7cn0KbGRhLnByZWQkcG9zdGVyaW9yWzE6MjAsMV0KbGRhLmNsYXNzWzE6MjBdCmBgYAoKYGBge3J9CnN1bShsZGEucHJlZCRwb3N0ZXJpb3JbLDFdPi45KQpgYGAKCg==