suppressMessages(library(ISLR))
head(Smarket)
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
## 1 2001 0.381 -0.192 -2.624 -1.055 5.010 1.1913 0.959 Up
## 2 2001 0.959 0.381 -0.192 -2.624 -1.055 1.2965 1.032 Up
## 3 2001 1.032 0.959 0.381 -0.192 -2.624 1.4112 -0.623 Down
## 4 2001 -0.623 1.032 0.959 0.381 -0.192 1.2760 0.614 Up
## 5 2001 0.614 -0.623 1.032 0.959 0.381 1.2057 0.213 Up
## 6 2001 0.213 0.614 -0.623 1.032 0.959 1.3491 1.392 Up
smarket = Smarket
names(smarket)
## [1] "Year" "Lag1" "Lag2" "Lag3" "Lag4" "Lag5"
## [7] "Volume" "Today" "Direction"
names(smarket) = tolower(names(smarket))
summary(smarket)
## year lag1 lag2
## Min. :2001 Min. :-4.922000 Min. :-4.922000
## 1st Qu.:2002 1st Qu.:-0.639500 1st Qu.:-0.639500
## Median :2003 Median : 0.039000 Median : 0.039000
## Mean :2003 Mean : 0.003834 Mean : 0.003919
## 3rd Qu.:2004 3rd Qu.: 0.596750 3rd Qu.: 0.596750
## Max. :2005 Max. : 5.733000 Max. : 5.733000
## lag3 lag4 lag5
## Min. :-4.922000 Min. :-4.922000 Min. :-4.92200
## 1st Qu.:-0.640000 1st Qu.:-0.640000 1st Qu.:-0.64000
## Median : 0.038500 Median : 0.038500 Median : 0.03850
## Mean : 0.001716 Mean : 0.001636 Mean : 0.00561
## 3rd Qu.: 0.596750 3rd Qu.: 0.596750 3rd Qu.: 0.59700
## Max. : 5.733000 Max. : 5.733000 Max. : 5.73300
## volume today direction
## Min. :0.3561 Min. :-4.922000 Down:602
## 1st Qu.:1.2574 1st Qu.:-0.639500 Up :648
## Median :1.4229 Median : 0.038500
## Mean :1.4783 Mean : 0.003138
## 3rd Qu.:1.6417 3rd Qu.: 0.596750
## Max. :3.1525 Max. : 5.733000
cor(smarket[,-9])
## year lag1 lag2 lag3 lag4
## year 1.00000000 0.029699649 0.030596422 0.033194581 0.035688718
## lag1 0.02969965 1.000000000 -0.026294328 -0.010803402 -0.002985911
## lag2 0.03059642 -0.026294328 1.000000000 -0.025896670 -0.010853533
## lag3 0.03319458 -0.010803402 -0.025896670 1.000000000 -0.024051036
## lag4 0.03568872 -0.002985911 -0.010853533 -0.024051036 1.000000000
## lag5 0.02978799 -0.005674606 -0.003557949 -0.018808338 -0.027083641
## volume 0.53900647 0.040909908 -0.043383215 -0.041823686 -0.048414246
## today 0.03009523 -0.026155045 -0.010250033 -0.002447647 -0.006899527
## lag5 volume today
## year 0.029787995 0.53900647 0.030095229
## lag1 -0.005674606 0.04090991 -0.026155045
## lag2 -0.003557949 -0.04338321 -0.010250033
## lag3 -0.018808338 -0.04182369 -0.002447647
## lag4 -0.027083641 -0.04841425 -0.006899527
## lag5 1.000000000 -0.02200231 -0.034860083
## volume -0.022002315 1.00000000 0.014591823
## today -0.034860083 0.01459182 1.000000000
with(smarket, plot(volume))

with(smarket, plot(volume ~ year))

test = smarket[smarket$year == 2005,]
train = smarket[smarket$year != 2005,]
lda() and qda() function are part of MASS library.
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
lda.fit = lda(direction ~ lag1+lag2, data=train)
lda.fit
## Call:
## lda(direction ~ lag1 + lag2, data = train)
##
## Prior probabilities of groups:
## Down Up
## 0.491984 0.508016
##
## Group means:
## lag1 lag2
## Down 0.04279022 0.03389409
## Up -0.03954635 -0.03132544
##
## Coefficients of linear discriminants:
## LD1
## lag1 -0.6420190
## lag2 -0.5135293
summary(lda.fit)
## Length Class Mode
## prior 2 -none- numeric
## counts 2 -none- numeric
## means 4 -none- numeric
## scaling 2 -none- numeric
## lev 2 -none- character
## svd 1 -none- numeric
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
plot(lda.fit)

lda.pred = predict(lda.fit, test)
names(lda.pred)
## [1] "class" "posterior" "x"
lda.pred$class
## [1] Up Up Up Up Up Up Up Up Up Up Up Down Up Up
## [15] Up Up Up Down Up Up Down Down Down Up Down Down Up Up
## [29] Up Down Down Up Up Up Up Up Up Down Down Up Up Up
## [43] Up Down Down Up Up Up Up Up Up Up Up Up Up Up
## [57] Up Up Up Up Down Down Up Up Down Down Down Up Up Up
## [71] Up Up Up Up Down Up Down Down Up Up Up Up Up Down
## [85] Up Down Down Up Up Up Up Up Up Down Down Down Down Up
## [99] Up Up Up Up Down Up Up Down Up Up Up Up Up Up
## [113] Up Up Up Up Down Up Up Up Up Up Up Down Down Up
## [127] Up Down Up Up Down Down Down Up Up Up Up Up Down Up
## [141] Up Up Up Down Down Up Up Down Down Up Up Up Up Up
## [155] Up Up Up Up Up Up Up Up Up Up Up Up Up Up
## [169] Down Down Up Down Down Up Up Up Up Up Up Down Up Up
## [183] Up Up Up Up Up Up Down Down Up Up Up Up Up Up
## [197] Up Up Up Down Down Up Down Up Up Down Down Up Up Down
## [211] Down Up Down Down Up Up Up Up Down Down Up Up Up Down
## [225] Down Down Down Down Up Up Up Up Down Down Up Up Up Up
## [239] Up Up Down Down Up Up Up Up Up Down Up Up Up Up
## Levels: Down Up
table(lda.pred$class, test$direction)
##
## Down Up
## Down 35 35
## Up 76 106
mean(lda.pred$class == test$direction)
## [1] 0.5595238
lda.pred$posterior[1:10,]
## Down Up
## 999 0.4901792 0.5098208
## 1000 0.4792185 0.5207815
## 1001 0.4668185 0.5331815
## 1002 0.4740011 0.5259989
## 1003 0.4927877 0.5072123
## 1004 0.4938562 0.5061438
## 1005 0.4951016 0.5048984
## 1006 0.4872861 0.5127139
## 1007 0.4907013 0.5092987
## 1008 0.4844026 0.5155974
qda.fit = qda(direction ~ lag1+lag2, data=train)
qda.fit
## Call:
## qda(direction ~ lag1 + lag2, data = train)
##
## Prior probabilities of groups:
## Down Up
## 0.491984 0.508016
##
## Group means:
## lag1 lag2
## Down 0.04279022 0.03389409
## Up -0.03954635 -0.03132544
summary(qda.fit)
## Length Class Mode
## prior 2 -none- numeric
## counts 2 -none- numeric
## means 4 -none- numeric
## scaling 8 -none- numeric
## ldet 2 -none- numeric
## lev 2 -none- character
## N 1 -none- numeric
## call 3 -none- call
## terms 3 terms call
## xlevels 0 -none- list
qda.pred = predict(qda.fit, test)
names(qda.pred)
## [1] "class" "posterior"
qda.pred$class
## [1] Up Up Up Up Up Up Up Up Up Up Up Down Up Up
## [15] Up Up Up Down Up Up Up Down Down Up Down Down Up Up
## [29] Up Down Up Up Up Up Up Up Up Down Down Up Up Up
## [43] Up Down Down Up Up Up Up Up Up Up Up Up Up Up
## [57] Up Up Up Up Down Down Up Up Up Up Up Up Up Up
## [71] Up Up Up Up Down Up Down Down Up Up Up Up Up Down
## [85] Up Down Down Up Up Up Up Up Up Up Down Down Down Up
## [99] Up Up Up Up Up Up Up Down Up Up Up Up Up Up
## [113] Up Up Up Up Up Up Up Up Up Up Up Down Up Up
## [127] Up Down Up Up Down Down Up Up Up Up Up Up Down Up
## [141] Up Up Up Up Down Up Up Up Up Up Up Up Up Up
## [155] Up Up Up Up Up Up Up Up Up Up Up Up Up Up
## [169] Up Down Up Down Down Up Up Up Up Up Up Down Up Up
## [183] Up Up Up Up Up Up Down Down Up Up Up Up Up Up
## [197] Up Up Up Up Down Up Down Up Up Down Down Up Up Down
## [211] Down Up Up Down Up Up Up Up Down Down Up Up Up Down
## [225] Down Down Down Up Up Up Up Up Up Down Up Up Up Up
## [239] Up Up Up Down Up Up Up Up Up Up Up Up Up Up
## Levels: Down Up
table(qda.pred$class, test$direction)
##
## Down Up
## Down 30 20
## Up 81 121
mean(qda.pred$class == test$direction)
## [1] 0.5992063