BB
Week 7
https://datascienceplus.com/how-to-perform-logistic-regression-lda-qda-in-r/
library(MASS)
library(ggplot2)
library(ISLR)
attach(Smarket)
#Check Dimensions
dim(Smarket)
## [1] 1250 9
#Check for missing values
apply(Smarket, 2, function(x) {length(unique(x))})
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today
## 5 1044 1045 1045 1044 1044 1181 1044
## Direction
## 2
apply(Smarket, 2, function(x) {sum(is.na(x))})
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today
## 0 0 0 0 0 0 0 0
## Direction
## 0
apply(Smarket, 2, function(x) {sum(x==" ")})
## Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today
## 0 0 0 0 0 0 0 0
## Direction
## 0
#LDA
#split data
set.seed(1)
row.number = sample(1:nrow(Smarket), 0.6*nrow(Smarket))
train = Smarket[row.number,]
test = Smarket[-row.number]
dim(train)
## [1] 750 9
dim(test)
## [1] 1250 4
model1 <- lda(factor(Direction)~., data=Smarket)
premodel.train.lda <- predict(model1, data=train)
table(Predicted = premodel.train.lda$class, Direction=Direction)
## Direction
## Predicted Down Up
## Down 557 5
## Up 45 643
ldahist(premodel.train.lda$x[,1], g=premodel.train.lda$class)
premodel.test.lda<- predict(model1, newdata = test)
table(Predicted = premodel.test.lda$class, Direction=test$Direction)
## Direction
## Predicted Down Up
## Down 557 5
## Up 45 643
ldahist(premodel.test.lda$posterior[,2], g=premodel.test.lda$class)
par(mfrow=c(1,1))
plot(premodel.test.lda$posterior[,2], premodel.test.lda$class, col=test$Direction)
#QDA
model2 <- qda(factor(Direction)~., data=Smarket)
premodel.train.qda <- predict(model2, data=train)
table(Predicted = premodel.train.qda$class, Direction=Direction)
## Direction
## Predicted Down Up
## Down 546 10
## Up 56 638
premodel.test.qda<- predict(model2, newdata = test)
par(mfrow=c(1,1))
plot(premodel.test.qda$posterior[,2], premodel.test.qda$class, col=Direction)