# a
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume Today
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747 Min. :-18.1950
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202 1st Qu.: -1.1540
## Median : 0.2380 Median : 0.2340 Median :1.00268 Median : 0.2410
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462 Mean : 0.1499
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373 3rd Qu.: 1.4050
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821 Max. : 12.0260
## Direction
## Down:484
## Up :605
##
##
##
##
plot(Weekly)
cor(Weekly[,-9])
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
plot(Weekly$Volume)
#### There may be a relationship between Year and Volume based on the
scatterplot and the matrix.
glm.fits <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly, family = binomial)
summary(glm.fits)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = binomial, data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
glm.probs = predict(glm.fits, type = 'response')
glm.pred = rep("Down", 1089)
glm.pred[glm.probs > 0.5] = "Up"
writeLines("Confusion Matrix and Overall Fraction of Correct Predictions:")
## Confusion Matrix and Overall Fraction of Correct Predictions:
table(glm.pred, Weekly$Direction)
##
## glm.pred Down Up
## Down 54 48
## Up 430 557
mean(glm.pred == Weekly$Direction)
## [1] 0.5610652
train <- (Weekly$Year<2009)
weekly09 <- Weekly[!train ,]
direction09 <- Weekly$Direction[!train]
dim(weekly09)
## [1] 104 9
glm_fit <- glm(Direction~Lag2, data = Weekly,family=binomial ,subset=train)
glm_probability <- predict (glm_fit,weekly09, type="response")
glm_prediction <- rep("Down",104)
glm_prediction[glm_probability >.5]=" Up"
table(glm_prediction ,direction09)
## direction09
## glm_prediction Down Up
## Up 34 56
## Down 9 5
lda.fit <- lda(Direction~Lag2 ,data = Weekly ,subset=train)
lda.fit
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
##
## Coefficients of linear discriminants:
## LD1
## Lag2 0.4414162
lda.prediction <- predict(lda.fit , weekly09)
names(lda.prediction)
## [1] "class" "posterior" "x"
lda.class <- lda.prediction$class
table(lda.class , direction09)
## direction09
## lda.class Down Up
## Down 9 5
## Up 34 56
weekly.qda <- qda(Direction~Lag2 ,data=Weekly ,subset=train)
weekly.qda
## Call:
## qda(Direction ~ Lag2, data = Weekly, subset = train)
##
## Prior probabilities of groups:
## Down Up
## 0.4477157 0.5522843
##
## Group means:
## Lag2
## Down -0.03568254
## Up 0.26036581
class.qda <- predict(weekly.qda ,weekly09)$class
table(class.qda ,direction09)
## direction09
## class.qda Down Up
## Down 0 0
## Up 43 61
train.X <- cbind(Weekly$Lag2)[train ,]
test.X <- cbind(Weekly$Lag2)[!train ,]
direction.train <- Weekly$Direction [train]
dim(train.X) <- c(985,1)
dim(test.X) <- c(104,1)
set.seed(1)
knn.pred <- knn(train.X,test.X,direction.train ,k=1)
table(knn.pred ,direction09)
## direction09
## knn.pred Down Up
## Down 21 30
## Up 22 31
n.bayes <- naiveBayes(Direction~Lag2 ,data=Weekly ,subset=train)
n.bayes
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## Down Up
## 0.4477157 0.5522843
##
## Conditional probabilities:
## Lag2
## Y [,1] [,2]
## Down -0.03568254 2.199504
## Up 0.26036581 2.317485
mpg01 <- c(Auto$mpg > median(Auto$mpg))
Auto1 <- data.frame(Auto, mpg01)
plot(Auto1)
boxplot(Auto1$mpg01 ~ Auto1$cylinders)
boxplot(Auto1$mpg01 ~ Auto1$displacement)
boxplot(Auto1$mpg01 ~ Auto1$horsepower)
boxplot(Auto1$mpg01 ~ Auto1$weight)
boxplot(Auto1$mpg01 ~ Auto1$acceleration)
boxplot(Auto1$mpg01 ~ Auto1$year)
boxplot(Auto1$mpg01 ~ Auto1$origin)
set.seed(191)
Atrain = sample(nrow(Auto1), 314)
auto.train <- Auto1[Atrain,]
auto.test <- Auto1[!(as.numeric(rownames(Auto1)) %in% Atrain),]
lda.auto <- lda(mpg01 ~ displacement + horsepower + weight + acceleration, data = auto.train)
lda.auto
## Call:
## lda(mpg01 ~ displacement + horsepower + weight + acceleration,
## data = auto.train)
##
## Prior probabilities of groups:
## FALSE TRUE
## 0.5191083 0.4808917
##
## Group means:
## displacement horsepower weight acceleration
## FALSE 276.0307 132.0798 3637.454 14.53988
## TRUE 115.7185 78.5894 2328.781 16.49073
##
## Coefficients of linear discriminants:
## LD1
## displacement -0.0078245278
## horsepower 0.0036968317
## weight -0.0009948677
## acceleration -0.0094873543
lda.pred <- predict(lda.auto, auto.test)
names(lda.pred)
## [1] "class" "posterior" "x"
table(lda.pred$class, auto.test$mpg01)
##
## FALSE TRUE
## FALSE 29 2
## TRUE 4 48
1 - mean(lda.pred$class == auto.test$mpg01)
## [1] 0.07228916
qda.auto <- qda(mpg01 ~ displacement + horsepower + acceleration + weight, data = auto.train)
qda.pred <- predict(qda.auto, auto.test)
names(qda.pred)
## [1] "class" "posterior"
table(qda.pred$class, auto.test$mpg01)
##
## FALSE TRUE
## FALSE 32 2
## TRUE 1 48
1 - mean(qda.pred$class == auto.test$mpg01)
## [1] 0.03614458
lr.auto <- glm(mpg01 ~ displacement + horsepower + acceleration + weight, data = auto.train, family = binomial)
lr.auto
##
## Call: glm(formula = mpg01 ~ displacement + horsepower + acceleration +
## weight, family = binomial, data = auto.train)
##
## Coefficients:
## (Intercept) displacement horsepower acceleration weight
## 12.96911 -0.01260 -0.05740 -0.11074 -0.00129
##
## Degrees of Freedom: 313 Total (i.e. Null); 309 Residual
## Null Deviance: 434.8
## Residual Deviance: 172.2 AIC: 182.2
lr.probs = predict(lr.auto, auto.test, type = 'response')
lr.pred = rep("High", 83)
lr.pred[lr.probs > 0.5] = "Low"
mean(lr.pred == auto.test$mpg01)
## [1] 0
(5/83)*100
## [1] 6.024096
nb.auto <- naiveBayes(mpg01 ~ displacement + horsepower + weight + acceleration, data = auto.train)
nb.auto
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## FALSE TRUE
## 0.5191083 0.4808917
##
## Conditional probabilities:
## displacement
## Y [,1] [,2]
## FALSE 276.0307 92.00204
## TRUE 115.7185 41.02814
##
## horsepower
## Y [,1] [,2]
## FALSE 132.0798 39.10574
## TRUE 78.5894 15.60482
##
## weight
## Y [,1] [,2]
## FALSE 3637.454 699.0066
## TRUE 2328.781 410.3934
##
## acceleration
## Y [,1] [,2]
## FALSE 14.53988 2.809981
## TRUE 16.49073 2.524001
nb.class <- predict(nb.auto, auto.test)
table(nb.class, auto.test$mpg01)
##
## nb.class FALSE TRUE
## FALSE 30 2
## TRUE 3 48
1 - mean(nb.class == auto.test$mpg01)
## [1] 0.06024096