# Chapter 4 page 168: 10, 11, 13

# 10

library(ISLR)
## Warning: package 'ISLR' was built under R version 4.1.3
summary(Weekly)
##       Year           Lag1               Lag2               Lag3         
##  Min.   :1990   Min.   :-18.1950   Min.   :-18.1950   Min.   :-18.1950  
##  1st Qu.:1995   1st Qu.: -1.1540   1st Qu.: -1.1540   1st Qu.: -1.1580  
##  Median :2000   Median :  0.2410   Median :  0.2410   Median :  0.2410  
##  Mean   :2000   Mean   :  0.1506   Mean   :  0.1511   Mean   :  0.1472  
##  3rd Qu.:2005   3rd Qu.:  1.4050   3rd Qu.:  1.4090   3rd Qu.:  1.4090  
##  Max.   :2010   Max.   : 12.0260   Max.   : 12.0260   Max.   : 12.0260  
##       Lag4               Lag5              Volume            Today         
##  Min.   :-18.1950   Min.   :-18.1950   Min.   :0.08747   Min.   :-18.1950  
##  1st Qu.: -1.1580   1st Qu.: -1.1660   1st Qu.:0.33202   1st Qu.: -1.1540  
##  Median :  0.2380   Median :  0.2340   Median :1.00268   Median :  0.2410  
##  Mean   :  0.1458   Mean   :  0.1399   Mean   :1.57462   Mean   :  0.1499  
##  3rd Qu.:  1.4090   3rd Qu.:  1.4050   3rd Qu.:2.05373   3rd Qu.:  1.4050  
##  Max.   : 12.0260   Max.   : 12.0260   Max.   :9.32821   Max.   : 12.0260  
##  Direction 
##  Down:484  
##  Up  :605  
##            
##            
##            
## 
cor(Weekly[,-9])
##               Year         Lag1        Lag2        Lag3         Lag4
## Year    1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1   -0.03228927  1.000000000 -0.07485305  0.05863568 -0.071273876
## Lag2   -0.03339001 -0.074853051  1.00000000 -0.07572091  0.058381535
## Lag3   -0.03000649  0.058635682 -0.07572091  1.00000000 -0.075395865
## Lag4   -0.03112792 -0.071273876  0.05838153 -0.07539587  1.000000000
## Lag5   -0.03051910 -0.008183096 -0.07249948  0.06065717 -0.075675027
## Volume  0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today  -0.03245989 -0.075031842  0.05916672 -0.07124364 -0.007825873
##                Lag5      Volume        Today
## Year   -0.030519101  0.84194162 -0.032459894
## Lag1   -0.008183096 -0.06495131 -0.075031842
## Lag2   -0.072499482 -0.08551314  0.059166717
## Lag3    0.060657175 -0.06928771 -0.071243639
## Lag4   -0.075675027 -0.06107462 -0.007825873
## Lag5    1.000000000 -0.05851741  0.011012698
## Volume -0.058517414  1.00000000 -0.033077783
## Today   0.011012698 -0.03307778  1.000000000
attach(Weekly)
plot(Volume)

fit.glm=glm(Direction~Lag1+Lag2+Lag3+Lag4+Lag5+Volume,data=Weekly,family=binomial)
summary(fit.glm)
## 
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + 
##     Volume, family = binomial, data = Weekly)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6949  -1.2565   0.9913   1.0849   1.4579  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.26686    0.08593   3.106   0.0019 **
## Lag1        -0.04127    0.02641  -1.563   0.1181   
## Lag2         0.05844    0.02686   2.175   0.0296 * 
## Lag3        -0.01606    0.02666  -0.602   0.5469   
## Lag4        -0.02779    0.02646  -1.050   0.2937   
## Lag5        -0.01447    0.02638  -0.549   0.5833   
## Volume      -0.02274    0.03690  -0.616   0.5377   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1496.2  on 1088  degrees of freedom
## Residual deviance: 1486.4  on 1082  degrees of freedom
## AIC: 1500.4
## 
## Number of Fisher Scoring iterations: 4
probs=predict(fit.glm,type="response")
pred.glm=rep("Down",length(probs))
pred.glm[probs>0.5]="Up"
table(pred.glm,Direction)
##         Direction
## pred.glm Down  Up
##     Down   54  48
##     Up    430 557
train=(Year<2009)
Weekly.20092010=Weekly[!train,]
Direction.20092010=Direction[!train]
fit.glm2=glm(Direction~Lag2,data=Weekly,family=binomial,subset=train)
summary(fit.glm2)
## 
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = Weekly, 
##     subset = train)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.536  -1.264   1.021   1.091   1.368  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  0.20326    0.06428   3.162  0.00157 **
## Lag2         0.05810    0.02870   2.024  0.04298 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1354.7  on 984  degrees of freedom
## Residual deviance: 1350.5  on 983  degrees of freedom
## AIC: 1354.5
## 
## Number of Fisher Scoring iterations: 4
probs2=predict(fit.glm2,Weekly.20092010,type="response")
pred.glm2=rep("Down",length(probs2))
pred.glm2[probs2>0.5]="Up"
table(pred.glm2,Direction.20092010)
##          Direction.20092010
## pred.glm2 Down Up
##      Down    9  5
##      Up     34 56
library(MASS)
fit.lda=lda(Direction~Lag2,data=Weekly,subset=train)
fit.lda
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
## 
## Prior probabilities of groups:
##      Down        Up 
## 0.4477157 0.5522843 
## 
## Group means:
##             Lag2
## Down -0.03568254
## Up    0.26036581
## 
## Coefficients of linear discriminants:
##            LD1
## Lag2 0.4414162
pred.lda=predict(fit.lda,Weekly.20092010)
table(pred.lda$class,Direction.20092010)
##       Direction.20092010
##        Down Up
##   Down    9  5
##   Up     34 56
fit.qda=qda(Direction~Lag2,data=Weekly,subset=train)
fit.qda
## Call:
## qda(Direction ~ Lag2, data = Weekly, subset = train)
## 
## Prior probabilities of groups:
##      Down        Up 
## 0.4477157 0.5522843 
## 
## Group means:
##             Lag2
## Down -0.03568254
## Up    0.26036581
pred.qda=predict(fit.qda,Weekly.20092010)
table(pred.qda$class,Direction.20092010)
##       Direction.20092010
##        Down Up
##   Down    0  0
##   Up     43 61
library(class)
train.X=as.matrix(Lag2[train])
test.X=as.matrix(Lag2[!train])
train.Direction=Direction[train]
set.seed(1)
pred.knn=knn(train.X,test.X,train.Direction,k=1)
table(pred.knn,Direction.20092010)
##         Direction.20092010
## pred.knn Down Up
##     Down   21 30
##     Up     22 31
# Logistic regression with Lag2:Lag1
fit.glm3=glm(Direction~Lag2:Lag1,data=Weekly,family=binomial,subset=train)
probs3=predict(fit.glm3,Weekly.20092010,type="response")
pred.glm3=rep("Down",length(probs3))
pred.glm3[probs3>0.5]="Up"
table(pred.glm3,Direction.20092010)
##          Direction.20092010
## pred.glm3 Down Up
##      Down    1  1
##      Up     42 60
mean(pred.glm3==Direction.20092010)
## [1] 0.5865385
# LDA with Lag2 interaction with Lag1
fit.lda2=lda(Direction~Lag2:Lag1,data=Weekly,subset=train)
pred.lda2=predict(fit.lda2,Weekly.20092010)
mean(pred.lda2$class==Direction.20092010)
## [1] 0.5769231
# KNN k = 10
pred.knn2=knn(train.X,test.X,train.Direction,k=10)
table(pred.knn2,Direction.20092010)
##          Direction.20092010
## pred.knn2 Down Up
##      Down   17 18
##      Up     26 43
mean(pred.knn2==Direction.20092010)
## [1] 0.5769231
#11

cor(Auto[,-9])
##                     mpg  cylinders displacement horsepower     weight
## mpg           1.0000000 -0.7776175   -0.8051269 -0.7784268 -0.8322442
## cylinders    -0.7776175  1.0000000    0.9508233  0.8429834  0.8975273
## displacement -0.8051269  0.9508233    1.0000000  0.8972570  0.9329944
## horsepower   -0.7784268  0.8429834    0.8972570  1.0000000  0.8645377
## weight       -0.8322442  0.8975273    0.9329944  0.8645377  1.0000000
## acceleration  0.4233285 -0.5046834   -0.5438005 -0.6891955 -0.4168392
## year          0.5805410 -0.3456474   -0.3698552 -0.4163615 -0.3091199
## origin        0.5652088 -0.5689316   -0.6145351 -0.4551715 -0.5850054
##              acceleration       year     origin
## mpg             0.4233285  0.5805410  0.5652088
## cylinders      -0.5046834 -0.3456474 -0.5689316
## displacement   -0.5438005 -0.3698552 -0.6145351
## horsepower     -0.6891955 -0.4163615 -0.4551715
## weight         -0.4168392 -0.3091199 -0.5850054
## acceleration    1.0000000  0.2903161  0.2127458
## year            0.2903161  1.0000000  0.1815277
## origin          0.2127458  0.1815277  1.0000000
pairs(Auto)

boxplot(cylinders~mpg,data=Auto,main="Cylinders vs mpg")

train=(Year%%2==0)
Auto.train=Auto[train,]
Auto.test=Auto[!train,]
fot.lda=lda(mpg~cylinders+weight+displacement+horsepower,data=Auto,subset=train)
fit.lda
## Call:
## lda(Direction ~ Lag2, data = Weekly, subset = train)
## 
## Prior probabilities of groups:
##      Down        Up 
## 0.4477157 0.5522843 
## 
## Group means:
##             Lag2
## Down -0.03568254
## Up    0.26036581
## 
## Coefficients of linear discriminants:
##            LD1
## Lag2 0.4414162
pred.lda<-predict(fit.lda,Auto.test)
## Warning: 'newdata' had 521 rows but variables found have 1089 rows