library (ISLR)
## Warning: package 'ISLR' was built under R version 3.4.4
library (MASS)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.4.4
## corrplot 0.84 loaded
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.4.4
summary(Weekly)
## Year Lag1 Lag2 Lag3
## Min. :1990 Min. :-18.1950 Min. :-18.1950 Min. :-18.1950
## 1st Qu.:1995 1st Qu.: -1.1540 1st Qu.: -1.1540 1st Qu.: -1.1580
## Median :2000 Median : 0.2410 Median : 0.2410 Median : 0.2410
## Mean :2000 Mean : 0.1506 Mean : 0.1511 Mean : 0.1472
## 3rd Qu.:2005 3rd Qu.: 1.4050 3rd Qu.: 1.4090 3rd Qu.: 1.4090
## Max. :2010 Max. : 12.0260 Max. : 12.0260 Max. : 12.0260
## Lag4 Lag5 Volume
## Min. :-18.1950 Min. :-18.1950 Min. :0.08747
## 1st Qu.: -1.1580 1st Qu.: -1.1660 1st Qu.:0.33202
## Median : 0.2380 Median : 0.2340 Median :1.00268
## Mean : 0.1458 Mean : 0.1399 Mean :1.57462
## 3rd Qu.: 1.4090 3rd Qu.: 1.4050 3rd Qu.:2.05373
## Max. : 12.0260 Max. : 12.0260 Max. :9.32821
## Today Direction
## Min. :-18.1950 Down:484
## 1st Qu.: -1.1540 Up :605
## Median : 0.2410
## Mean : 0.1499
## 3rd Qu.: 1.4050
## Max. : 12.0260
cor(Weekly[,-9])
## Year Lag1 Lag2 Lag3 Lag4
## Year 1.00000000 -0.032289274 -0.03339001 -0.03000649 -0.031127923
## Lag1 -0.03228927 1.000000000 -0.07485305 0.05863568 -0.071273876
## Lag2 -0.03339001 -0.074853051 1.00000000 -0.07572091 0.058381535
## Lag3 -0.03000649 0.058635682 -0.07572091 1.00000000 -0.075395865
## Lag4 -0.03112792 -0.071273876 0.05838153 -0.07539587 1.000000000
## Lag5 -0.03051910 -0.008183096 -0.07249948 0.06065717 -0.075675027
## Volume 0.84194162 -0.064951313 -0.08551314 -0.06928771 -0.061074617
## Today -0.03245989 -0.075031842 0.05916672 -0.07124364 -0.007825873
## Lag5 Volume Today
## Year -0.030519101 0.84194162 -0.032459894
## Lag1 -0.008183096 -0.06495131 -0.075031842
## Lag2 -0.072499482 -0.08551314 0.059166717
## Lag3 0.060657175 -0.06928771 -0.071243639
## Lag4 -0.075675027 -0.06107462 -0.007825873
## Lag5 1.000000000 -0.05851741 0.011012698
## Volume -0.058517414 1.00000000 -0.033077783
## Today 0.011012698 -0.03307778 1.000000000
En forma gráfica es más fácil observar que hay una fuerte relación entre Year y Volume
M <-cor(Weekly[,-9])
corrplot(M, type="upper", order="hclust",
col=brewer.pal(n=8, name="RdYlBu"))
modelo <-
glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
data = Weekly,
family = "binomial")
summary(modelo)
##
## Call:
## glm(formula = Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 +
## Volume, family = "binomial", data = Weekly)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6949 -1.2565 0.9913 1.0849 1.4579
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26686 0.08593 3.106 0.0019 **
## Lag1 -0.04127 0.02641 -1.563 0.1181
## Lag2 0.05844 0.02686 2.175 0.0296 *
## Lag3 -0.01606 0.02666 -0.602 0.5469
## Lag4 -0.02779 0.02646 -1.050 0.2937
## Lag5 -0.01447 0.02638 -0.549 0.5833
## Volume -0.02274 0.03690 -0.616 0.5377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1496.2 on 1088 degrees of freedom
## Residual deviance: 1486.4 on 1082 degrees of freedom
## AIC: 1500.4
##
## Number of Fisher Scoring iterations: 4
predic <- predict (modelo, Weekly , type = "response")
glm.pred= rep("Down" ,length(predic))
glm.pred[predic > .5] = "Up"
table(glm.pred,Weekly$Direction)
##
## glm.pred Down Up
## Down 54 48
## Up 430 557
mean(glm.pred == Weekly$Direction)
## [1] 0.5610652
con estos resultados vemos que acertamos a 54 + 557 = 611 y tenemos una efectividad del 52%
train <- (Weekly$Year < 2009)
Weekly.train <- Weekly[train, ]
Weekly.test <- Weekly[!train, ]
fit.glm2 <- glm(Direction ~ Lag2, data = Weekly.train, family = binomial, subset = train)
summary(fit.glm2)
##
## Call:
## glm(formula = Direction ~ Lag2, family = binomial, data = Weekly.train,
## subset = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.536 -1.264 1.021 1.091 1.368
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.20326 0.06428 3.162 0.00157 **
## Lag2 0.05810 0.02870 2.024 0.04298 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1354.7 on 984 degrees of freedom
## Residual deviance: 1350.5 on 983 degrees of freedom
## AIC: 1354.5
##
## Number of Fisher Scoring iterations: 4
predic <- predict(fit.glm2, Weekly.test , type = "response")
glm.pred = rep("Down" ,length(predic))
glm.pred[predic > .5] = "Up"
table(glm.pred,Weekly.test$Direction)
##
## glm.pred Down Up
## Down 9 5
## Up 34 56
mean(glm.pred == Weekly.test$Direction)
## [1] 0.625
fit.glm2 <- lda(Direction ~ Lag2, data = Weekly.train, subset = train)
summary(fit.glm2)
## Length Class Mode
## prior 2 -none- numeric
## counts 2 -none- numeric
## means 2 -none- numeric
## scaling 1 -none- numeric
## lev 2 -none- character
## svd 1 -none- numeric
## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
predic <- predict(fit.glm2, Weekly.test)
table(predic$class,Weekly.test$Direction)
##
## Down Up
## Down 9 5
## Up 34 56
mean(predic$class == Weekly.test$Direction)
## [1] 0.625
fit.glm2 <- qda(Direction ~ Lag2, data = Weekly.train, subset = train)
summary(fit.glm2)
## Length Class Mode
## prior 2 -none- numeric
## counts 2 -none- numeric
## means 2 -none- numeric
## scaling 2 -none- numeric
## ldet 2 -none- numeric
## lev 2 -none- character
## N 1 -none- numeric
## call 4 -none- call
## terms 3 terms call
## xlevels 0 -none- list
predic <- predict(fit.glm2, Weekly.test)
table(predic$class,Weekly.test$Direction)
##
## Down Up
## Down 0 0
## Up 43 61
mean(predic$class == Weekly.test$Direction)
## [1] 0.5865385
library (class)
train.X=cbind(Weekly.train$Lag1 ,Weekly.train$Lag2)
test.X=cbind (Weekly.test$Lag1 ,Weekly.test$Lag2)
train.Direction =Weekly.train$Direction
set.seed (1)
knn.pred = knn(train.X, test.X, train.Direction , k = 1)
table(knn.pred , Weekly.test$Direction)
##
## knn.pred Down Up
## Down 18 29
## Up 25 32
mean(knn.pred == Weekly.test$Direction)
## [1] 0.4807692
la regresion lineal y lda son los de mejor desempeño para este set de datos.
attach(Auto)
mpg01 <- rep(0, length(mpg))
mpg01[mpg > median(mpg)] <- 1
Auto <- data.frame(Auto, mpg01)
cor(Auto[, -9])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## mpg01 0.8369392 -0.7591939 -0.7534766 -0.6670526 -0.7577566
## acceleration year origin mpg01
## mpg 0.4233285 0.5805410 0.5652088 0.8369392
## cylinders -0.5046834 -0.3456474 -0.5689316 -0.7591939
## displacement -0.5438005 -0.3698552 -0.6145351 -0.7534766
## horsepower -0.6891955 -0.4163615 -0.4551715 -0.6670526
## weight -0.4168392 -0.3091199 -0.5850054 -0.7577566
## acceleration 1.0000000 0.2903161 0.2127458 0.3468215
## year 0.2903161 1.0000000 0.1815277 0.4299042
## origin 0.2127458 0.1815277 1.0000000 0.5136984
## mpg01 0.3468215 0.4299042 0.5136984 1.0000000
train <- (year %% 2 == 0)
Auto.train <- Auto[train, ]
Auto.test <- Auto[!train, ]
mpg01.test <- mpg01[!train]
fit.lda <- lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = Auto, subset = train)
fit.lda
## Call:
## lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = Auto,
## subset = train)
##
## Prior probabilities of groups:
## 0 1
## 0.4571429 0.5428571
##
## Group means:
## cylinders weight displacement horsepower
## 0 6.812500 3604.823 271.7396 133.14583
## 1 4.070175 2314.763 111.6623 77.92105
##
## Coefficients of linear discriminants:
## LD1
## cylinders -0.6741402638
## weight -0.0011465750
## displacement 0.0004481325
## horsepower 0.0059035377
pred.lda <- predict(fit.lda, Auto.test)
table(pred.lda$class, mpg01.test)
## mpg01.test
## 0 1
## 0 86 9
## 1 14 73
mean(pred.lda$class != mpg01.test)
## [1] 0.1263736
fit.qda <- qda(mpg01 ~ cylinders + weight + displacement + horsepower, data = Auto, subset = train)
fit.qda
## Call:
## qda(mpg01 ~ cylinders + weight + displacement + horsepower, data = Auto,
## subset = train)
##
## Prior probabilities of groups:
## 0 1
## 0.4571429 0.5428571
##
## Group means:
## cylinders weight displacement horsepower
## 0 6.812500 3604.823 271.7396 133.14583
## 1 4.070175 2314.763 111.6623 77.92105
pred.qda <- predict(fit.qda, Auto.test)
table(pred.qda$class, mpg01.test)
## mpg01.test
## 0 1
## 0 89 13
## 1 11 69
mean(pred.qda$class != mpg01.test)
## [1] 0.1318681
fit.glm <-
glm(
mpg01 ~ cylinders + weight + displacement + horsepower,
data = Auto,
family = binomial,
subset = train
)
summary(fit.glm)
##
## Call:
## glm(formula = mpg01 ~ cylinders + weight + displacement + horsepower,
## family = binomial, data = Auto, subset = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.48027 -0.03413 0.10583 0.29634 2.57584
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 17.658730 3.409012 5.180 2.22e-07 ***
## cylinders -1.028032 0.653607 -1.573 0.1158
## weight -0.002922 0.001137 -2.569 0.0102 *
## displacement 0.002462 0.015030 0.164 0.8699
## horsepower -0.050611 0.025209 -2.008 0.0447 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 289.58 on 209 degrees of freedom
## Residual deviance: 83.24 on 205 degrees of freedom
## AIC: 93.24
##
## Number of Fisher Scoring iterations: 7
probs <- predict(fit.glm, Auto.test, type = "response")
pred.glm <- rep(0, length(probs))
pred.glm[probs > 0.5] <- 1
table(pred.glm, mpg01.test)
## mpg01.test
## pred.glm 0 1
## 0 89 11
## 1 11 71
mean(pred.glm != mpg01.test)
## [1] 0.1208791