# Lectura de la data
data <- readr::read_delim(file = "bank-additional-full.csv", locale = readr::locale(encoding = "UTF-8"), delim = ";")
##
## -- Column specification --------------------------------------------------------
## cols(
## .default = col_character(),
## age = col_double(),
## duration = col_double(),
## campaign = col_double(),
## pdays = col_double(),
## previous = col_double(),
## emp.var.rate = col_double(),
## cons.price.idx = col_double(),
## cons.conf.idx = col_double(),
## euribor3m = col_double(),
## nr.employed = col_double()
## )
## i Use `spec()` for the full column specifications.
# Descripcion de variables
skimr::skim(data)
Data summary
| Name |
data |
| Number of rows |
41188 |
| Number of columns |
21 |
| _______________________ |
|
| Column type frequency: |
|
| character |
11 |
| numeric |
10 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| job |
0 |
1 |
6 |
13 |
0 |
12 |
0 |
| marital |
0 |
1 |
6 |
8 |
0 |
4 |
0 |
| education |
0 |
1 |
7 |
19 |
0 |
8 |
0 |
| default |
0 |
1 |
2 |
7 |
0 |
3 |
0 |
| housing |
0 |
1 |
2 |
7 |
0 |
3 |
0 |
| loan |
0 |
1 |
2 |
7 |
0 |
3 |
0 |
| contact |
0 |
1 |
8 |
9 |
0 |
2 |
0 |
| month |
0 |
1 |
3 |
3 |
0 |
10 |
0 |
| day_of_week |
0 |
1 |
3 |
3 |
0 |
5 |
0 |
| poutcome |
0 |
1 |
7 |
11 |
0 |
3 |
0 |
| y |
0 |
1 |
2 |
3 |
0 |
2 |
0 |
Variable type: numeric
| age |
0 |
1 |
40.02 |
10.42 |
17.00 |
32.00 |
38.00 |
47.00 |
98.00 |
▅▇▃▁▁ |
| duration |
0 |
1 |
258.29 |
259.28 |
0.00 |
102.00 |
180.00 |
319.00 |
4918.00 |
▇▁▁▁▁ |
| campaign |
0 |
1 |
2.57 |
2.77 |
1.00 |
1.00 |
2.00 |
3.00 |
56.00 |
▇▁▁▁▁ |
| pdays |
0 |
1 |
962.48 |
186.91 |
0.00 |
999.00 |
999.00 |
999.00 |
999.00 |
▁▁▁▁▇ |
| previous |
0 |
1 |
0.17 |
0.49 |
0.00 |
0.00 |
0.00 |
0.00 |
7.00 |
▇▁▁▁▁ |
| emp.var.rate |
0 |
1 |
0.08 |
1.57 |
-3.40 |
-1.80 |
1.10 |
1.40 |
1.40 |
▁▃▁▁▇ |
| cons.price.idx |
0 |
1 |
93.58 |
0.58 |
92.20 |
93.08 |
93.75 |
93.99 |
94.77 |
▁▆▃▇▂ |
| cons.conf.idx |
0 |
1 |
-40.50 |
4.63 |
-50.80 |
-42.70 |
-41.80 |
-36.40 |
-26.90 |
▅▇▁▇▁ |
| euribor3m |
0 |
1 |
3.62 |
1.73 |
0.63 |
1.34 |
4.86 |
4.96 |
5.04 |
▅▁▁▁▇ |
| nr.employed |
0 |
1 |
5167.04 |
72.25 |
4963.60 |
5099.10 |
5191.00 |
5228.10 |
5228.10 |
▁▁▃▁▇ |
# Seleccion de 4 variables cuantitativas y 1 cualtiativa
fil.data <-
dplyr::select(data, c(y, age, duration, emp.var.rate, cons.price.idx)) %>%
tidyr::drop_na() %>%
dplyr::mutate(y = as.factor(y))
head(fil.data)
Regresion logistica
# Regresion logistica
log.reg <- glm(formula = y ~ ., data = fil.data, family = binomial(link = "logit"))
summary(log.reg)
##
## Call:
## glm(formula = y ~ ., family = binomial(link = "logit"), data = fil.data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.9474 -0.3897 -0.1967 -0.1319 3.1785
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.309e+02 4.112e+00 -31.831 < 2e-16 ***
## age 6.191e-03 1.549e-03 3.998 6.4e-05 ***
## duration 4.520e-03 7.092e-05 63.732 < 2e-16 ***
## emp.var.rate -1.128e+00 1.920e-02 -58.764 < 2e-16 ***
## cons.price.idx 1.352e+00 4.385e-02 30.835 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 28999 on 41187 degrees of freedom
## Residual deviance: 19067 on 41183 degrees of freedom
## AIC: 19077
##
## Number of Fisher Scoring iterations: 6
# Regresion logistica: caret
RNGkind(sample.kind = "Rejection")
set.seed(100)
log.reg2 <- caret::train(y ~ ., data = fil.data, method = "glm", family = "binomial")
log.reg2
## Generalized Linear Model
##
## 41188 samples
## 4 predictor
## 2 classes: 'no', 'yes'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ...
## Resampling results:
##
## Accuracy Kappa
## 0.901024 0.3576079
summary(log.reg2$finalModel)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.9474 -0.3897 -0.1967 -0.1319 3.1785
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.309e+02 4.112e+00 -31.831 < 2e-16 ***
## age 6.191e-03 1.549e-03 3.998 6.4e-05 ***
## duration 4.520e-03 7.092e-05 63.732 < 2e-16 ***
## emp.var.rate -1.128e+00 1.920e-02 -58.764 < 2e-16 ***
## cons.price.idx 1.352e+00 4.385e-02 30.835 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 28999 on 41187 degrees of freedom
## Residual deviance: 19067 on 41183 degrees of freedom
## AIC: 19077
##
## Number of Fisher Scoring iterations: 6
Analisis descriminante lineal: caret
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
pre.Proc <- caret::preProcess(x = fil.data, method = c("center", "scale"))
fil.data.z <- predict(pre.Proc, fil.data)
head(fil.data.z)
# Analisis discrminante lineal
set.seed(100)
lda <- caret::train(y ~ ., data = fil.data.z, method = "lda")
lda
## Linear Discriminant Analysis
##
## 41188 samples
## 4 predictor
## 2 classes: 'no', 'yes'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9018298 0.4030479
K vecinos mas cercanos (KNN)
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
pre.Proc <- caret::preProcess(x = fil.data, method = c("center", "scale"))
fil.data.z <- predict(pre.Proc, fil.data)
head(fil.data.z)
# Eleccion del valor optimo de k
set.seed(100)
knn <- caret::train(y ~ ., data = fil.data.z, method = "knn")
knn
## k-Nearest Neighbors
##
## 41188 samples
## 4 predictor
## 2 classes: 'no', 'yes'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.8929313 0.4384832
## 7 0.8971711 0.4513161
## 9 0.8991547 0.4562761
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
LS0tDQp0aXRsZTogIlRyYWJham8gMyINCmF1dGhvcjogIkNhcmxvIFZlZ2EiDQpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6IA0KICAgIGh0bWxfZG9jdW1lbnQ6DQogICAgICAgIGRmX3ByaW50OiBwYWdlZA0KICAgICAgICB0b2M6IHRydWUNCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICAgICBjb2RlX2ZvbGRpbmc6ICJzaG93Ig0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCmBgYHtyIGluY2x1ZGU9RkFMU0V9DQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShNQVNTKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoSVNMUikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShza2ltcikNCg0KYGBgDQoNCg0KYGBge3IgfQ0KDQojIExlY3R1cmEgZGUgbGEgZGF0YQ0KZGF0YSA8LSByZWFkcjo6cmVhZF9kZWxpbShmaWxlID0gImJhbmstYWRkaXRpb25hbC1mdWxsLmNzdiIsIGxvY2FsZSA9IHJlYWRyOjpsb2NhbGUoZW5jb2RpbmcgPSAiVVRGLTgiKSwgZGVsaW0gPSAiOyIpDQoNCiMgRGVzY3JpcGNpb24gZGUgdmFyaWFibGVzDQpza2ltcjo6c2tpbShkYXRhKQ0KDQojIFNlbGVjY2lvbiBkZSA0IHZhcmlhYmxlcyBjdWFudGl0YXRpdmFzIHkgMSBjdWFsdGlhdGl2YQ0KDQpmaWwuZGF0YSA8LSANCiAgZHBseXI6OnNlbGVjdChkYXRhLCBjKHksIGFnZSwgZHVyYXRpb24sIGVtcC52YXIucmF0ZSwgY29ucy5wcmljZS5pZHgpKSAlPiUgDQogIHRpZHlyOjpkcm9wX25hKCkgJT4lIA0KICBkcGx5cjo6bXV0YXRlKHkgPSBhcy5mYWN0b3IoeSkpDQpoZWFkKGZpbC5kYXRhKQ0KDQpgYGANCiMjIFJlZ3Jlc2lvbiBsb2dpc3RpY2ENCg0KYGBge3J9DQojIFJlZ3Jlc2lvbiBsb2dpc3RpY2ENCmxvZy5yZWcgPC0gZ2xtKGZvcm11bGEgPSB5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLCBmYW1pbHkgPSBiaW5vbWlhbChsaW5rID0gImxvZ2l0IikpDQpzdW1tYXJ5KGxvZy5yZWcpDQoNCiMgUmVncmVzaW9uIGxvZ2lzdGljYTogY2FyZXQNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCnNldC5zZWVkKDEwMCkNCmxvZy5yZWcyIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLCBtZXRob2QgPSAiZ2xtIiwgZmFtaWx5ID0gImJpbm9taWFsIikNCmxvZy5yZWcyDQpzdW1tYXJ5KGxvZy5yZWcyJGZpbmFsTW9kZWwpDQoNCmBgYA0KDQojIyBBbmFsaXNpcyBkZXNjcmltaW5hbnRlIGxpbmVhbDogY2FyZXQNCg0KYGBge3J9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KIyBFc3RhbmRhcml6YWNpb24gZGUgdmFyaWFibGVzDQoNCnByZS5Qcm9jIDwtIGNhcmV0OjpwcmVQcm9jZXNzKHggPSBmaWwuZGF0YSwgbWV0aG9kID0gYygiY2VudGVyIiwgInNjYWxlIikpDQpmaWwuZGF0YS56IDwtIHByZWRpY3QocHJlLlByb2MsIGZpbC5kYXRhKQ0KaGVhZChmaWwuZGF0YS56KQ0KDQojIEFuYWxpc2lzIGRpc2NybWluYW50ZSBsaW5lYWwNCg0Kc2V0LnNlZWQoMTAwKQ0KbGRhIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLnosIG1ldGhvZCA9ICJsZGEiKQ0KbGRhDQoNCmBgYA0KDQojIyBLIHZlY2lub3MgbWFzIGNlcmNhbm9zIChLTk4pDQoNCmBgYHtyfQ0KDQpSTkdraW5kKHNhbXBsZS5raW5kID0gIlJlamVjdGlvbiIpDQoNCiMgRXN0YW5kYXJpemFjaW9uIGRlIHZhcmlhYmxlcw0KDQpwcmUuUHJvYyA8LSBjYXJldDo6cHJlUHJvY2Vzcyh4ID0gZmlsLmRhdGEsIG1ldGhvZCA9IGMoImNlbnRlciIsICJzY2FsZSIpKQ0KZmlsLmRhdGEueiA8LSBwcmVkaWN0KHByZS5Qcm9jLCBmaWwuZGF0YSkNCmhlYWQoZmlsLmRhdGEueikNCg0KIyBFbGVjY2lvbiBkZWwgdmFsb3Igb3B0aW1vIGRlIGsNCg0Kc2V0LnNlZWQoMTAwKQ0Ka25uIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLnosIG1ldGhvZCA9ICJrbm4iKQ0Ka25uDQoNCmBgYA0KDQo=