# Lectura de la data
data <- readr::read_delim(file = "bank-additional-full.csv", locale = readr::locale(encoding = "UTF-8"), delim = ";")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character(),
##   age = col_double(),
##   duration = col_double(),
##   campaign = col_double(),
##   pdays = col_double(),
##   previous = col_double(),
##   emp.var.rate = col_double(),
##   cons.price.idx = col_double(),
##   cons.conf.idx = col_double(),
##   euribor3m = col_double(),
##   nr.employed = col_double()
## )
## i Use `spec()` for the full column specifications.
# Descripcion de variables
skimr::skim(data)
Data summary
Name data
Number of rows 41188
Number of columns 21
_______________________
Column type frequency:
character 11
numeric 10
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
job 0 1 6 13 0 12 0
marital 0 1 6 8 0 4 0
education 0 1 7 19 0 8 0
default 0 1 2 7 0 3 0
housing 0 1 2 7 0 3 0
loan 0 1 2 7 0 3 0
contact 0 1 8 9 0 2 0
month 0 1 3 3 0 10 0
day_of_week 0 1 3 3 0 5 0
poutcome 0 1 7 11 0 3 0
y 0 1 2 3 0 2 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
age 0 1 40.02 10.42 17.00 32.00 38.00 47.00 98.00 ▅▇▃▁▁
duration 0 1 258.29 259.28 0.00 102.00 180.00 319.00 4918.00 ▇▁▁▁▁
campaign 0 1 2.57 2.77 1.00 1.00 2.00 3.00 56.00 ▇▁▁▁▁
pdays 0 1 962.48 186.91 0.00 999.00 999.00 999.00 999.00 ▁▁▁▁▇
previous 0 1 0.17 0.49 0.00 0.00 0.00 0.00 7.00 ▇▁▁▁▁
emp.var.rate 0 1 0.08 1.57 -3.40 -1.80 1.10 1.40 1.40 ▁▃▁▁▇
cons.price.idx 0 1 93.58 0.58 92.20 93.08 93.75 93.99 94.77 ▁▆▃▇▂
cons.conf.idx 0 1 -40.50 4.63 -50.80 -42.70 -41.80 -36.40 -26.90 ▅▇▁▇▁
euribor3m 0 1 3.62 1.73 0.63 1.34 4.86 4.96 5.04 ▅▁▁▁▇
nr.employed 0 1 5167.04 72.25 4963.60 5099.10 5191.00 5228.10 5228.10 ▁▁▃▁▇
# Seleccion de 4 variables cuantitativas y 1 cualtiativa

fil.data <- 
  dplyr::select(data, c(y, age, duration, emp.var.rate, cons.price.idx)) %>% 
  tidyr::drop_na() %>% 
  dplyr::mutate(y = as.factor(y))
head(fil.data)

Regresion logistica

# Regresion logistica
log.reg <- glm(formula = y ~ ., data = fil.data, family = binomial(link = "logit"))
summary(log.reg)
## 
## Call:
## glm(formula = y ~ ., family = binomial(link = "logit"), data = fil.data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.9474  -0.3897  -0.1967  -0.1319   3.1785  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.309e+02  4.112e+00 -31.831  < 2e-16 ***
## age             6.191e-03  1.549e-03   3.998  6.4e-05 ***
## duration        4.520e-03  7.092e-05  63.732  < 2e-16 ***
## emp.var.rate   -1.128e+00  1.920e-02 -58.764  < 2e-16 ***
## cons.price.idx  1.352e+00  4.385e-02  30.835  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 28999  on 41187  degrees of freedom
## Residual deviance: 19067  on 41183  degrees of freedom
## AIC: 19077
## 
## Number of Fisher Scoring iterations: 6
# Regresion logistica: caret
RNGkind(sample.kind = "Rejection")
set.seed(100)
log.reg2 <- caret::train(y ~ ., data = fil.data, method = "glm", family = "binomial")
log.reg2
## Generalized Linear Model 
## 
## 41188 samples
##     4 predictor
##     2 classes: 'no', 'yes' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ... 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.901024  0.3576079
summary(log.reg2$finalModel)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.9474  -0.3897  -0.1967  -0.1319   3.1785  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -1.309e+02  4.112e+00 -31.831  < 2e-16 ***
## age             6.191e-03  1.549e-03   3.998  6.4e-05 ***
## duration        4.520e-03  7.092e-05  63.732  < 2e-16 ***
## emp.var.rate   -1.128e+00  1.920e-02 -58.764  < 2e-16 ***
## cons.price.idx  1.352e+00  4.385e-02  30.835  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 28999  on 41187  degrees of freedom
## Residual deviance: 19067  on 41183  degrees of freedom
## AIC: 19077
## 
## Number of Fisher Scoring iterations: 6

Analisis descriminante lineal: caret

RNGkind(sample.kind = "Rejection")

# Estandarizacion de variables

pre.Proc <- caret::preProcess(x = fil.data, method = c("center", "scale"))
fil.data.z <- predict(pre.Proc, fil.data)
head(fil.data.z)
# Analisis discrminante lineal

set.seed(100)
lda <- caret::train(y ~ ., data = fil.data.z, method = "lda")
lda
## Linear Discriminant Analysis 
## 
## 41188 samples
##     4 predictor
##     2 classes: 'no', 'yes' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9018298  0.4030479

K vecinos mas cercanos (KNN)

RNGkind(sample.kind = "Rejection")

# Estandarizacion de variables

pre.Proc <- caret::preProcess(x = fil.data, method = c("center", "scale"))
fil.data.z <- predict(pre.Proc, fil.data)
head(fil.data.z)
# Eleccion del valor optimo de k

set.seed(100)
knn <- caret::train(y ~ ., data = fil.data.z, method = "knn")
knn
## k-Nearest Neighbors 
## 
## 41188 samples
##     4 predictor
##     2 classes: 'no', 'yes' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 41188, 41188, 41188, 41188, 41188, 41188, ... 
## Resampling results across tuning parameters:
## 
##   k  Accuracy   Kappa    
##   5  0.8929313  0.4384832
##   7  0.8971711  0.4513161
##   9  0.8991547  0.4562761
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
LS0tDQp0aXRsZTogIlRyYWJham8gMyINCmF1dGhvcjogIkNhcmxvIFZlZ2EiDQpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiDQpvdXRwdXQ6IA0KICAgIGh0bWxfZG9jdW1lbnQ6DQogICAgICAgIGRmX3ByaW50OiBwYWdlZA0KICAgICAgICB0b2M6IHRydWUNCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgICAgICBjb2RlX2ZvbGRpbmc6ICJzaG93Ig0KLS0tDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCmBgYHtyIGluY2x1ZGU9RkFMU0V9DQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShNQVNTKQ0KbGlicmFyeShjYXJldCkNCmxpYnJhcnkoSVNMUikNCmxpYnJhcnkoZ2xtbmV0KQ0KbGlicmFyeShza2ltcikNCg0KYGBgDQoNCg0KYGBge3IgfQ0KDQojIExlY3R1cmEgZGUgbGEgZGF0YQ0KZGF0YSA8LSByZWFkcjo6cmVhZF9kZWxpbShmaWxlID0gImJhbmstYWRkaXRpb25hbC1mdWxsLmNzdiIsIGxvY2FsZSA9IHJlYWRyOjpsb2NhbGUoZW5jb2RpbmcgPSAiVVRGLTgiKSwgZGVsaW0gPSAiOyIpDQoNCiMgRGVzY3JpcGNpb24gZGUgdmFyaWFibGVzDQpza2ltcjo6c2tpbShkYXRhKQ0KDQojIFNlbGVjY2lvbiBkZSA0IHZhcmlhYmxlcyBjdWFudGl0YXRpdmFzIHkgMSBjdWFsdGlhdGl2YQ0KDQpmaWwuZGF0YSA8LSANCiAgZHBseXI6OnNlbGVjdChkYXRhLCBjKHksIGFnZSwgZHVyYXRpb24sIGVtcC52YXIucmF0ZSwgY29ucy5wcmljZS5pZHgpKSAlPiUgDQogIHRpZHlyOjpkcm9wX25hKCkgJT4lIA0KICBkcGx5cjo6bXV0YXRlKHkgPSBhcy5mYWN0b3IoeSkpDQpoZWFkKGZpbC5kYXRhKQ0KDQpgYGANCiMjIFJlZ3Jlc2lvbiBsb2dpc3RpY2ENCg0KYGBge3J9DQojIFJlZ3Jlc2lvbiBsb2dpc3RpY2ENCmxvZy5yZWcgPC0gZ2xtKGZvcm11bGEgPSB5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLCBmYW1pbHkgPSBiaW5vbWlhbChsaW5rID0gImxvZ2l0IikpDQpzdW1tYXJ5KGxvZy5yZWcpDQoNCiMgUmVncmVzaW9uIGxvZ2lzdGljYTogY2FyZXQNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCnNldC5zZWVkKDEwMCkNCmxvZy5yZWcyIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLCBtZXRob2QgPSAiZ2xtIiwgZmFtaWx5ID0gImJpbm9taWFsIikNCmxvZy5yZWcyDQpzdW1tYXJ5KGxvZy5yZWcyJGZpbmFsTW9kZWwpDQoNCmBgYA0KDQojIyBBbmFsaXNpcyBkZXNjcmltaW5hbnRlIGxpbmVhbDogY2FyZXQNCg0KYGBge3J9DQoNClJOR2tpbmQoc2FtcGxlLmtpbmQgPSAiUmVqZWN0aW9uIikNCg0KIyBFc3RhbmRhcml6YWNpb24gZGUgdmFyaWFibGVzDQoNCnByZS5Qcm9jIDwtIGNhcmV0OjpwcmVQcm9jZXNzKHggPSBmaWwuZGF0YSwgbWV0aG9kID0gYygiY2VudGVyIiwgInNjYWxlIikpDQpmaWwuZGF0YS56IDwtIHByZWRpY3QocHJlLlByb2MsIGZpbC5kYXRhKQ0KaGVhZChmaWwuZGF0YS56KQ0KDQojIEFuYWxpc2lzIGRpc2NybWluYW50ZSBsaW5lYWwNCg0Kc2V0LnNlZWQoMTAwKQ0KbGRhIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLnosIG1ldGhvZCA9ICJsZGEiKQ0KbGRhDQoNCmBgYA0KDQojIyBLIHZlY2lub3MgbWFzIGNlcmNhbm9zIChLTk4pDQoNCmBgYHtyfQ0KDQpSTkdraW5kKHNhbXBsZS5raW5kID0gIlJlamVjdGlvbiIpDQoNCiMgRXN0YW5kYXJpemFjaW9uIGRlIHZhcmlhYmxlcw0KDQpwcmUuUHJvYyA8LSBjYXJldDo6cHJlUHJvY2Vzcyh4ID0gZmlsLmRhdGEsIG1ldGhvZCA9IGMoImNlbnRlciIsICJzY2FsZSIpKQ0KZmlsLmRhdGEueiA8LSBwcmVkaWN0KHByZS5Qcm9jLCBmaWwuZGF0YSkNCmhlYWQoZmlsLmRhdGEueikNCg0KIyBFbGVjY2lvbiBkZWwgdmFsb3Igb3B0aW1vIGRlIGsNCg0Kc2V0LnNlZWQoMTAwKQ0Ka25uIDwtIGNhcmV0Ojp0cmFpbih5IH4gLiwgZGF0YSA9IGZpbC5kYXRhLnosIG1ldGhvZCA9ICJrbm4iKQ0Ka25uDQoNCmBgYA0KDQo=