Pregunta 1
* Usar el conjunto de datos Complementary que se encuentra en la carpeta compartida dentro del aula virtual del curso. La variable respuesta es ins y el resto son potenciales variables predictoras. Obtener un modelo predictivo apropiado usando todas las variables predictoras disponibles y set.seed(85291).
* El valor del indicador con máximo valor es: ?
Lectura de datos
# Lectura de la data
data <-
readr::read_tsv(file = "Complementary.txt",
locale = readr::locale(encoding = "UTF-8")) %>%
dplyr::mutate_if(is.character, as.factor)
##
## -- Column specification --------------------------------------------------------
## cols(
## ins = col_character(),
## retire = col_character(),
## age = col_double(),
## hstatusg = col_double(),
## hhincome = col_double(),
## educyear = col_double(),
## married = col_character(),
## hisp = col_character(),
## white = col_character(),
## female = col_character(),
## chronic = col_double(),
## sretire = col_character(),
## adl = col_double()
## )
# Descripcion de variables
skimr::skim(data)
Data summary
| Name |
data |
| Number of rows |
3206 |
| Number of columns |
13 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
7 |
| numeric |
6 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| ins |
0 |
1 |
FALSE |
2 |
no: 1965, yes: 1241 |
| retire |
0 |
1 |
FALSE |
2 |
yes: 2003, no: 1203 |
| married |
0 |
1 |
FALSE |
2 |
yes: 2350, no: 856 |
| hisp |
0 |
1 |
FALSE |
2 |
no: 2973, yes: 233 |
| white |
0 |
1 |
FALSE |
2 |
yes: 2631, no: 575 |
| female |
0 |
1 |
FALSE |
2 |
no: 1674, yes: 1532 |
| sretire |
0 |
1 |
FALSE |
2 |
no: 1961, yes: 1245 |
Variable type: numeric
| age |
0 |
1 |
66.91 |
3.68 |
52 |
65 |
67.0 |
69.0 |
86.00 |
▁▃▇▁▁ |
| hstatusg |
0 |
1 |
0.70 |
0.46 |
0 |
0 |
1.0 |
1.0 |
1.00 |
▃▁▁▁▇ |
| hhincome |
0 |
1 |
45.26 |
64.34 |
0 |
17 |
31.1 |
52.8 |
1312.12 |
▇▁▁▁▁ |
| educyear |
0 |
1 |
11.90 |
3.30 |
0 |
10 |
12.0 |
14.0 |
17.00 |
▁▁▃▇▅ |
| chronic |
0 |
1 |
2.06 |
1.42 |
0 |
1 |
2.0 |
3.0 |
8.00 |
▇▇▂▁▁ |
| adl |
0 |
1 |
0.30 |
0.83 |
0 |
0 |
0.0 |
0.0 |
5.00 |
▇▁▁▁▁ |
Regresion logistica
RNGkind(sample.kind = "Rejection")
# Regresion logistica
log.reg <- glm(formula = ins ~ ., data = data, family = binomial(link = "logit"))
summary(log.reg)
##
## Call:
## glm(formula = ins ~ ., family = binomial(link = "logit"), data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3748 -1.0044 -0.6839 1.2076 2.3059
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4338215 0.7939663 -1.806 0.07093 .
## retireyes 0.1566189 0.0865469 1.810 0.07035 .
## age -0.0180995 0.0115907 -1.562 0.11839
## hstatusg 0.2678891 0.1030306 2.600 0.00932 **
## hhincome 0.0021068 0.0007546 2.792 0.00524 **
## educyear 0.1155060 0.0143918 8.026 1.01e-15 ***
## marriedyes 0.5403183 0.1142086 4.731 2.23e-06 ***
## hispyes -0.7828790 0.1971847 -3.970 7.18e-05 ***
## whiteyes 0.0436225 0.1073383 0.406 0.68445
## femaleyes -0.1206100 0.0882492 -1.367 0.17172
## chronic 0.0545228 0.0304231 1.792 0.07311 .
## sretireyes -0.0191353 0.0929102 -0.206 0.83683
## adl -0.2003690 0.0610218 -3.284 0.00103 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4279.5 on 3205 degrees of freedom
## Residual deviance: 3973.5 on 3193 degrees of freedom
## AIC: 3999.5
##
## Number of Fisher Scoring iterations: 4
log.reg
##
## Call: glm(formula = ins ~ ., family = binomial(link = "logit"), data = data)
##
## Coefficients:
## (Intercept) retireyes age hstatusg hhincome educyear
## -1.433821 0.156619 -0.018100 0.267889 0.002107 0.115506
## marriedyes hispyes whiteyes femaleyes chronic sretireyes
## 0.540318 -0.782879 0.043623 -0.120610 0.054523 -0.019135
## adl
## -0.200369
##
## Degrees of Freedom: 3205 Total (i.e. Null); 3193 Residual
## Null Deviance: 4280
## Residual Deviance: 3974 AIC: 4000
# Regresion logistica: caret
set.seed(85291)
log.reg2 <- caret::train(ins ~ ., data = data, method = "glm", family = "binomial")
# Resumen del modelo:
summary(log.reg2$finalModel)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3748 -1.0044 -0.6839 1.2076 2.3059
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4338215 0.7939663 -1.806 0.07093 .
## retireyes 0.1566189 0.0865469 1.810 0.07035 .
## age -0.0180995 0.0115907 -1.562 0.11839
## hstatusg 0.2678891 0.1030306 2.600 0.00932 **
## hhincome 0.0021068 0.0007546 2.792 0.00524 **
## educyear 0.1155060 0.0143918 8.026 1.01e-15 ***
## marriedyes 0.5403183 0.1142086 4.731 2.23e-06 ***
## hispyes -0.7828790 0.1971847 -3.970 7.18e-05 ***
## whiteyes 0.0436225 0.1073383 0.406 0.68445
## femaleyes -0.1206100 0.0882492 -1.367 0.17172
## chronic 0.0545228 0.0304231 1.792 0.07311 .
## sretireyes -0.0191353 0.0929102 -0.206 0.83683
## adl -0.2003690 0.0610218 -3.284 0.00103 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4279.5 on 3205 degrees of freedom
## Residual deviance: 3973.5 on 3193 degrees of freedom
## AIC: 3999.5
##
## Number of Fisher Scoring iterations: 4
# Accuary y Kappa del modelo
lr.Accuracy <- log.reg2$results$Accuracy;lr.Accuracy
## [1] 0.623117
lr.Kappa <- log.reg2$results$Kappa;lr.Kappa
## [1] 0.1359655
K vecinos mas cercanos (KNN)
# Estandarizacion de variables
set.seed(85291)
pre.Proc <- caret::preProcess(x = data, method = c("center", "scale"))
data.z <- predict(pre.Proc, data)
head(data.z)
# Eleccion del valor optimo de k
set.seed(85291)
knn <- caret::train(ins ~ ., data = data.z, method = "knn")
Knn.accu <- knn$results[, 1:2]; Knn.accu
Knn.kapp <- knn$results[, c(1, 3)]; Knn.kapp
Resultados: Comparacion de Accuracy y Kappa
Indicadores <-
data.frame(
Modelo = c('Regresion Logistica', rep('KNN', length(Knn.accu$k))),
orden = c(NA_integer_, Knn.accu$k),
Accuracy = c(lr.Accuracy, Knn.accu$Accuracy),
Kappa = c(lr.Kappa, Knn.kapp$Kappa)
)
Indicadores
Pregunta 2
* El conjunto de datos Banco contiene información sobre pagos predeterminados, factores demográficos, datos crediticios, historial de pagos y estados de cuenta de los clientes de tarjetas de crédito en Taiwán desde abril de 2005 hasta septiembre del mismo año. La variable respuesta es Pago que toma el valor 1 cuando el pago es determinado y 0 en caso contrario (definir la variable respuesta como factor). El resto de variables disponibles en el conjunto de datos se consideran variables predictoras potenciales.
Lectura de datos
# Lectura de la data
banco <-
readr::read_tsv(file = "Banco.txt",
locale = readr::locale(encoding = "UTF-8")) %>%
dplyr::mutate(Pagos = as.factor(Pagos))
##
## -- Column specification --------------------------------------------------------
## cols(
## Pagos = col_double(),
## Credito = col_double(),
## Edad = col_double(),
## EstadoSep = col_double(),
## EstadoAgo = col_double(),
## EstadoJul = col_double(),
## PagoSep = col_double(),
## PagoAgo = col_double(),
## PagoJul = col_double()
## )
# Descripcion de variables
skimr::skim(banco)
Data summary
| Name |
banco |
| Number of rows |
199 |
| Number of columns |
9 |
| _______________________ |
|
| Column type frequency: |
|
| factor |
1 |
| numeric |
8 |
| ________________________ |
|
| Group variables |
None |
Variable type: factor
| Pagos |
0 |
1 |
FALSE |
2 |
0: 154, 1: 45 |
Variable type: numeric
| Credito |
0 |
1 |
168643.22 |
137619.25 |
10000 |
50000.0 |
130000 |
250000.0 |
630000 |
▇▃▂▁▁ |
| Edad |
0 |
1 |
34.51 |
9.79 |
22 |
27.0 |
32 |
40.5 |
63 |
▇▅▂▂▁ |
| EstadoSep |
0 |
1 |
46344.93 |
72164.87 |
-2000 |
3586.5 |
17973 |
54275.0 |
422069 |
▇▁▁▁▁ |
| EstadoAgo |
0 |
1 |
46180.39 |
72907.33 |
-9850 |
3769.5 |
18114 |
55476.0 |
431342 |
▇▁▁▁▁ |
| EstadoJul |
0 |
1 |
43213.30 |
71862.25 |
-9850 |
3289.0 |
18631 |
49122.0 |
479432 |
▇▁▁▁▁ |
| PagoSep |
0 |
1 |
5056.39 |
9179.13 |
0 |
642.0 |
2000 |
5572.5 |
70010 |
▇▁▁▁▁ |
| PagoAgo |
0 |
1 |
3895.90 |
7318.33 |
0 |
367.0 |
1500 |
3564.5 |
55693 |
▇▁▁▁▁ |
| PagoJul |
0 |
1 |
5274.34 |
14246.89 |
0 |
384.5 |
1100 |
3597.5 |
133657 |
▇▁▁▁▁ |
Modelo de Regresion Logistica:
RNGkind(sample.kind = "Rejection")
# Regresion logistica: caret
set.seed(5781)
p2.log.reg <- caret::train(Pagos ~ ., data = banco, method = "glm", family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Resumen del modelo:
summary(p2.log.reg$finalModel)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.28699 -0.79962 -0.56033 -0.04006 2.96325
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.328e+00 6.759e-01 -1.965 0.0495 *
## Credito -1.550e-06 1.658e-06 -0.935 0.3500
## Edad 2.578e-02 1.955e-02 1.318 0.1873
## EstadoSep -8.276e-05 4.863e-05 -1.702 0.0888 .
## EstadoAgo 1.031e-04 6.430e-05 1.604 0.1087
## EstadoJul -8.933e-06 2.948e-05 -0.303 0.7619
## PagoSep -2.030e-04 9.387e-05 -2.162 0.0306 *
## PagoAgo -8.721e-05 8.071e-05 -1.081 0.2799
## PagoJul -4.278e-05 4.871e-05 -0.878 0.3798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 212.75 on 198 degrees of freedom
## Residual deviance: 188.23 on 190 degrees of freedom
## AIC: 206.23
##
## Number of Fisher Scoring iterations: 7
# Accuary y Kappa del modelo
p2.log.reg
## Generalized Linear Model
##
## 199 samples
## 8 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 199, 199, 199, 199, 199, 199, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7463913 0.02230517
Analisis descriminante lineal: caret
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
set.seed(3029)
pre.Proc <- caret::preProcess(x = banco, method = c("center", "scale"))
data.z <- predict(pre.Proc, banco)
head(data.z)
# Analisis discrminante lineal
set.seed(3029)
lda <- caret::train(Pagos ~ ., data = data.z, method = "lda")
# Resumen del modelo:
lda
## Linear Discriminant Analysis
##
## 199 samples
## 8 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 199, 199, 199, 199, 199, 199, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7567164 0.04024851
K vecinos mas cercanos (KNN)
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
set.seed(7582)
pre.Proc <- caret::preProcess(x = banco, method = c("center", "scale"))
data.z <- predict(pre.Proc, banco)
head(data.z)
# Eleccion del valor optimo de k
set.seed(7582)
knn <- caret::train(Pagos ~ ., data = data.z, method = "knn")
knn
## k-Nearest Neighbors
##
## 199 samples
## 8 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 199, 199, 199, 199, 199, 199, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 5 0.6875311 0.0019438390
## 7 0.7046010 -0.0004977253
## 9 0.7230706 0.0090671057
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 9.
Pregunta 3
* Para las siguientes preguntas usar el conjunto de datos swiss dentro de la librería MASS. La variable respuesta es Fertility y el resto son variables predictoras potenciales .
Lectura de datos
# Descripcion de variables
skimr::skim(swiss)
Data summary
| Name |
swiss |
| Number of rows |
47 |
| Number of columns |
6 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
6 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| Fertility |
0 |
1 |
70.14 |
12.49 |
35.00 |
64.70 |
70.40 |
78.45 |
92.5 |
▂▂▇▇▅ |
| Agriculture |
0 |
1 |
50.66 |
22.71 |
1.20 |
35.90 |
54.10 |
67.65 |
89.7 |
▃▃▆▇▅ |
| Examination |
0 |
1 |
16.49 |
7.98 |
3.00 |
12.00 |
16.00 |
22.00 |
37.0 |
▅▇▆▂▂ |
| Education |
0 |
1 |
10.98 |
9.62 |
1.00 |
6.00 |
8.00 |
12.00 |
53.0 |
▇▃▁▁▁ |
| Catholic |
0 |
1 |
41.14 |
41.70 |
2.15 |
5.20 |
15.14 |
93.12 |
100.0 |
▇▁▁▁▅ |
| Infant.Mortality |
0 |
1 |
19.94 |
2.91 |
10.80 |
18.15 |
20.00 |
21.70 |
26.6 |
▁▂▇▆▂ |
Regresion lineal multiple
RNGkind(sample.kind = "Rejection")
set.seed(29507)
# Estimando el modelo de Regresion Lineal Multiple
lm1 <- lm(formula = Fertility ~ ., data = swiss)
summary(lm1)
##
## Call:
## lm(formula = Fertility ~ ., data = swiss)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.2743 -5.2617 0.5032 4.1198 15.3213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.91518 10.70604 6.250 1.91e-07 ***
## Agriculture -0.17211 0.07030 -2.448 0.01873 *
## Examination -0.25801 0.25388 -1.016 0.31546
## Education -0.87094 0.18303 -4.758 2.43e-05 ***
## Catholic 0.10412 0.03526 2.953 0.00519 **
## Infant.Mortality 1.07705 0.38172 2.822 0.00734 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.165 on 41 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.671
## F-statistic: 19.76 on 5 and 41 DF, p-value: 5.594e-10
# Estimando el modelo de Regresion Lineal Multiple: Caret
set.seed(29507)
lm2 <- caret::train(Fertility ~ ., data = swiss, method = "lm")
lm2
## Linear Regression
##
## 47 samples
## 5 predictor
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 47, 47, 47, 47, 47, 47, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 8.010361 0.6504464 6.575596
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Regresion Ridge
RNGkind(sample.kind = "Rejection")
# Separacion de la matriz de predictores y respuesta
x <- model.matrix(Fertility ~ ., data = swiss)[ , -1]
y <- swiss$Fertility
# Grafico de la convergencia a 0 de las variables
reg.ridge <- glmnet::glmnet(x = x, y = y, alpha = 0)
plot(reg.ridge, xvar = "lambda")

# Hallando el valor de lambda optimo con validacion cruzada
set.seed(97352)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 0, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 0.8203164
# Para hallar el modelo de regresion Ridge usando el valor optimo de lambda
reg.ridge.m1 <- glmnet::glmnet(x = x, y = y, alpha = 0, lambda = best.lambda)
coef(reg.ridge.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 64.44211084
## Agriculture -0.12736848
## Examination -0.31867808
## Education -0.73073509
## Catholic 0.08667922
## Infant.Mortality 1.09634393
Regresion Lasso
# Grafico de la convergencia a 0 de las variables
reg.lasso <- glmnet::glmnet(x = x, y = y, alpha = 1)
plot(reg.lasso, xvar = "lambda")

# Hallando el valor de lambda optimo con validacion cruzada
RNGkind(sample.kind = "Rejection")
set.seed(81472)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 1, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda
## [1] 0.02336291
# Para hallar el modelo de regresion Lasso usando el valor optimo de lambda
reg.lasso.m1 <- glmnet::glmnet(x = x, y = y, alpha = 1, lambda = best.lambda)
coef(reg.lasso.m1)
## 6 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 66.6461865
## Agriculture -0.1680536
## Examination -0.2549077
## Education -0.8647621
## Catholic 0.1032342
## Infant.Mortality 1.0760753
---
title: "Practica Calificada I"
author: "Carlo Vega"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output: 
    html_document:
        df_print: paged
        toc: true
        code_download: true
        code_folding: "show"
---

```{r setup, include=FALSE, message=F, warning=FALSE, error=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r include=FALSE}

library(tidyverse)
library(MASS)
library(caret)
library(ISLR)
library(glmnet)
library(skimr)

```

# Pregunta 1

    * Usar el conjunto de datos Complementary que se encuentra en la carpeta compartida dentro del aula virtual del curso. La variable respuesta es ins y el resto son potenciales variables predictoras. Obtener un modelo predictivo apropiado usando todas las variables predictoras disponibles y set.seed(85291).

    * El valor del indicador con máximo valor es: ?

## Lectura de datos
```{r }

# Lectura de la data
data <- 
  readr::read_tsv(file = "Complementary.txt", 
                  locale = readr::locale(encoding = "UTF-8")) %>% 
  dplyr::mutate_if(is.character, as.factor)

# Descripcion de variables
skimr::skim(data)

```


## Regresion logistica

```{r}
RNGkind(sample.kind = "Rejection")

# Regresion logistica
log.reg <- glm(formula = ins ~ ., data = data, family = binomial(link = "logit"))
summary(log.reg)
log.reg

# Regresion logistica: caret
set.seed(85291)
log.reg2 <- caret::train(ins ~ ., data = data, method = "glm", family = "binomial")

# Resumen del modelo:
summary(log.reg2$finalModel)

# Accuary y Kappa del modelo

lr.Accuracy <- log.reg2$results$Accuracy;lr.Accuracy
lr.Kappa <- log.reg2$results$Kappa;lr.Kappa

```

## K vecinos mas cercanos (KNN)

```{r}

# Estandarizacion de variables
set.seed(85291)
pre.Proc <- caret::preProcess(x = data, method = c("center", "scale"))
data.z <- predict(pre.Proc, data)
head(data.z)

# Eleccion del valor optimo de k

set.seed(85291)
knn <- caret::train(ins ~ ., data = data.z, method = "knn")

Knn.accu <- knn$results[, 1:2]; Knn.accu
Knn.kapp <- knn$results[, c(1, 3)]; Knn.kapp

```

## Resultados: Comparacion de Accuracy y Kappa

```{r}

Indicadores <- 
  data.frame(
  Modelo = c('Regresion Logistica', rep('KNN', length(Knn.accu$k))), 
  orden = c(NA_integer_, Knn.accu$k), 
  Accuracy = c(lr.Accuracy, Knn.accu$Accuracy), 
  Kappa = c(lr.Kappa, Knn.kapp$Kappa)
  )

Indicadores
```

# Pregunta 2

    * El conjunto de datos Banco contiene información sobre pagos predeterminados, factores demográficos, datos crediticios, historial de pagos y estados de cuenta de los clientes de tarjetas de crédito en Taiwán desde abril de 2005 hasta septiembre del mismo año. La variable respuesta es Pago que toma el valor 1 cuando el pago es determinado y 0 en caso contrario (definir la variable respuesta como factor). El resto de variables disponibles en el conjunto de datos se consideran variables predictoras potenciales.

## Lectura de datos

```{r}

# Lectura de la data
banco <- 
  readr::read_tsv(file = "Banco.txt", 
                  locale = readr::locale(encoding = "UTF-8")) %>% 
  dplyr::mutate(Pagos = as.factor(Pagos))

# Descripcion de variables
skimr::skim(banco)

```

## Modelo de Regresion Logistica:

```{r}
RNGkind(sample.kind = "Rejection")
# Regresion logistica: caret
set.seed(5781)
p2.log.reg <- caret::train(Pagos ~ ., data = banco, method = "glm", family = "binomial")

# Resumen del modelo:
summary(p2.log.reg$finalModel)

# Accuary y Kappa del modelo
p2.log.reg

```

## Analisis descriminante lineal: caret

```{r}
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
set.seed(3029)
pre.Proc <- caret::preProcess(x = banco, method = c("center", "scale"))
data.z <- predict(pre.Proc, banco)
head(data.z)

# Analisis discrminante lineal
set.seed(3029)
lda <- caret::train(Pagos ~ ., data = data.z, method = "lda")

# Resumen del modelo:
lda

```

## K vecinos mas cercanos (KNN)

```{r}
RNGkind(sample.kind = "Rejection")
# Estandarizacion de variables
set.seed(7582)
pre.Proc <- caret::preProcess(x = banco, method = c("center", "scale"))
data.z <- predict(pre.Proc, banco)
head(data.z)

# Eleccion del valor optimo de k
set.seed(7582)
knn <- caret::train(Pagos ~ ., data = data.z, method = "knn")
knn

```

# Pregunta 3

    * Para las siguientes preguntas usar el conjunto de datos swiss dentro de la librería MASS. La variable respuesta es Fertility y el resto son variables predictoras potenciales .

## Lectura de datos

```{r }

# Descripcion de variables
skimr::skim(swiss)

```


## Regresion lineal multiple

```{r }

RNGkind(sample.kind = "Rejection")
set.seed(29507)
# Estimando el modelo de Regresion Lineal Multiple
lm1 <- lm(formula = Fertility ~ ., data = swiss)
summary(lm1)

# Estimando el modelo de Regresion Lineal Multiple: Caret
set.seed(29507)
lm2 <- caret::train(Fertility ~ ., data = swiss, method = "lm")
lm2


```

## Regresion Ridge

```{r}

RNGkind(sample.kind = "Rejection")

# Separacion de la matriz de predictores y respuesta
x <- model.matrix(Fertility ~ ., data = swiss)[ , -1]
y <- swiss$Fertility

# Grafico de la convergencia a 0 de las variables
reg.ridge <- glmnet::glmnet(x = x, y = y, alpha = 0)
plot(reg.ridge, xvar = "lambda")

# Hallando el valor de lambda optimo con validacion cruzada
set.seed(97352)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 0, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda

# Para hallar el modelo de regresion Ridge usando el valor optimo de lambda
reg.ridge.m1 <- glmnet::glmnet(x = x, y = y, alpha = 0, lambda = best.lambda)
coef(reg.ridge.m1)

```

## Regresion Lasso

```{r}

# Grafico de la convergencia a 0 de las variables
reg.lasso <- glmnet::glmnet(x = x, y = y, alpha = 1)
plot(reg.lasso, xvar = "lambda")

# Hallando el valor de lambda optimo con validacion cruzada
RNGkind(sample.kind = "Rejection")
set.seed(81472)
cv.out <- glmnet::cv.glmnet(x = x, y = y, alpha = 1, type.measure = "mse")
best.lambda <- cv.out$lambda.min;best.lambda

# Para hallar el modelo de regresion Lasso usando el valor optimo de lambda
reg.lasso.m1 <- glmnet::glmnet(x = x, y = y, alpha = 1, lambda = best.lambda)
coef(reg.lasso.m1)

```

