Autor:

Email:

RPubs:

Twitter:

Linkedin:

Jack Bedoya Acosta

https://rpubs.com/jbedoya/

https://twitter.com/JacksitoEST

https://ec.linkedin.com/in/jack-bedoya-acosta-24ab9b1b4

some text

Acciones Preliminares

Carga de Paquetes

You can also embed plots, for example:

Lectura de los datos

head(base)
## # A tibble: 6 x 105
##   ANY_ANYACA PLAN                AREA  COD_ESTUDIANTES DESID1 NACIONALIDAD SEXO 
##   <chr>      <chr>               <chr>           <dbl> <chr>  <chr>        <chr>
## 1 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               1 Admis~ ECUADOR      M    
## 2 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               2 Admis~ ECUADOR      F    
## 3 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               3 Admis~ ECUADOR      M    
## 4 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               4 Admis~ ECUADOR      F    
## 5 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               5 Admis~ ECUADOR      M    
## 6 2020-21    ESP.SUP. EN D.H., ~ ÁREA~               6 Admis~ ECUADOR      F    
## # ... with 98 more variables: FECHADENACIMIENTO <chr>, LUGARDENACIMINETO <chr>,
## #   EDAD <dbl>, TIPO_DISCAPACIDAD <chr>, PORCENTAJE_DISCAPACIDAD <chr>,
## #   NUMERO_HIJOS <chr>, ETNIA <chr>, MENCION <lgl>,
## #   MOTIVO_ESTUDIO_POSGRADO <chr>, NIVEL_ESTUDIOS1 <chr>,
## #   TITULO_OBTENIDO1 <chr>, PAIS1...19 <chr>, CIUDAD1...20 <chr>,
## #   INSTITUCION1 <chr>, TIPO_INSTITUCION1 <chr>, FECHA_OBTENCION1 <dttm>,
## #   NIVEL_ESTUDIOS2 <chr>, TIPO_INSTITUCION2 <chr>, NIVEL_ESTUDIOS3 <chr>, ...

Descripcion de los datos

glimpse(base)
## Rows: 7,825
## Columns: 105
## $ ANY_ANYACA                     <chr> "2020-21", "2020-21", "2020-21", "2020-~
## $ PLAN                           <chr> "ESP.SUP. EN D.H., POLÍTICAS PÚBLICAS Y~
## $ AREA                           <chr> "ÁREA DE DERECHO", "ÁREA DE DERECHO", "~
## $ COD_ESTUDIANTES                <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ~
## $ DESID1                         <chr> "Admisión OFERTA ECUADOR ciclo 2020-202~
## $ NACIONALIDAD                   <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ SEXO                           <chr> "M", "F", "M", "F", "M", "F", "M", "M",~
## $ FECHADENACIMIENTO              <chr> "02-02-1992", "26-11-1988", "31-08-1993~
## $ LUGARDENACIMINETO              <chr> "CARCHI", "SANTO DOMINGO DE LOS TSÁCHIL~
## $ EDAD                           <dbl> 29, 32, 27, 41, 43, 42, 44, 27, 27, 26,~
## $ TIPO_DISCAPACIDAD              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ PORCENTAJE_DISCAPACIDAD        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ NUMERO_HIJOS                   <chr> "0", "0", "0", "0", "2", "1", "2", "0",~
## $ ETNIA                          <chr> "Mestizo", "Mestizo", "Mestizo", "Mesti~
## $ MENCION                        <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ MOTIVO_ESTUDIO_POSGRADO        <chr> "Desarrollar sus conocimientos profesio~
## $ NIVEL_ESTUDIOS1                <chr> "Pregrado (tercer nivel)", "Pregrado (t~
## $ TITULO_OBTENIDO1               <chr> "ABOGADO DE LOS TRIBUNALES Y JUZGADOS D~
## $ PAIS1...19                     <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ CIUDAD1...20                   <chr> "RIOBAMBA", "Quito", "Quito", "GUAYAQUI~
## $ INSTITUCION1                   <chr> "UNIVERSIDAD NACIONAL DE CHIMBORAZO", "~
## $ TIPO_INSTITUCION1              <chr> "Pública nacional", "Privada nacional",~
## $ FECHA_OBTENCION1               <dttm> 2015-11-27, 2013-09-09, 2019-03-01, 20~
## $ NIVEL_ESTUDIOS2                <chr> NA, NA, NA, NA, NA, "Doctorado", NA, "P~
## $ TIPO_INSTITUCION2              <chr> NA, NA, NA, NA, NA, "Pública nacional",~
## $ NIVEL_ESTUDIOS3                <chr> NA, NA, NA, NA, NA, "Especialización", ~
## $ TIPO_INSTITUCION3              <chr> NA, NA, NA, NA, NA, "Privada nacional",~
## $ NIVEL_ESTUDIOS4                <chr> NA, NA, NA, NA, NA, "Especialización", ~
## $ TIPO_INSTITUCION4              <chr> NA, NA, NA, NA, NA, "Pública nacional",~
## $ TIPO_INSTITI_SECUNDARIA        <chr> "Fiscomisional", "Fiscal", "Fiscal", "F~
## $ INSTITUCION_LABORAL1           <chr> "NOTARIA PRIMERA DEL CANTÓN RIOBAMBA", ~
## $ OCUPACION_ACTUAL1              <chr> "Personal de apoyo administrativo", "Pe~
## $ INSTITUCION_LABORAL            <chr> "Público", "Privado", "Público", "Públi~
## $ RAMA_INSTIRUCION1              <chr> "No aplica", "Servicios Legales", "Salu~
## $ PAIS1...35                     <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ CIUDAD1...36                   <chr> "RIOBAMBA", "Quito", "San Miguel de los~
## $ FUNCIONES1                     <chr> "MATRIZADOR", "Coordinadora del Área de~
## $ FECHADESDE1                    <dttm> 2015-06-01, 2020-01-15, 2019-01-08, 20~
## $ FECHAHASTA1                    <dttm> 2020-08-20, 2020-07-20, 2022-01-01, 20~
## $ NIVEL_RESP_EN_LA_INSTITU1      <chr> "Operativo", "Operativo", "Directivo", ~
## $ CORRESPONDENCIA_LAB_PREGRADO   <chr> "Directa", "Directa", "Directa", "Direc~
## $ CORRESPONDENCIA_LAB_UASB       <chr> "Indirecta", "No aplica", "Directa", "D~
## $ TIPO_RELA_LABORAL_MANTIENE     <chr> "Dependencia", "Dependencia", "Contrato~
## $ JORNADA_LABORAL                <chr> "Tiempo completo", "Tiempo completo", "~
## $ AFILIADO_SEGURO_SOCIAL         <chr> "S", "S", "S", "S", "S", "S", "S", "S",~
## $ SEGURO_MEDICO_PRIVADO          <chr> "N", "S", "N", "S", "N", "N", "N", "S",~
## $ LABORAL2                       <chr> NA, "Fundación Regional de Asesoría Leg~
## $ PAIS2                          <chr> NA, "ECUADOR", "ECUADOR", "ECUADOR", NA~
## $ CIUDAD2                        <chr> NA, "Quito", NA, "GUAYAQUIL", NA, "QUIT~
## $ FUNCIONES2                     <chr> NA, "Presidenta de la Fundación INREDH"~
## $ FECHADESDE2                    <dttm> NA, 2018-03-20, NA, NA, NA, 2005-06-25~
## $ FECHAHASTA2                    <dttm> NA, 2020-07-20, NA, NA, NA, 2017-11-28~
## $ LABORAL3                       <chr> NA, NA, NA, NA, NA, "MONISTERIO DEL INT~
## $ PAIS3                          <chr> NA, NA, "ECUADOR", NA, NA, "ECUADOR", "~
## $ CIUDAD3                        <chr> NA, NA, NA, NA, NA, "QUITO", "QUITO", "~
## $ FUNCIONES3                     <chr> NA, NA, NA, NA, NA, "TENIENTE POLITICO ~
## $ FECHADESDE3                    <dttm> NA, NA, NA, NA, NA, 2005-06-25, 2007-1~
## $ FECHAHASTA3                    <dttm> NA, NA, NA, NA, NA, 2007-02-07, 2008-0~
## $ ESPAÑOL_ESCRITURA              <chr> "Elemental", "Avanzado", "Avanzado", "A~
## $ ESPAÑOL_LECTURA                <chr> "Intermedio", "Avanzado", "Avanzado", "~
## $ INGLES_ESCRITURA               <chr> "Intermedio", "Intermedio", "Intermedio~
## $ INGLES_LECTURA                 <chr> "Intermedio", "Intermedio", "Elemental"~
## $ OTRO_IDIOMA                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ OTRO_ESCRITURA                 <chr> "Elemental", NA, "Intermedio", NA, NA, ~
## $ OTRO_LECTURA                   <chr> "Elemental", NA, "Intermedio", NA, NA, ~
## $ COMO_ESPE_FINANCIAR_ESTUDIOS   <chr> "Fondos Propios", "Fondos Propios", "Fo~
## $ EXPLIQUE_LA_FORMA              <chr> "AL CONTADO", "Pago mediante tarjeta de~
## $ REBAJA_ESTUD_ANDINO_UNASUR     <chr> "Si", "Si", "No", "Si", "Si", "Si", "Si~
## $ CON_QUIEN_VIVE                 <chr> "Con su familia (Madre y/o Padre y Herm~
## $ CUANTAS_PERS_CONFORMA_HOGAR    <chr> "3", "2", "13", "3", "3", "2", "4", "4"~
## $ ES_USTED_JEFE_FAMILIA          <chr> "N", "S", "N", "S", "N", "S", "S", "N",~
## $ NUMERO_PER_DEPEND_DE_USTED     <chr> "2", "1", "0", "1", "0", "0", "1", "0",~
## $ CUANTAS_PER_DE_SU_HOG_TRABAJAN <chr> "0", "0", "4", "1", "1", "0", "1", "4",~
## $ LA_VIV_DONDE_ACTUA_RESIDE_ES   <chr> "Otros", "Propia y la esta pagando", "P~
## $ CUENTA_CON_SER_INTERNET        <chr> "S", "S", "S", "S", "S", "S", "S", "S",~
## $ CUENTA_CON_SER_CABLE           <chr> "N", "S", "S", "S", "N", "S", "S", "S",~
## $ RES_PRINCIPAL_MANTEN_ESTUDIOS  <chr> "Ustede mismo", "Ustede mismo", "Ustede~
## $ GRUPO_PINCIPAL_RESP_ECONO      <chr> "Profesional con título trabaja en empr~
## $ PRICIPAL_PROPIETARIO           <chr> "Ninguno", NA, "Tierra", "Ninguno", NA,~
## $ FUENTE_INGRESO_RESPONSA_ECONO  <chr> "Sueldo como técnico administrativo en ~
## $ REMUNERACION                   <chr> "543", "1002", "791", "1200", "300", "8~
## $ INGRESOS_DEL_CONYUGUE          <chr> "0", "0", "0", "0", "1200", "0", "1000"~
## $ EJERCICIO_PROFESIONAL          <chr> "0", "100", "0", "0", "0", "300", "0", ~
## $ NEGOCIO_PROPIO                 <dbl> 0, 0, 0, 0, 0, 500, 1000, 0, 0, 0, 0, 0~
## $ OTROS_INGRSOS_PROPIOS          <chr> "0", "0", "0", "0", "0", "0", "350", "1~
## $ OTROS_INGRSOS_DEL_HOGAR        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0~
## $ TOTAL.INGRESOS                 <dbl> 543, 1102, 791, 1200, 1500, 1600, 3550,~
## $ ARRIENDO                       <chr> "210", "85000", "0", "500", "0", "800",~
## $ SERVICIOS_BASICOS              <dbl> 30, 60, 40, 20, 80, 50, 100, 10, 40, 10~
## $ BANCO_TARJETAS                 <chr> "50", "50", "20", "200", "50", "400", "~
## $ SALUD_EDUCACION_ALIMENTACION   <dbl> 100, 400, 30, 100, 500, 300, 300, 50, 1~
## $ OTROS_GASTOS                   <chr> "30", "300", "50", "20", "300", "200", ~
## $ VEHICHULOS                     <chr> "0", "50000", "0", "2000", "6", "100000~
## $ BIENES_INMUEBLES               <chr> "0", "50000", "17000", "0", "60", "1000~
## $ CAJA_BANCOS_EFECTIVO           <chr> "0", "0", "4000", "0", "100", "1000", "~
## $ MUEBLES_ENSERES                <dbl> 0, 0, 3000, 0, 2, 25000, 15000, 0, 0, 0~
## $ TARJETA_CREDITO                <chr> "400", "100", "18", "1000", "100", "150~
## $ PRESTAMOS_BANCARIOS            <chr> "400", "100", "5000", "0", "0", "0", "0~
## $ OTROS                          <chr> "0", "0", "0", "0", "0", "0", "0", "0",~
## $ FECHA_INSCRIPCION              <dttm> 2020-08-19 16:41:32, 2020-07-20 16:11:~
## $ ESTADO_ADMISION                <chr> "RECHAZADO", "ADMITIDO", "ADMITIDO", "R~
## $ PAGO_inscripción               <chr> "Pagado admision", "Pagado admision", "~
## $ PAGO_MATRICULA                 <chr> "No Pagado matricula", "Pagado matricul~
## $ PROVINCIA_RESIDENCIA           <chr> "CHIMBORAZO", "PICHINCHA", "PICHINCHA",~
## $ PARROQUI_RESIDENCIA            <chr> "LIZARZABURU", "QUITO", "SAN MIGUEL DE ~

Estadistica Descriptiva

summary(base)
##   ANY_ANYACA            PLAN               AREA           COD_ESTUDIANTES
##  Length:7825        Length:7825        Length:7825        Min.   :   1   
##  Class :character   Class :character   Class :character   1st Qu.:1957   
##  Mode  :character   Mode  :character   Mode  :character   Median :3913   
##                                                           Mean   :3913   
##                                                           3rd Qu.:5869   
##                                                           Max.   :7825   
##                                                                          
##     DESID1          NACIONALIDAD           SEXO           FECHADENACIMIENTO 
##  Length:7825        Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  LUGARDENACIMINETO       EDAD       TIPO_DISCAPACIDAD  PORCENTAJE_DISCAPACIDAD
##  Length:7825        Min.   :21.00   Length:7825        Length:7825            
##  Class :character   1st Qu.:29.00   Class :character   Class :character       
##  Mode  :character   Median :33.00   Mode  :character   Mode  :character       
##                     Mean   :34.96                                             
##                     3rd Qu.:39.00                                             
##                     Max.   :78.00                                             
##                                                                               
##  NUMERO_HIJOS          ETNIA           MENCION        MOTIVO_ESTUDIO_POSGRADO
##  Length:7825        Length:7825        Mode:logical   Length:7825            
##  Class :character   Class :character   NA's:7825      Class :character       
##  Mode  :character   Mode  :character                  Mode  :character       
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  NIVEL_ESTUDIOS1    TITULO_OBTENIDO1    PAIS1...19        CIUDAD1...20      
##  Length:7825        Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  INSTITUCION1       TIPO_INSTITUCION1  FECHA_OBTENCION1             
##  Length:7825        Length:7825        Min.   :1972-04-12 00:00:00  
##  Class :character   Class :character   1st Qu.:2011-02-02 00:00:00  
##  Mode  :character   Mode  :character   Median :2015-04-25 00:00:00  
##                                        Mean   :2013-10-12 17:33:35  
##                                        3rd Qu.:2017-10-04 00:00:00  
##                                        Max.   :2024-05-02 00:00:00  
##                                        NA's   :234                  
##  NIVEL_ESTUDIOS2    TIPO_INSTITUCION2  NIVEL_ESTUDIOS3    TIPO_INSTITUCION3 
##  Length:7825        Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  NIVEL_ESTUDIOS4    TIPO_INSTITUCION4  TIPO_INSTITI_SECUNDARIA
##  Length:7825        Length:7825        Length:7825            
##  Class :character   Class :character   Class :character       
##  Mode  :character   Mode  :character   Mode  :character       
##                                                               
##                                                               
##                                                               
##                                                               
##  INSTITUCION_LABORAL1 OCUPACION_ACTUAL1  INSTITUCION_LABORAL RAMA_INSTIRUCION1 
##  Length:7825          Length:7825        Length:7825         Length:7825       
##  Class :character     Class :character   Class :character    Class :character  
##  Mode  :character     Mode  :character   Mode  :character    Mode  :character  
##                                                                                
##                                                                                
##                                                                                
##                                                                                
##   PAIS1...35        CIUDAD1...36        FUNCIONES1       
##  Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   FECHADESDE1                   FECHAHASTA1                 
##  Min.   :1972-10-02 00:00:00   Min.   :1995-08-31 00:00:00  
##  1st Qu.:2014-11-03 00:00:00   1st Qu.:2020-05-26 00:00:00  
##  Median :2017-09-18 00:00:00   Median :2020-07-03 00:00:00  
##  Mean   :2016-05-30 00:00:33   Mean   :2020-03-08 11:14:41  
##  3rd Qu.:2019-05-15 00:00:00   3rd Qu.:2020-08-17 00:00:00  
##  Max.   :2022-06-01 00:00:00   Max.   :2050-12-31 00:00:00  
##  NA's   :101                   NA's   :101                  
##  NIVEL_RESP_EN_LA_INSTITU1 CORRESPONDENCIA_LAB_PREGRADO
##  Length:7825               Length:7825                 
##  Class :character          Class :character            
##  Mode  :character          Mode  :character            
##                                                        
##                                                        
##                                                        
##                                                        
##  CORRESPONDENCIA_LAB_UASB TIPO_RELA_LABORAL_MANTIENE JORNADA_LABORAL   
##  Length:7825              Length:7825                Length:7825       
##  Class :character         Class :character           Class :character  
##  Mode  :character         Mode  :character           Mode  :character  
##                                                                        
##                                                                        
##                                                                        
##                                                                        
##  AFILIADO_SEGURO_SOCIAL SEGURO_MEDICO_PRIVADO   LABORAL2        
##  Length:7825            Length:7825           Length:7825       
##  Class :character       Class :character      Class :character  
##  Mode  :character       Mode  :character      Mode  :character  
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##     PAIS2             CIUDAD2           FUNCIONES2       
##  Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   FECHADESDE2                   FECHAHASTA2                    LABORAL3        
##  Min.   :1964-08-15 00:00:00   Min.   :1992-07-31 00:00:00   Length:7825       
##  1st Qu.:2013-06-03 00:00:00   1st Qu.:2016-02-08 12:00:00   Class :character  
##  Median :2016-03-01 00:00:00   Median :2018-06-30 00:00:00   Mode  :character  
##  Mean   :2015-02-04 22:07:37   Mean   :2017-08-11 01:06:11                     
##  3rd Qu.:2018-03-05 00:00:00   3rd Qu.:2019-11-01 06:00:00                     
##  Max.   :2021-04-23 00:00:00   Max.   :2050-09-01 00:00:00                     
##  NA's   :3673                  NA's   :3713                                    
##     PAIS3             CIUDAD3           FUNCIONES3       
##  Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##                                                          
##   FECHADESDE3                   FECHAHASTA3                  ESPAÑOL_ESCRITURA 
##  Min.   :1980-07-03 00:00:00   Min.   :1990-03-30 00:00:00   Length:7825       
##  1st Qu.:2011-06-01 18:00:00   1st Qu.:2013-11-30 00:00:00   Class :character  
##  Median :2014-06-10 12:00:00   Median :2016-04-30 00:00:00   Mode  :character  
##  Mean   :2013-07-20 09:49:27   Mean   :2015-09-28 09:48:47                     
##  3rd Qu.:2016-11-11 00:00:00   3rd Qu.:2018-05-31 00:00:00                     
##  Max.   :2020-11-16 00:00:00   Max.   :2025-06-30 00:00:00                     
##  NA's   :4913                  NA's   :4922                                    
##  ESPAÑOL_LECTURA    INGLES_ESCRITURA   INGLES_LECTURA     OTRO_IDIOMA       
##  Length:7825        Length:7825        Length:7825        Length:7825       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  OTRO_ESCRITURA     OTRO_LECTURA       COMO_ESPE_FINANCIAR_ESTUDIOS
##  Length:7825        Length:7825        Length:7825                 
##  Class :character   Class :character   Class :character            
##  Mode  :character   Mode  :character   Mode  :character            
##                                                                    
##                                                                    
##                                                                    
##                                                                    
##  EXPLIQUE_LA_FORMA  REBAJA_ESTUD_ANDINO_UNASUR CON_QUIEN_VIVE    
##  Length:7825        Length:7825                Length:7825       
##  Class :character   Class :character           Class :character  
##  Mode  :character   Mode  :character           Mode  :character  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  CUANTAS_PERS_CONFORMA_HOGAR ES_USTED_JEFE_FAMILIA NUMERO_PER_DEPEND_DE_USTED
##  Length:7825                 Length:7825           Length:7825               
##  Class :character            Class :character      Class :character          
##  Mode  :character            Mode  :character      Mode  :character          
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  CUANTAS_PER_DE_SU_HOG_TRABAJAN LA_VIV_DONDE_ACTUA_RESIDE_ES
##  Length:7825                    Length:7825                 
##  Class :character               Class :character            
##  Mode  :character               Mode  :character            
##                                                             
##                                                             
##                                                             
##                                                             
##  CUENTA_CON_SER_INTERNET CUENTA_CON_SER_CABLE RES_PRINCIPAL_MANTEN_ESTUDIOS
##  Length:7825             Length:7825          Length:7825                  
##  Class :character        Class :character     Class :character             
##  Mode  :character        Mode  :character     Mode  :character             
##                                                                            
##                                                                            
##                                                                            
##                                                                            
##  GRUPO_PINCIPAL_RESP_ECONO PRICIPAL_PROPIETARIO FUENTE_INGRESO_RESPONSA_ECONO
##  Length:7825               Length:7825          Length:7825                  
##  Class :character          Class :character     Class :character             
##  Mode  :character          Mode  :character     Mode  :character             
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  REMUNERACION       INGRESOS_DEL_CONYUGUE EJERCICIO_PROFESIONAL
##  Length:7825        Length:7825           Length:7825          
##  Class :character   Class :character      Class :character     
##  Mode  :character   Mode  :character      Mode  :character     
##                                                                
##                                                                
##                                                                
##                                                                
##  NEGOCIO_PROPIO     OTROS_INGRSOS_PROPIOS OTROS_INGRSOS_DEL_HOGAR
##  Min.   :    0.00   Length:7825           Min.   :    0.0        
##  1st Qu.:    0.00   Class :character      1st Qu.:    0.0        
##  Median :    0.00   Mode  :character      Median :    0.0        
##  Mean   :   93.92                         Mean   :  132.4        
##  3rd Qu.:    0.00                         3rd Qu.:    0.0        
##  Max.   :18000.00                         Max.   :80000.0        
##  NA's   :173                              NA's   :175            
##  TOTAL.INGRESOS      ARRIENDO         SERVICIOS_BASICOS  BANCO_TARJETAS    
##  Min.   :      0   Length:7825        Min.   :  -120.0   Length:7825       
##  1st Qu.:    876   Class :character   1st Qu.:    30.0   Class :character  
##  Median :   1500   Mode  :character   Median :    60.0   Mode  :character  
##  Mean   :   3268                      Mean   :   273.5                     
##  3rd Qu.:   2500                      3rd Qu.:   100.0                     
##  Max.   :3800000                      Max.   :800000.0                     
##                                       NA's   :177                          
##  SALUD_EDUCACION_ALIMENTACION OTROS_GASTOS        VEHICHULOS       
##  Min.   :  -2000              Length:7825        Length:7825       
##  1st Qu.:    100              Class :character   Class :character  
##  Median :    200              Mode  :character   Mode  :character  
##  Mean   :    471                                                   
##  3rd Qu.:    400                                                   
##  Max.   :1000000                                                   
##  NA's   :177                                                       
##  BIENES_INMUEBLES   CAJA_BANCOS_EFECTIVO MUEBLES_ENSERES   TARJETA_CREDITO   
##  Length:7825        Length:7825          Min.   :      0   Length:7825       
##  Class :character   Class :character     1st Qu.:      0   Class :character  
##  Mode  :character   Mode  :character     Median :   2000   Mode  :character  
##                                          Mean   :   5452                     
##                                          3rd Qu.:   5000                     
##                                          Max.   :6500000                     
##                                          NA's   :184                         
##  PRESTAMOS_BANCARIOS    OTROS           FECHA_INSCRIPCION            
##  Length:7825         Length:7825        Min.   :2020-05-21 07:16:02  
##  Class :character    Class :character   1st Qu.:2020-06-30 15:16:53  
##  Mode  :character    Mode  :character   Median :2020-08-01 21:24:18  
##                                         Mean   :2020-07-22 17:25:07  
##                                         3rd Qu.:2020-08-18 00:06:52  
##                                         Max.   :2021-02-06 21:38:05  
##                                                                      
##  ESTADO_ADMISION    PAGO_inscripción   PAGO_MATRICULA     PROVINCIA_RESIDENCIA
##  Length:7825        Length:7825        Length:7825        Length:7825         
##  Class :character   Class :character   Class :character   Class :character    
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character    
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  PARROQUI_RESIDENCIA
##  Length:7825        
##  Class :character   
##  Mode  :character   
##                     
##                     
##                     
## 

Modelar la Data

Aprendizaje Supervizado

base2 <- base %>% select(AREA,
                         NACIONALIDAD,
                         SEXO,
                         EDAD,
                         TIPO_DISCAPACIDAD,
                         ESPAÑOL_ESCRITURA,
                         ESPAÑOL_LECTURA,
                         INGLES_ESCRITURA,
                         INGLES_LECTURA,
                         REBAJA_ESTUD_ANDINO_UNASUR,
                         CON_QUIEN_VIVE,
                         ES_USTED_JEFE_FAMILIA,
                         NUMERO_PER_DEPEND_DE_USTED,
                         CUENTA_CON_SER_INTERNET,
                         CUENTA_CON_SER_CABLE,
                         REMUNERACION,
                         EJERCICIO_PROFESIONAL,
                         NEGOCIO_PROPIO,
                         OTROS_INGRSOS_PROPIOS,
                         TOTAL.INGRESOS,
                         ARRIENDO,
                         SERVICIOS_BASICOS,
                         BANCO_TARJETAS,
                         SALUD_EDUCACION_ALIMENTACION,
                         OTROS_GASTOS,
                         VEHICHULOS,
                         CAJA_BANCOS_EFECTIVO,
                         MUEBLES_ENSERES,
                         TARJETA_CREDITO,
                         PRESTAMOS_BANCARIOS,
                         FECHA_INSCRIPCION,
                         ESTADO_ADMISION,
                         PAGO_inscripción,
                         PAGO_MATRICULA)


base2$TIPO_DISCAPACIDAD[base2$TIPO_DISCAPACIDAD!=""] <- "Si"
base2 <- mutate_at(base2, c("TIPO_DISCAPACIDAD"), ~replace(., is.na(.), "No"))
base2$CON_QUIEN_VIVE[base2$CON_QUIEN_VIVE!="Solo/a"] <- "Acompaniado"
base2$CON_QUIEN_VIVE <- factor(base2$CON_QUIEN_VIVE, labels = c( "Solo/a","Acompaniado" ) )
base2$SEXO <- factor(base2$SEXO, labels = c( "M","F" ) )
base2$CUENTA_CON_SER_INTERNET <- factor(base2$CUENTA_CON_SER_INTERNET, labels = c( "S","N" ) )
base2$CUENTA_CON_SER_CABLE <- factor(base2$CUENTA_CON_SER_CABLE, labels = c( "S","N" ))
base2$TIPO_DISCAPACIDAD <- factor(base2$TIPO_DISCAPACIDAD , labels = c( "Si","No" ))
base2$ESPAÑOL_ESCRITURA <- factor(base2$ESPAÑOL_ESCRITURA  , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$ESPAÑOL_LECTURA <- factor(base2$ESPAÑOL_LECTURA  , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$INGLES_ESCRITURA <- factor(base2$INGLES_ESCRITURA  , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$INGLES_LECTURA <- factor(base2$INGLES_LECTURA  , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$REBAJA_ESTUD_ANDINO_UNASUR <- factor(base2$REBAJA_ESTUD_ANDINO_UNASUR , labels = c( "Si","No" ))
base2$ES_USTED_JEFE_FAMILIA<- factor(base2$ES_USTED_JEFE_FAMILIA , labels = c( "S","N" ))
base2$PAGO_inscripción <- factor(base2$PAGO_inscripción ,labels = c("Pagado admision","No Pagado admision"))
base2$PAGO_MATRICULA <- factor(base2$PAGO_MATRICULA ,labels = c("Pagado matricula","No Pagado matricula"))
base2$NACIONALIDAD[base2$NACIONALIDAD!="ECUADOR"] <- "OTRO PAIS"
base2$NACIONALIDAD <- factor(base2$NACIONALIDAD,labels = c("ECUADOR","OTRO PAIS"))
base2$AREA <- factor(base2$AREA,labels = c("ÁREA DE AMBIENTE Y SUSTENTABILIDAD","ÁREA DE COMUNICACIÓN",
                                           "ÁREA DE DERECHO","ÁREA DE EDUCACIÓN","ÁREA DE ESTUDIOS SOCIALES Y GLOBALES",
                                           "ÁREA DE GESTIÓN","ÁREA DE HISTORIA","ÁREA DE LETRAS Y ESTUDIOS CULTURALES",
                                           "ÁREA DE SALUD"))
base2 <- base2 %>% 
  mutate( ESTADO_ADMISION = factor(ESTADO_ADMISION, 
                                   levels= c("ADMITIDO","RECHAZADO"), 
                                   labels= c("Aprueba", "Rechazado"))) 
base2 <- base2 %>%
  mutate( REMUNERACION= str_replace(REMUNERACION, pattern = ",", replacement = "."),
          EJERCICIO_PROFESIONAL= str_replace(EJERCICIO_PROFESIONAL, pattern = ",", replacement = "."),
          NEGOCIO_PROPIO= str_replace(NEGOCIO_PROPIO, pattern = ",", replacement = "."),
          OTROS_INGRSOS_PROPIOS= str_replace(OTROS_INGRSOS_PROPIOS, pattern = ",", replacement = "."),
          TOTAL.INGRESOS= str_replace(TOTAL.INGRESOS, pattern = ",", replacement = "."),
          ARRIENDO= str_replace(ARRIENDO, pattern = ",", replacement = "."),
          SERVICIOS_BASICOS= str_replace(SERVICIOS_BASICOS, pattern = ",", replacement = "."),
          SALUD_EDUCACION_ALIMENTACION= str_replace(SALUD_EDUCACION_ALIMENTACION, pattern = ",", replacement = "."),
          OTROS_GASTOS= str_replace(OTROS_GASTOS, pattern = ",", replacement = "."),
          VEHICHULOS= str_replace(VEHICHULOS, pattern = ",", replacement = "."),
          CAJA_BANCOS_EFECTIVO= str_replace(CAJA_BANCOS_EFECTIVO, pattern = ",", replacement = "."),
          MUEBLES_ENSERES= str_replace(MUEBLES_ENSERES, pattern = ",", replacement = "."),
          TARJETA_CREDITO= str_replace(TARJETA_CREDITO, pattern = ",", replacement = "."),
          NUMERO_PER_DEPEND_DE_USTED= str_replace(NUMERO_PER_DEPEND_DE_USTED, pattern = ",", replacement = "."),
          PRESTAMOS_BANCARIOS= str_replace(PRESTAMOS_BANCARIOS, pattern = ",", replacement = ".")) 
base2 <- base2 %>% mutate(REMUNERACION=as.numeric(REMUNERACION),
                          EJERCICIO_PROFESIONAL=as.numeric(EJERCICIO_PROFESIONAL),
                          NEGOCIO_PROPIO=as.numeric(NEGOCIO_PROPIO),
                          OTROS_INGRSOS_PROPIOS=as.numeric(OTROS_INGRSOS_PROPIOS),
                          TOTAL.INGRESOS=as.numeric(TOTAL.INGRESOS),
                          ARRIENDO=as.numeric(ARRIENDO),
                          SERVICIOS_BASICOS=as.numeric(SERVICIOS_BASICOS),
                          BANCO_TARJETAS=as.numeric(BANCO_TARJETAS),
                          SALUD_EDUCACION_ALIMENTACION=as.numeric(SALUD_EDUCACION_ALIMENTACION),
                          OTROS_GASTOS=as.numeric(OTROS_GASTOS),
                          VEHICHULOS=as.numeric(VEHICHULOS),
                          CAJA_BANCOS_EFECTIVO=as.numeric(CAJA_BANCOS_EFECTIVO),
                          MUEBLES_ENSERES=as.numeric(MUEBLES_ENSERES),
                          TARJETA_CREDITO=as.numeric(TARJETA_CREDITO),
                          NUMERO_PER_DEPEND_DE_USTED=as.numeric(NUMERO_PER_DEPEND_DE_USTED),
                          PRESTAMOS_BANCARIOS=as.numeric(PRESTAMOS_BANCARIOS))
base2 <- mutate_at(base2, c("REMUNERACION","EJERCICIO_PROFESIONAL","NEGOCIO_PROPIO",
                            "OTROS_INGRSOS_PROPIOS","OTROS_INGRSOS_PROPIOS","TOTAL.INGRESOS",
                            "ARRIENDO","ARRIENDO","SERVICIOS_BASICOS",
                            "BANCO_TARJETAS","BANCO_TARJETAS","CAJA_BANCOS_EFECTIVO",
                            "MUEBLES_ENSERES","TARJETA_CREDITO","NUMERO_PER_DEPEND_DE_USTED",
                            "PRESTAMOS_BANCARIOS"), ~replace(., is.na(.), 0))
base2 <- na.omit(base2)

Pasar a Tablas

base2 <- tbl_df(base2)

Aprendizaje Supervizado

Train - Test split

set.seed(1234) # Semilla para aleatorios de Base Socio demografica
base_split <- base2 %>%
  initial_split(prop = 0.8,
                strata = ESTADO_ADMISION)

Dimensiones

train <- training(base_split)
dim(train)
## [1] 6111   34
test <- testing(base_split)
dim(test)
## [1] 1529   34

Preprocesamiento

set.seed(123)
rct_base2 <- train %>% recipe(ESTADO_ADMISION ~ . ) %>%
  step_rm(ESPAÑOL_ESCRITURA,
          ESPAÑOL_LECTURA,
          INGLES_ESCRITURA,
          INGLES_LECTURA,
          REBAJA_ESTUD_ANDINO_UNASUR) %>% # Eliminar  
  step_normalize( all_numeric(), -all_outcomes()) %>% # Normalizacion
  step_other(all_nominal(), -all_outcomes() , threshold = 0.07, other = "other") %>% 
  step_novel(all_nominal(), -all_outcomes() , new_level = "new") %>%
  step_dummy(all_nominal(), -all_outcomes() ) %>% # Dummy
  step_nzv(all_predictors()) %>%   
  themis::step_upsample(ESTADO_ADMISION, over_ratio = 1, skip= TRUE, seed= 123) 

rct_base2
## Data Recipe
## 
## Inputs:
## 
##       role #variables
##    outcome          1
##  predictor         33
## 
## Operations:
## 
## Delete terms ESPAÑOL_ESCRITURA, ESPAÑOL_LECTURA, ...
## Centering and scaling for all_numeric(), -all_outcomes()
## Collapsing factor levels for all_nominal(), -all_outcomes()
## Novel factor level assignment for all_nominal(), -all_outcomes()
## Dummy variables from all_nominal(), -all_outcomes()
## Sparse, unbalanced variable filter on all_predictors()
## Up-sampling based on ESTADO_ADMISION
Ajustar parámetros del preprocesamiento
rct_base2_prep <- prep(rct_base2, train )
Preprocesar el train
train_prep <- bake(rct_base2_prep, new_data= NULL)
dim(train_prep)
## [1] 6548   26
Preprocesar el test
test_prep <- bake(rct_base2_prep, new_data= test)
dim(test_prep)
## [1] 1529   26

MARS

Remuestreo

set.seed(1234)
cv_base2 <- vfold_cv(train, v = 10, repeats = 1, strata = ESTADO_ADMISION)
cv_base2
## #  10-fold cross-validation using stratification 
## # A tibble: 10 x 2
##    splits             id    
##    <list>             <chr> 
##  1 <split [5499/612]> Fold01
##  2 <split [5499/612]> Fold02
##  3 <split [5499/612]> Fold03
##  4 <split [5499/612]> Fold04
##  5 <split [5500/611]> Fold05
##  6 <split [5500/611]> Fold06
##  7 <split [5500/611]> Fold07
##  8 <split [5501/610]> Fold08
##  9 <split [5501/610]> Fold09
## 10 <split [5501/610]> Fold10

Especificacion del Modelo

mars_sp <- 
  mars(num_terms = tune(), prod_degree = tune(), prune_method= tune()) %>% 
  set_engine("earth") %>% 
  set_mode("classification")
mars_sp %>% translate()
## MARS Model Specification (classification)
## 
## Main Arguments:
##   num_terms = tune()
##   prod_degree = tune()
##   prune_method = tune()
## 
## Engine-Specific Arguments:
##   glm = list(family = stats::binomial)
## 
## Computational engine: earth 
## 
## Model fit template:
## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), 
##     nprune = tune(), degree = tune(), pmethod = tune(), glm = list(family = stats::binomial), 
##     keepxy = TRUE)

Valores a probar hyperparametros

set.seed(123)
mars_grid <- mars_sp %>%
  parameters() %>%
  grid_latin_hypercube(size = 10)
mars_grid
## # A tibble: 9 x 3
##   num_terms prod_degree prune_method
##       <int>       <int> <chr>       
## 1         3           1 seqrep      
## 2         4           1 none        
## 3         4           2 cv          
## 4         5           1 backward    
## 5         2           2 exhaustive  
## 6         4           1 forward     
## 7         3           1 backward    
## 8         3           2 forward     
## 9         3           2 exhaustive

Workflow

mars_wflow <- 
  workflow() %>% 
  add_recipe(rct_base2) %>% 
  add_model(mars_sp)
mars_wflow 
## == Workflow ====================================================================
## Preprocessor: Recipe
## Model: mars()
## 
## -- Preprocessor ----------------------------------------------------------------
## 7 Recipe Steps
## 
## * step_rm()
## * step_normalize()
## * step_other()
## * step_novel()
## * step_dummy()
## * step_nzv()
## * step_upsample()
## 
## -- Model -----------------------------------------------------------------------
## MARS Model Specification (classification)
## 
## Main Arguments:
##   num_terms = tune()
##   prod_degree = tune()
##   prune_method = tune()
## 
## Computational engine: earth

Métricas para el Modelo

metricas <- metric_set(roc_auc, accuracy, sens, spec, bal_accuracy)
metricas
## # A tibble: 5 x 3
##   metric       class        direction
##   <chr>        <chr>        <chr>    
## 1 roc_auc      prob_metric  maximize 
## 2 accuracy     class_metric maximize 
## 3 sens         class_metric maximize 
## 4 spec         class_metric maximize 
## 5 bal_accuracy class_metric maximize

Tuning de hiperparametros

set.seed(123)
mars_tuned <- tune_grid(
  mars_wflow,
  resamples= cv_base2,
  grid = mars_grid,
  metrics = metricas,
  control= control_grid(allow_par = T)
)

mars_tuned
## # Tuning results
## # 10-fold cross-validation using stratification 
## # A tibble: 10 x 4
##    splits             id     .metrics          .notes          
##    <list>             <chr>  <list>            <list>          
##  1 <split [5499/612]> Fold01 <tibble [40 x 7]> <tibble [1 x 1]>
##  2 <split [5499/612]> Fold02 <tibble [40 x 7]> <tibble [3 x 1]>
##  3 <split [5499/612]> Fold03 <tibble [40 x 7]> <tibble [1 x 1]>
##  4 <split [5499/612]> Fold04 <tibble [40 x 7]> <tibble [1 x 1]>
##  5 <split [5500/611]> Fold05 <tibble [40 x 7]> <tibble [1 x 1]>
##  6 <split [5500/611]> Fold06 <tibble [40 x 7]> <tibble [1 x 1]>
##  7 <split [5500/611]> Fold07 <tibble [40 x 7]> <tibble [1 x 1]>
##  8 <split [5501/610]> Fold08 <tibble [40 x 7]> <tibble [1 x 1]>
##  9 <split [5501/610]> Fold09 <tibble [40 x 7]> <tibble [1 x 1]>
## 10 <split [5501/610]> Fold10 <tibble [40 x 7]> <tibble [1 x 1]>

Modelo Final

mars_pars_fin <- select_best(mars_tuned, metric = 'bal_accuracy')
mars_wflow_fin <- 
  mars_wflow %>% 
  finalize_workflow(mars_pars_fin)
mars_fitted <- fit(mars_wflow_fin, train)
mars_fitted
## == Workflow [trained] ==========================================================
## Preprocessor: Recipe
## Model: mars()
## 
## -- Preprocessor ----------------------------------------------------------------
## 7 Recipe Steps
## 
## * step_rm()
## * step_normalize()
## * step_other()
## * step_novel()
## * step_dummy()
## * step_nzv()
## * step_upsample()
## 
## -- Model -----------------------------------------------------------------------
## GLM (family binomial, link logit):
##  nulldev   df       dev   df   devratio     AIC iters converged
##  9077.45 6547   2498.64 6543      0.725    2509    19         1
## 
## Earth selected 5 of 7 terms, and 4 of 25 predictors (nprune=5)
## Termination condition: RSq changed by less than 0.001 at 7 terms
## Importance: PAGO_MATRICULA_No.Pagado.matricula, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## Earth GCV 0.06664619    RSS 435.2005    GRSq 0.7334966    RSq 0.7341475
mars_model_fin <- pull_workflow_fit(mars_fitted)
mars_model_fin
## parsnip model object
## 
## Fit time:  91ms 
## GLM (family binomial, link logit):
##  nulldev   df       dev   df   devratio     AIC iters converged
##  9077.45 6547   2498.64 6543      0.725    2509    19         1
## 
## Earth selected 5 of 7 terms, and 4 of 25 predictors (nprune=5)
## Termination condition: RSq changed by less than 0.001 at 7 terms
## Importance: PAGO_MATRICULA_No.Pagado.matricula, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## Earth GCV 0.06664619    RSS 435.2005    GRSq 0.7334966    RSq 0.7341475

Evaluamos en el test

test %>% 
  predict(mars_fitted, new_data = . ) %>% 
  mutate(Real= test$ESTADO_ADMISION) %>% 
  conf_mat(truth = Real, estimate = .pred_class ) %>% 
  summary
## # A tibble: 13 x 3
##    .metric              .estimator .estimate
##    <chr>                <chr>          <dbl>
##  1 accuracy             binary         0.903
##  2 kap                  binary         0.806
##  3 sens                 binary         0.834
##  4 spec                 binary         0.982
##  5 ppv                  binary         0.981
##  6 npv                  binary         0.837
##  7 mcc                  binary         0.817
##  8 j_index              binary         0.816
##  9 bal_accuracy         binary         0.908
## 10 detection_prevalence binary         0.455
## 11 precision            binary         0.981
## 12 recall               binary         0.834
## 13 f_meas               binary         0.902

Matriz de Confucion

test %>% 
  predict(mars_fitted, new_data = . ) %>% 
  mutate(Real= test$ESTADO_ADMISION) %>% 
  conf_mat(truth = Real, estimate = .pred_class ) 
##            Truth
## Prediction  Aprueba Rechazado
##   Aprueba       683        13
##   Rechazado     136       697

Predecir probabilidades

test %>% 
  predict(mars_fitted, new_data = . , type = "prob") %>% 
  mutate(Real= test$ESTADO_ADMISION)
## # A tibble: 1,529 x 3
##    .pred_Aprueba .pred_Rechazado Real     
##            <dbl>           <dbl> <fct>    
##  1      1.68e- 1   0.832         Aprueba  
##  2      8.31e- 2   0.917         Rechazado
##  3      5.03e-10   1.00          Rechazado
##  4      4.92e-10   1.00          Rechazado
##  5      1.62e- 1   0.838         Aprueba  
##  6      1.24e- 1   0.876         Rechazado
##  7      7.06e-10   1.00          Rechazado
##  8      1.00e+ 0   0.00000000270 Aprueba  
##  9      6.84e-10   1.00          Rechazado
## 10      9.69e- 2   0.903         Rechazado
## # ... with 1,519 more rows

Conclusiones

  • La sens (Sensibilidad) en el set de Datos Test es de 0.83
  • El Balanced accuracy (Precision)en el set de Datos Test es de 0.91
  • Modelo Muy Bueno