Autor: Email: RPubs: Twitter: Linkedin: |
Jack Bedoya Acosta |
|
|
You can also embed plots, for example:
head(base)
## # A tibble: 6 x 105
## ANY_ANYACA PLAN AREA COD_ESTUDIANTES DESID1 NACIONALIDAD SEXO
## <chr> <chr> <chr> <dbl> <chr> <chr> <chr>
## 1 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 1 Admis~ ECUADOR M
## 2 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 2 Admis~ ECUADOR F
## 3 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 3 Admis~ ECUADOR M
## 4 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 4 Admis~ ECUADOR F
## 5 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 5 Admis~ ECUADOR M
## 6 2020-21 ESP.SUP. EN D.H., ~ ÁREA~ 6 Admis~ ECUADOR F
## # ... with 98 more variables: FECHADENACIMIENTO <chr>, LUGARDENACIMINETO <chr>,
## # EDAD <dbl>, TIPO_DISCAPACIDAD <chr>, PORCENTAJE_DISCAPACIDAD <chr>,
## # NUMERO_HIJOS <chr>, ETNIA <chr>, MENCION <lgl>,
## # MOTIVO_ESTUDIO_POSGRADO <chr>, NIVEL_ESTUDIOS1 <chr>,
## # TITULO_OBTENIDO1 <chr>, PAIS1...19 <chr>, CIUDAD1...20 <chr>,
## # INSTITUCION1 <chr>, TIPO_INSTITUCION1 <chr>, FECHA_OBTENCION1 <dttm>,
## # NIVEL_ESTUDIOS2 <chr>, TIPO_INSTITUCION2 <chr>, NIVEL_ESTUDIOS3 <chr>, ...
glimpse(base)
## Rows: 7,825
## Columns: 105
## $ ANY_ANYACA <chr> "2020-21", "2020-21", "2020-21", "2020-~
## $ PLAN <chr> "ESP.SUP. EN D.H., POLÍTICAS PÚBLICAS Y~
## $ AREA <chr> "ÁREA DE DERECHO", "ÁREA DE DERECHO", "~
## $ COD_ESTUDIANTES <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, ~
## $ DESID1 <chr> "Admisión OFERTA ECUADOR ciclo 2020-202~
## $ NACIONALIDAD <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ SEXO <chr> "M", "F", "M", "F", "M", "F", "M", "M",~
## $ FECHADENACIMIENTO <chr> "02-02-1992", "26-11-1988", "31-08-1993~
## $ LUGARDENACIMINETO <chr> "CARCHI", "SANTO DOMINGO DE LOS TSÁCHIL~
## $ EDAD <dbl> 29, 32, 27, 41, 43, 42, 44, 27, 27, 26,~
## $ TIPO_DISCAPACIDAD <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ PORCENTAJE_DISCAPACIDAD <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ NUMERO_HIJOS <chr> "0", "0", "0", "0", "2", "1", "2", "0",~
## $ ETNIA <chr> "Mestizo", "Mestizo", "Mestizo", "Mesti~
## $ MENCION <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ MOTIVO_ESTUDIO_POSGRADO <chr> "Desarrollar sus conocimientos profesio~
## $ NIVEL_ESTUDIOS1 <chr> "Pregrado (tercer nivel)", "Pregrado (t~
## $ TITULO_OBTENIDO1 <chr> "ABOGADO DE LOS TRIBUNALES Y JUZGADOS D~
## $ PAIS1...19 <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ CIUDAD1...20 <chr> "RIOBAMBA", "Quito", "Quito", "GUAYAQUI~
## $ INSTITUCION1 <chr> "UNIVERSIDAD NACIONAL DE CHIMBORAZO", "~
## $ TIPO_INSTITUCION1 <chr> "Pública nacional", "Privada nacional",~
## $ FECHA_OBTENCION1 <dttm> 2015-11-27, 2013-09-09, 2019-03-01, 20~
## $ NIVEL_ESTUDIOS2 <chr> NA, NA, NA, NA, NA, "Doctorado", NA, "P~
## $ TIPO_INSTITUCION2 <chr> NA, NA, NA, NA, NA, "Pública nacional",~
## $ NIVEL_ESTUDIOS3 <chr> NA, NA, NA, NA, NA, "Especialización", ~
## $ TIPO_INSTITUCION3 <chr> NA, NA, NA, NA, NA, "Privada nacional",~
## $ NIVEL_ESTUDIOS4 <chr> NA, NA, NA, NA, NA, "Especialización", ~
## $ TIPO_INSTITUCION4 <chr> NA, NA, NA, NA, NA, "Pública nacional",~
## $ TIPO_INSTITI_SECUNDARIA <chr> "Fiscomisional", "Fiscal", "Fiscal", "F~
## $ INSTITUCION_LABORAL1 <chr> "NOTARIA PRIMERA DEL CANTÓN RIOBAMBA", ~
## $ OCUPACION_ACTUAL1 <chr> "Personal de apoyo administrativo", "Pe~
## $ INSTITUCION_LABORAL <chr> "Público", "Privado", "Público", "Públi~
## $ RAMA_INSTIRUCION1 <chr> "No aplica", "Servicios Legales", "Salu~
## $ PAIS1...35 <chr> "ECUADOR", "ECUADOR", "ECUADOR", "ECUAD~
## $ CIUDAD1...36 <chr> "RIOBAMBA", "Quito", "San Miguel de los~
## $ FUNCIONES1 <chr> "MATRIZADOR", "Coordinadora del Área de~
## $ FECHADESDE1 <dttm> 2015-06-01, 2020-01-15, 2019-01-08, 20~
## $ FECHAHASTA1 <dttm> 2020-08-20, 2020-07-20, 2022-01-01, 20~
## $ NIVEL_RESP_EN_LA_INSTITU1 <chr> "Operativo", "Operativo", "Directivo", ~
## $ CORRESPONDENCIA_LAB_PREGRADO <chr> "Directa", "Directa", "Directa", "Direc~
## $ CORRESPONDENCIA_LAB_UASB <chr> "Indirecta", "No aplica", "Directa", "D~
## $ TIPO_RELA_LABORAL_MANTIENE <chr> "Dependencia", "Dependencia", "Contrato~
## $ JORNADA_LABORAL <chr> "Tiempo completo", "Tiempo completo", "~
## $ AFILIADO_SEGURO_SOCIAL <chr> "S", "S", "S", "S", "S", "S", "S", "S",~
## $ SEGURO_MEDICO_PRIVADO <chr> "N", "S", "N", "S", "N", "N", "N", "S",~
## $ LABORAL2 <chr> NA, "Fundación Regional de Asesoría Leg~
## $ PAIS2 <chr> NA, "ECUADOR", "ECUADOR", "ECUADOR", NA~
## $ CIUDAD2 <chr> NA, "Quito", NA, "GUAYAQUIL", NA, "QUIT~
## $ FUNCIONES2 <chr> NA, "Presidenta de la Fundación INREDH"~
## $ FECHADESDE2 <dttm> NA, 2018-03-20, NA, NA, NA, 2005-06-25~
## $ FECHAHASTA2 <dttm> NA, 2020-07-20, NA, NA, NA, 2017-11-28~
## $ LABORAL3 <chr> NA, NA, NA, NA, NA, "MONISTERIO DEL INT~
## $ PAIS3 <chr> NA, NA, "ECUADOR", NA, NA, "ECUADOR", "~
## $ CIUDAD3 <chr> NA, NA, NA, NA, NA, "QUITO", "QUITO", "~
## $ FUNCIONES3 <chr> NA, NA, NA, NA, NA, "TENIENTE POLITICO ~
## $ FECHADESDE3 <dttm> NA, NA, NA, NA, NA, 2005-06-25, 2007-1~
## $ FECHAHASTA3 <dttm> NA, NA, NA, NA, NA, 2007-02-07, 2008-0~
## $ ESPAÑOL_ESCRITURA <chr> "Elemental", "Avanzado", "Avanzado", "A~
## $ ESPAÑOL_LECTURA <chr> "Intermedio", "Avanzado", "Avanzado", "~
## $ INGLES_ESCRITURA <chr> "Intermedio", "Intermedio", "Intermedio~
## $ INGLES_LECTURA <chr> "Intermedio", "Intermedio", "Elemental"~
## $ OTRO_IDIOMA <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ OTRO_ESCRITURA <chr> "Elemental", NA, "Intermedio", NA, NA, ~
## $ OTRO_LECTURA <chr> "Elemental", NA, "Intermedio", NA, NA, ~
## $ COMO_ESPE_FINANCIAR_ESTUDIOS <chr> "Fondos Propios", "Fondos Propios", "Fo~
## $ EXPLIQUE_LA_FORMA <chr> "AL CONTADO", "Pago mediante tarjeta de~
## $ REBAJA_ESTUD_ANDINO_UNASUR <chr> "Si", "Si", "No", "Si", "Si", "Si", "Si~
## $ CON_QUIEN_VIVE <chr> "Con su familia (Madre y/o Padre y Herm~
## $ CUANTAS_PERS_CONFORMA_HOGAR <chr> "3", "2", "13", "3", "3", "2", "4", "4"~
## $ ES_USTED_JEFE_FAMILIA <chr> "N", "S", "N", "S", "N", "S", "S", "N",~
## $ NUMERO_PER_DEPEND_DE_USTED <chr> "2", "1", "0", "1", "0", "0", "1", "0",~
## $ CUANTAS_PER_DE_SU_HOG_TRABAJAN <chr> "0", "0", "4", "1", "1", "0", "1", "4",~
## $ LA_VIV_DONDE_ACTUA_RESIDE_ES <chr> "Otros", "Propia y la esta pagando", "P~
## $ CUENTA_CON_SER_INTERNET <chr> "S", "S", "S", "S", "S", "S", "S", "S",~
## $ CUENTA_CON_SER_CABLE <chr> "N", "S", "S", "S", "N", "S", "S", "S",~
## $ RES_PRINCIPAL_MANTEN_ESTUDIOS <chr> "Ustede mismo", "Ustede mismo", "Ustede~
## $ GRUPO_PINCIPAL_RESP_ECONO <chr> "Profesional con título trabaja en empr~
## $ PRICIPAL_PROPIETARIO <chr> "Ninguno", NA, "Tierra", "Ninguno", NA,~
## $ FUENTE_INGRESO_RESPONSA_ECONO <chr> "Sueldo como técnico administrativo en ~
## $ REMUNERACION <chr> "543", "1002", "791", "1200", "300", "8~
## $ INGRESOS_DEL_CONYUGUE <chr> "0", "0", "0", "0", "1200", "0", "1000"~
## $ EJERCICIO_PROFESIONAL <chr> "0", "100", "0", "0", "0", "300", "0", ~
## $ NEGOCIO_PROPIO <dbl> 0, 0, 0, 0, 0, 500, 1000, 0, 0, 0, 0, 0~
## $ OTROS_INGRSOS_PROPIOS <chr> "0", "0", "0", "0", "0", "0", "350", "1~
## $ OTROS_INGRSOS_DEL_HOGAR <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 0~
## $ TOTAL.INGRESOS <dbl> 543, 1102, 791, 1200, 1500, 1600, 3550,~
## $ ARRIENDO <chr> "210", "85000", "0", "500", "0", "800",~
## $ SERVICIOS_BASICOS <dbl> 30, 60, 40, 20, 80, 50, 100, 10, 40, 10~
## $ BANCO_TARJETAS <chr> "50", "50", "20", "200", "50", "400", "~
## $ SALUD_EDUCACION_ALIMENTACION <dbl> 100, 400, 30, 100, 500, 300, 300, 50, 1~
## $ OTROS_GASTOS <chr> "30", "300", "50", "20", "300", "200", ~
## $ VEHICHULOS <chr> "0", "50000", "0", "2000", "6", "100000~
## $ BIENES_INMUEBLES <chr> "0", "50000", "17000", "0", "60", "1000~
## $ CAJA_BANCOS_EFECTIVO <chr> "0", "0", "4000", "0", "100", "1000", "~
## $ MUEBLES_ENSERES <dbl> 0, 0, 3000, 0, 2, 25000, 15000, 0, 0, 0~
## $ TARJETA_CREDITO <chr> "400", "100", "18", "1000", "100", "150~
## $ PRESTAMOS_BANCARIOS <chr> "400", "100", "5000", "0", "0", "0", "0~
## $ OTROS <chr> "0", "0", "0", "0", "0", "0", "0", "0",~
## $ FECHA_INSCRIPCION <dttm> 2020-08-19 16:41:32, 2020-07-20 16:11:~
## $ ESTADO_ADMISION <chr> "RECHAZADO", "ADMITIDO", "ADMITIDO", "R~
## $ PAGO_inscripción <chr> "Pagado admision", "Pagado admision", "~
## $ PAGO_MATRICULA <chr> "No Pagado matricula", "Pagado matricul~
## $ PROVINCIA_RESIDENCIA <chr> "CHIMBORAZO", "PICHINCHA", "PICHINCHA",~
## $ PARROQUI_RESIDENCIA <chr> "LIZARZABURU", "QUITO", "SAN MIGUEL DE ~
summary(base)
## ANY_ANYACA PLAN AREA COD_ESTUDIANTES
## Length:7825 Length:7825 Length:7825 Min. : 1
## Class :character Class :character Class :character 1st Qu.:1957
## Mode :character Mode :character Mode :character Median :3913
## Mean :3913
## 3rd Qu.:5869
## Max. :7825
##
## DESID1 NACIONALIDAD SEXO FECHADENACIMIENTO
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## LUGARDENACIMINETO EDAD TIPO_DISCAPACIDAD PORCENTAJE_DISCAPACIDAD
## Length:7825 Min. :21.00 Length:7825 Length:7825
## Class :character 1st Qu.:29.00 Class :character Class :character
## Mode :character Median :33.00 Mode :character Mode :character
## Mean :34.96
## 3rd Qu.:39.00
## Max. :78.00
##
## NUMERO_HIJOS ETNIA MENCION MOTIVO_ESTUDIO_POSGRADO
## Length:7825 Length:7825 Mode:logical Length:7825
## Class :character Class :character NA's:7825 Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## NIVEL_ESTUDIOS1 TITULO_OBTENIDO1 PAIS1...19 CIUDAD1...20
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## INSTITUCION1 TIPO_INSTITUCION1 FECHA_OBTENCION1
## Length:7825 Length:7825 Min. :1972-04-12 00:00:00
## Class :character Class :character 1st Qu.:2011-02-02 00:00:00
## Mode :character Mode :character Median :2015-04-25 00:00:00
## Mean :2013-10-12 17:33:35
## 3rd Qu.:2017-10-04 00:00:00
## Max. :2024-05-02 00:00:00
## NA's :234
## NIVEL_ESTUDIOS2 TIPO_INSTITUCION2 NIVEL_ESTUDIOS3 TIPO_INSTITUCION3
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## NIVEL_ESTUDIOS4 TIPO_INSTITUCION4 TIPO_INSTITI_SECUNDARIA
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## INSTITUCION_LABORAL1 OCUPACION_ACTUAL1 INSTITUCION_LABORAL RAMA_INSTIRUCION1
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## PAIS1...35 CIUDAD1...36 FUNCIONES1
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## FECHADESDE1 FECHAHASTA1
## Min. :1972-10-02 00:00:00 Min. :1995-08-31 00:00:00
## 1st Qu.:2014-11-03 00:00:00 1st Qu.:2020-05-26 00:00:00
## Median :2017-09-18 00:00:00 Median :2020-07-03 00:00:00
## Mean :2016-05-30 00:00:33 Mean :2020-03-08 11:14:41
## 3rd Qu.:2019-05-15 00:00:00 3rd Qu.:2020-08-17 00:00:00
## Max. :2022-06-01 00:00:00 Max. :2050-12-31 00:00:00
## NA's :101 NA's :101
## NIVEL_RESP_EN_LA_INSTITU1 CORRESPONDENCIA_LAB_PREGRADO
## Length:7825 Length:7825
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## CORRESPONDENCIA_LAB_UASB TIPO_RELA_LABORAL_MANTIENE JORNADA_LABORAL
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## AFILIADO_SEGURO_SOCIAL SEGURO_MEDICO_PRIVADO LABORAL2
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## PAIS2 CIUDAD2 FUNCIONES2
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## FECHADESDE2 FECHAHASTA2 LABORAL3
## Min. :1964-08-15 00:00:00 Min. :1992-07-31 00:00:00 Length:7825
## 1st Qu.:2013-06-03 00:00:00 1st Qu.:2016-02-08 12:00:00 Class :character
## Median :2016-03-01 00:00:00 Median :2018-06-30 00:00:00 Mode :character
## Mean :2015-02-04 22:07:37 Mean :2017-08-11 01:06:11
## 3rd Qu.:2018-03-05 00:00:00 3rd Qu.:2019-11-01 06:00:00
## Max. :2021-04-23 00:00:00 Max. :2050-09-01 00:00:00
## NA's :3673 NA's :3713
## PAIS3 CIUDAD3 FUNCIONES3
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## FECHADESDE3 FECHAHASTA3 ESPAÑOL_ESCRITURA
## Min. :1980-07-03 00:00:00 Min. :1990-03-30 00:00:00 Length:7825
## 1st Qu.:2011-06-01 18:00:00 1st Qu.:2013-11-30 00:00:00 Class :character
## Median :2014-06-10 12:00:00 Median :2016-04-30 00:00:00 Mode :character
## Mean :2013-07-20 09:49:27 Mean :2015-09-28 09:48:47
## 3rd Qu.:2016-11-11 00:00:00 3rd Qu.:2018-05-31 00:00:00
## Max. :2020-11-16 00:00:00 Max. :2025-06-30 00:00:00
## NA's :4913 NA's :4922
## ESPAÑOL_LECTURA INGLES_ESCRITURA INGLES_LECTURA OTRO_IDIOMA
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## OTRO_ESCRITURA OTRO_LECTURA COMO_ESPE_FINANCIAR_ESTUDIOS
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## EXPLIQUE_LA_FORMA REBAJA_ESTUD_ANDINO_UNASUR CON_QUIEN_VIVE
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## CUANTAS_PERS_CONFORMA_HOGAR ES_USTED_JEFE_FAMILIA NUMERO_PER_DEPEND_DE_USTED
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## CUANTAS_PER_DE_SU_HOG_TRABAJAN LA_VIV_DONDE_ACTUA_RESIDE_ES
## Length:7825 Length:7825
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
## CUENTA_CON_SER_INTERNET CUENTA_CON_SER_CABLE RES_PRINCIPAL_MANTEN_ESTUDIOS
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## GRUPO_PINCIPAL_RESP_ECONO PRICIPAL_PROPIETARIO FUENTE_INGRESO_RESPONSA_ECONO
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## REMUNERACION INGRESOS_DEL_CONYUGUE EJERCICIO_PROFESIONAL
## Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## NEGOCIO_PROPIO OTROS_INGRSOS_PROPIOS OTROS_INGRSOS_DEL_HOGAR
## Min. : 0.00 Length:7825 Min. : 0.0
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.0
## Median : 0.00 Mode :character Median : 0.0
## Mean : 93.92 Mean : 132.4
## 3rd Qu.: 0.00 3rd Qu.: 0.0
## Max. :18000.00 Max. :80000.0
## NA's :173 NA's :175
## TOTAL.INGRESOS ARRIENDO SERVICIOS_BASICOS BANCO_TARJETAS
## Min. : 0 Length:7825 Min. : -120.0 Length:7825
## 1st Qu.: 876 Class :character 1st Qu.: 30.0 Class :character
## Median : 1500 Mode :character Median : 60.0 Mode :character
## Mean : 3268 Mean : 273.5
## 3rd Qu.: 2500 3rd Qu.: 100.0
## Max. :3800000 Max. :800000.0
## NA's :177
## SALUD_EDUCACION_ALIMENTACION OTROS_GASTOS VEHICHULOS
## Min. : -2000 Length:7825 Length:7825
## 1st Qu.: 100 Class :character Class :character
## Median : 200 Mode :character Mode :character
## Mean : 471
## 3rd Qu.: 400
## Max. :1000000
## NA's :177
## BIENES_INMUEBLES CAJA_BANCOS_EFECTIVO MUEBLES_ENSERES TARJETA_CREDITO
## Length:7825 Length:7825 Min. : 0 Length:7825
## Class :character Class :character 1st Qu.: 0 Class :character
## Mode :character Mode :character Median : 2000 Mode :character
## Mean : 5452
## 3rd Qu.: 5000
## Max. :6500000
## NA's :184
## PRESTAMOS_BANCARIOS OTROS FECHA_INSCRIPCION
## Length:7825 Length:7825 Min. :2020-05-21 07:16:02
## Class :character Class :character 1st Qu.:2020-06-30 15:16:53
## Mode :character Mode :character Median :2020-08-01 21:24:18
## Mean :2020-07-22 17:25:07
## 3rd Qu.:2020-08-18 00:06:52
## Max. :2021-02-06 21:38:05
##
## ESTADO_ADMISION PAGO_inscripción PAGO_MATRICULA PROVINCIA_RESIDENCIA
## Length:7825 Length:7825 Length:7825 Length:7825
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## PARROQUI_RESIDENCIA
## Length:7825
## Class :character
## Mode :character
##
##
##
##
Aprendizaje Supervizado
base2 <- base %>% select(AREA,
NACIONALIDAD,
SEXO,
EDAD,
TIPO_DISCAPACIDAD,
ESPAÑOL_ESCRITURA,
ESPAÑOL_LECTURA,
INGLES_ESCRITURA,
INGLES_LECTURA,
REBAJA_ESTUD_ANDINO_UNASUR,
CON_QUIEN_VIVE,
ES_USTED_JEFE_FAMILIA,
NUMERO_PER_DEPEND_DE_USTED,
CUENTA_CON_SER_INTERNET,
CUENTA_CON_SER_CABLE,
REMUNERACION,
EJERCICIO_PROFESIONAL,
NEGOCIO_PROPIO,
OTROS_INGRSOS_PROPIOS,
TOTAL.INGRESOS,
ARRIENDO,
SERVICIOS_BASICOS,
BANCO_TARJETAS,
SALUD_EDUCACION_ALIMENTACION,
OTROS_GASTOS,
VEHICHULOS,
CAJA_BANCOS_EFECTIVO,
MUEBLES_ENSERES,
TARJETA_CREDITO,
PRESTAMOS_BANCARIOS,
FECHA_INSCRIPCION,
ESTADO_ADMISION,
PAGO_inscripción,
PAGO_MATRICULA)
base2$TIPO_DISCAPACIDAD[base2$TIPO_DISCAPACIDAD!=""] <- "Si"
base2 <- mutate_at(base2, c("TIPO_DISCAPACIDAD"), ~replace(., is.na(.), "No"))
base2$CON_QUIEN_VIVE[base2$CON_QUIEN_VIVE!="Solo/a"] <- "Acompaniado"
base2$CON_QUIEN_VIVE <- factor(base2$CON_QUIEN_VIVE, labels = c( "Solo/a","Acompaniado" ) )
base2$SEXO <- factor(base2$SEXO, labels = c( "M","F" ) )
base2$CUENTA_CON_SER_INTERNET <- factor(base2$CUENTA_CON_SER_INTERNET, labels = c( "S","N" ) )
base2$CUENTA_CON_SER_CABLE <- factor(base2$CUENTA_CON_SER_CABLE, labels = c( "S","N" ))
base2$TIPO_DISCAPACIDAD <- factor(base2$TIPO_DISCAPACIDAD , labels = c( "Si","No" ))
base2$ESPAÑOL_ESCRITURA <- factor(base2$ESPAÑOL_ESCRITURA , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$ESPAÑOL_LECTURA <- factor(base2$ESPAÑOL_LECTURA , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$INGLES_ESCRITURA <- factor(base2$INGLES_ESCRITURA , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$INGLES_LECTURA <- factor(base2$INGLES_LECTURA , labels = c ( "Elemental", "Intermedio", "Avanzado"))
base2$REBAJA_ESTUD_ANDINO_UNASUR <- factor(base2$REBAJA_ESTUD_ANDINO_UNASUR , labels = c( "Si","No" ))
base2$ES_USTED_JEFE_FAMILIA<- factor(base2$ES_USTED_JEFE_FAMILIA , labels = c( "S","N" ))
base2$PAGO_inscripción <- factor(base2$PAGO_inscripción ,labels = c("Pagado admision","No Pagado admision"))
base2$PAGO_MATRICULA <- factor(base2$PAGO_MATRICULA ,labels = c("Pagado matricula","No Pagado matricula"))
base2$NACIONALIDAD[base2$NACIONALIDAD!="ECUADOR"] <- "OTRO PAIS"
base2$NACIONALIDAD <- factor(base2$NACIONALIDAD,labels = c("ECUADOR","OTRO PAIS"))
base2$AREA <- factor(base2$AREA,labels = c("ÁREA DE AMBIENTE Y SUSTENTABILIDAD","ÁREA DE COMUNICACIÓN",
"ÁREA DE DERECHO","ÁREA DE EDUCACIÓN","ÁREA DE ESTUDIOS SOCIALES Y GLOBALES",
"ÁREA DE GESTIÓN","ÁREA DE HISTORIA","ÁREA DE LETRAS Y ESTUDIOS CULTURALES",
"ÁREA DE SALUD"))
base2 <- base2 %>%
mutate( ESTADO_ADMISION = factor(ESTADO_ADMISION,
levels= c("ADMITIDO","RECHAZADO"),
labels= c("Aprueba", "Rechazado")))
base2 <- base2 %>%
mutate( REMUNERACION= str_replace(REMUNERACION, pattern = ",", replacement = "."),
EJERCICIO_PROFESIONAL= str_replace(EJERCICIO_PROFESIONAL, pattern = ",", replacement = "."),
NEGOCIO_PROPIO= str_replace(NEGOCIO_PROPIO, pattern = ",", replacement = "."),
OTROS_INGRSOS_PROPIOS= str_replace(OTROS_INGRSOS_PROPIOS, pattern = ",", replacement = "."),
TOTAL.INGRESOS= str_replace(TOTAL.INGRESOS, pattern = ",", replacement = "."),
ARRIENDO= str_replace(ARRIENDO, pattern = ",", replacement = "."),
SERVICIOS_BASICOS= str_replace(SERVICIOS_BASICOS, pattern = ",", replacement = "."),
SALUD_EDUCACION_ALIMENTACION= str_replace(SALUD_EDUCACION_ALIMENTACION, pattern = ",", replacement = "."),
OTROS_GASTOS= str_replace(OTROS_GASTOS, pattern = ",", replacement = "."),
VEHICHULOS= str_replace(VEHICHULOS, pattern = ",", replacement = "."),
CAJA_BANCOS_EFECTIVO= str_replace(CAJA_BANCOS_EFECTIVO, pattern = ",", replacement = "."),
MUEBLES_ENSERES= str_replace(MUEBLES_ENSERES, pattern = ",", replacement = "."),
TARJETA_CREDITO= str_replace(TARJETA_CREDITO, pattern = ",", replacement = "."),
NUMERO_PER_DEPEND_DE_USTED= str_replace(NUMERO_PER_DEPEND_DE_USTED, pattern = ",", replacement = "."),
PRESTAMOS_BANCARIOS= str_replace(PRESTAMOS_BANCARIOS, pattern = ",", replacement = "."))
base2 <- base2 %>% mutate(REMUNERACION=as.numeric(REMUNERACION),
EJERCICIO_PROFESIONAL=as.numeric(EJERCICIO_PROFESIONAL),
NEGOCIO_PROPIO=as.numeric(NEGOCIO_PROPIO),
OTROS_INGRSOS_PROPIOS=as.numeric(OTROS_INGRSOS_PROPIOS),
TOTAL.INGRESOS=as.numeric(TOTAL.INGRESOS),
ARRIENDO=as.numeric(ARRIENDO),
SERVICIOS_BASICOS=as.numeric(SERVICIOS_BASICOS),
BANCO_TARJETAS=as.numeric(BANCO_TARJETAS),
SALUD_EDUCACION_ALIMENTACION=as.numeric(SALUD_EDUCACION_ALIMENTACION),
OTROS_GASTOS=as.numeric(OTROS_GASTOS),
VEHICHULOS=as.numeric(VEHICHULOS),
CAJA_BANCOS_EFECTIVO=as.numeric(CAJA_BANCOS_EFECTIVO),
MUEBLES_ENSERES=as.numeric(MUEBLES_ENSERES),
TARJETA_CREDITO=as.numeric(TARJETA_CREDITO),
NUMERO_PER_DEPEND_DE_USTED=as.numeric(NUMERO_PER_DEPEND_DE_USTED),
PRESTAMOS_BANCARIOS=as.numeric(PRESTAMOS_BANCARIOS))
base2 <- mutate_at(base2, c("REMUNERACION","EJERCICIO_PROFESIONAL","NEGOCIO_PROPIO",
"OTROS_INGRSOS_PROPIOS","OTROS_INGRSOS_PROPIOS","TOTAL.INGRESOS",
"ARRIENDO","ARRIENDO","SERVICIOS_BASICOS",
"BANCO_TARJETAS","BANCO_TARJETAS","CAJA_BANCOS_EFECTIVO",
"MUEBLES_ENSERES","TARJETA_CREDITO","NUMERO_PER_DEPEND_DE_USTED",
"PRESTAMOS_BANCARIOS"), ~replace(., is.na(.), 0))
base2 <- na.omit(base2)
base2 <- tbl_df(base2)
set.seed(1234) # Semilla para aleatorios de Base Socio demografica
base_split <- base2 %>%
initial_split(prop = 0.8,
strata = ESTADO_ADMISION)
Dimensiones
train <- training(base_split)
dim(train)
## [1] 6111 34
test <- testing(base_split)
dim(test)
## [1] 1529 34
set.seed(123)
rct_base2 <- train %>% recipe(ESTADO_ADMISION ~ . ) %>%
step_rm(ESPAÑOL_ESCRITURA,
ESPAÑOL_LECTURA,
INGLES_ESCRITURA,
INGLES_LECTURA,
REBAJA_ESTUD_ANDINO_UNASUR) %>% # Eliminar
step_normalize( all_numeric(), -all_outcomes()) %>% # Normalizacion
step_other(all_nominal(), -all_outcomes() , threshold = 0.07, other = "other") %>%
step_novel(all_nominal(), -all_outcomes() , new_level = "new") %>%
step_dummy(all_nominal(), -all_outcomes() ) %>% # Dummy
step_nzv(all_predictors()) %>%
themis::step_upsample(ESTADO_ADMISION, over_ratio = 1, skip= TRUE, seed= 123)
rct_base2
## Data Recipe
##
## Inputs:
##
## role #variables
## outcome 1
## predictor 33
##
## Operations:
##
## Delete terms ESPAÑOL_ESCRITURA, ESPAÑOL_LECTURA, ...
## Centering and scaling for all_numeric(), -all_outcomes()
## Collapsing factor levels for all_nominal(), -all_outcomes()
## Novel factor level assignment for all_nominal(), -all_outcomes()
## Dummy variables from all_nominal(), -all_outcomes()
## Sparse, unbalanced variable filter on all_predictors()
## Up-sampling based on ESTADO_ADMISION
rct_base2_prep <- prep(rct_base2, train )
train_prep <- bake(rct_base2_prep, new_data= NULL)
dim(train_prep)
## [1] 6548 26
test_prep <- bake(rct_base2_prep, new_data= test)
dim(test_prep)
## [1] 1529 26
set.seed(1234)
cv_base2 <- vfold_cv(train, v = 10, repeats = 1, strata = ESTADO_ADMISION)
cv_base2
## # 10-fold cross-validation using stratification
## # A tibble: 10 x 2
## splits id
## <list> <chr>
## 1 <split [5499/612]> Fold01
## 2 <split [5499/612]> Fold02
## 3 <split [5499/612]> Fold03
## 4 <split [5499/612]> Fold04
## 5 <split [5500/611]> Fold05
## 6 <split [5500/611]> Fold06
## 7 <split [5500/611]> Fold07
## 8 <split [5501/610]> Fold08
## 9 <split [5501/610]> Fold09
## 10 <split [5501/610]> Fold10
mars_sp <-
mars(num_terms = tune(), prod_degree = tune(), prune_method= tune()) %>%
set_engine("earth") %>%
set_mode("classification")
mars_sp %>% translate()
## MARS Model Specification (classification)
##
## Main Arguments:
## num_terms = tune()
## prod_degree = tune()
## prune_method = tune()
##
## Engine-Specific Arguments:
## glm = list(family = stats::binomial)
##
## Computational engine: earth
##
## Model fit template:
## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
## nprune = tune(), degree = tune(), pmethod = tune(), glm = list(family = stats::binomial),
## keepxy = TRUE)
set.seed(123)
mars_grid <- mars_sp %>%
parameters() %>%
grid_latin_hypercube(size = 10)
mars_grid
## # A tibble: 9 x 3
## num_terms prod_degree prune_method
## <int> <int> <chr>
## 1 3 1 seqrep
## 2 4 1 none
## 3 4 2 cv
## 4 5 1 backward
## 5 2 2 exhaustive
## 6 4 1 forward
## 7 3 1 backward
## 8 3 2 forward
## 9 3 2 exhaustive
mars_wflow <-
workflow() %>%
add_recipe(rct_base2) %>%
add_model(mars_sp)
mars_wflow
## == Workflow ====================================================================
## Preprocessor: Recipe
## Model: mars()
##
## -- Preprocessor ----------------------------------------------------------------
## 7 Recipe Steps
##
## * step_rm()
## * step_normalize()
## * step_other()
## * step_novel()
## * step_dummy()
## * step_nzv()
## * step_upsample()
##
## -- Model -----------------------------------------------------------------------
## MARS Model Specification (classification)
##
## Main Arguments:
## num_terms = tune()
## prod_degree = tune()
## prune_method = tune()
##
## Computational engine: earth
metricas <- metric_set(roc_auc, accuracy, sens, spec, bal_accuracy)
metricas
## # A tibble: 5 x 3
## metric class direction
## <chr> <chr> <chr>
## 1 roc_auc prob_metric maximize
## 2 accuracy class_metric maximize
## 3 sens class_metric maximize
## 4 spec class_metric maximize
## 5 bal_accuracy class_metric maximize
set.seed(123)
mars_tuned <- tune_grid(
mars_wflow,
resamples= cv_base2,
grid = mars_grid,
metrics = metricas,
control= control_grid(allow_par = T)
)
mars_tuned
## # Tuning results
## # 10-fold cross-validation using stratification
## # A tibble: 10 x 4
## splits id .metrics .notes
## <list> <chr> <list> <list>
## 1 <split [5499/612]> Fold01 <tibble [40 x 7]> <tibble [1 x 1]>
## 2 <split [5499/612]> Fold02 <tibble [40 x 7]> <tibble [3 x 1]>
## 3 <split [5499/612]> Fold03 <tibble [40 x 7]> <tibble [1 x 1]>
## 4 <split [5499/612]> Fold04 <tibble [40 x 7]> <tibble [1 x 1]>
## 5 <split [5500/611]> Fold05 <tibble [40 x 7]> <tibble [1 x 1]>
## 6 <split [5500/611]> Fold06 <tibble [40 x 7]> <tibble [1 x 1]>
## 7 <split [5500/611]> Fold07 <tibble [40 x 7]> <tibble [1 x 1]>
## 8 <split [5501/610]> Fold08 <tibble [40 x 7]> <tibble [1 x 1]>
## 9 <split [5501/610]> Fold09 <tibble [40 x 7]> <tibble [1 x 1]>
## 10 <split [5501/610]> Fold10 <tibble [40 x 7]> <tibble [1 x 1]>
mars_pars_fin <- select_best(mars_tuned, metric = 'bal_accuracy')
mars_wflow_fin <-
mars_wflow %>%
finalize_workflow(mars_pars_fin)
mars_fitted <- fit(mars_wflow_fin, train)
mars_fitted
## == Workflow [trained] ==========================================================
## Preprocessor: Recipe
## Model: mars()
##
## -- Preprocessor ----------------------------------------------------------------
## 7 Recipe Steps
##
## * step_rm()
## * step_normalize()
## * step_other()
## * step_novel()
## * step_dummy()
## * step_nzv()
## * step_upsample()
##
## -- Model -----------------------------------------------------------------------
## GLM (family binomial, link logit):
## nulldev df dev df devratio AIC iters converged
## 9077.45 6547 2498.64 6543 0.725 2509 19 1
##
## Earth selected 5 of 7 terms, and 4 of 25 predictors (nprune=5)
## Termination condition: RSq changed by less than 0.001 at 7 terms
## Importance: PAGO_MATRICULA_No.Pagado.matricula, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## Earth GCV 0.06664619 RSS 435.2005 GRSq 0.7334966 RSq 0.7341475
mars_model_fin <- pull_workflow_fit(mars_fitted)
mars_model_fin
## parsnip model object
##
## Fit time: 91ms
## GLM (family binomial, link logit):
## nulldev df dev df devratio AIC iters converged
## 9077.45 6547 2498.64 6543 0.725 2509 19 1
##
## Earth selected 5 of 7 terms, and 4 of 25 predictors (nprune=5)
## Termination condition: RSq changed by less than 0.001 at 7 terms
## Importance: PAGO_MATRICULA_No.Pagado.matricula, ...
## Number of terms at each degree of interaction: 1 4 (additive model)
## Earth GCV 0.06664619 RSS 435.2005 GRSq 0.7334966 RSq 0.7341475
test %>%
predict(mars_fitted, new_data = . ) %>%
mutate(Real= test$ESTADO_ADMISION) %>%
conf_mat(truth = Real, estimate = .pred_class ) %>%
summary
## # A tibble: 13 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.903
## 2 kap binary 0.806
## 3 sens binary 0.834
## 4 spec binary 0.982
## 5 ppv binary 0.981
## 6 npv binary 0.837
## 7 mcc binary 0.817
## 8 j_index binary 0.816
## 9 bal_accuracy binary 0.908
## 10 detection_prevalence binary 0.455
## 11 precision binary 0.981
## 12 recall binary 0.834
## 13 f_meas binary 0.902
test %>%
predict(mars_fitted, new_data = . ) %>%
mutate(Real= test$ESTADO_ADMISION) %>%
conf_mat(truth = Real, estimate = .pred_class )
## Truth
## Prediction Aprueba Rechazado
## Aprueba 683 13
## Rechazado 136 697
test %>%
predict(mars_fitted, new_data = . , type = "prob") %>%
mutate(Real= test$ESTADO_ADMISION)
## # A tibble: 1,529 x 3
## .pred_Aprueba .pred_Rechazado Real
## <dbl> <dbl> <fct>
## 1 1.68e- 1 0.832 Aprueba
## 2 8.31e- 2 0.917 Rechazado
## 3 5.03e-10 1.00 Rechazado
## 4 4.92e-10 1.00 Rechazado
## 5 1.62e- 1 0.838 Aprueba
## 6 1.24e- 1 0.876 Rechazado
## 7 7.06e-10 1.00 Rechazado
## 8 1.00e+ 0 0.00000000270 Aprueba
## 9 6.84e-10 1.00 Rechazado
## 10 9.69e- 2 0.903 Rechazado
## # ... with 1,519 more rows