# Leyendo librerias
library(readxl)
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(writexl)
library(stringr)

# Leyendo data
Egresados <- read_excel("Egresados_Economía.xlsx",
                        sheet = "Estudiantes")
Mallas_estudiantes <- read_delim("20181019-171138-egresados/20181019-171138-ConsultaMallas.csv", 
                                 ";", 
                                 escape_double = FALSE,
                                 trim_ws = TRUE)
## Parsed with column specification:
## cols(
##   IdSede = col_integer(),
##   CodigoAlumno = col_integer(),
##   DocumentoAlumno = col_integer(),
##   IdPrograma = col_integer(),
##   IdEnfasis = col_integer(),
##   IdJornada = col_integer(),
##   OIdProgPensMateJornPeri = col_integer(),
##   SemestreMalla = col_integer(),
##   AreaMateMalla = col_integer(),
##   AreaMateVista = col_integer(),
##   NotaVista = col_character(),
##   IdTipoNotaVista = col_integer(),
##   PeriodoVista = col_character(),
##   EsInscripcionVista = col_integer(),
##   EsAprobadaVista = col_integer(),
##   OIdEstuProgPeriMate = col_integer()
## )
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 65854 parsing failures.
## row # A tibble: 5 x 5 col     row col         expected  actual   file                                expected   <int> <chr>       <chr>     <chr>    <chr>                               actual 1 22789 DocumentoA~ an integ~ 8504064~ '20181019-171138-egresados/2018101~ file 2 22790 DocumentoA~ an integ~ 8504064~ '20181019-171138-egresados/2018101~ row 3 22791 DocumentoA~ an integ~ 8504064~ '20181019-171138-egresados/2018101~ col 4 22792 DocumentoA~ an integ~ 8504064~ '20181019-171138-egresados/2018101~ expected 5 22793 DocumentoA~ an integ~ 8504064~ '20181019-171138-egresados/2018101~
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
pensum <- read_excel("pensum.xlsx") 
pensum$AreaMateMalla <- as.numeric(substr(pensum$Materia, 4, 8))
pensum <- unique(pensum[,c(1,5,13)])

# Algun análisis
hist(Egresados$NumeroMateriasAprobadas)

hist(Egresados$NumeroMateriasFaltantes)

hist(Egresados$NumeroSemestresAlumno)

hist(Egresados$VelocidadMalla)

hist(Egresados$VelocidadAlumno)

# Cruzando data
Materias_faltantes <- subset(Mallas_estudiantes,
                             is.na(Mallas_estudiantes[,15]))
Materias_egresados <- left_join(Egresados,
                                Materias_faltantes,
                                by = c("IdSede","DocumentoAlumno", "CodigoAlumno","IdPrograma", "IdEnfasis", "IdJornada"))
Materias_egresados <- left_join(Materias_egresados, pensum, by = c("IdSede", "AreaMateMalla"))

# Escribiend resutado
write_xlsx(Materias_egresados,
           "Materias Faltantes de Egresados.xlsx")