options(repos = c(CRAN = "https://cloud.r-project.org"))
# INSTALACIÓN Y CARGA DE PAQUETES
# Instalar (solo la primera vez)
install.packages("tidyverse") # dplyr, tidyr, ggplot2, readr, tibbles
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'tidyverse' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("mice") # imputación múltiple
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'mice' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'mice'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\mice\libs\x64\mice.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\mice\libs\x64\mice.dll:
Permission denied
Warning: restored 'mice'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("VIM") # diagnóstico y visualización de NA
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'VIM' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'VIM'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\VIM\libs\x64\VIM.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\VIM\libs\x64\VIM.dll: Permission
denied
Warning: restored 'VIM'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("writexl") # exportar a Excel
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'writexl' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'writexl'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\writexl\libs\x64\writexl.dll
a C:\Users\john\AppData\Local\R\win-library\4.5\writexl\libs\x64\writexl.dll:
Permission denied
Warning: restored 'writexl'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("MASS") # estadísticas (conflicto con select)
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'MASS' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'MASS'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\MASS\libs\x64\MASS.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\MASS\libs\x64\MASS.dll:
Permission denied
Warning: restored 'MASS'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("openxlsx") # leer/escribir Excel
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'openxlsx' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'openxlsx'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\openxlsx\libs\x64\openxlsx.dll
a C:\Users\john\AppData\Local\R\win-library\4.5\openxlsx\libs\x64\openxlsx.dll:
Permission denied
Warning: restored 'openxlsx'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("stringi") # manejo avanzado de texto
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'stringi' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'stringi'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\stringi\libs\x64\stringi.dll
a C:\Users\john\AppData\Local\R\win-library\4.5\stringi\libs\x64\stringi.dll:
Permission denied
Warning: restored 'stringi'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("janitor") # limpieza de nombres y tablas
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'janitor' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("lubridate") # manejo de fechas
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'lubridate' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'lubridate'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\lubridate\libs\x64\lubridate.dll
a
C:\Users\john\AppData\Local\R\win-library\4.5\lubridate\libs\x64\lubridate.dll:
Permission denied
Warning: restored 'lubridate'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("stringr") # funciones de texto
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'stringr' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("readr") # lectura eficiente de CSV
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'readr' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'readr'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\readr\libs\x64\readr.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\readr\libs\x64\readr.dll:
Permission denied
Warning: restored 'readr'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("dplyr") # manipulación de datos
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'dplyr' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'dplyr'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\dplyr\libs\x64\dplyr.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\dplyr\libs\x64\dplyr.dll:
Permission denied
Warning: restored 'dplyr'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("purrr") # programación funcional
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'purrr' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'purrr'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\purrr\libs\x64\purrr.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\purrr\libs\x64\purrr.dll:
Permission denied
Warning: restored 'purrr'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("fs") # manejo de archivos y directorios
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'fs' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'fs'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\fs\libs\x64\fs.dll a
C:\Users\john\AppData\Local\R\win-library\4.5\fs\libs\x64\fs.dll: Permission
denied
Warning: restored 'fs'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
install.packages("openxlsx") # crear, leer y escribir archivos Excel (.xlsx)
Installing package into 'C:/Users/john/AppData/Local/R/win-library/4.5'
(as 'lib' is unspecified)
package 'openxlsx' successfully unpacked and MD5 sums checked
Warning: cannot remove prior installation of package 'openxlsx'
Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
C:\Users\john\AppData\Local\R\win-library\4.5\00LOCK\openxlsx\libs\x64\openxlsx.dll
a C:\Users\john\AppData\Local\R\win-library\4.5\openxlsx\libs\x64\openxlsx.dll:
Permission denied
Warning: restored 'openxlsx'
The downloaded binary packages are in
C:\Users\john\AppData\Local\Temp\RtmpIVXfEx\downloaded_packages
# Conjunto principal de librerías para manejo, limpieza y exportación de datos
library(readr) # lectura rápida y eficiente de archivos CSV
library(dplyr) # manipulación de datos (filter, mutate, group_by)
Adjuntando el paquete: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(purrr) # programación funcional (map, walk)
Warning: package 'purrr' was built under R version 4.5.2
library(fs) # manejo de archivos y creación de directorios
Warning: package 'fs' was built under R version 4.5.2
library(openxlsx)
Esta sección carga todas las librerías necesarias para procesar tus bases de datos del Saber 11. Usamos exclude = “select” para evitar conflictos entre MASS y dplyr.
# CARGA DE DATOS ORIGINALES
saber11_2010 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2010_UNIDO.csv"
)
Rows: 670030 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (43): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (5): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, PUNT_INGLES, PUNT_MATEMATI...
lgl (4): PUNT_SOCIALES_CIUDADANAS, PUNT_C_NATURALES, PUNT_LECTURA_CRITICA, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2010)
Rows: 670,030
Columns: 52
$ PERIODO <dbl> 20101, 20101, 20101, 20101, 20101, 20101…
$ ESTU_TIPODOCUMENTO <chr> "CC", "CC", "CC", "CC", "TI", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201010113253", "SB11201010113253", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", NA, "N", "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "A", "A", "A", "A", "A", "A", "A", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 325269003879, 325269003879, 325269003879…
$ COLE_COD_DANE_SEDE <chr> "325269003879", "325269003879", "3252690…
$ COLE_COD_DEPTO_UBICACION <chr> "25", "25", "25", "05", "25", "05", "05"…
$ COLE_COD_MCPIO_UBICACION <chr> "25269", "25269", "25269", "05172", "258…
$ COLE_CODIGO_ICFES <chr> "097022", "097022", "097022", "130948", …
$ COLE_DEPTO_UBICACION <chr> "CUNDINAMARCA", "CUNDINAMARCA", "CUNDINA…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "MAÑANA", "SABATINA"…
$ COLE_MCPIO_UBICACION <chr> "FACATATIVÁ", "FACATATIVÁ", "FACATATIVÁ"…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "CENTRO DE ESTUDIOS HUITAKA", "CENTRO DE…
$ COLE_NOMBRE_SEDE <chr> "CENTRO DE ESTUDIOS HUITAKA", "CENTRO DE…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "25", "25", "25", "05", "25", "05", "05"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "25269", "25269", "25269", "05045", "258…
$ ESTU_COD_RESIDE_DEPTO <chr> "25", "25", "25", "05", "25", "05", "05"…
$ ESTU_COD_RESIDE_MCPIO <chr> "25260", "25260", "25260", "05172", "258…
$ ESTU_DEPTO_PRESENTACION <chr> "CUNDINAMARCA", "CUNDINAMARCA", "CUNDINA…
$ ESTU_DEPTO_RESIDE <chr> "CUNDINAMARCA", "CUNDINAMARCA", "CUNDINA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "08/03/1990", "08/03/1990", "08/03/1990"…
$ ESTU_GENERO <chr> "M", "M", "M", "M", "F", "F", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "FACATATIVÁ", "FACATATIVÁ", "FACATATIVÁ"…
$ ESTU_MCPIO_RESIDE <chr> "EL ROSAL", "EL ROSAL", "EL ROSAL", "CHI…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Tres", "Dos", "Dos", "T…
$ FAMI_EDUCACIONMADRE <chr> "Secundaria (Bachillerato) completa", "S…
$ FAMI_EDUCACIONPADRE <chr> "Secundaria (Bachillerato) incompleta", …
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 2", "Estrato 2", "Estrato 2", "…
$ FAMI_PERSONASHOGAR <chr> "Tres", "Tres", "Tres", "Cinco", "Tres",…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "No", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "No", "No", "No", "No"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "No", "No", "No", "No"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "No", "Si", "Si", "No"…
$ DESEMP_INGLES <chr> "A1", "A1", "A1", "A1", "A1", "A-", "A-"…
$ PUNT_INGLES <dbl> 44.92, 44.92, 44.92, 43.48, 43.48, 38.78…
$ PUNT_MATEMATICAS <dbl> 40.93, 40.93, 40.93, 49.52, 40.78, 29.11…
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010…
names(saber11_2010)
[1] "PERIODO" "ESTU_TIPODOCUMENTO"
[3] "ESTU_CONSECUTIVO" "COLE_AREA_UBICACION"
[5] "COLE_BILINGUE" "COLE_CALENDARIO"
[7] "COLE_CARACTER" "COLE_COD_DANE_ESTABLECIMIENTO"
[9] "COLE_COD_DANE_SEDE" "COLE_COD_DEPTO_UBICACION"
[11] "COLE_COD_MCPIO_UBICACION" "COLE_CODIGO_ICFES"
[13] "COLE_DEPTO_UBICACION" "COLE_GENERO"
[15] "COLE_JORNADA" "COLE_MCPIO_UBICACION"
[17] "COLE_NATURALEZA" "COLE_NOMBRE_ESTABLECIMIENTO"
[19] "COLE_NOMBRE_SEDE" "COLE_SEDE_PRINCIPAL"
[21] "ESTU_COD_DEPTO_PRESENTACION" "ESTU_COD_MCPIO_PRESENTACION"
[23] "ESTU_COD_RESIDE_DEPTO" "ESTU_COD_RESIDE_MCPIO"
[25] "ESTU_DEPTO_PRESENTACION" "ESTU_DEPTO_RESIDE"
[27] "ESTU_ESTADOINVESTIGACION" "ESTU_ESTUDIANTE"
[29] "ESTU_FECHANACIMIENTO" "ESTU_GENERO"
[31] "ESTU_MCPIO_PRESENTACION" "ESTU_MCPIO_RESIDE"
[33] "ESTU_NACIONALIDAD" "ESTU_PAIS_RESIDE"
[35] "ESTU_PRIVADO_LIBERTAD" "FAMI_CUARTOSHOGAR"
[37] "FAMI_EDUCACIONMADRE" "FAMI_EDUCACIONPADRE"
[39] "FAMI_ESTRATOVIVIENDA" "FAMI_PERSONASHOGAR"
[41] "FAMI_TIENEAUTOMOVIL" "FAMI_TIENECOMPUTADOR"
[43] "FAMI_TIENEINTERNET" "FAMI_TIENELAVADORA"
[45] "DESEMP_INGLES" "PUNT_INGLES"
[47] "PUNT_MATEMATICAS" "PUNT_SOCIALES_CIUDADANAS"
[49] "PUNT_C_NATURALES" "PUNT_LECTURA_CRITICA"
[51] "PUNT_GLOBAL" "ANIO"
saber11_2010_limpio <- saber11_2010 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2010 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 25,509 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201010005533 2
2 SB11201010005539 2
3 SB11201010005540 2
4 SB11201010005552 2
5 SB11201010005568 2
6 SB11201010005569 2
7 SB11201010005575 2
8 SB11201010005582 2
9 SB11201010005583 2
10 SB11201010005584 2
# ℹ 25,499 more rows
saber11_2010_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2010_limpio)
Rows: 570,944
Columns: 52
$ PERIODO <dbl> 20101, 20101, 20101, 20101, 20101, 20101…
$ ESTU_TIPODOCUMENTO <chr> "CC", "CC", "CC", "TI", "TI", "CC", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201010000001", "SB11201010000020", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ COLE_CALENDARIO <chr> "A", "A", "A", "A", "A", "A", "A", "A", …
$ COLE_CARACTER <chr> "NO APLICA", "NO APLICA", "NO APLICA", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 341078000785, 341078000785, 341078000785…
$ COLE_COD_DANE_SEDE <chr> "341078000785", "341078000785", "3410780…
$ COLE_COD_DEPTO_UBICACION <chr> "41", "41", "41", "41", "41", "41", "41"…
$ COLE_COD_MCPIO_UBICACION <chr> "41078", "41078", "41078", "41078", "410…
$ COLE_CODIGO_ICFES <chr> "146415", "146415", "146415", "146415", …
$ COLE_DEPTO_UBICACION <chr> "HUILA", "HUILA", "HUILA", "HUILA", "HUI…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "SABATINA", "SABATINA", "SABATINA", "SAB…
$ COLE_MCPIO_UBICACION <chr> "BARAYA", "BARAYA", "BARAYA", "BARAYA", …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_NOMBRE_SEDE <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "95", "95", "95", "95", "95", "95", "95"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "95001", "95001", "95001", "95001", "950…
$ ESTU_COD_RESIDE_DEPTO <chr> "95", "95", NA, "95", "95", "95", "95", …
$ ESTU_COD_RESIDE_MCPIO <chr> "95001", "95001", NA, "95001", "95001", …
$ ESTU_DEPTO_PRESENTACION <chr> "GUAVIARE", "GUAVIARE", "GUAVIARE", "GUA…
$ ESTU_DEPTO_RESIDE <chr> "GUAVIARE", "GUAVIARE", NA, "GUAVIARE", …
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "03/01/1973", "10/01/1989", "04/10/1984"…
$ ESTU_GENERO <chr> "F", "F", "M", "F", "M", "F", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_MCPIO_RESIDE <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Uno", "Dos", "Dos", "Uno", "Dos", "Dos"…
$ FAMI_EDUCACIONMADRE <chr> "No sabe", "No sabe", "No sabe", "No sab…
$ FAMI_EDUCACIONPADRE <chr> "No sabe", "No sabe", "No sabe", "No sab…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 1", "Estrato 1", NA, "Estrato 1…
$ FAMI_PERSONASHOGAR <chr> "Dos", "Cinco", "Tres", "Tres", "Seis", …
$ FAMI_TIENEAUTOMOVIL <chr> "No", "No", "No", "No", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "No", "No", "No", "No", "Si", "No", "No"…
$ FAMI_TIENEINTERNET <chr> "No", "No", "No", "No", "No", "No", "No"…
$ FAMI_TIENELAVADORA <chr> "No", "No", "No", "No", "Si", "No", "No"…
$ DESEMP_INGLES <chr> "A-", "A-", "A1", "A-", "A1", "A1", "A1"…
$ PUNT_INGLES <dbl> 38.78, 37.10, 44.92, 35.25, 47.79, 43.48…
$ PUNT_MATEMATICAS <dbl> 40.78, 43.88, 37.56, 37.36, 40.78, 43.88…
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010…
write_csv(
saber11_2010_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2010_limpio.csv"
)
write.xlsx(
saber11_2010_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2010_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2011 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2011_UNIDO.csv"
)
Rows: 650420 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (43): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (5): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, PUNT_INGLES, PUNT_MATEMATI...
lgl (4): PUNT_SOCIALES_CIUDADANAS, PUNT_C_NATURALES, PUNT_LECTURA_CRITICA, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2011)
Rows: 650,420
Columns: 52
$ PERIODO <dbl> 20111, 20111, 20111, 20111, 20111, 20111…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201110031540", "SB11201110071456", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "S", NA, "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "B", "OTRO", "B", "B", "A", "B", "A", "B…
$ COLE_CARACTER <chr> "ACADÉMICO", "NO APLICA", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001002082, 319001004475, 376001026925…
$ COLE_COD_DANE_SEDE <chr> "376001002082", "319001004475", "3760010…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "19", "76", "11", "76", "25", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "19001", "76001", "11001", "767…
$ COLE_CODIGO_ICFES <chr> "017137", "093740", "036681", "130062", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "CAUCA", "VALLE", "BOGOTÁ", "VA…
$ COLE_GENERO <chr> "FEMENINO", "MIXTO", "MIXTO", "MIXTO", "…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "MAÑANA", "COMPLETA"…
$ COLE_MCPIO_UBICACION <chr> "CALI", "POPAYÁN", "CALI", "BOGOTÁ D.C."…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO MARIA AUXILIADORA", "COL. LOS A…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO MARIA AUXILIADORA", "COL. LOS A…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "19", "76", "11", "76", "25", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "19001", "76001", "11001", "767…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "19", "76", "11", "76", "25", NA, …
$ ESTU_COD_RESIDE_MCPIO <chr> "76001", "19001", "76001", "11001", "767…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "CAUCA", "VALLE", "BOGOTÁ", "VA…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "CAUCA", "VALLE", "BOGOTÁ", "VA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "09/11/1994", "17/01/1994", "05/05/1993"…
$ ESTU_GENERO <chr> "F", "F", "M", "F", "F", "F", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "POPAYÁN", "CALI", "BOGOTÁ D.C."…
$ ESTU_MCPIO_RESIDE <chr> "CALI", "POPAYÁN", "CALI", "BOGOTÁ D.C."…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Cinco", "Tres", "Tres", …
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional incompleta", "Sec…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "No sabe", "Postgrado", "Po…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 4", "Estrato 1", "Estrato 4", "…
$ FAMI_PERSONASHOGAR <chr> "Cuatro", "Cuatro", "Cinco", "Cinco", "T…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "Si", "No", "Si", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "No", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B1", "A-", "A2", "A2", "A1", "A2", "A1"…
$ PUNT_INGLES <dbl> 79.55, 36.06, 58.59, 59.90, 45.63, 61.22…
$ PUNT_MATEMATICAS <dbl> 60.14, 52.39, 59.94, 49.57, 38.84, 62.92…
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2011, 2011, 2011, 2011, 2011, 2011, 2011…
names(saber11_2011)
[1] "PERIODO" "ESTU_TIPODOCUMENTO"
[3] "ESTU_CONSECUTIVO" "COLE_AREA_UBICACION"
[5] "COLE_BILINGUE" "COLE_CALENDARIO"
[7] "COLE_CARACTER" "COLE_COD_DANE_ESTABLECIMIENTO"
[9] "COLE_COD_DANE_SEDE" "COLE_COD_DEPTO_UBICACION"
[11] "COLE_COD_MCPIO_UBICACION" "COLE_CODIGO_ICFES"
[13] "COLE_DEPTO_UBICACION" "COLE_GENERO"
[15] "COLE_JORNADA" "COLE_MCPIO_UBICACION"
[17] "COLE_NATURALEZA" "COLE_NOMBRE_ESTABLECIMIENTO"
[19] "COLE_NOMBRE_SEDE" "COLE_SEDE_PRINCIPAL"
[21] "ESTU_COD_DEPTO_PRESENTACION" "ESTU_COD_MCPIO_PRESENTACION"
[23] "ESTU_COD_RESIDE_DEPTO" "ESTU_COD_RESIDE_MCPIO"
[25] "ESTU_DEPTO_PRESENTACION" "ESTU_DEPTO_RESIDE"
[27] "ESTU_ESTADOINVESTIGACION" "ESTU_ESTUDIANTE"
[29] "ESTU_FECHANACIMIENTO" "ESTU_GENERO"
[31] "ESTU_MCPIO_PRESENTACION" "ESTU_MCPIO_RESIDE"
[33] "ESTU_NACIONALIDAD" "ESTU_PAIS_RESIDE"
[35] "ESTU_PRIVADO_LIBERTAD" "FAMI_CUARTOSHOGAR"
[37] "FAMI_EDUCACIONMADRE" "FAMI_EDUCACIONPADRE"
[39] "FAMI_ESTRATOVIVIENDA" "FAMI_PERSONASHOGAR"
[41] "FAMI_TIENEAUTOMOVIL" "FAMI_TIENECOMPUTADOR"
[43] "FAMI_TIENEINTERNET" "FAMI_TIENELAVADORA"
[45] "DESEMP_INGLES" "PUNT_INGLES"
[47] "PUNT_MATEMATICAS" "PUNT_SOCIALES_CIUDADANAS"
[49] "PUNT_C_NATURALES" "PUNT_LECTURA_CRITICA"
[51] "PUNT_GLOBAL" "ANIO"
saber11_2011_limpio <- saber11_2011 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2011 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 21,848 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201110004555 2
2 SB11201110004574 2
3 SB11201110004578 2
4 SB11201110004581 2
5 SB11201110004586 2
6 SB11201110004597 2
7 SB11201110004599 2
8 SB11201110004600 2
9 SB11201110004610 2
10 SB11201110004612 2
# ℹ 21,838 more rows
saber11_2011_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2011_limpio)
Rows: 572,417
Columns: 52
$ PERIODO <dbl> 20111, 20111, 20111, 20111, 20111, 20111…
$ ESTU_TIPODOCUMENTO <chr> "TI", "CC", "CC", "CC", "TI", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201110000001", "SB11201110000009", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ COLE_CALENDARIO <chr> "A", "A", "A", "A", "A", "A", "A", "A", …
$ COLE_CARACTER <chr> "NO APLICA", "NO APLICA", "NO APLICA", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 341078000785, 341078000785, 341078000785…
$ COLE_COD_DANE_SEDE <chr> "341078000785", "341078000785", "3410780…
$ COLE_COD_DEPTO_UBICACION <chr> "41", "41", "41", "41", "41", "41", "41"…
$ COLE_COD_MCPIO_UBICACION <chr> "41078", "41078", "41078", "41078", "410…
$ COLE_CODIGO_ICFES <chr> "146415", "146415", "146415", "146415", …
$ COLE_DEPTO_UBICACION <chr> "HUILA", "HUILA", "HUILA", "HUILA", "HUI…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "SABATINA", "SABATINA", "SABATINA", "SAB…
$ COLE_MCPIO_UBICACION <chr> "BARAYA", "BARAYA", "BARAYA", "BARAYA", …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_NOMBRE_SEDE <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "95", "95", "95", "95", "95", "95", "95"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "95001", "95001", "95001", "95001", "950…
$ ESTU_COD_RESIDE_DEPTO <chr> "95", "95", "95", "95", "95", "95", "95"…
$ ESTU_COD_RESIDE_MCPIO <chr> "95001", "95001", "95001", "95001", "950…
$ ESTU_DEPTO_PRESENTACION <chr> "GUAVIARE", "GUAVIARE", "GUAVIARE", "GUA…
$ ESTU_DEPTO_RESIDE <chr> "GUAVIARE", "GUAVIARE", "GUAVIARE", "GUA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "04/08/1994", "17/02/1992", "21/07/1977"…
$ ESTU_GENERO <chr> "F", "F", "F", "M", "F", "M", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_MCPIO_RESIDE <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Uno", "Uno", "Tres", "Uno", "Dos…
$ FAMI_EDUCACIONMADRE <chr> "Primaria completa", "Secundaria (Bachil…
$ FAMI_EDUCACIONPADRE <chr> "Primaria incompleta", "Secundaria (Bach…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 1", NA, "Estrato 1", "Estrato 1…
$ FAMI_PERSONASHOGAR <chr> "Cuatro", "Dos", "Una", "Cinco", "Tres",…
$ FAMI_TIENEAUTOMOVIL <chr> "No", "No", "No", "Si", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "No", "No", "Si", "No", "No", "No"…
$ FAMI_TIENEINTERNET <chr> "Si", "No", "No", "Si", "No", "No", "No"…
$ FAMI_TIENELAVADORA <chr> "Si", "No", "No", "Si", "No", "No", "No"…
$ DESEMP_INGLES <chr> "A-", "A-", "A1", "A-", "A1", "A-", "A-"…
$ PUNT_INGLES <dbl> 42.76, 39.87, 44.71, 39.87, 47.17, 38.08…
$ PUNT_MATEMATICAS <dbl> 59.77, 44.23, 32.42, 38.94, 41.69, 35.84…
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2011, 2011, 2011, 2011, 2011, 2011, 2011…
write_csv(
saber11_2011_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2011_limpio.csv"
)
write.xlsx(
saber11_2011_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2011_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2012 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2012_UNIDO.csv"
)
Rows: 680388 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (43): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (5): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, PUNT_INGLES, PUNT_MATEMATI...
lgl (4): PUNT_SOCIALES_CIUDADANAS, PUNT_C_NATURALES, PUNT_LECTURA_CRITICA, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2012)
Rows: 680,388
Columns: 52
$ PERIODO <dbl> 20121, 20121, 20121, 20121, 20121, 20121…
$ ESTU_TIPODOCUMENTO <chr> "CC", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201210053765", "SB11201210038103", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "A", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "TÉCNICO", "TÉCNICO", "TÉCN…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 308001001633, 376001005910, 376001005910…
$ COLE_COD_DANE_SEDE <chr> "308001001633", "376001005910", "3760010…
$ COLE_COD_DEPTO_UBICACION <chr> "08", "76", "76", "76", "76", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "08001", "76001", "76001", "76001", "760…
$ COLE_CODIGO_ICFES <chr> "085241", "034629", "034629", "034629", …
$ COLE_DEPTO_UBICACION <chr> "ATLANTICO", "VALLE", "VALLE", "VALLE", …
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "NOCHE", "MAÑANA", "MAÑANA", "MAÑANA", "…
$ COLE_MCPIO_UBICACION <chr> "BARRANQUILLA", "CALI", "CALI", "CALI", …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "CORPORACION EDUCATIVA Y TECNICA BOLIVAR…
$ COLE_NOMBRE_SEDE <chr> "CORP EDUC Y TEC BOLIVARIANA", "COL DE L…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "08", "76", "76", "76", "76", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "08001", "76001", "76001", "76001", "760…
$ ESTU_COD_RESIDE_DEPTO <chr> "08", "76", "76", "76", "76", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "08001", "76001", "76001", "76001", "760…
$ ESTU_DEPTO_PRESENTACION <chr> "ATLANTICO", "VALLE", "VALLE", "VALLE", …
$ ESTU_DEPTO_RESIDE <chr> "ATLANTICO", "VALLE", "VALLE", "VALLE", …
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "19/10/1989", "29/08/1995", "29/08/1995"…
$ ESTU_GENERO <chr> "M", "F", "F", "F", "F", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "BARRANQUILLA", "CALI", "CALI", "CALI", …
$ ESTU_MCPIO_RESIDE <chr> "BARRANQUILLA", "CALI", "CALI", "CALI", …
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Dos", "Dos", "Dos", "Dos…
$ FAMI_EDUCACIONMADRE <chr> "Secundaria (Bachillerato) completa", "S…
$ FAMI_EDUCACIONPADRE <chr> "Secundaria (Bachillerato) completa", "S…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 1", "Estrato 3", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "Siete", "Tres", "Tres", "Tres", "Tres",…
$ FAMI_TIENEAUTOMOVIL <chr> "No", "No", "No", "No", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "A-", "A-", "A-", "A-", "A-", "A-", "A-"…
$ PUNT_INGLES <dbl> 43, 42, 42, 42, 42, 42, 42, 42, 42, 42, …
$ PUNT_MATEMATICAS <dbl> 30, 46, 46, 46, 46, 46, 46, 46, 46, 46, …
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012…
names(saber11_2012)
[1] "PERIODO" "ESTU_TIPODOCUMENTO"
[3] "ESTU_CONSECUTIVO" "COLE_AREA_UBICACION"
[5] "COLE_BILINGUE" "COLE_CALENDARIO"
[7] "COLE_CARACTER" "COLE_COD_DANE_ESTABLECIMIENTO"
[9] "COLE_COD_DANE_SEDE" "COLE_COD_DEPTO_UBICACION"
[11] "COLE_COD_MCPIO_UBICACION" "COLE_CODIGO_ICFES"
[13] "COLE_DEPTO_UBICACION" "COLE_GENERO"
[15] "COLE_JORNADA" "COLE_MCPIO_UBICACION"
[17] "COLE_NATURALEZA" "COLE_NOMBRE_ESTABLECIMIENTO"
[19] "COLE_NOMBRE_SEDE" "COLE_SEDE_PRINCIPAL"
[21] "ESTU_COD_DEPTO_PRESENTACION" "ESTU_COD_MCPIO_PRESENTACION"
[23] "ESTU_COD_RESIDE_DEPTO" "ESTU_COD_RESIDE_MCPIO"
[25] "ESTU_DEPTO_PRESENTACION" "ESTU_DEPTO_RESIDE"
[27] "ESTU_ESTADOINVESTIGACION" "ESTU_ESTUDIANTE"
[29] "ESTU_FECHANACIMIENTO" "ESTU_GENERO"
[31] "ESTU_MCPIO_PRESENTACION" "ESTU_MCPIO_RESIDE"
[33] "ESTU_NACIONALIDAD" "ESTU_PAIS_RESIDE"
[35] "ESTU_PRIVADO_LIBERTAD" "FAMI_CUARTOSHOGAR"
[37] "FAMI_EDUCACIONMADRE" "FAMI_EDUCACIONPADRE"
[39] "FAMI_ESTRATOVIVIENDA" "FAMI_PERSONASHOGAR"
[41] "FAMI_TIENEAUTOMOVIL" "FAMI_TIENECOMPUTADOR"
[43] "FAMI_TIENEINTERNET" "FAMI_TIENELAVADORA"
[45] "DESEMP_INGLES" "PUNT_INGLES"
[47] "PUNT_MATEMATICAS" "PUNT_SOCIALES_CIUDADANAS"
[49] "PUNT_C_NATURALES" "PUNT_LECTURA_CRITICA"
[51] "PUNT_GLOBAL" "ANIO"
saber11_2012_limpio <- saber11_2012 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2012 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 25,181 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201210003000 5
2 SB11201210003004 5
3 SB11201210003006 5
4 SB11201210003008 5
5 SB11201210003009 5
6 SB11201210003010 5
7 SB11201210003011 5
8 SB11201210003013 5
9 SB11201210003016 5
10 SB11201210003017 5
# ℹ 25,171 more rows
saber11_2012_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2012_limpio)
Rows: 580,551
Columns: 52
$ PERIODO <dbl> 20121, 20121, 20121, 20121, 20121, 20121…
$ ESTU_TIPODOCUMENTO <chr> "TI", "CC", "TI", "CC", "CC", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201210000001", "SB11201210000002", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ COLE_CALENDARIO <chr> "A", "A", "A", "A", "A", "A", "A", "A", …
$ COLE_CARACTER <chr> "NO APLICA", "NO APLICA", "NO APLICA", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 341078000785, 341078000785, 341078000785…
$ COLE_COD_DANE_SEDE <chr> "341078000785", "341078000785", "3410780…
$ COLE_COD_DEPTO_UBICACION <chr> "41", "41", "41", "41", "41", "41", "41"…
$ COLE_COD_MCPIO_UBICACION <chr> "41078", "41078", "41078", "41078", "410…
$ COLE_CODIGO_ICFES <chr> "146415", "146415", "146415", "146415", …
$ COLE_DEPTO_UBICACION <chr> "HUILA", "HUILA", "HUILA", "HUILA", "HUI…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "SABATINA", "SABATINA", "SABATINA", "SAB…
$ COLE_MCPIO_UBICACION <chr> "BARAYA", "BARAYA", "BARAYA", "BARAYA", …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_NOMBRE_SEDE <chr> "INSTITUTO DE EXCELENCIA HUMANA Y ACADÉM…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "95", "95", "95", "95", "95", "95", "95"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "95001", "95001", "95001", "95001", "950…
$ ESTU_COD_RESIDE_DEPTO <chr> "95", "95", "95", "95", "95", "95", "95"…
$ ESTU_COD_RESIDE_MCPIO <chr> "95001", "95001", "95001", "95001", "950…
$ ESTU_DEPTO_PRESENTACION <chr> "GUAVIARE", "GUAVIARE", "GUAVIARE", "GUA…
$ ESTU_DEPTO_RESIDE <chr> "GUAVIARE", "GUAVIARE", "GUAVIARE", "GUA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "30/05/1994", "30/01/1969", "16/08/1994"…
$ ESTU_GENERO <chr> "M", "M", "M", "F", "F", "M", "M", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_MCPIO_RESIDE <chr> "SAN JOSÉ DEL GUAVIARE", "SAN JOSÉ DEL G…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Dos", "Dos", "Tres", "Tr…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Secundaria (Bachillerato) incompleta", …
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 1", "Estrato 1", "Estrato 1", "…
$ FAMI_PERSONASHOGAR <chr> "Cuatro", "Cuatro", "Tres", "Tres", "Cin…
$ FAMI_TIENEAUTOMOVIL <chr> "No", "No", "No", "Si", "No", "No", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "No", "No", "No", "No", "Si"…
$ FAMI_TIENEINTERNET <chr> "No", "No", "No", "No", "No", "No", "Si"…
$ FAMI_TIENELAVADORA <chr> "No", "No", "No", "No", "No", "No", "Si"…
$ DESEMP_INGLES <chr> "A-", "A-", "A-", "A-", "A-", "A-", "A-"…
$ PUNT_INGLES <dbl> 37, 37, 33, 20, 42, 33, 26, 40, 37, 33, …
$ PUNT_MATEMATICAS <dbl> 51, 40, 54, 40, 37, 25, 33, 46, 19, 37, …
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2012, 2012, 2012, 2012, 2012, 2012, 2012…
write_csv(
saber11_2012_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2012_limpio.csv"
)
write.xlsx(
saber11_2012_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2012_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2013 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2013_UNIDO.csv"
)
Rows: 583775 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (6): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
lgl (4): PUNT_SOCIALES_CIUDADANAS, PUNT_C_NATURALES, PUNT_LECTURA_CRITICA, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2013)
Rows: 583,775
Columns: 52
$ PERIODO <dbl> 20131, 20131, 20131, 20131, 20131, 20131…
$ ESTU_TIPODOCUMENTO <chr> "CR", "TI", "CC", "CC", "CC", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201310000414", "SB11201310023873", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "N", NA, "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "B", "B", "A", "A", "A", "A", "B", "A", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311848000812, 376001033859, 311001107599…
$ COLE_COD_DANE_SEDE <dbl> 311848000812, 376001033859, 311001107599…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "76", "11", "11", "73", "54", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "76001", "11001", "11001", "732…
$ COLE_CODIGO_ICFES <chr> "022574", "118026", "108381", "074484", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTA", "VALLE", "BOGOTA", "BOGOTA", "…
$ COLE_GENERO <chr> "FEMENINO", "MIXTO", "MIXTO", "MIXTO", "…
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "MAÑANA", "NOCHE",…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "CALI", "BOGOTÁ D.C.", "B…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "GIMN FEMENINO", "COLEGIO PARROQUIAL NUE…
$ COLE_NOMBRE_SEDE <chr> "GIMN FEMENINO", "COLEGIO PARROQUIAL NUE…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "11", "76", "11", "11", "73", "54", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "11001", "76001", "11001", "11001", "730…
$ ESTU_COD_RESIDE_DEPTO <chr> "11", "76", "25", "11", "73", "54", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "11001", "76001", "25214", "11001", "732…
$ ESTU_DEPTO_PRESENTACION <chr> "BOGOTÁ", "VALLE", "BOGOTÁ", "BOGOTÁ", "…
$ ESTU_DEPTO_RESIDE <chr> "BOGOTÁ", "VALLE", "CUNDINAMARCA", "BOGO…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "08/08/1994", "17/12/1996", "18/12/1994"…
$ ESTU_GENERO <chr> "F", "F", "F", "M", "M", "F", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "BOGOTÁ D.C.", "CALI", "BOGOTÁ D.C.", "B…
$ ESTU_MCPIO_RESIDE <chr> "BOGOTÁ D.C.", "CALI", "COTA", "BOGOTÁ D…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Cuatro", "Tres", "Cuatro", "Cuatro", "C…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Secun…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Secun…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 6", "Estrato 3", "Estrato 6", "…
$ FAMI_PERSONASHOGAR <chr> "Cuatro", "Cinco", "Cinco", "Dos", "Cuat…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "No", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "No", "Si", "No", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "No", "No", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "No", "No", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "B1", "B1", "A-", "A-", "A-", "A1"…
$ PUNT_INGLES <dbl> 94, 68, 77, 39, 39, 36, 45, 42, 45, 72, …
$ PUNT_MATEMATICAS <dbl> 88, 57, 70, 38, 45, 45, 29, 49, 23, 51, …
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 2013…
# names(saber11_2013)
saber11_2013_limpio <- saber11_2013 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2013 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 1,891 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201310000127 11
2 SB11201310000429 11
3 SB11201310000885 11
4 SB11201310000903 11
5 SB11201310000917 11
6 SB11201310000949 11
7 SB11201310001087 11
8 SB11201310001405 11
9 SB11201310001520 11
10 SB11201310001521 11
# ℹ 1,881 more rows
saber11_2013_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2013_limpio)
Rows: 576,094
Columns: 52
$ PERIODO <dbl> 20131, 20131, 20131, 20131, 20131, 20131…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201310000081", "SB11201310000110", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311848000812, 311848000812, 311848000812…
$ COLE_COD_DANE_SEDE <dbl> 311848000812, 311848000812, 311848000812…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "11", "11", "11", "11", "76", "11"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "11001", "11001", "11001", "110…
$ COLE_CODIGO_ICFES <chr> "022574", "022574", "022574", "022574", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTA", "BOGOTA", "BOGOTA", "BOGOTA", …
$ COLE_GENERO <chr> "FEMENINO", "FEMENINO", "FEMENINO", "FEM…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "COMPLETA", "COM…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "BOGOTÁ D.C.", "BOGOTÁ D.…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "GIMN FEMENINO", "GIMN FEMENINO", "GIMN …
$ COLE_NOMBRE_SEDE <chr> "GIMN FEMENINO", "GIMN FEMENINO", "GIMN …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "25", "11", "11", "11", "11", "76", "11"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "25899", "11001", "11001", "11001", "110…
$ ESTU_COD_RESIDE_DEPTO <chr> "25", "11", "11", "11", "11", "76", "11"…
$ ESTU_COD_RESIDE_MCPIO <chr> "25126", "11001", "11001", "11001", "110…
$ ESTU_DEPTO_PRESENTACION <chr> "CUNDINAMARCA", "BOGOTÁ", "BOGOTÁ", "BOG…
$ ESTU_DEPTO_RESIDE <chr> "CUNDINAMARCA", "BOGOTÁ", "BOGOTÁ", "BOG…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "28/10/1995", "27/09/1995", "13/02/1996"…
$ ESTU_GENERO <chr> "F", "F", "F", "F", "F", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "ZIPAQUIRÁ", "BOGOTÁ D.C.", "BOGOTÁ D.C.…
$ ESTU_MCPIO_RESIDE <chr> "CAJICÁ", "BOGOTÁ D.C.", "BOGOTÁ D.C.", …
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Tres", "Cuatro", "Tres", "Tres",…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 2", "Estrato 6", "Estrato 4", "…
$ FAMI_PERSONASHOGAR <chr> "Tres", "Cuatro", "Seis", "Cuatro", "Cua…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "B+", "B+", "B+", "B+", "A1", "B+"…
$ PUNT_INGLES <dbl> 92, 94, 83, 94, 88, 48, 100, 100, 36, 94…
$ PUNT_MATEMATICAS <dbl> 70, 83, 54, 63, 54, 45, 77, 74, 54, 82, …
$ PUNT_SOCIALES_CIUDADANAS <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 2013…
write_csv(
saber11_2013_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2013_limpio.csv"
)
write.xlsx(
saber11_2013_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2013_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2014 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2014_UNIDO.csv"
)
Rows: 571637 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2014)
Rows: 571,637
Columns: 52
$ PERIODO <dbl> 20141, 20141, 20141, 20141, 20141, 20141…
$ ESTU_TIPODOCUMENTO <chr> "TI", "CC", "TI", "TI", "TI", "CC", "CC"…
$ ESTU_CONSECUTIVO <chr> "SB11201410008271", "SB11201410056678", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "RURAL", "…
$ COLE_BILINGUE <chr> "N", NA, NA, "N", "N", "N", NA, "N", "N"…
$ COLE_CALENDARIO <chr> "B", "A", "B", "B", "A", "A", "A", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 308573074895, 305266019977, 466001006826…
$ COLE_COD_DANE_SEDE <dbl> 308573074895, 305266019977, 466001006826…
$ COLE_COD_DEPTO_UBICACION <chr> "08", "05", "66", "25", "08", "05", "05"…
$ COLE_COD_MCPIO_UBICACION <chr> "08573", "05266", "66001", "25175", "080…
$ COLE_CODIGO_ICFES <chr> "063792", "129569", "164574", "062190", …
$ COLE_DEPTO_UBICACION <chr> "ATLANTICO", "ANTIOQUIA", "RISARALDA", "…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "COMPLETA", "COMPL…
$ COLE_MCPIO_UBICACION <chr> "PUERTO COLOMBIA", "ENVIGADO", "PEREIRA"…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO CRISTIANO J. VENDER MURPHY", "P…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO CRISTIANO J. VENDER MURPHY - SE…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "08", "05", "66", "11", "08", "05", "05"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "08001", "05001", "66001", "11001", "080…
$ ESTU_COD_RESIDE_DEPTO <chr> "08", "05", "66", "11", "08", "05", "05"…
$ ESTU_COD_RESIDE_MCPIO <chr> "08001", "05266", "66001", "11001", "080…
$ ESTU_DEPTO_PRESENTACION <chr> "ATLANTICO", "ANTIOQUIA", "RISARALDA", "…
$ ESTU_DEPTO_RESIDE <chr> "ATLANTICO", "ANTIOQUIA", "RISARALDA", "…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "03/02/1997", "19/01/1991", "02/10/1997"…
$ ESTU_GENERO <chr> "M", "F", "F", "F", "F", "F", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "BARRANQUILLA", "MEDELLÍN", "PEREIRA", "…
$ ESTU_MCPIO_RESIDE <chr> "BARRANQUILLA", "ENVIGADO", "PEREIRA", "…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Tres", "Tres", "Tres", "…
$ FAMI_EDUCACIONMADRE <chr> "Técnica o tecnológica completa", "Ningu…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Ningu…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 3", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "Cuatro", "Dos", "Cuatro", "Cuatro", "Cu…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "Si", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "No"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "No"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "No", "No"…
$ DESEMP_INGLES <chr> "B1", "A-", "A1", "B1", "A-", "A1", "A-"…
$ PUNT_INGLES <dbl> 75, 40, 52, 80, 36, 45, 26, 72, 37, 94, …
$ PUNT_MATEMATICAS <dbl> 55, 48, 46, 63, 52, 30, 43, 73, 51, 63, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2014, 2014, 2014, 2014, 2014, 2014, 2014…
# names(saber11_2014)
saber11_2014_limpio <- saber11_2014 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2014 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 2,209 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201410036433 2
2 SB11201410036437 2
3 SB11201410036471 2
4 SB11201410036502 2
5 SB11201410036536 2
6 SB11201410038302 2
7 SB11201410038466 2
8 SB11201410038542 2
9 SB11201410038836 2
10 SB11201410057803 2
# ℹ 2,199 more rows
saber11_2014_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2014_limpio)
Rows: 569,428
Columns: 52
$ PERIODO <dbl> 20141, 20141, 20141, 20141, 20141, 20141…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "CC", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201410000007", "SB11201410000008", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 408573012835, 408573012835, 408573012835…
$ COLE_COD_DANE_SEDE <dbl> 408573012835, 408573012835, 408573012835…
$ COLE_COD_DEPTO_UBICACION <chr> "08", "08", "08", "08", "08", "08", "08"…
$ COLE_COD_MCPIO_UBICACION <chr> "08573", "08573", "08573", "08573", "085…
$ COLE_CODIGO_ICFES <chr> "003418", "003418", "003418", "003418", …
$ COLE_DEPTO_UBICACION <chr> "ATLANTICO", "ATLANTICO", "ATLANTICO", "…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "COMPLETA", "COM…
$ COLE_MCPIO_UBICACION <chr> "PUERTO COLOMBIA", "PUERTO COLOMBIA", "P…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO KARL C PARRISH", "COLEGIO KARL …
$ COLE_NOMBRE_SEDE <chr> "COLEGIO KARL C PARRISH", "COLEGIO KARL …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "08", "08", "08", "08", "08", "08", "08"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "08001", "08001", "08001", "08001", "080…
$ ESTU_COD_RESIDE_DEPTO <chr> "08", "08", "08", "08", "08", "08", "08"…
$ ESTU_COD_RESIDE_MCPIO <chr> "08001", "08001", "08001", "08001", "080…
$ ESTU_DEPTO_PRESENTACION <chr> "ATLANTICO", "ATLANTICO", "ATLANTICO", "…
$ ESTU_DEPTO_RESIDE <chr> "ATLANTICO", "ATLANTICO", "ATLANTICO", "…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "23/02/1996", "16/05/1996", "21/01/1996"…
$ ESTU_GENERO <chr> "F", "M", "M", "F", "F", "M", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "BARRANQUILLA", "BARRANQUILLA", "BARRANQ…
$ ESTU_MCPIO_RESIDE <chr> "BARRANQUILLA", "BARRANQUILLA", "BARRANQ…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Cuatro", "Tres", "Cuatr…
$ FAMI_EDUCACIONMADRE <chr> "Postgrado", "Postgrado", "Educación pro…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "Postgrado", "Educación pro…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 6", "Estrato 6", "Estrato 6", "…
$ FAMI_PERSONASHOGAR <chr> "Cinco", "Cinco", "Cuatro", "Cuatro", "C…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "B1", "B+", "B+", "B+", "B+", "B1"…
$ PUNT_INGLES <dbl> 89, 80, 89, 89, 83, 89, 80, 83, 77, 83, …
$ PUNT_MATEMATICAS <dbl> 84, 63, 60, 92, 69, 66, 53, 55, 69, 91, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_C_NATURALES <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_LECTURA_CRITICA <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ PUNT_GLOBAL <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ ANIO <dbl> 2014, 2014, 2014, 2014, 2014, 2014, 2014…
write_csv(
saber11_2014_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2014_limpio.csv"
)
write.xlsx(
saber11_2014_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2014_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2015 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2015_UNIDO.csv"
)
Rows: 570464 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2015)
Rows: 570,464
Columns: 52
$ PERIODO <dbl> 20151, 20151, 20151, 20151, 20151, 20151…
$ ESTU_TIPODOCUMENTO <chr> "CC", "TI", "CC", "TI", "CC", "CC", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201510082054", "SB11201510022100", …
$ COLE_AREA_UBICACION <chr> "URBANO", "RURAL", "URBANO", "RURAL", "U…
$ COLE_BILINGUE <chr> "N", "S", "S", NA, "N", "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "OTRO", "B", "B", "B", "A", "B", "B", "B…
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311001105863, 325175000549, 311848003072…
$ COLE_COD_DANE_SEDE <dbl> 311001105863, 325175000549, 311848003072…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "25", "11", "25", "05", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "25175", "11001", "25126", "050…
$ COLE_CODIGO_ICFES <chr> "088617", "042473", "066431", "164145", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTA", "CUNDINAMARCA", "BOGOTA", "CUN…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "COMPLETA", "COMPLETA", "COMPL…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "CHIA", "BOGOTÁ D.C.", "C…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "CENTRO JOHANN KEPLER", "GIMN BRITANICO"…
$ COLE_NOMBRE_SEDE <chr> "CENT JOHANN KEPLER", "GIMN BRITANICO", …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "25", "11", "11", "11", "05", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "25899", "11001", "11001", "11001", "050…
$ ESTU_COD_RESIDE_DEPTO <chr> "25", "11", "11", "11", "05", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "25175", "11001", "11001", "11001", "050…
$ ESTU_DEPTO_PRESENTACION <chr> "CUNDINAMARCA", "BOGOTÁ", "BOGOTÁ", "BOG…
$ ESTU_DEPTO_RESIDE <chr> "CUNDINAMARCA", "BOGOTÁ", "BOGOTÁ", "BOG…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "06/09/1990", "27/08/1998", "19/12/1996"…
$ ESTU_GENERO <chr> "F", "F", "F", "F", "F", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "ZIPAQUIRÁ", "BOGOTÁ D.C.", "BOGOTÁ D.C.…
$ ESTU_MCPIO_RESIDE <chr> "CHÍA", "BOGOTÁ D.C.", "BOGOTÁ D.C.", "B…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Cuatro", "Tres", "Dos", "Tres", …
$ FAMI_EDUCACIONMADRE <chr> "Técnica o tecnológica completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Técnica o tecnológica completa", "Educa…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 2", "Estrato 4", "Estrato 5", "…
$ FAMI_PERSONASHOGAR <chr> "Tres", "Cinco", "Cuatro", "Tres", "Cinc…
$ FAMI_TIENEAUTOMOVIL <chr> "No", "Si", "Si", "Si", "No", "No", "No"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "No"…
$ DESEMP_INGLES <chr> "A-", "B1", "B+", "B+", "A-", "A-", "A2"…
$ PUNT_INGLES <dbl> 42, 80, 90, 90, 45, 47, 64, 65, 41, 43, …
$ PUNT_MATEMATICAS <dbl> 43, 68, 61, 81, 39, 48, 56, 64, 47, 41, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 46, 58, 72, 87, 36, 54, 64, 66, 43, 48, …
$ PUNT_C_NATURALES <dbl> 52, 75, 81, 93, 42, 52, 58, 64, 28, 34, …
$ PUNT_LECTURA_CRITICA <dbl> 45, 64, 72, 72, 39, 52, 49, 71, 44, 41, …
$ PUNT_GLOBAL <dbl> 231, 337, 365, 419, 197, 256, 287, 331, …
$ ANIO <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015…
# names(saber11_2015)
saber11_2015_limpio <- saber11_2015 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2015 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 2,067 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201510035421 2
2 SB11201510035483 2
3 SB11201510051831 2
4 SB11201510052037 2
5 SB11201510052282 2
6 SB11201510052374 2
7 SB11201510052433 2
8 SB11201510052603 2
9 SB11201510053177 2
10 SB11201510053239 2
# ℹ 2,057 more rows
saber11_2015_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2015_limpio)
Rows: 568,397
Columns: 52
$ PERIODO <dbl> 20151, 20151, 20151, 20151, 20151, 20151…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201510000182", "SB11201510000183", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "S", "S", "N", "N", "N", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 308001000882, 308001000882, 319001001180…
$ COLE_COD_DANE_SEDE <dbl> 308001000882, 308001000882, 319001001180…
$ COLE_COD_DEPTO_UBICACION <chr> "08", "08", "19", "19", "19", "19", "19"…
$ COLE_COD_MCPIO_UBICACION <chr> "08001", "08001", "19001", "19001", "190…
$ COLE_CODIGO_ICFES <chr> "003046", "003046", "050625", "050625", …
$ COLE_DEPTO_UBICACION <chr> "ATLANTICO", "ATLANTICO", "CAUCA", "CAUC…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "COMPLETA", "COM…
$ COLE_MCPIO_UBICACION <chr> "BARRANQUILLA", "BARRANQUILLA", "POPAYAN…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "CORP CULT COL ALEMAN", "CORP CULT COL A…
$ COLE_NOMBRE_SEDE <chr> "CORP CULT COL ALEMAN", "CORP CULT COL A…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "08", "08", "19", "19", "19", "19", "19"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "08001", "08001", "19001", "19001", "190…
$ ESTU_COD_RESIDE_DEPTO <chr> "08", "08", "19", "19", "19", "19", "19"…
$ ESTU_COD_RESIDE_MCPIO <chr> "08001", "08001", "19001", "19001", "190…
$ ESTU_DEPTO_PRESENTACION <chr> "ATLANTICO", "ATLANTICO", "CAUCA", "CAUC…
$ ESTU_DEPTO_RESIDE <chr> "ATLANTICO", "ATLANTICO", "CAUCA", "CAUC…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "08/11/1997", "20/12/1997", "14/12/1998"…
$ ESTU_GENERO <chr> "M", "M", "F", "M", "M", "F", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "BARRANQUILLA", "BARRANQUILLA", "POPAYÁN…
$ ESTU_MCPIO_RESIDE <chr> "BARRANQUILLA", "BARRANQUILLA", "POPAYÁN…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Cuatro", "Tres", "Tres", "Dos", "Seis",…
$ FAMI_EDUCACIONMADRE <chr> "Postgrado", "Educación profesional comp…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "Educación profesional comp…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 4", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "Cinco", "Cuatro", "Tres", "Tres", "Seis…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "A2", "A1", "B1", "A2", "B1", "A2"…
$ PUNT_INGLES <dbl> 90, 68, 53, 76, 66, 76, 59, 72, 85, 78, …
$ PUNT_MATEMATICAS <dbl> 58, 62, 61, 86, 79, 67, 65, 68, 76, 91, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 62, 53, 60, 77, 60, 51, 64, 55, 87, 72, …
$ PUNT_C_NATURALES <dbl> 63, 64, 55, 74, 76, 57, 76, 69, 73, 78, …
$ PUNT_LECTURA_CRITICA <dbl> 77, 64, 58, 69, 55, 58, 62, 62, 77, 59, …
$ PUNT_GLOBAL <dbl> 335, 307, 290, 382, 337, 298, 331, 321, …
$ ANIO <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015…
write_csv(
saber11_2015_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2015_limpio.csv"
)
write.xlsx(
saber11_2015_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2015_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2016 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2016_UNIDO.csv"
)
Rows: 563370 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2016)
Rows: 563,370
Columns: 52
$ PERIODO <dbl> 20161, 20161, 20161, 20161, 20161, 20161…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "CC", "CC", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201610005757", "SB11201610040978", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "RURAL", "…
$ COLE_BILINGUE <chr> "N", "N", "S", "S", "S", "N", "S", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "TÉCNICO/ACADÉMICO", "TÉCNICO", "ACADÉMI…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311848000286, 376001028219, 376001001221…
$ COLE_COD_DANE_SEDE <dbl> 311848000286, 376001028219, 376001001221…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "76", "76", "76", "66", "76", "11"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "76001", "76001", "76001", "660…
$ COLE_CODIGO_ICFES <chr> "025411", "058727", "016741", "016774", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTA", "VALLE", "VALLE", "VALLE", "RI…
$ COLE_GENERO <chr> "MASCULINO", "MIXTO", "MIXTO", "MIXTO", …
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "COMPLETA", "MAÑAN…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "CALI", "CALI", "CALI", "…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "GIMN DE LOS CERROS", "COLEGIO COMFANDI …
$ COLE_NOMBRE_SEDE <chr> "GIMN DE LOS CERROS", "COLEGIO COMFANDI …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "11", "76", "76", "76", "66", "76", "11"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "11001", "76001", "76001", "76001", "660…
$ ESTU_COD_RESIDE_DEPTO <chr> "11", "76", "76", "76", "66", "76", "11"…
$ ESTU_COD_RESIDE_MCPIO <chr> "11001", "76001", "76001", "76001", "660…
$ ESTU_DEPTO_PRESENTACION <chr> "BOGOTÁ", "VALLE", "VALLE", "VALLE", "RI…
$ ESTU_DEPTO_RESIDE <chr> "BOGOTÁ", "VALLE", "VALLE", "VALLE", "RI…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "06/02/1999", "22/12/1999", "04/06/1997"…
$ ESTU_GENERO <chr> "M", "M", "M", "M", "M", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "BOGOTÁ D.C.", "CALI", "CALI", "CALI", "…
$ ESTU_MCPIO_RESIDE <chr> "BOGOTÁ D.C.", "CALI", "CALI", "CALI", "…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Cuatro", "Tres", "Dos", …
$ FAMI_EDUCACIONMADRE <chr> "Postgrado", "Postgrado", "Educación pro…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Secun…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 4", "Estrato 3", "Estrato 6", "…
$ FAMI_PERSONASHOGAR <chr> "Tres", "Tres", "Cinco", "Tres", "Dos", …
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "No", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "A2", "B+", "B1", "B+", "A1", "B+"…
$ PUNT_INGLES <dbl> 92, 64, 95, 82, 90, 53, 87, 87, 80, 82, …
$ PUNT_MATEMATICAS <dbl> 100, 71, 73, 78, 84, 71, 66, 70, 74, 79,…
$ PUNT_SOCIALES_CIUDADANAS <dbl> 96, 68, 78, 68, 74, 72, 68, 70, 68, 68, …
$ PUNT_C_NATURALES <dbl> 83, 78, 72, 70, 69, 66, 63, 67, 72, 75, …
$ PUNT_LECTURA_CRITICA <dbl> 69, 69, 60, 60, 71, 56, 68, 62, 64, 68, …
$ PUNT_GLOBAL <dbl> 437, 355, 363, 350, 378, 326, 339, 344, …
$ ANIO <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016…
# names(saber11_2016)
saber11_2016_limpio <- saber11_2016 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2016 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 2,100 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201610048924 2
2 SB11201610048960 2
3 SB11201610048980 2
4 SB11201610049008 2
5 SB11201610049032 2
6 SB11201610049067 2
7 SB11201610049084 2
8 SB11201610049107 2
9 SB11201610049129 2
10 SB11201610049147 2
# ℹ 2,090 more rows
saber11_2016_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2016_limpio)
Rows: 561,270
Columns: 52
$ PERIODO <dbl> 20161, 20161, 20161, 20161, 20161, 20161…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201610000003", "SB11201610000004", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376111000280, 376111000280, 376111000280…
$ COLE_COD_DANE_SEDE <dbl> 376111000280, 376111000280, 376111000280…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "76", "76", "11", "76", "11"…
$ COLE_COD_MCPIO_UBICACION <chr> "76111", "76111", "76111", "76111", "110…
$ COLE_CODIGO_ICFES <chr> "018184", "018184", "018184", "018184", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "BOG…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "FEM…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "MAÑANA", "MAÑANA", …
$ COLE_MCPIO_UBICACION <chr> "BUGA", "BUGA", "BUGA", "BUGA", "BOGOTÁ …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "LICEO DE LOS ANDES", "LICEO DE LOS ANDE…
$ COLE_NOMBRE_SEDE <chr> "LIC DE LOS ANDES", "LIC DE LOS ANDES", …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "76", "76", "11", "76", "11"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76111", "76111", "76111", "76111", "110…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "76", "76", "11", "76", "11"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76111", "76111", "76111", "76111", "110…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "BOG…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "VALLE", "VALLE", "BOG…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "08/06/1999", "30/09/2000", "24/02/1999"…
$ ESTU_GENERO <chr> "F", "M", "M", "M", "F", "M", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "GUADALAJARA DE BUGA", "GUADALAJARA DE B…
$ ESTU_MCPIO_RESIDE <chr> "GUADALAJARA DE BUGA", "GUADALAJARA DE B…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Dos", "Tres", "Cuatro",…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 3", "Estrato 2", "Estrato 4", "…
$ FAMI_PERSONASHOGAR <chr> "Tres", "Cuatro", "Tres", "Tres", "Cinco…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "A1", "B1", "B1", "B+", "B+", "A2", "B1"…
$ PUNT_INGLES <dbl> 55, 70, 80, 95, 95, 64, 80, 90, 97, 61, …
$ PUNT_MATEMATICAS <dbl> 78, 88, 94, 87, 75, 84, 93, 98, 76, 57, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 64, 78, 74, 70, 72, 78, 74, 79, 78, 58, …
$ PUNT_C_NATURALES <dbl> 63, 72, 73, 72, 69, 72, 79, 76, 79, 59, …
$ PUNT_LECTURA_CRITICA <dbl> 63, 71, 67, 74, 70, 64, 68, 74, 69, 56, …
$ PUNT_GLOBAL <dbl> 330, 383, 386, 386, 367, 368, 393, 412, …
$ ANIO <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016…
write_csv(
saber11_2016_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2016_limpio.csv"
)
write.xlsx(
saber11_2016_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2016_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2017 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2017_UNIDO.csv"
)
Rows: 561287 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2017)
Rows: 561,287
Columns: 52
$ PERIODO <dbl> 20171, 20171, 20171, 20171, 20171, 20171…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201710031305", "SB11201710029895", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, "N", "N", "N", "N", "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "TÉCNICO", "TÉ…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001041761, 376001000128, 476520007207…
$ COLE_COD_DANE_SEDE <dbl> 376001041761, 376001000128, 476520007207…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "76", "76", "52", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "76001", "76520", "76001", "520…
$ COLE_CODIGO_ICFES <chr> "130138", "017111", "136127", "111807", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "NAR…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "MAÑANA", "TARDE", "…
$ COLE_MCPIO_UBICACION <chr> "CALI", "CALI", "PALMIRA", "CALI", "PAST…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO SANTA ISABEL DE HUNGRIA - CIUDA…
$ COLE_NOMBRE_SEDE <chr> "FUND COL SANTA ISABEL DE HUNGRIA", "COL…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "76", "76", "52", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "76001", "76001", "76001", "520…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "76", "76", "52", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76001", "76001", "76001", "76001", "520…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "NAR…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "VALLE", "VALLE", "NAR…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "31/01/2000", "14/01/2000", "23/08/1999"…
$ ESTU_GENERO <chr> "M", "M", "M", "M", "F", "M", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "CALI", "CALI", "CALI", "PASTO",…
$ ESTU_MCPIO_RESIDE <chr> "CALI", "CALI", "CALI", "CALI", "PASTO",…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Dos", "Tres", "Dos", "Cuatro", …
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Secun…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Técni…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 4", "Estrato 3", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "3 a 4", "3 a 4", "3 a 4", "5 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "No", "Si", NA, …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", NA, NA, NA…
$ FAMI_TIENEINTERNET <chr> "Si", NA, "Si", "Si", "Si", "No", "Si", …
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "No", "Si", NA, …
$ DESEMP_INGLES <chr> "A-", "A2", "A1", "A1", "A2", "A-", "A2"…
$ PUNT_INGLES <dbl> 26, 60, 54, 55, 62, 47, 62, 92, 83, 41, …
$ PUNT_MATEMATICAS <dbl> 42, 64, 51, 54, 58, 54, 65, 54, 76, 49, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 35, 81, 42, 61, 61, 64, 57, 64, 60, 52, …
$ PUNT_C_NATURALES <dbl> 45, 56, 50, 56, 64, 65, 61, 70, 70, 58, …
$ PUNT_LECTURA_CRITICA <dbl> 41, 60, 54, 52, 59, 64, 58, 69, 58, 52, …
$ PUNT_GLOBAL <dbl> 198, 324, 248, 278, 303, 303, 302, 332, …
$ ANIO <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017…
# names(saber11_2017)
saber11_2017_limpio <- saber11_2017 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2017 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 2,008 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201720036019 2
2 SB11201720036142 2
3 SB11201720036242 2
4 SB11201720036281 2
5 SB11201720036360 2
6 SB11201720036448 2
7 SB11201720036488 2
8 SB11201720036517 2
9 SB11201720036556 2
10 SB11201720036593 2
# ℹ 1,998 more rows
saber11_2017_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2017_limpio)
Rows: 559,279
Columns: 52
$ PERIODO <dbl> 20171, 20171, 20171, 20171, 20171, 20171…
$ ESTU_TIPODOCUMENTO <chr> "CC", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201710000001", "SB11201710000006", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, "N", "N", "N", "N", "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 308001077745, 319001000175, 319001000175…
$ COLE_COD_DANE_SEDE <dbl> 308001077745, 319001000175, 319001000175…
$ COLE_COD_DEPTO_UBICACION <chr> "08", "19", "19", "19", "19", "19", "19"…
$ COLE_COD_MCPIO_UBICACION <chr> "08001", "19001", "19001", "19001", "190…
$ COLE_CODIGO_ICFES <chr> "135558", "106179", "106179", "106179", …
$ COLE_DEPTO_UBICACION <chr> "ATLANTICO", "CAUCA", "CAUCA", "CAUCA", …
$ COLE_GENERO <chr> "MIXTO", "FEMENINO", "FEMENINO", "FEMENI…
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "MAÑANA", "MAÑANA"…
$ COLE_MCPIO_UBICACION <chr> "BARRANQUILLA", "POPAYÁN", "POPAYÁN", "P…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO REAL (ROYAL SCHOOL)", "COL HOGA…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO REAL (ROYAL SCHOOL) - SEDE PRIN…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "08", "19", "19", "19", "19", "19", "19"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "08001", "19001", "19001", "19001", "190…
$ ESTU_COD_RESIDE_DEPTO <chr> "08", "19", "19", "19", "19", "19", "19"…
$ ESTU_COD_RESIDE_MCPIO <chr> "08001", "19001", "19001", "19001", "190…
$ ESTU_DEPTO_PRESENTACION <chr> "ATLANTICO", "CAUCA", "CAUCA", "CAUCA", …
$ ESTU_DEPTO_RESIDE <chr> "ATLANTICO", "CAUCA", "CAUCA", "CAUCA", …
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "16/05/1998", "05/04/2000", "01/03/2000"…
$ ESTU_GENERO <chr> "F", "F", "F", "F", "F", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "BARRANQUILLA", "POPAYÁN", "POPAYÁN", "P…
$ ESTU_MCPIO_RESIDE <chr> "BARRANQUILLA", "POPAYÁN", "POPAYÁN", "P…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Tres", "Tres", "Dos", "…
$ FAMI_EDUCACIONMADRE <chr> "Técnica o tecnológica completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "No Aplica", "Técnica o tecnológica inco…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 2", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "5 a 6", "3 a 4", "3 a 4", "3 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "No", "Si", "No"…
$ DESEMP_INGLES <chr> "B1", "A-", "A2", "A1", "A-", "A2", "A1"…
$ PUNT_INGLES <dbl> 73, 47, 59, 53, 37, 58, 50, 45, 70, 49, …
$ PUNT_MATEMATICAS <dbl> 49, 49, 51, 67, 48, 57, 58, 57, 72, 68, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 54, 55, 54, 54, 42, 39, 66, 52, 69, 63, …
$ PUNT_C_NATURALES <dbl> 56, 56, 62, 64, 48, 59, 57, 59, 62, 53, …
$ PUNT_LECTURA_CRITICA <dbl> 57, 64, 60, 71, 48, 68, 57, 55, 66, 64, …
$ PUNT_GLOBAL <dbl> 277, 277, 285, 316, 229, 280, 294, 275, …
$ ANIO <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017…
write_csv(
saber11_2017_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2017_limpio.csv"
)
write.xlsx(
saber11_2017_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2017_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2018 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2018_UNIDO.csv"
)
Rows: 32348 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2018)
Rows: 32,348
Columns: 52
$ PERIODO <dbl> 20181, 20181, 20181, 20181, 20181, 20181…
$ ESTU_TIPODOCUMENTO <chr> "CC", "CC", "CC", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201810028234", "SB11201810028234", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "S", "S", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "A", "B", "B", "B", "B", "OTRO…
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", NA, "ACADÉMICO…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001031007, 376001031007, 373001010485…
$ COLE_COD_DANE_SEDE <dbl> 376001031007, 376001031007, 373001010485…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "73", "11", "11", "52", "52"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "76001", "73001", "11001", "110…
$ COLE_CODIGO_ICFES <chr> "084368", "084368", "192393", "034306", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "TOLIMA", "BOGOTA", "B…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "SABATINA", "COMPLET…
$ COLE_MCPIO_UBICACION <chr> "CALI", "CALI", "IBAGUÉ", "BOGOTÁ D.C.",…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "INST SER INTERNACIONAL COMUNA 17", "INS…
$ COLE_NOMBRE_SEDE <chr> "INST SER INTERNACIONAL, COMUNA 17", "IN…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "73", "11", "11", "52", "52"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "76001", "73001", "11001", "110…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "73", "11", "11", "52", "52"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76001", "76001", "73001", "11001", "110…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "TOLIMA", "BOGOTÁ", "B…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "TOLIMA", "BOGOTÁ", "B…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "25/03/1998", "25/03/1998", "21/11/1998"…
$ ESTU_GENERO <chr> "M", "M", "F", "M", "M", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "CALI", "IBAGUÉ", "BOGOTÁ D.C.",…
$ ESTU_MCPIO_RESIDE <chr> "CALI", "CALI", "IBAGUÉ", "BOGOTÁ D.C.",…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Seis o mas", "Tres", "T…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Técnica o tecnológica completa", "Técni…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 5", "Estrato 6", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "3 a 4", "7 a 8", "3 a 4", "3 a…
$ FAMI_TIENEAUTOMOVIL <chr> "No", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "A1", "A1", "A-", "B+", "B+", "B+", "B+"…
$ PUNT_INGLES <dbl> 52, 52, 47, 87, 87, 81, 81, 45, 89, 89, …
$ PUNT_MATEMATICAS <dbl> 34, 34, 47, 79, 79, 81, 81, 36, 78, 78, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 32, 32, 46, 74, 74, 66, 66, 32, 77, 77, …
$ PUNT_C_NATURALES <dbl> 41, 41, 52, 77, 77, 75, 75, 39, 78, 78, …
$ PUNT_LECTURA_CRITICA <dbl> 44, 44, 58, 71, 71, 73, 73, 46, 77, 77, …
$ PUNT_GLOBAL <dbl> 194, 194, 252, 381, 381, 372, 372, 194, …
$ ANIO <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018…
# names(saber11_2018)
saber11_2018_limpio <- saber11_2018 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2018 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 12,527 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201810000026 2
2 SB11201810000027 2
3 SB11201810000029 2
4 SB11201810000032 2
5 SB11201810000034 2
6 SB11201810000035 2
7 SB11201810000036 2
8 SB11201810000037 2
9 SB11201810000038 2
10 SB11201810000039 2
# ℹ 12,517 more rows
saber11_2018_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2018_limpio)
Rows: 19,798
Columns: 52
$ PERIODO <dbl> 20181, 20181, 20181, 20181, 20181, 20181…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201810000026", "SB11201810000027", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> NA, NA, "N", NA, NA, "S", NA, "N", "S", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376520005498, 376520005498, 311769000998…
$ COLE_COD_DANE_SEDE <dbl> 376520005498, 376520005498, 311769000998…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "11", "76", "76", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76520", "76520", "11001", "76520", "765…
$ COLE_CODIGO_ICFES <chr> "124289", "124289", "019380", "124289", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "BOGOTA", "VALLE", "VA…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "COMPLETA", "MAÑANA"…
$ COLE_MCPIO_UBICACION <chr> "PALMIRA", "PALMIRA", "BOGOTÁ D.C.", "PA…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "LIC LA ENSEÑANZA", "LIC LA ENSEÑANZA", …
$ COLE_NOMBRE_SEDE <chr> "LIC LA ENSEÑANZA", "LIC LA ENSEÑANZA", …
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "11", "76", "76", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76520", "76520", "11001", "76520", "765…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "11", "76", "76", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76520", "76520", "11001", "76520", "765…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "BOGOTÁ", "VALLE", "VA…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "BOGOTÁ", "VALLE", "VA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "31/05/2000", "11/04/2001", "03/03/2000"…
$ ESTU_GENERO <chr> "F", "F", "F", "M", "M", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "PALMIRA", "PALMIRA", "BOGOTÁ D.C.", "PA…
$ ESTU_MCPIO_RESIDE <chr> "PALMIRA", "PALMIRA", "BOGOTÁ D.C.", "PA…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Cinco", "Tres", NA, "Tres", NA, …
$ FAMI_EDUCACIONMADRE <chr> NA, "Educación profesional completa", "E…
$ FAMI_EDUCACIONPADRE <chr> NA, "Educación profesional completa", "P…
$ FAMI_ESTRATOVIVIENDA <chr> NA, "Estrato 3", "Estrato 5", NA, "Estra…
$ FAMI_PERSONASHOGAR <chr> "1 a 2", "5 a 6", "3 a 4", NA, "3 a 4", …
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", NA, "Si", NA, "Si", "N…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", NA, "Si", NA, "Si", "S…
$ FAMI_TIENEINTERNET <chr> NA, "Si", "Si", NA, "Si", "Si", "Si", "S…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", NA, "Si", NA, "Si", "S…
$ DESEMP_INGLES <chr> "B1", "A2", "B+", "A-", "B1", "B+", "B1"…
$ PUNT_INGLES <dbl> 68, 63, 82, 0, 77, 81, 78, 71, 81, 71, 8…
$ PUNT_MATEMATICAS <dbl> 55, 57, 73, 75, 71, 61, 71, 64, 68, 60, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 58, 53, 70, 68, 80, 68, 69, 59, 71, 66, …
$ PUNT_C_NATURALES <dbl> 51, 59, 69, 75, 69, 65, 72, 69, 69, 59, …
$ PUNT_LECTURA_CRITICA <dbl> 63, 61, 66, 77, 76, 72, 70, 64, 68, 59, …
$ PUNT_GLOBAL <dbl> 288, 290, 352, 369, 371, 338, 355, 323, …
$ ANIO <dbl> 2018, 2018, 2018, 2018, 2018, 2018, 2018…
write_csv(
saber11_2018_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2018_limpio.csv"
)
write.xlsx(
saber11_2018_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2018_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2019 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2019_UNIDO.csv"
)
Rows: 1109085 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2019)
Rows: 1,109,085
Columns: 52
$ PERIODO <dbl> 20191, 20191, 20191, 20191, 20191, 20191…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "CC", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201910019254", "SB11201910005652", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "S", "N", "N", "S", "N", "S", NA, "N", "…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "TÉCNICO", "AC…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311769000475, 308573074895, 376001000900…
$ COLE_COD_DANE_SEDE <dbl> 311769000475, 308573074895, 376001000900…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "08", "76", "76", "76", "25", "05"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "08573", "76001", "76001", "760…
$ COLE_CODIGO_ICFES <chr> "022632", "063792", "070094", "143396", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTA", "ATLANTICO", "VALLE", "VALLE",…
$ COLE_GENERO <chr> "FEMENINO", "MIXTO", "MIXTO", "MIXTO", "…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "MAÑANA", "COMPL…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "PUERTO COLOMBIA", "CALI"…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "ASPAEN GIMNASIO IRAGUA", "COLEGIO CRIST…
$ COLE_NOMBRE_SEDE <chr> "ASPAEN GIMNASIO IRAGUA", "COLEGIO CRIST…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "11", "08", "76", "76", "76", "11", "05"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "11001", "08001", "76001", "76001", "760…
$ ESTU_COD_RESIDE_DEPTO <chr> "11", "08", "76", "76", "76", "11", "05"…
$ ESTU_COD_RESIDE_MCPIO <chr> "11001", "08001", "76001", "76001", "760…
$ ESTU_DEPTO_PRESENTACION <chr> "BOGOTÁ", "ATLANTICO", "VALLE", "VALLE",…
$ ESTU_DEPTO_RESIDE <chr> "BOGOTÁ", "ATLANTICO", "VALLE", "VALLE",…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "29/10/2001", "08/08/2001", "08/06/2001"…
$ ESTU_GENERO <chr> "F", "F", "M", "F", "M", "M", "M", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "BOGOTÁ D.C.", "BARRANQUILLA", "CALI", "…
$ ESTU_MCPIO_RESIDE <chr> "BOGOTÁ D.C.", "BARRANQUILLA", "CALI", "…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Tres", "Cuatro", "Seis …
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "Educación profesional comp…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 5", "Estrato 3", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "3 a 4", "3 a 4", "5 a 6", "7 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "No", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "A2", "A2", "B+", "A1", "B+", "B+"…
$ PUNT_INGLES <dbl> 81, 66, 61, 80, 48, 84, 87, 77, 83, 80, …
$ PUNT_MATEMATICAS <dbl> 72, 62, 70, 72, 56, 64, 73, 64, 75, 59, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 73, 58, 58, 66, 45, 68, 70, 42, 66, 48, …
$ PUNT_C_NATURALES <dbl> 68, 60, 64, 63, 54, 66, 71, 58, 68, 61, …
$ PUNT_LECTURA_CRITICA <dbl> 65, 63, 59, 66, 54, 65, 70, 58, 75, 59, …
$ PUNT_GLOBAL <dbl> 352, 306, 313, 339, 260, 336, 361, 286, …
$ ANIO <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019…
# names(saber11_2019)
saber11_2019_limpio <- saber11_2019 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2019 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 546,232 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11201910009506 2
2 SB11201910009559 2
3 SB11201910015837 2
4 SB11201910015865 2
5 SB11201910015905 2
6 SB11201910015940 2
7 SB11201910023037 2
8 SB11201910023145 2
9 SB11201910023191 2
10 SB11201910023241 2
# ℹ 546,222 more rows
saber11_2019_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2019_limpio)
Rows: 558,751
Columns: 52
$ PERIODO <dbl> 20191, 20191, 20191, 20191, 20191, 20191…
$ ESTU_TIPODOCUMENTO <chr> "CC", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11201910000008", "SB11201910000010", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001013246, 376001013246, 376001013246…
$ COLE_COD_DANE_SEDE <dbl> 376001013246, 376001013246, 376001013246…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "76", "76", "76", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "76001", "76001", "76111", "761…
$ COLE_CODIGO_ICFES <chr> "016873", "016873", "016873", "087486", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "VAL…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "MAÑANA", "MAÑANA", "MAÑANA", …
$ COLE_MCPIO_UBICACION <chr> "CALI", "CALI", "CALI", "GUADALAJARA DE …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO DEL SAGRADO CORAZÓN DE JESÚS-VA…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO DEL SAGRADO CORAZÓN DE JESÚS-VA…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "76", "76", "76", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "76001", "76001", "76111", "761…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "76", "76", "76", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76001", "76001", "76001", "76111", "761…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "VALLE", "VALLE", "VAL…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "VALLE", "VALLE", "VAL…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "30/06/2000", "12/05/2001", "30/04/2002"…
$ ESTU_GENERO <chr> "M", "M", "F", "F", "M", "M", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "CALI", "CALI", "GUADALAJARA DE …
$ ESTU_MCPIO_RESIDE <chr> "CALI", "CALI", "CALI", "GUADALAJARA DE …
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Tres", "Cuatro", "Uno", NA, "Cua…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "No sa…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 3", "Estrato 5", "Estrato 5", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "3 a 4", "5 a 6", "1 a 2", NA, …
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "No", NA, "No", "Si", …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", NA, "Si", "Si", …
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", NA, "Si", …
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", NA, "Si", "Si", …
$ DESEMP_INGLES <chr> "B1", "A2", "B1", "A2", "B1", "A2", "A2"…
$ PUNT_INGLES <dbl> 70, 65, 78, 61, 68, 63, 67, 55, 81, 73, …
$ PUNT_MATEMATICAS <dbl> 70, 70, 64, 56, 60, 63, 64, 65, 67, 68, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 69, 62, 69, 44, 54, 62, 63, 63, 68, 61, …
$ PUNT_C_NATURALES <dbl> 71, 65, 66, 49, 59, 62, 69, 62, 68, 66, …
$ PUNT_LECTURA_CRITICA <dbl> 68, 62, 62, 63, 57, 58, 75, 64, 67, 74, …
$ PUNT_GLOBAL <dbl> 348, 324, 331, 268, 292, 307, 338, 314, …
$ ANIO <dbl> 2019, 2019, 2019, 2019, 2019, 2019, 2019…
write_csv(
saber11_2019_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2019_limpio.csv"
)
write.xlsx(
saber11_2019_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2019_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2020 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2020_UNIDO.csv"
)
Rows: 15435 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2020)
Rows: 15,435
Columns: 52
$ PERIODO <dbl> 20201, 20201, 20201, 20201, 20201, 20201…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11202010021926", "SB11202010027371", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "RURAL", "…
$ COLE_BILINGUE <chr> "S", "N", "N", "S", "N", "N", "N", NA, "…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "TÉCNICO/ACADÉ…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001001221, 311769000866, 376001000314…
$ COLE_COD_DANE_SEDE <dbl> 376001001221, 311769000866, 376001000314…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "11", "76", "05", "76", "11", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "11001", "76001", "05380", "760…
$ COLE_CODIGO_ICFES <chr> "016741", "019513", "017228", "156604", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "BOGOTÁ", "VALLE", "ANTIOQUIA",…
$ COLE_GENERO <chr> "MIXTO", "FEMENINO", "MIXTO", "MIXTO", "…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "MAÑANA", "COMPL…
$ COLE_MCPIO_UBICACION <chr> "CALI", "BOGOTÁ D.C.", "CALI", "LA ESTRE…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COL BOLIVAR", "FUNDACION NUEVO MARYMOUN…
$ COLE_NOMBRE_SEDE <chr> "COL BOLIVAR", "FUND NUEVO MARYMOUNT", "…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "11", "76", "05", "76", "11", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "11001", "76001", "05001", "760…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "11", "76", "05", "76", "11", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76001", "11001", "76001", "05380", "760…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "BOGOTÁ", "VALLE", "ANTIOQUIA",…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "BOGOTÁ", "VALLE", "ANTIOQUIA",…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "17/01/0002", "30/10/2001", "18/09/0003"…
$ ESTU_GENERO <chr> "F", "F", "F", "M", "M", "F", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "BOGOTÁ D.C.", "CALI", "MEDELLÍN…
$ ESTU_MCPIO_RESIDE <chr> "CALI", "BOGOTÁ D.C.", "CALI", "LA ESTRE…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Cinco", "Tres", "Tres", "Tres", "Cuatro…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Postg…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 6", "Estrato 6", "Estrato 2", "…
$ FAMI_PERSONASHOGAR <chr> "5 a 6", "3 a 4", "3 a 4", "3 a 4", "3 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "No", "Si", "Si"…
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", "Si"…
$ DESEMP_INGLES <chr> "B+", "B+", "B1", "B+", "A-", "B1", "A1"…
$ PUNT_INGLES <dbl> 85, 79, 72, 81, 47, 77, 54, 72, 34, 81, …
$ PUNT_MATEMATICAS <dbl> 76, 73, 62, 74, 32, 70, 53, 62, 19, 73, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 66, 80, 55, 55, 48, 75, 54, 48, 30, 71, …
$ PUNT_C_NATURALES <dbl> 71, 75, 55, 61, 48, 65, 56, 59, 38, 77, …
$ PUNT_LECTURA_CRITICA <dbl> 70, 72, 61, 70, 52, 68, 58, 55, 37, 80, …
$ PUNT_GLOBAL <dbl> 359, 377, 297, 331, 226, 350, 276, 286, …
$ ANIO <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020…
# names(saber11_2020)
saber11_2020_limpio <- saber11_2020 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2020 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
saber11_2020_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
glimpse(saber11_2020_limpio)
Rows: 15,435
Columns: 52
$ PERIODO <dbl> 20201, 20201, 20201, 20201, 20201, 20201…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11202010000025", "SB11202010000051", …
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "URBANO", "URBANO", …
$ COLE_BILINGUE <chr> "N", NA, NA, NA, NA, "N", "N", "N", "N",…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376001000306, 347001051105, 347001051105…
$ COLE_COD_DANE_SEDE <dbl> 376001000306, 347001051105, 347001051105…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "47", "47", "47", "47", "76", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76001", "47001", "47001", "47001", "470…
$ COLE_CODIGO_ICFES <chr> "017079", "143644", "143644", "143644", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "MAGDALENA", "MAGDALENA", "MAGD…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "MAÑANA", "MAÑANA"…
$ COLE_MCPIO_UBICACION <chr> "CALI", "SANTA MARTA", "SANTA MARTA", "S…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO LEONÍSTICO LA MERCED", "REDCOL …
$ COLE_NOMBRE_SEDE <chr> "COLEGIO LEONÍSTICO LA MERCED", "COLEGIO…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "47", "47", "47", "47", "76", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76001", "47001", "47001", "47001", "470…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "47", "47", "47", "47", "76", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76122", "47660", "47001", "47001", "470…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "MAGDALENA", "MAGDALENA", "MAGD…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "MAGDALENA", "MAGDALENA", "MAGD…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "15/02/2002", "20/09/2002", "18/03/2003"…
$ ESTU_GENERO <chr> "M", "F", "F", "F", "M", "M", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "CALI", "SANTA MARTA", "SANTA MARTA", "S…
$ ESTU_MCPIO_RESIDE <chr> "CAICEDONIA", "SABANAS DE SAN ÁNGEL", "S…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Uno", NA, "Tres", "Tres", "Tres", "Tres…
$ FAMI_EDUCACIONMADRE <chr> "Secundaria (Bachillerato) incompleta", …
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Postg…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 3", "Estrato 6", "Estrato 4", "…
$ FAMI_PERSONASHOGAR <chr> "1 a 2", NA, "3 a 4", "3 a 4", "5 a 6", …
$ FAMI_TIENEAUTOMOVIL <chr> "No", NA, "Si", "Si", "Si", "Si", "No", …
$ FAMI_TIENECOMPUTADOR <chr> "No", NA, "Si", "Si", "Si", "Si", "Si", …
$ FAMI_TIENEINTERNET <chr> "No", "Si", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", NA, "Si", "Si", "Si", "Si", "Si", …
$ DESEMP_INGLES <chr> "A2", "B+", "B+", "B+", "B+", "B1", "A-"…
$ PUNT_INGLES <dbl> 58, 83, 79, 81, 81, 76, 37, 65, 53, 49, …
$ PUNT_MATEMATICAS <dbl> 57, 60, 69, 74, 62, 68, 42, 53, 50, 52, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 33, 70, 62, 61, 43, 81, 40, 65, 41, 51, …
$ PUNT_C_NATURALES <dbl> 46, 65, 70, 55, 49, 72, 50, 58, 54, 42, …
$ PUNT_LECTURA_CRITICA <dbl> 53, 67, 70, 66, 46, 68, 49, 57, 48, 46, …
$ PUNT_GLOBAL <dbl> 240, 334, 343, 327, 262, 363, 223, 294, …
$ ANIO <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020…
write_csv(
saber11_2020_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2020_limpio.csv"
)
write.xlsx(
saber11_2020_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2020_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2021 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2021_UNIDO.csv"
)
Rows: 15528 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2021)
Rows: 15,528
Columns: 52
$ PERIODO <dbl> 20211, 20211, 20211, 20211, 20211, 20211…
$ ESTU_TIPODOCUMENTO <chr> "TI", "CR", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11202110001547", "SB11202110021004", …
$ COLE_AREA_UBICACION <chr> "RURAL", "URBANO", "URBANO", "URBANO", "…
$ COLE_BILINGUE <chr> "S", NA, "S", "N", "S", NA, "S", NA, "N"…
$ COLE_CALENDARIO <chr> "B", "A", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 425758800009, 425799000637, 447001004284…
$ COLE_COD_DANE_SEDE <dbl> 425758800009, 425799000637, 447001004284…
$ COLE_COD_DEPTO_UBICACION <chr> "25", "25", "47", "76", "11", "76", "11"…
$ COLE_COD_MCPIO_UBICACION <chr> "25758", "25799", "47001", "76001", "110…
$ COLE_CODIGO_ICFES <chr> "043562", "136754", "053363", "066522", …
$ COLE_DEPTO_UBICACION <chr> "CUNDINAMARCA", "CUNDINAMARCA", "MAGDALE…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "MAÑANA", "COMPLETA", "MAÑAN…
$ COLE_MCPIO_UBICACION <chr> "SOPÓ", "TENJO", "SANTA MARTA", "CALI", …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COL CAMPOALEGRE LTDA", "COLEGIO CAMPEST…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO CAMPOALEGRE", "COLEGIO CAMPESTR…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "25", "25", "47", "76", "11", "76", "11"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "25899", "25269", "47001", "76001", "110…
$ ESTU_COD_RESIDE_DEPTO <chr> "25", "25", "47", "76", "11", "76", "11"…
$ ESTU_COD_RESIDE_MCPIO <chr> "25175", "25214", "47001", "76001", "110…
$ ESTU_DEPTO_PRESENTACION <chr> "CUNDINAMARCA", "CUNDINAMARCA", "MAGDALE…
$ ESTU_DEPTO_RESIDE <chr> "CUNDINAMARCA", "CUNDINAMARCA", "MAGDALE…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "29/08/2002", "01/07/2003", "21/06/2003"…
$ ESTU_GENERO <chr> "M", "M", "M", "F", "M", "M", "F", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "ZIPAQUIRÁ", "FACATATIVÁ", "SANTA MARTA"…
$ ESTU_MCPIO_RESIDE <chr> "CHÍA", "COTA", "SANTA MARTA", "CALI", "…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", NA, "Tres", "Dos", "Cinc…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "Educación profesional comp…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 4", "Estrato 5", NA, "Estrato 3…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "1 a 2", NA, "3 a 4", "1 a 2", …
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", NA, "Si", "Si", "No", "Si", …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", NA, "Si", "Si", "Si", "Si", …
$ FAMI_TIENEINTERNET <chr> "Si", "Si", NA, "Si", "Si", "Si", "Si", …
$ FAMI_TIENELAVADORA <chr> "Si", "Si", NA, "Si", "Si", "Si", "Si", …
$ DESEMP_INGLES <chr> "B+", "A2", "B+", "B1", "B+", "A-", "B+"…
$ PUNT_INGLES <dbl> 83, 66, 87, 70, 89, 45, 100, 79, 33, 83,…
$ PUNT_MATEMATICAS <dbl> 74, 59, 80, 69, 65, 39, 76, 56, 51, 72, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 64, 60, 74, 63, 74, 55, 70, 56, 51, 77, …
$ PUNT_C_NATURALES <dbl> 61, 56, 62, 71, 65, 52, 63, 57, 44, 68, …
$ PUNT_LECTURA_CRITICA <dbl> 69, 66, 69, 70, 71, 50, 73, 65, 51, 69, …
$ PUNT_GLOBAL <dbl> 341, 303, 362, 342, 352, 243, 364, 300, …
$ ANIO <dbl> 2021, 2021, 2021, 2021, 2021, 2021, 2021…
# names(saber11_2021)
saber11_2021_limpio <- saber11_2021 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2021 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
saber11_2021_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
# Verificar si una variable tiene los mismos valores de la otra:
#unique_antes <- unique(saber11_2021$COLE_BILINGUE)
#unique_despues <- unique(saber11_2021_limpio$COLE_BILINGUE)
#unique_antes
#unique_despues
#setequal(unique_antes, unique_despues)
glimpse(saber11_2021_limpio)
Rows: 15,528
Columns: 52
$ PERIODO <dbl> 20211, 20211, 20211, 20211, 20211, 20211…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "SB11202110000026", "SB11202110000029", …
$ COLE_AREA_UBICACION <chr> "RURAL", "RURAL", "RURAL", "RURAL", "RUR…
$ COLE_BILINGUE <chr> NA, NA, NA, NA, NA, NA, "N", NA, NA, NA,…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "B", "B", "B", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "ACADÉMICO", "…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 417001005373, 417001005373, 417001005373…
$ COLE_COD_DANE_SEDE <dbl> 417001005373, 417001005373, 417001005373…
$ COLE_COD_DEPTO_UBICACION <chr> "17", "17", "17", "17", "17", "17", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "17873", "17873", "17873", "17873", "178…
$ COLE_CODIGO_ICFES <chr> "156091", "156091", "156091", "156091", …
$ COLE_DEPTO_UBICACION <chr> "CALDAS", "CALDAS", "CALDAS", "CALDAS", …
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "COMPLETA", "COM…
$ COLE_MCPIO_UBICACION <chr> "VILLAMARÍA", "VILLAMARÍA", "VILLAMARÍA"…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COL. ANGLOHISPANO", "COL. ANGLOHISPANO"…
$ COLE_NOMBRE_SEDE <chr> "COL. ANGLOHISPANO - SEDE PRINCIPAL", "C…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "17", "17", "17", "17", "17", "17", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "17001", "17001", "17001", "17001", "170…
$ ESTU_COD_RESIDE_DEPTO <chr> "17", "17", "17", "17", "17", "17", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "17001", "17873", "17001", "17873", "170…
$ ESTU_DEPTO_PRESENTACION <chr> "CALDAS", "CALDAS", "CALDAS", "CALDAS", …
$ ESTU_DEPTO_RESIDE <chr> "CALDAS", "CALDAS", "CALDAS", "CALDAS", …
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "22/04/2003", "26/06/2003", "25/12/2003"…
$ ESTU_GENERO <chr> "F", "M", "M", "F", "M", "M", "M", "F", …
$ ESTU_MCPIO_PRESENTACION <chr> "MANIZALES", "MANIZALES", "MANIZALES", "…
$ ESTU_MCPIO_RESIDE <chr> "MANIZALES", "VILLAMARÍA", "MANIZALES", …
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Tres", "Cuatro", "Cinco", "Uno",…
$ FAMI_EDUCACIONMADRE <chr> "Técnica o tecnológica completa", "Educa…
$ FAMI_EDUCACIONPADRE <chr> "Secundaria (Bachillerato) completa", "S…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 5", "Estrato 4", "Estrato 6", "…
$ FAMI_PERSONASHOGAR <chr> "1 a 2", "3 a 4", "5 a 6", "5 a 6", "3 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "Si", "Si", "Si", "Si", "Si", NA, …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", "Si", NA, …
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", "Si", NA, …
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", NA, …
$ DESEMP_INGLES <chr> "A2", "B1", "B1", "B1", "A2", "A2", "A1"…
$ PUNT_INGLES <dbl> 65, 68, 77, 76, 59, 59, 57, 85, 81, 60, …
$ PUNT_MATEMATICAS <dbl> 47, 61, 67, 48, 56, 30, 51, 65, 48, 46, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 54, 63, 66, 54, 60, 38, 31, 51, 46, 45, …
$ PUNT_C_NATURALES <dbl> 45, 50, 68, 51, 59, 35, 42, 61, 48, 39, …
$ PUNT_LECTURA_CRITICA <dbl> 49, 69, 69, 60, 61, 32, 41, 61, 55, 40, …
$ PUNT_GLOBAL <dbl> 250, 307, 341, 275, 295, 178, 212, 307, …
$ ANIO <dbl> 2021, 2021, 2021, 2021, 2021, 2021, 2021…
write_csv(
saber11_2021_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2021_limpio.csv"
)
write.xlsx(
saber11_2021_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2021_limpio.xlsx",
asTable = TRUE
)
# CARGA DE DATOS ORIGINALES
saber11_2022 <- read_csv(
"C:/Users/john/Desktop/Saber_11_2025/data/UNIDOS/Saber11_2022_UNIDO.csv"
)
Rows: 1085937 Columns: 52
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (42): ESTU_TIPODOCUMENTO, ESTU_CONSECUTIVO, COLE_AREA_UBICACION, COLE_BI...
dbl (10): PERIODO, COLE_COD_DANE_ESTABLECIMIENTO, COLE_COD_DANE_SEDE, PUNT_I...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# confirmar
glimpse(saber11_2022)
Rows: 1,085,937
Columns: 52
$ PERIODO <dbl> 20221, 20221, 20221, 20221, 20221, 20221…
$ ESTU_TIPODOCUMENTO <chr> "TI", "TI", "TI", "TI", "TI", "CC", "TI"…
$ ESTU_CONSECUTIVO <chr> "AC202210004339", "AC202210008120", "AC2…
$ COLE_AREA_UBICACION <chr> "URBANO", "RURAL", "URBANO", "URBANO", "…
$ COLE_BILINGUE <chr> NA, "N", "N", "S", "N", "N", "N", "N", "…
$ COLE_CALENDARIO <chr> "B", "B", "B", "B", "B", "A", "B", "A", …
$ COLE_CARACTER <chr> "ACADÉMICO", "ACADÉMICO", "TÉCNICO/ACADÉ…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 376520005862, 376001000721, 376001000543…
$ COLE_COD_DANE_SEDE <dbl> 376520005862, 376001000721, 376001000543…
$ COLE_COD_DEPTO_UBICACION <chr> "76", "76", "76", "11", "76", "11", "76"…
$ COLE_COD_MCPIO_UBICACION <chr> "76520", "76001", "76001", "11001", "760…
$ COLE_CODIGO_ICFES <chr> "129601", "016980", "628131", "055988", …
$ COLE_DEPTO_UBICACION <chr> "VALLE", "VALLE", "VALLE", "BOGOTÁ", "VA…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "COMPLETA", "COMPLETA", "COMPLETA", "COM…
$ COLE_MCPIO_UBICACION <chr> "PALMIRA", "CALI", "CALI", "BOGOTÁ D.C."…
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "LIC CAMPESTRE CRECER", "COLEGIO FRANCIS…
$ COLE_NOMBRE_SEDE <chr> "LIC CAMPESTRE CRECER", "COLEGIO FRANCIS…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "S", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "76", "76", "76", "11", "76", "11", "76"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "76520", "76001", "76001", "11001", "760…
$ ESTU_COD_RESIDE_DEPTO <chr> "76", "76", "76", "11", "76", "11", "76"…
$ ESTU_COD_RESIDE_MCPIO <chr> "76520", "76001", "76001", "11001", "760…
$ ESTU_DEPTO_PRESENTACION <chr> "VALLE", "VALLE", "VALLE", "BOGOTÁ", "VA…
$ ESTU_DEPTO_RESIDE <chr> "VALLE", "VALLE", "VALLE", "BOGOTÁ", "VA…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "27/06/2005", "24/01/2005", "29/09/2006"…
$ ESTU_GENERO <chr> "M", "M", "F", "F", "F", "F", "F", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "PALMIRA", "CALI", "CALI", "BOGOTÁ D.C."…
$ ESTU_MCPIO_RESIDE <chr> "PALMIRA", "CALI", "CALI", "BOGOTÁ D.C."…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "N", …
$ FAMI_CUARTOSHOGAR <chr> "Tres", "Tres", "Cinco", "Cuatro", "Tres…
$ FAMI_EDUCACIONMADRE <chr> "Educación profesional completa", "Postg…
$ FAMI_EDUCACIONPADRE <chr> "Postgrado", "Postgrado", "Secundaria (B…
$ FAMI_ESTRATOVIVIENDA <chr> "Estrato 3", "Estrato 4", "Estrato 2", "…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "5 a 6", "7 a 8", "5 a 6", "3 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "No", "Si", "Si", NA, "No", …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "Si", "Si", "Si", "Si", NA, "Si", …
$ FAMI_TIENEINTERNET <chr> "Si", "Si", "Si", "Si", "Si", NA, "Si", …
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", NA, "Si", …
$ DESEMP_INGLES <chr> "B+", "B1", "B1", "B1", "B1", "A-", "A1"…
$ PUNT_INGLES <dbl> 80, 74, 68, 78, 69, 47, 57, 32, 80, 82, …
$ PUNT_MATEMATICAS <dbl> 77, 100, 69, 63, 56, 54, 71, 32, 56, 53,…
$ PUNT_SOCIALES_CIUDADANAS <dbl> 81, 76, 70, 62, 62, 50, 59, 33, 59, 59, …
$ PUNT_C_NATURALES <dbl> 74, 74, 67, 64, 59, 43, 62, 50, 59, 58, …
$ PUNT_LECTURA_CRITICA <dbl> 67, 66, 75, 58, 66, 54, 64, 57, 64, 65, …
$ PUNT_GLOBAL <dbl> 376, 393, 350, 315, 307, 250, 317, 211, …
$ ANIO <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022…
# names(saber11_2022)
saber11_2022_limpio <- saber11_2022 %>%
arrange(ESTU_CONSECUTIVO, desc(PERIODO)) %>% # Ordena por estudiante y período más reciente
distinct(ESTU_CONSECUTIVO, .keep_all = TRUE) # Mantiene solo el registro reciente
saber11_2022 %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 532,792 × 2
ESTU_CONSECUTIVO n
<chr> <int>
1 SB11202240000034 2
2 SB11202240000036 2
3 SB11202240000037 2
4 SB11202240000039 2
5 SB11202240000040 2
6 SB11202240000044 2
7 SB11202240000046 2
8 SB11202240000048 2
9 SB11202240000051 2
10 SB11202240000055 2
# ℹ 532,782 more rows
saber11_2022_limpio %>% count(ESTU_CONSECUTIVO) %>% filter(n > 1)
# A tibble: 0 × 2
# ℹ 2 variables: ESTU_CONSECUTIVO <chr>, n <int>
# Verificar si una variable tiene los mismos valores de la otra:
#unique_antes <- unique(saber11_2021$COLE_BILINGUE)
#unique_despues <- unique(saber11_2021_limpio$COLE_BILINGUE)
#unique_antes
#unique_despues
#setequal(unique_antes, unique_despues)
glimpse(saber11_2022_limpio)
Rows: 552,841
Columns: 52
$ PERIODO <dbl> 20221, 20221, 20221, 20221, 20221, 20221…
$ ESTU_TIPODOCUMENTO <chr> "TI", "CC", "TI", "CC", "TI", "TI", "TI"…
$ ESTU_CONSECUTIVO <chr> "AC202210000131", "AC202210000142", "AC2…
$ COLE_AREA_UBICACION <chr> "URBANO", "URBANO", "RURAL", "RURAL", "R…
$ COLE_BILINGUE <chr> "N", NA, "N", "N", "N", "N", "N", NA, "N…
$ COLE_CALENDARIO <chr> "A", "B", "B", "B", "B", "B", "B", "A", …
$ COLE_CARACTER <chr> "ACADÉMICO", NA, "ACADÉMICO", "ACADÉMICO…
$ COLE_COD_DANE_ESTABLECIMIENTO <dbl> 311001108684, 376001043463, 425377000529…
$ COLE_COD_DANE_SEDE <dbl> 311001108684, 376001043463, 425377000529…
$ COLE_COD_DEPTO_UBICACION <chr> "11", "76", "25", "25", "25", "25", "25"…
$ COLE_COD_MCPIO_UBICACION <chr> "11001", "76001", "25377", "25377", "253…
$ COLE_CODIGO_ICFES <chr> "140632", "714519", "106286", "106286", …
$ COLE_DEPTO_UBICACION <chr> "BOGOTÁ", "VALLE", "CUNDINAMARCA", "CUND…
$ COLE_GENERO <chr> "MIXTO", "MIXTO", "MIXTO", "MIXTO", "MIX…
$ COLE_JORNADA <chr> "MAÑANA", "TARDE", "COMPLETA", "COMPLETA…
$ COLE_MCPIO_UBICACION <chr> "BOGOTÁ D.C.", "CALI", "LA CALERA", "LA …
$ COLE_NATURALEZA <chr> "NO OFICIAL", "NO OFICIAL", "NO OFICIAL"…
$ COLE_NOMBRE_ESTABLECIMIENTO <chr> "COLEGIO INSTITUTO SANTIAGO DE COMPOSTEL…
$ COLE_NOMBRE_SEDE <chr> "COLEGIO INSTITUTO SANTIAGO DE COMPOSTEL…
$ COLE_SEDE_PRINCIPAL <chr> "S", "S", "S", "S", "S", "S", "S", "N", …
$ ESTU_COD_DEPTO_PRESENTACION <chr> "11", "76", "11", "11", "11", "11", "11"…
$ ESTU_COD_MCPIO_PRESENTACION <chr> "11001", "76001", "11001", "11001", "110…
$ ESTU_COD_RESIDE_DEPTO <chr> "25", "76", "11", "11", "11", "11", "11"…
$ ESTU_COD_RESIDE_MCPIO <chr> "25377", "76001", "11001", "11001", "110…
$ ESTU_DEPTO_PRESENTACION <chr> "BOGOTÁ", "VALLE", "BOGOTÁ", "BOGOTÁ", "…
$ ESTU_DEPTO_RESIDE <chr> "CUNDINAMARCA", "VALLE", "BOGOTÁ", "BOGO…
$ ESTU_ESTADOINVESTIGACION <chr> "PUBLICAR", "PUBLICAR", "PUBLICAR", "PUB…
$ ESTU_ESTUDIANTE <chr> "ESTUDIANTE", "ESTUDIANTE", "ESTUDIANTE"…
$ ESTU_FECHANACIMIENTO <chr> "08/01/2004", "01/10/2000", "03/03/2004"…
$ ESTU_GENERO <chr> "F", "M", "F", "F", "F", "M", "M", "M", …
$ ESTU_MCPIO_PRESENTACION <chr> "BOGOTÁ D.C.", "CALI", "BOGOTÁ D.C.", "B…
$ ESTU_MCPIO_RESIDE <chr> "LA CALERA", "CALI", "BOGOTÁ D.C.", "BOG…
$ ESTU_NACIONALIDAD <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PAIS_RESIDE <chr> "COLOMBIA", "COLOMBIA", "COLOMBIA", "COL…
$ ESTU_PRIVADO_LIBERTAD <chr> "N", "N", "N", "N", "N", "N", "N", "S", …
$ FAMI_CUARTOSHOGAR <chr> "Dos", "Dos", "Cuatro", "Tres", "Seis o …
$ FAMI_EDUCACIONMADRE <chr> "Postgrado", "Secundaria (Bachillerato) …
$ FAMI_EDUCACIONPADRE <chr> "Educación profesional completa", "Secun…
$ FAMI_ESTRATOVIVIENDA <chr> NA, "Estrato 1", "Estrato 2", "Estrato 6…
$ FAMI_PERSONASHOGAR <chr> "3 a 4", "3 a 4", "5 a 6", "3 a 4", "5 a…
$ FAMI_TIENEAUTOMOVIL <chr> "Si", "No", "Si", "Si", "Si", "Si", NA, …
$ FAMI_TIENECOMPUTADOR <chr> "Si", "No", "Si", "Si", "Si", "Si", NA, …
$ FAMI_TIENEINTERNET <chr> "Si", "No", "Si", "Si", "Si", "Si", "Si"…
$ FAMI_TIENELAVADORA <chr> "Si", "Si", "Si", "Si", "Si", "Si", NA, …
$ DESEMP_INGLES <chr> "B+", "A2", "B1", "B+", "B+", "B+", "B+"…
$ PUNT_INGLES <dbl> 81, 67, 78, 100, 90, 83, 82, 50, 85, 79,…
$ PUNT_MATEMATICAS <dbl> 70, 47, 66, 65, 72, 71, 71, 68, 65, 69, …
$ PUNT_SOCIALES_CIUDADANAS <dbl> 73, 61, 69, 65, 71, 65, 68, 57, 68, 73, …
$ PUNT_C_NATURALES <dbl> 74, 59, 66, 65, 74, 66, 68, 55, 66, 70, …
$ PUNT_LECTURA_CRITICA <dbl> 72, 55, 76, 67, 66, 70, 69, 60, 75, 68, …
$ PUNT_GLOBAL <dbl> 365, 282, 350, 341, 361, 346, 350, 296, …
$ ANIO <dbl> 2022, 2022, 2022, 2022, 2022, 2022, 2022…
write_csv(
saber11_2022_limpio,
"C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2022_limpio.csv"
)
write.xlsx(
saber11_2022_limpio,
file = "C:/Users/john/Desktop/Saber_11_2025/data/sinDuplicados/saber11_2022_limpio.xlsx",
asTable = TRUE
)
| Año | Registros totales | Registros sin duplicado | Duplicados eliminados | % Eliminado |
|---|---|---|---|---|
| 2010 | 670,030 | 570,944 | 99,086 | 14.79% |
| 2011 | 650,420 | 572,417 | 78,003 | 12.00% |
| 2012 | 680,388 | 580,551 | 99,837 | 14.67% |
| 2013 | 583,775 | 576,094 | 7,681 | 1.31% |
| 2014 | 571,637 | 569,428 | 2,209 | 0.39% |
| 2015 | 570,464 | 568,397 | 2,067 | 0.36% |
| 2016 | 563,370 | 561,270 | 2,100 | 0.37% |
| 2017 | 561,287 | 559,279 | 2,008 | 0.36% |
| 2018 | 32,348 | 19,798 | 12,550 | 38.78% |
| 2019 | 1,109,085 | 558,751 | 550,334 | 49.62% |
| 2020 | 15,435 | 15,435 | 0 | 0% |
| 2021 | 15,528 | 15,528 | 0 | 0% |
| 2022 | 1,085,937 | 552,841 | 533,096 | 49.07% |
El proceso de depuración de los microdatos del Saber 11 correspondientes al periodo 2010–2022 permitió identificar y corregir la presencia de registros duplicados asociados a estudiantes que aparecían más de una vez en los archivos originales. Este procedimiento se realizó utilizando la variable ESTU_CONSECUTIVO como identificador principal y aplicando un ordenamiento descendente por PERIODO, con el fin de conservar únicamente la versión más reciente del registro cuando existían duplicados. Esta metodología garantizó la selección de información actualizada, evitando inconsistencias y aumentando la confiabilidad de los datos para análisis posteriores.
Los resultados obtenidos confirman la existencia de duplicados en múltiples años, con magnitudes variables según la cohorte analizada. En 2010, por ejemplo, se pasó de 670.030 registros originales a 570.944 observaciones únicas, eliminando 99.086 duplicados. En 2011, el total se redujo de 650.420 a 572.417 registros, con 78.003 duplicados eliminados. Para 2012, los datos disminuyeron de 680.388 a 580.551 registros, depurándose 99.837 duplicados. En 2013, se eliminaron 7.681 duplicados, pasando de 583.775 a 576.094 registros. De manera similar, los conjuntos correspondientes a 2014 presentaron variaciones según cada base procesada, con reducciones como: 571.637 a 569.428 registros (2.209 duplicados), 570.464 a 568.397 registros (2.067 duplicados), 563.370 a 561.270 registros (2.100 duplicados), 561.287 a 559.279 registros (2.008 duplicados) y una reducción significativa en otro archivo de 1.109.085 a 558.751 registros, eliminando 550.334 duplicados. También se evidenciaron bases que no presentaron duplicados, como las de 15.435 y 15.528 registros, que permanecieron sin cambios tras la depuración. Finalmente, en otro conjunto del mismo año, el total pasó de 1.085.937 a 552.841 observaciones, eliminando 533.096 duplicados.
En conjunto, estas reducciones reflejan una mejora sustancial en la calidad y coherencia de los datos, permitiendo disponer de una base unificada en la que cada estudiante está representado una sola vez, condición indispensable para análisis estadísticos robustos, modelamientos predictivos, estudios longitudinales y procesos de construcción de grafos institucionales. Asimismo, la eliminación de duplicados evita sesgos en los resultados y garantiza la integridad de la información al integrar las distintas cohortes del periodo 2010–2022.