Técnica de mineria de datos para la promoción de una educación inclusiva

Este es un documento R Markdown. Contiene información de la Base de Datos relacionada con apsecto de inclusión y grupos de especial protección constitucional en la Universidad de Santander. Para la mineria de datos se empleó el paquete Rattle y R.

Datos

Se carga la base de datos

library (tidyverse)

dataset <- read.csv("C:/Users/coordinador.analitic/OneDrive - Universidad de Santander/Alertas tempranas UDES/Publicaciones/Capitulo de libro ELSERVIER 2024/Markdown/base_datos_inclusion.csv", sep=";")
attach(dataset)
str(dataset)

## 'data.frame':    1539 obs. of  32 variables:
##  $ Tipo_Est  : chr  "Nuevo" "Nuevo" "Nuevo" "Nuevo" ...
##  $ Prog      : chr  "ANTROPOLOGÍA" "ANTROPOLOGÍA" "INSTRUMENTACIÓN QUIRÚRGICA" "INSTRUMENTACIÓN QUIRÚRGICA" ...
##  $ Prom      : num  4.04 3.58 4.04 3.9 3.96 4.12 4.06 4 3.95 4.08 ...
##  $ Edad      : int  24 23 28 21 21 20 20 21 21 20 ...
##  $ Edad_categ: chr  "21-25años" "21-25años" ">26años" "21-25años" ...
##  $ Gen       : chr  "F" "F" "F" "F" ...
##  $ Es_civil  : chr  "Soltero" "Soltero" "Soltero" "Soltero" ...
##  $ Origen    : chr  "COLOMBIA" "COLOMBIA" "COLOMBIA" "COLOMBIA" ...
##  $ Origen_Col: chr  "Colombia" "Colombia" "Colombia" "Colombia" ...
##  $ Dto_origen: chr  "NORTE DE SANTANDER" "GUAJIRA" "PUTUMAYO" "CESAR" ...
##  $ Zona      : chr  "Urbana" "Urbana" "Urbana" "Urbana" ...
##  $ Estrato   : chr  "Bajo" "Bajo" "Bajo" "Bajo" ...
##  $ Trab      : chr  "No" "No" "Si" "No" ...
##  $ coh       : chr  "20-ene" "20-feb" "20-ene" "20-ene" ...
##  $ Campus    : chr  "Bucaramanga" "Bucaramanga" "Bucaramanga" "Bucaramanga" ...
##  $ Prom_cat  : chr  "Superior" "Inferior" "Superior" "Superior" ...
##  $ Area      : chr  "Sociales" "Sociales" "Salud" "Salud" ...
##  $ NING      : chr  "A-" "A-" "A-" "A-" ...
##  $ NLC       : chr  "N3" "N3" "N2" "N3" ...
##  $ NMAT      : chr  "N2" "N2" "N2" "N3" ...
##  $ NPSC      : chr  "N1" "N2" "N2" "N2" ...
##  $ NCN       : chr  "N2" "N1" "N2" "N2" ...
##  $ NING2     : chr  "A-A2" "A-A2" "A-A2" "A-A2" ...
##  $ NLC2      : chr  "N3N4" "N3N4" "N1N2" "N3N4" ...
##  $ NMAT2     : chr  "N1N2" "N1N2" "N1N2" "N3N4" ...
##  $ NPSC2     : chr  "N1N2" "N1N2" "N1N2" "N1N2" ...
##  $ NCN2      : chr  "N1N2" "N1N2" "N1N2" "N1N2" ...
##  $ PLC       : int  51 51 47 61 52 51 62 67 61 47 ...
##  $ PMA       : int  41 46 38 53 50 65 58 67 61 45 ...
##  $ PSC       : int  40 48 50 49 30 39 53 63 69 47 ...
##  $ PCN       : int  49 38 41 53 42 56 52 68 69 41 ...
##  $ PIN       : int  38 40 33 46 39 48 51 69 55 52 ...

names(dataset)

##  [1] "Tipo_Est"   "Prog"       "Prom"       "Edad"       "Edad_categ"
##  [6] "Gen"        "Es_civil"   "Origen"     "Origen_Col" "Dto_origen"
## [11] "Zona"       "Estrato"    "Trab"       "coh"        "Campus"    
## [16] "Prom_cat"   "Area"       "NING"       "NLC"        "NMAT"      
## [21] "NPSC"       "NCN"        "NING2"      "NLC2"       "NMAT2"     
## [26] "NPSC2"      "NCN2"       "PLC"        "PMA"        "PSC"       
## [31] "PCN"        "PIN"

Explorar

library(moments)
summary(dataset) #note que ya no tendremos que separar por $, dado la función attach

##    Tipo_Est             Prog                Prom            Edad      
##  Length:1539        Length:1539        Min.   :1.040   Min.   :16.00  
##  Class :character   Class :character   1st Qu.:3.640   1st Qu.:19.00  
##  Mode  :character   Mode  :character   Median :3.880   Median :20.00  
##                                        Mean   :3.835   Mean   :20.86  
##                                        3rd Qu.:4.090   3rd Qu.:22.00  
##                                        Max.   :5.000   Max.   :37.00  
##   Edad_categ            Gen              Es_civil            Origen         
##  Length:1539        Length:1539        Length:1539        Length:1539       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   Origen_Col         Dto_origen            Zona             Estrato         
##  Length:1539        Length:1539        Length:1539        Length:1539       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Trab               coh               Campus            Prom_cat        
##  Length:1539        Length:1539        Length:1539        Length:1539       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Area               NING               NLC                NMAT          
##  Length:1539        Length:1539        Length:1539        Length:1539       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      NPSC               NCN               NING2               NLC2          
##  Length:1539        Length:1539        Length:1539        Length:1539       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     NMAT2              NPSC2               NCN2                PLC       
##  Length:1539        Length:1539        Length:1539        Min.   :28.00  
##  Class :character   Class :character   Class :character   1st Qu.:48.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :54.00  
##                                                           Mean   :54.25  
##                                                           3rd Qu.:61.00  
##                                                           Max.   :81.00  
##       PMA              PSC            PCN             PIN        
##  Min.   : 21.00   Min.   :22.0   Min.   :27.00   Min.   :  0.00  
##  1st Qu.: 45.00   1st Qu.:42.0   1st Qu.:43.50   1st Qu.: 42.00  
##  Median : 52.00   Median :49.0   Median :50.00   Median : 49.00  
##  Mean   : 51.73   Mean   :49.3   Mean   :50.17   Mean   : 49.95  
##  3rd Qu.: 59.00   3rd Qu.:57.0   3rd Qu.:57.00   3rd Qu.: 57.00  
##  Max.   :100.00   Max.   :82.0   Max.   :75.00   Max.   :100.00

Estadísticas descriptiva

# El paquete 'Hmisc' ofrece la función 'contents'.

library(Hmisc, quietly=TRUE)

# Obtener un resumen del conjunto de datos continuos.
describe(dataset$Prom)

## dataset$Prom 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0      206        1    3.835   0.4452    3.160    3.368 
##      .25      .50      .75      .90      .95 
##    3.640    3.880    4.090    4.300    4.430 
## 
## lowest : 1.04 1.08 1.1  1.28 1.33, highest: 4.69 4.72 4.75 4.76 5

describe(dataset$Prom)

## dataset$Prom 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0      206        1    3.835   0.4452    3.160    3.368 
##      .25      .50      .75      .90      .95 
##    3.640    3.880    4.090    4.300    4.430 
## 
## lowest : 1.04 1.08 1.1  1.28 1.33, highest: 4.69 4.72 4.75 4.76 5

describe(dataset$Edad)

## dataset$Edad 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       21    0.975    20.86    2.657       18       18 
##      .25      .50      .75      .90      .95 
##       19       20       22       24       26 
## 
## lowest : 16 17 18 19 20, highest: 32 33 34 35 37

describe(dataset$PLC)

## dataset$PLC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       50    0.999    54.25    9.981       40       43 
##      .25      .50      .75      .90      .95 
##       48       54       61       65       69 
## 
## lowest : 28 30 31 32 33, highest: 74 75 76 77 81

describe(dataset$PMA)

## dataset$PMA 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       62    0.999    51.73    11.42       35       39 
##      .25      .50      .75      .90      .95 
##       45       52       59       65       68 
## 
## lowest :  21  22  24  25  26, highest:  79  80  83  87 100

describe(dataset$PSC)

## dataset$PSC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       57    0.999     49.3    11.76     32.0     36.0 
##      .25      .50      .75      .90      .95 
##     42.0     49.0     57.0     62.0     65.1 
## 
## lowest : 22 24 25 26 27, highest: 75 76 79 80 82

describe(dataset$PCN)

## dataset$PCN 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       49    0.999    50.17    10.31     36.0     38.0 
##      .25      .50      .75      .90      .95 
##     43.5     50.0     57.0     62.0     65.0 
## 
## lowest : 27 28 29 30 31, highest: 71 72 73 74 75

describe(dataset$PIN)

## dataset$PIN 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       57    0.999    49.95    12.02       33       37 
##      .25      .50      .75      .90      .95 
##       42       49       57       64       69 
## 
## lowest :   0  22  23  28  29, highest:  78  79  81  93 100

# Obtener un resumen del conjunto de datos categoricos
describe(dataset$Gen)

## dataset$Gen 
##        n  missing distinct 
##     1539        0        2 
##                       
## Value          F     M
## Frequency    980   559
## Proportion 0.637 0.363

describe(dataset$Edad_categ)

## dataset$Edad_categ 
##        n  missing distinct 
##     1539        0        3 
##                                         
## Value        <20años   >26años 21-25años
## Frequency        807        97       635
## Proportion     0.524     0.063     0.413

describe(dataset$Es_civil)

## dataset$Es_civil 
##        n  missing distinct 
##     1539        0        4 
##                                                           
## Value           Casado   Religioso     Soltero Unión libre
## Frequency            8           1        1509          21
## Proportion       0.005       0.001       0.981       0.014

describe(dataset$Origen_Col)

## dataset$Origen_Col 
##        n  missing distinct 
##     1539        0        2 
##                                 
## Value        Colombia FueraColom
## Frequency        1532          7
## Proportion      0.995      0.005

describe(dataset$Origen)

## dataset$Origen 
##        n  missing distinct 
##     1539        0        4 
##                                                   
## Value        AUSTRIA  COLOMBIA   ECUADOR VENEZUELA
## Frequency          1      1532         1         5
## Proportion     0.001     0.995     0.001     0.003

describe(dataset$Dto_origen)

## dataset$Dto_origen 
##        n  missing distinct 
##     1539        0       30 
## 
## lowest : -                        ANTIOQUIA                ARAUCA                   ATLANTICO                BOGOTA D.C              
## highest: SAN ANDRES Y PROVIDENCIA SANTANDER                SUCRE                    TOLIMA                   VALLE DEL CAUCA

describe(dataset$Zona)

## dataset$Zona 
##        n  missing distinct 
##     1539        0        2 
##                         
## Value       Rural Urbana
## Frequency     204   1335
## Proportion  0.133  0.867

describe(dataset$Estrato)

## dataset$Estrato 
##        n  missing distinct 
##     1539        0        3 
##                             
## Value       Alto  Bajo Medio
## Frequency     14  1296   229
## Proportion 0.009 0.842 0.149

describe(dataset$Trab)

## dataset$Trab 
##        n  missing distinct 
##     1539        0        2 
##                       
## Value         No    Si
## Frequency   1456    83
## Proportion 0.946 0.054

describe(dataset$coh)

## dataset$coh 
##        n  missing distinct 
##     1539        0        8 
##                                                                   
## Value      20-ene 20-feb 21-ene 21-feb 22-ene 22-feb 23-ene 23-feb
## Frequency     417    131    213    138    214    200    222      4
## Proportion  0.271  0.085  0.138  0.090  0.139  0.130  0.144  0.003

describe(dataset$Campus)

## dataset$Campus 
##        n  missing distinct 
##     1539        0        3 
##                                               
## Value      Bucaramanga      Cúcuta  Valledupar
## Frequency          281          72        1186
## Proportion       0.183       0.047       0.771

describe(dataset$Prom_cat)

## dataset$Prom_cat 
##        n  missing distinct 
##     1539        0        2 
##                             
## Value      Inferior Superior
## Frequency       704      835
## Proportion    0.457    0.543

describe(dataset$Area)

## dataset$Area 
##        n  missing distinct 
##     1539        0        6 
##                                                                             
## Value      Economicas    Exactas Ingenieria      Salud   Sociales Tecnologia
## Frequency         160         97        149        581        517         35
## Proportion      0.104      0.063      0.097      0.378      0.336      0.023

describe(dataset$PLC)

## dataset$PLC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       50    0.999    54.25    9.981       40       43 
##      .25      .50      .75      .90      .95 
##       48       54       61       65       69 
## 
## lowest : 28 30 31 32 33, highest: 74 75 76 77 81

describe(dataset$PMA)

## dataset$PMA 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       62    0.999    51.73    11.42       35       39 
##      .25      .50      .75      .90      .95 
##       45       52       59       65       68 
## 
## lowest :  21  22  24  25  26, highest:  79  80  83  87 100

describe(dataset$PSC)

## dataset$PSC 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       57    0.999     49.3    11.76     32.0     36.0 
##      .25      .50      .75      .90      .95 
##     42.0     49.0     57.0     62.0     65.1 
## 
## lowest : 22 24 25 26 27, highest: 75 76 79 80 82

describe(dataset$PCN)

## dataset$PCN 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       49    0.999    50.17    10.31     36.0     38.0 
##      .25      .50      .75      .90      .95 
##     43.5     50.0     57.0     62.0     65.0 
## 
## lowest : 27 28 29 30 31, highest: 71 72 73 74 75

describe(dataset$PIN)

## dataset$PIN 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##     1539        0       57    0.999    49.95    12.02       33       37 
##      .25      .50      .75      .90      .95 
##       42       49       57       64       69 
## 
## lowest :   0  22  23  28  29, highest:  78  79  81  93 100

Graficos descriptivos

###Histogramas

# Display histogram plots for the selected variables. 

# Use ggplot2 to generate histogram plot for Edad

# Generate the plot.

p01 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(Edad, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=Edad)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("Edad") +
  ggplot2::ggtitle("Distribution of Edad by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Display the plots.

gridExtra::grid.arrange(p01)

# Display histogram plots for the selected variables. 

# Use ggplot2 to generate histogram plot for PLC

# Generate the plot.

p01 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  dplyr::select(PLC, Campus) %>%
  ggplot2::ggplot(ggplot2::aes(x=PLC)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Campus, colour=Campus), alpha=0.55) +
  ggplot2::xlab("PLC") +
  ggplot2::ggtitle("Distribution of PLC by Campus") +
  ggplot2::labs(fill="Campus", y="Density")

# Use ggplot2 to generate histogram plot for PMA

# Generate the plot.

p02 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  dplyr::select(PMA, Campus) %>%
  ggplot2::ggplot(ggplot2::aes(x=PMA)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Campus, colour=Campus), alpha=0.55) +
  ggplot2::xlab("PMA") +
  ggplot2::ggtitle("Distribution of PMA by Campus") +
  ggplot2::labs(fill="Campus", y="Density")

# Use ggplot2 to generate histogram plot for PSC

# Generate the plot.

p03 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  dplyr::select(PSC, Campus) %>%
  ggplot2::ggplot(ggplot2::aes(x=PSC)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Campus, colour=Campus), alpha=0.55) +
  ggplot2::xlab("PSC") +
  ggplot2::ggtitle("Distribution of PSC by Campus") +
  ggplot2::labs(fill="Campus", y="Density")

# Use ggplot2 to generate histogram plot for PCN

# Generate the plot.

p04 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  dplyr::select(PCN, Campus) %>%
  ggplot2::ggplot(ggplot2::aes(x=PCN)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Campus, colour=Campus), alpha=0.55) +
  ggplot2::xlab("PCN") +
  ggplot2::ggtitle("Distribution of PCN by Campus") +
  ggplot2::labs(fill="Campus", y="Density")

# Use ggplot2 to generate histogram plot for PIN

# Generate the plot.

p05 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  dplyr::select(PIN, Campus) %>%
  ggplot2::ggplot(ggplot2::aes(x=PIN)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Campus, colour=Campus), alpha=0.55) +
  ggplot2::xlab("PIN") +
  ggplot2::ggtitle("Distribution of PIN by Campus") +
  ggplot2::labs(fill="Campus", y="Density")

# Display the plots.

gridExtra::grid.arrange(p01, p02, p03, p04, p05)

# Use ggplot2 to generate histogram plot for PLC

# Generate the plot.

p01 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(PLC, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=PLC)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("PLC") +
  ggplot2::ggtitle("Distribution of PLC by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Use ggplot2 to generate histogram plot for PMA

# Generate the plot.

p02 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(PMA, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=PMA)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("PMA") +
  ggplot2::ggtitle("Distribution of PMA by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Use ggplot2 to generate histogram plot for PSC

# Generate the plot.

p03 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(PSC, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=PSC)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("PSC") +
  ggplot2::ggtitle("Distribution of PSC by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Use ggplot2 to generate histogram plot for PCN

# Generate the plot.

p04 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(PCN, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=PCN)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("PCN") +
  ggplot2::ggtitle("Distribution of PCN by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Use ggplot2 to generate histogram plot for PIN

# Generate the plot.

p05 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Gen=as.factor(Gen)) %>%
  dplyr::select(PIN, Gen) %>%
  ggplot2::ggplot(ggplot2::aes(x=PIN)) +
  ggplot2::geom_density(lty=3) +
  ggplot2::geom_density(ggplot2::aes(fill=Gen, colour=Gen), alpha=0.55) +
  ggplot2::xlab("PIN") +
  ggplot2::ggtitle("Distribution of PIN by Gen") +
  ggplot2::labs(fill="Gen", y="Density")

# Display the plots.

gridExtra::grid.arrange(p01, p02, p03, p04, p05)

# Use ggplot2 to generate box plot for Edad

# Generate a box plot.

p01 <- dataset %>%
  with(dataset[,]) %>%
  dplyr::mutate(Campus=as.factor(Campus)) %>%
  ggplot2::ggplot(ggplot2::aes(y=Edad)) +
  ggplot2::geom_boxplot(ggplot2::aes(x="All"), notch=TRUE, fill="grey") +
  ggplot2::stat_summary(ggplot2::aes(x="All"), fun.y=mean, geom="point", shape=8) +
  ggplot2::geom_boxplot(ggplot2::aes(x=Campus, fill=Campus), notch=TRUE) +
  ggplot2::stat_summary(ggplot2::aes(x=Campus), fun.y=mean, geom="point", shape=8) +
  ggplot2::xlab("Campus\n\nRattle 2024-jul.-16 12:51:58 coordinador.analitic") +
  ggplot2::ggtitle("Distribution of Edad by Campus") +
  ggplot2::theme(legend.position="none")

# Display the plots.

gridExtra::grid.arrange(p01)

Diagramas bivariados

Correlación

library(corrplot, quietly=TRUE) # El paquete 'corrplot' ofrece la función 'corrplot'.
library(dplyr)
library(PerformanceAnalytics)

num1 <- dataset [, 28:32]# Variables numericas

summary(num1)

##       PLC             PMA              PSC            PCN       
##  Min.   :28.00   Min.   : 21.00   Min.   :22.0   Min.   :27.00  
##  1st Qu.:48.00   1st Qu.: 45.00   1st Qu.:42.0   1st Qu.:43.50  
##  Median :54.00   Median : 52.00   Median :49.0   Median :50.00  
##  Mean   :54.25   Mean   : 51.73   Mean   :49.3   Mean   :50.17  
##  3rd Qu.:61.00   3rd Qu.: 59.00   3rd Qu.:57.0   3rd Qu.:57.00  
##  Max.   :81.00   Max.   :100.00   Max.   :82.0   Max.   :75.00  
##       PIN        
##  Min.   :  0.00  
##  1st Qu.: 42.00  
##  Median : 49.00  
##  Mean   : 49.95  
##  3rd Qu.: 57.00  
##  Max.   :100.00

chart.Correlation(num1, histogram = TRUE, method = "pearson")

Graficos dinámicos

library(plotly)
plot_ly(data=dataset, x = ~ Campus, y = ~PLC, color = ~Campus, type = "box")

library(plotly)
plot_ly(data=dataset, x = ~ Prog, y = ~PLC, color = ~Prog, type = "box")

A Knowledge Database Discovery approach for improving quality in Higher Education Institutions

Universidad de santander - Analítica Académica

2024-07-12