Las lbrerías

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(vcd)
## Warning: package 'vcd' was built under R version 3.6.3
## Loading required package: grid

Los datos

matricula <- as.factor(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1))

matematicas <- c(41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43, 51, 60, 62, 57, 35, 75, 45, 57, 45, 46, 66, 57, 49, 49, 57, 64, 63, 57, 50, 58, 75, 68, 44, 40, 41, 62, 57, 43, 48, 63, 39, 70,
63, 59, 61, 38, 61, 49, 73, 44, 42, 39, 55, 52, 45, 61, 39, 41,
50, 40, 60, 47, 59, 49, 46, 58, 71, 58, 46, 43, 54, 56, 46, 54,
57, 54, 71, 48, 40, 64, 51, 39, 40, 61, 66, 49, 65, 52, 46, 61,
72, 71, 40, 69, 64, 56, 49, 54, 53, 66, 67, 40, 46, 69, 40, 41,
57, 58, 57, 37, 55, 62, 64, 40, 50, 46, 53, 52, 45, 56, 45, 54,
56, 41, 54, 72, 56, 47, 49, 60, 54, 55, 33, 49, 43, 50, 52, 48,
58, 43, 41, 43, 46, 44, 43, 61, 40, 49, 56, 61, 50, 51, 42, 67,
53, 50, 51, 72, 48, 40, 53, 39, 63, 51, 45, 39, 42, 62, 44, 65,
63, 54, 45, 60, 49, 48, 57, 55, 66, 64, 55, 42, 56, 53, 41, 42,
53, 42, 60, 52, 38, 57, 58, 65)

datos <- data.frame(matricula, matematicas)
head(datos, 6)
##   matricula matematicas
## 1         0          41
## 2         0          53
## 3         0          54
## 4         0          47
## 5         0          57
## 6         0          51
tail(datos,6)
##     matricula matematicas
## 195         1          60
## 196         0          52
## 197         0          38
## 198         0          57
## 199         1          58
## 200         1          65

Tabla de Frecuencias

# Tabla de casos de Honor y No Honor
table(datos$matricula)
## 
##   0   1 
## 151  49

Graficando

ggplot(data = datos, aes(x = matricula, y = matematicas, color = matricula)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  theme_bw() +
  theme(legend.position = "null")

Generar el modelo de regresión logística

# Generar el modelo de regresión logística
modelo <- glm(matricula ~ matematicas, data = datos, family = "binomial")
summary(modelo)
## 
## Call:
## glm(formula = matricula ~ matematicas, family = "binomial", data = datos)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0332  -0.6785  -0.3506  -0.1565   2.6143  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.79394    1.48174  -6.610 3.85e-11 ***
## matematicas  0.15634    0.02561   6.105 1.03e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 222.71  on 199  degrees of freedom
## Residual deviance: 167.07  on 198  degrees of freedom
## AIC: 171.07
## 
## Number of Fisher Scoring iterations: 5

Otra gráfica con datos codificados

datos$matricula <- as.character(datos$matricula)
datos$matricula <- as.numeric(datos$matricula)

plot(matricula ~ matematicas, datos, col = "darkblue",
     main = "Modelo regresión logística",
     ylab = "P(matrícula=1|matemáticas)",
     xlab = "matemáticas", pch = "I")

# type = "response" devuelve las predicciones en forma de probabilidad en lugar de en log_ODDs
curve(predict(modelo, data.frame(matematicas = x), type = "response"),
      col = "firebrick", lwd = 2.5, add = TRUE)

Nuevos valores a predecir

nuevos_puntos <- seq(from = min(datos$matematicas), to = max(datos$matematicas),
                     by = 0.5)

nuevos_puntos
##  [1] 33.0 33.5 34.0 34.5 35.0 35.5 36.0 36.5 37.0 37.5 38.0 38.5 39.0 39.5 40.0
## [16] 40.5 41.0 41.5 42.0 42.5 43.0 43.5 44.0 44.5 45.0 45.5 46.0 46.5 47.0 47.5
## [31] 48.0 48.5 49.0 49.5 50.0 50.5 51.0 51.5 52.0 52.5 53.0 53.5 54.0 54.5 55.0
## [46] 55.5 56.0 56.5 57.0 57.5 58.0 58.5 59.0 59.5 60.0 60.5 61.0 61.5 62.0 62.5
## [61] 63.0 63.5 64.0 64.5 65.0 65.5 66.0 66.5 67.0 67.5 68.0 68.5 69.0 69.5 70.0
## [76] 70.5 71.0 71.5 72.0 72.5 73.0 73.5 74.0 74.5 75.0

Predicciones de los nuevos puntos

predicciones <- predict(modelo, data.frame(matematicas = nuevos_puntos), se.fit = TRUE)
predicciones
## $fit
##           1           2           3           4           5           6 
## -4.63471038 -4.55654020 -4.47837002 -4.40019984 -4.32202967 -4.24385949 
##           7           8           9          10          11          12 
## -4.16568931 -4.08751913 -4.00934896 -3.93117878 -3.85300860 -3.77483842 
##          13          14          15          16          17          18 
## -3.69666824 -3.61849807 -3.54032789 -3.46215771 -3.38398753 -3.30581736 
##          19          20          21          22          23          24 
## -3.22764718 -3.14947700 -3.07130682 -2.99313664 -2.91496647 -2.83679629 
##          25          26          27          28          29          30 
## -2.75862611 -2.68045593 -2.60228576 -2.52411558 -2.44594540 -2.36777522 
##          31          32          33          34          35          36 
## -2.28960504 -2.21143487 -2.13326469 -2.05509451 -1.97692433 -1.89875415 
##          37          38          39          40          41          42 
## -1.82058398 -1.74241380 -1.66424362 -1.58607344 -1.50790327 -1.42973309 
##          43          44          45          46          47          48 
## -1.35156291 -1.27339273 -1.19522255 -1.11705238 -1.03888220 -0.96071202 
##          49          50          51          52          53          54 
## -0.88254184 -0.80437167 -0.72620149 -0.64803131 -0.56986113 -0.49169095 
##          55          56          57          58          59          60 
## -0.41352078 -0.33535060 -0.25718042 -0.17901024 -0.10084007 -0.02266989 
##          61          62          63          64          65          66 
##  0.05550029  0.13367047  0.21184065  0.29001082  0.36818100  0.44635118 
##          67          68          69          70          71          72 
##  0.52452136  0.60269153  0.68086171  0.75903189  0.83720207  0.91537225 
##          73          74          75          76          77          78 
##  0.99354242  1.07171260  1.14988278  1.22805296  1.30622313  1.38439331 
##          79          80          81          82          83          84 
##  1.46256349  1.54073367  1.61890385  1.69707402  1.77524420  1.85341438 
##          85 
##  1.93158456 
## 
## $se.fit
##         1         2         3         4         5         6         7         8 
## 0.6532674 0.6410488 0.6288535 0.6166829 0.6045386 0.5924220 0.5803350 0.5682794 
##         9        10        11        12        13        14        15        16 
## 0.5562574 0.5442710 0.5323227 0.5204152 0.5085513 0.4967341 0.4849670 0.4732538 
##        17        18        19        20        21        22        23        24 
## 0.4615987 0.4500059 0.4384807 0.4270283 0.4156549 0.4043671 0.3931722 0.3820784 
##        25        26        27        28        29        30        31        32 
## 0.3710949 0.3602317 0.3494999 0.3389122 0.3284823 0.3182259 0.3081603 0.2983048 
##        33        34        35        36        37        38        39        40 
## 0.2886808 0.2793124 0.2702261 0.2614513 0.2530204 0.2449690 0.2373357 0.2301621 
##        41        42        43        44        45        46        47        48 
## 0.2234924 0.2173730 0.2118517 0.2069762 0.2027933 0.1993464 0.1966743 0.1948089 
##        49        50        51        52        53        54        55        56 
## 0.1937734 0.1935812 0.1942349 0.1957258 0.1980352 0.2011347 0.2049887 0.2095554 
##        57        58        59        60        61        62        63        64 
## 0.2147894 0.2206432 0.2270689 0.2340194 0.2414494 0.2493160 0.2575791 0.2662020 
##        65        66        67        68        69        70        71        72 
## 0.2751507 0.2843945 0.2939056 0.3036588 0.3136315 0.3238036 0.3341567 0.3446746 
##        73        74        75        76        77        78        79        80 
## 0.3553426 0.3661476 0.3770779 0.3881227 0.3992728 0.4105194 0.4218548 0.4332721 
##        81        82        83        84        85 
## 0.4447650 0.4563277 0.4679551 0.4796425 0.4913856 
## 
## $residual.scale
## [1] 1

Convertido a probabilidades las predicciones

predicciones_logit <- exp(predicciones$fit) / (1 + exp(predicciones$fit))
predicciones_logit
##           1           2           3           4           5           6 
## 0.009615562 0.010389249 0.011224482 0.012126041 0.013099054 0.014149025 
##           7           8           9          10          11          12 
## 0.015281854 0.016503865 0.017821824 0.019242973 0.020775051 0.022426319 
##          13          14          15          16          17          18 
## 0.024205592 0.026122257 0.028186305 0.030408352 0.032799659 0.035372158 
##          19          20          21          22          23          24 
## 0.038138464 0.041111891 0.044306459 0.047736902 0.051418657 0.055367861 
##          25          26          27          28          29          30 
## 0.059601325 0.064136505 0.068991458 0.074184786 0.079735560 0.085663235 
##          31          32          33          34          35          36 
## 0.091987534 0.098728324 0.105905462 0.113538621 0.121647088 0.130249544 
##          37          38          39          40          41          42 
## 0.139363815 0.149006597 0.159193163 0.169937053 0.181249737 0.193140276 
##          43          44          45          46          47          48 
## 0.205614973 0.218677025 0.232326188 0.246558447 0.261365732 0.276735659 
##          49          50          51          52          53          54 
## 0.292651324 0.309091155 0.326028838 0.343433314 0.361268868 0.379495303 
##          55          56          57          58          59          60 
## 0.398068206 0.416939312 0.436056949 0.455366564 0.474811325 0.494332771 
##          61          62          63          64          65          66 
## 0.513871512 0.533367947 0.552762991 0.571998783 0.591019371 0.609771345 
##          67          68          69          70          71          72 
## 0.628204401 0.646271845 0.663930996 0.681143510 0.697875612 0.714098231 
##          73          74          75          76          77          78 
## 0.729787049 0.744922469 0.759489505 0.773477614 0.786880463 0.799695659 
##          79          80          81          82          83          84 
## 0.811924441 0.823571354 0.834643901 0.845152199 0.855108627 0.864527492 
##          85 
## 0.873424703

Generando una matriz de confusión

predicciones <- ifelse(test = modelo$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo$model$matricula, predicciones,
                          dnn = c("observaciones", "predicciones"))
matriz_confusion
##              predicciones
## observaciones   0   1
##             0 140  11
##             1  27  22

Interpretación de la matriz de confusión