library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(vcd)
## Warning: package 'vcd' was built under R version 3.6.3
## Loading required package: grid
matricula <- as.factor(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1))
matematicas <- c(41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43, 51, 60, 62, 57, 35, 75, 45, 57, 45, 46, 66, 57, 49, 49, 57, 64, 63, 57, 50, 58, 75, 68, 44, 40, 41, 62, 57, 43, 48, 63, 39, 70,
63, 59, 61, 38, 61, 49, 73, 44, 42, 39, 55, 52, 45, 61, 39, 41,
50, 40, 60, 47, 59, 49, 46, 58, 71, 58, 46, 43, 54, 56, 46, 54,
57, 54, 71, 48, 40, 64, 51, 39, 40, 61, 66, 49, 65, 52, 46, 61,
72, 71, 40, 69, 64, 56, 49, 54, 53, 66, 67, 40, 46, 69, 40, 41,
57, 58, 57, 37, 55, 62, 64, 40, 50, 46, 53, 52, 45, 56, 45, 54,
56, 41, 54, 72, 56, 47, 49, 60, 54, 55, 33, 49, 43, 50, 52, 48,
58, 43, 41, 43, 46, 44, 43, 61, 40, 49, 56, 61, 50, 51, 42, 67,
53, 50, 51, 72, 48, 40, 53, 39, 63, 51, 45, 39, 42, 62, 44, 65,
63, 54, 45, 60, 49, 48, 57, 55, 66, 64, 55, 42, 56, 53, 41, 42,
53, 42, 60, 52, 38, 57, 58, 65)
datos <- data.frame(matricula, matematicas)
head(datos, 6)
## matricula matematicas
## 1 0 41
## 2 0 53
## 3 0 54
## 4 0 47
## 5 0 57
## 6 0 51
tail(datos,6)
## matricula matematicas
## 195 1 60
## 196 0 52
## 197 0 38
## 198 0 57
## 199 1 58
## 200 1 65
# Tabla de casos de Honor y No Honor
table(datos$matricula)
##
## 0 1
## 151 49
ggplot(data = datos, aes(x = matricula, y = matematicas, color = matricula)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1) +
theme_bw() +
theme(legend.position = "null")
# Generar el modelo de regresión logística
modelo <- glm(matricula ~ matematicas, data = datos, family = "binomial")
summary(modelo)
##
## Call:
## glm(formula = matricula ~ matematicas, family = "binomial", data = datos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0332 -0.6785 -0.3506 -0.1565 2.6143
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.79394 1.48174 -6.610 3.85e-11 ***
## matematicas 0.15634 0.02561 6.105 1.03e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 167.07 on 198 degrees of freedom
## AIC: 171.07
##
## Number of Fisher Scoring iterations: 5
datos$matricula <- as.character(datos$matricula)
datos$matricula <- as.numeric(datos$matricula)
plot(matricula ~ matematicas, datos, col = "darkblue",
main = "Modelo regresión logística",
ylab = "P(matrícula=1|matemáticas)",
xlab = "matemáticas", pch = "I")
# type = "response" devuelve las predicciones en forma de probabilidad en lugar de en log_ODDs
curve(predict(modelo, data.frame(matematicas = x), type = "response"),
col = "firebrick", lwd = 2.5, add = TRUE)
nuevos_puntos <- seq(from = min(datos$matematicas), to = max(datos$matematicas),
by = 0.5)
nuevos_puntos
## [1] 33.0 33.5 34.0 34.5 35.0 35.5 36.0 36.5 37.0 37.5 38.0 38.5 39.0 39.5 40.0
## [16] 40.5 41.0 41.5 42.0 42.5 43.0 43.5 44.0 44.5 45.0 45.5 46.0 46.5 47.0 47.5
## [31] 48.0 48.5 49.0 49.5 50.0 50.5 51.0 51.5 52.0 52.5 53.0 53.5 54.0 54.5 55.0
## [46] 55.5 56.0 56.5 57.0 57.5 58.0 58.5 59.0 59.5 60.0 60.5 61.0 61.5 62.0 62.5
## [61] 63.0 63.5 64.0 64.5 65.0 65.5 66.0 66.5 67.0 67.5 68.0 68.5 69.0 69.5 70.0
## [76] 70.5 71.0 71.5 72.0 72.5 73.0 73.5 74.0 74.5 75.0
predicciones <- predict(modelo, data.frame(matematicas = nuevos_puntos), se.fit = TRUE)
predicciones
## $fit
## 1 2 3 4 5 6
## -4.63471038 -4.55654020 -4.47837002 -4.40019984 -4.32202967 -4.24385949
## 7 8 9 10 11 12
## -4.16568931 -4.08751913 -4.00934896 -3.93117878 -3.85300860 -3.77483842
## 13 14 15 16 17 18
## -3.69666824 -3.61849807 -3.54032789 -3.46215771 -3.38398753 -3.30581736
## 19 20 21 22 23 24
## -3.22764718 -3.14947700 -3.07130682 -2.99313664 -2.91496647 -2.83679629
## 25 26 27 28 29 30
## -2.75862611 -2.68045593 -2.60228576 -2.52411558 -2.44594540 -2.36777522
## 31 32 33 34 35 36
## -2.28960504 -2.21143487 -2.13326469 -2.05509451 -1.97692433 -1.89875415
## 37 38 39 40 41 42
## -1.82058398 -1.74241380 -1.66424362 -1.58607344 -1.50790327 -1.42973309
## 43 44 45 46 47 48
## -1.35156291 -1.27339273 -1.19522255 -1.11705238 -1.03888220 -0.96071202
## 49 50 51 52 53 54
## -0.88254184 -0.80437167 -0.72620149 -0.64803131 -0.56986113 -0.49169095
## 55 56 57 58 59 60
## -0.41352078 -0.33535060 -0.25718042 -0.17901024 -0.10084007 -0.02266989
## 61 62 63 64 65 66
## 0.05550029 0.13367047 0.21184065 0.29001082 0.36818100 0.44635118
## 67 68 69 70 71 72
## 0.52452136 0.60269153 0.68086171 0.75903189 0.83720207 0.91537225
## 73 74 75 76 77 78
## 0.99354242 1.07171260 1.14988278 1.22805296 1.30622313 1.38439331
## 79 80 81 82 83 84
## 1.46256349 1.54073367 1.61890385 1.69707402 1.77524420 1.85341438
## 85
## 1.93158456
##
## $se.fit
## 1 2 3 4 5 6 7 8
## 0.6532674 0.6410488 0.6288535 0.6166829 0.6045386 0.5924220 0.5803350 0.5682794
## 9 10 11 12 13 14 15 16
## 0.5562574 0.5442710 0.5323227 0.5204152 0.5085513 0.4967341 0.4849670 0.4732538
## 17 18 19 20 21 22 23 24
## 0.4615987 0.4500059 0.4384807 0.4270283 0.4156549 0.4043671 0.3931722 0.3820784
## 25 26 27 28 29 30 31 32
## 0.3710949 0.3602317 0.3494999 0.3389122 0.3284823 0.3182259 0.3081603 0.2983048
## 33 34 35 36 37 38 39 40
## 0.2886808 0.2793124 0.2702261 0.2614513 0.2530204 0.2449690 0.2373357 0.2301621
## 41 42 43 44 45 46 47 48
## 0.2234924 0.2173730 0.2118517 0.2069762 0.2027933 0.1993464 0.1966743 0.1948089
## 49 50 51 52 53 54 55 56
## 0.1937734 0.1935812 0.1942349 0.1957258 0.1980352 0.2011347 0.2049887 0.2095554
## 57 58 59 60 61 62 63 64
## 0.2147894 0.2206432 0.2270689 0.2340194 0.2414494 0.2493160 0.2575791 0.2662020
## 65 66 67 68 69 70 71 72
## 0.2751507 0.2843945 0.2939056 0.3036588 0.3136315 0.3238036 0.3341567 0.3446746
## 73 74 75 76 77 78 79 80
## 0.3553426 0.3661476 0.3770779 0.3881227 0.3992728 0.4105194 0.4218548 0.4332721
## 81 82 83 84 85
## 0.4447650 0.4563277 0.4679551 0.4796425 0.4913856
##
## $residual.scale
## [1] 1
predicciones_logit <- exp(predicciones$fit) / (1 + exp(predicciones$fit))
predicciones_logit
## 1 2 3 4 5 6
## 0.009615562 0.010389249 0.011224482 0.012126041 0.013099054 0.014149025
## 7 8 9 10 11 12
## 0.015281854 0.016503865 0.017821824 0.019242973 0.020775051 0.022426319
## 13 14 15 16 17 18
## 0.024205592 0.026122257 0.028186305 0.030408352 0.032799659 0.035372158
## 19 20 21 22 23 24
## 0.038138464 0.041111891 0.044306459 0.047736902 0.051418657 0.055367861
## 25 26 27 28 29 30
## 0.059601325 0.064136505 0.068991458 0.074184786 0.079735560 0.085663235
## 31 32 33 34 35 36
## 0.091987534 0.098728324 0.105905462 0.113538621 0.121647088 0.130249544
## 37 38 39 40 41 42
## 0.139363815 0.149006597 0.159193163 0.169937053 0.181249737 0.193140276
## 43 44 45 46 47 48
## 0.205614973 0.218677025 0.232326188 0.246558447 0.261365732 0.276735659
## 49 50 51 52 53 54
## 0.292651324 0.309091155 0.326028838 0.343433314 0.361268868 0.379495303
## 55 56 57 58 59 60
## 0.398068206 0.416939312 0.436056949 0.455366564 0.474811325 0.494332771
## 61 62 63 64 65 66
## 0.513871512 0.533367947 0.552762991 0.571998783 0.591019371 0.609771345
## 67 68 69 70 71 72
## 0.628204401 0.646271845 0.663930996 0.681143510 0.697875612 0.714098231
## 73 74 75 76 77 78
## 0.729787049 0.744922469 0.759489505 0.773477614 0.786880463 0.799695659
## 79 80 81 82 83 84
## 0.811924441 0.823571354 0.834643901 0.845152199 0.855108627 0.864527492
## 85
## 0.873424703
predicciones <- ifelse(test = modelo$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo$model$matricula, predicciones,
dnn = c("observaciones", "predicciones"))
matriz_confusion
## predicciones
## observaciones 0 1
## 0 140 11
## 1 27 22