library(ggplot2)
library(vcd)
## Loading required package: grid
library(knitr) # Para ver tablas mas amigables en formato html markdown
matricula <- as.factor(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1))
matematicas <- c(41, 53, 54, 47, 57, 51, 42, 45, 54, 52, 51, 51, 71, 57, 50, 43, 51, 60, 62, 57, 35, 75, 45, 57, 45, 46, 66, 57, 49, 49, 57, 64, 63, 57, 50, 58, 75, 68, 44, 40, 41, 62, 57, 43, 48, 63, 39, 70,
63, 59, 61, 38, 61, 49, 73, 44, 42, 39, 55, 52, 45, 61, 39, 41,
50, 40, 60, 47, 59, 49, 46, 58, 71, 58, 46, 43, 54, 56, 46, 54,
57, 54, 71, 48, 40, 64, 51, 39, 40, 61, 66, 49, 65, 52, 46, 61,
72, 71, 40, 69, 64, 56, 49, 54, 53, 66, 67, 40, 46, 69, 40, 41,
57, 58, 57, 37, 55, 62, 64, 40, 50, 46, 53, 52, 45, 56, 45, 54,
56, 41, 54, 72, 56, 47, 49, 60, 54, 55, 33, 49, 43, 50, 52, 48,
58, 43, 41, 43, 46, 44, 43, 61, 40, 49, 56, 61, 50, 51, 42, 67,
53, 50, 51, 72, 48, 40, 53, 39, 63, 51, 45, 39, 42, 62, 44, 65,
63, 54, 45, 60, 49, 48, 57, 55, 66, 64, 55, 42, 56, 53, 41, 42,
53, 42, 60, 52, 38, 57, 58, 65)
datos <- data.frame(matricula, matematicas)
head(datos, 10)
## matricula matematicas
## 1 0 41
## 2 0 53
## 3 0 54
## 4 0 47
## 5 0 57
## 6 0 51
## 7 0 42
## 8 0 45
## 9 0 54
## 10 0 52
tail(datos,10)
## matricula matematicas
## 191 0 41
## 192 0 42
## 193 0 53
## 194 0 42
## 195 1 60
## 196 0 52
## 197 0 38
## 198 0 57
## 199 1 58
## 200 1 65
n <- nrow(datos) # Total de casos
n
## [1] 200
table(datos$matricula)
##
## 0 1
## 151 49
ggplot(data = datos, aes(x = matricula, y = matematicas, color = matricula)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1) +
theme_bw() +
theme(legend.position = "null")
Probabilidad:
# Generar el modelo de regresión logística
modelo <- glm(matricula ~ matematicas, data = datos, family = "binomial")
summary(modelo)
##
## Call:
## glm(formula = matricula ~ matematicas, family = "binomial", data = datos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0332 -0.6785 -0.3506 -0.1565 2.6143
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.79394 1.48174 -6.610 3.85e-11 ***
## matematicas 0.15634 0.02561 6.105 1.03e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 222.71 on 199 degrees of freedom
## Residual deviance: 167.07 on 198 degrees of freedom
## AIC: 171.07
##
## Number of Fisher Scoring iterations: 5
datos$matricula <- as.character(datos$matricula)
datos$matricula <- as.numeric(datos$matricula)
plot(matricula ~ matematicas, datos, col = "darkblue",
main = "Modelo regresión logística",
ylab = "P(matrícula=1|matemáticas)",
xlab = "matemáticas", pch = "I")
# type = "response" devuelve las predicciones en forma de probabilidad en lugar de en log_ODDs
curve(predict(modelo, data.frame(matematicas = x), type = "response"),
col = "firebrick", lwd = 2.5, add = TRUE)
predicciones_modelo <- ifelse(modelo$fitted.values > 0.5, 1, 0)
datos$predicciones <- (as.vector(predicciones_modelo))
kable(datos)
| matricula | matematicas | predicciones |
|---|---|---|
| 0 | 41 | 0 |
| 0 | 53 | 0 |
| 0 | 54 | 0 |
| 0 | 47 | 0 |
| 0 | 57 | 0 |
| 0 | 51 | 0 |
| 0 | 42 | 0 |
| 0 | 45 | 0 |
| 0 | 54 | 0 |
| 0 | 52 | 0 |
| 0 | 51 | 0 |
| 1 | 51 | 0 |
| 0 | 71 | 1 |
| 1 | 57 | 0 |
| 0 | 50 | 0 |
| 0 | 43 | 0 |
| 0 | 51 | 0 |
| 0 | 60 | 0 |
| 1 | 62 | 0 |
| 0 | 57 | 0 |
| 0 | 35 | 0 |
| 1 | 75 | 1 |
| 0 | 45 | 0 |
| 0 | 57 | 0 |
| 0 | 45 | 0 |
| 0 | 46 | 0 |
| 1 | 66 | 1 |
| 0 | 57 | 0 |
| 0 | 49 | 0 |
| 0 | 49 | 0 |
| 0 | 57 | 0 |
| 0 | 64 | 1 |
| 1 | 63 | 1 |
| 0 | 57 | 0 |
| 0 | 50 | 0 |
| 1 | 58 | 0 |
| 0 | 75 | 1 |
| 1 | 68 | 1 |
| 0 | 44 | 0 |
| 0 | 40 | 0 |
| 0 | 41 | 0 |
| 0 | 62 | 0 |
| 0 | 57 | 0 |
| 0 | 43 | 0 |
| 1 | 48 | 0 |
| 0 | 63 | 1 |
| 0 | 39 | 0 |
| 0 | 70 | 1 |
| 0 | 63 | 1 |
| 0 | 59 | 0 |
| 1 | 61 | 0 |
| 0 | 38 | 0 |
| 0 | 61 | 0 |
| 0 | 49 | 0 |
| 1 | 73 | 1 |
| 0 | 44 | 0 |
| 0 | 42 | 0 |
| 0 | 39 | 0 |
| 0 | 55 | 0 |
| 0 | 52 | 0 |
| 0 | 45 | 0 |
| 1 | 61 | 0 |
| 0 | 39 | 0 |
| 0 | 41 | 0 |
| 0 | 50 | 0 |
| 0 | 40 | 0 |
| 0 | 60 | 0 |
| 0 | 47 | 0 |
| 0 | 59 | 0 |
| 0 | 49 | 0 |
| 0 | 46 | 0 |
| 0 | 58 | 0 |
| 1 | 71 | 1 |
| 0 | 58 | 0 |
| 0 | 46 | 0 |
| 0 | 43 | 0 |
| 1 | 54 | 0 |
| 0 | 56 | 0 |
| 0 | 46 | 0 |
| 0 | 54 | 0 |
| 0 | 57 | 0 |
| 0 | 54 | 0 |
| 0 | 71 | 1 |
| 1 | 48 | 0 |
| 0 | 40 | 0 |
| 1 | 64 | 1 |
| 0 | 51 | 0 |
| 0 | 39 | 0 |
| 0 | 40 | 0 |
| 0 | 61 | 0 |
| 1 | 66 | 1 |
| 0 | 49 | 0 |
| 1 | 65 | 1 |
| 0 | 52 | 0 |
| 0 | 46 | 0 |
| 1 | 61 | 0 |
| 1 | 72 | 1 |
| 1 | 71 | 1 |
| 0 | 40 | 0 |
| 1 | 69 | 1 |
| 0 | 64 | 1 |
| 0 | 56 | 0 |
| 0 | 49 | 0 |
| 0 | 54 | 0 |
| 0 | 53 | 0 |
| 0 | 66 | 1 |
| 1 | 67 | 1 |
| 0 | 40 | 0 |
| 0 | 46 | 0 |
| 1 | 69 | 1 |
| 0 | 40 | 0 |
| 0 | 41 | 0 |
| 0 | 57 | 0 |
| 1 | 58 | 0 |
| 1 | 57 | 0 |
| 0 | 37 | 0 |
| 0 | 55 | 0 |
| 1 | 62 | 0 |
| 0 | 64 | 1 |
| 0 | 40 | 0 |
| 0 | 50 | 0 |
| 0 | 46 | 0 |
| 0 | 53 | 0 |
| 0 | 52 | 0 |
| 1 | 45 | 0 |
| 0 | 56 | 0 |
| 0 | 45 | 0 |
| 0 | 54 | 0 |
| 0 | 56 | 0 |
| 0 | 41 | 0 |
| 0 | 54 | 0 |
| 1 | 72 | 1 |
| 1 | 56 | 0 |
| 0 | 47 | 0 |
| 0 | 49 | 0 |
| 1 | 60 | 0 |
| 0 | 54 | 0 |
| 0 | 55 | 0 |
| 0 | 33 | 0 |
| 0 | 49 | 0 |
| 0 | 43 | 0 |
| 0 | 50 | 0 |
| 0 | 52 | 0 |
| 0 | 48 | 0 |
| 0 | 58 | 0 |
| 0 | 43 | 0 |
| 1 | 41 | 0 |
| 0 | 43 | 0 |
| 0 | 46 | 0 |
| 0 | 44 | 0 |
| 0 | 43 | 0 |
| 0 | 61 | 0 |
| 0 | 40 | 0 |
| 0 | 49 | 0 |
| 1 | 56 | 0 |
| 0 | 61 | 0 |
| 0 | 50 | 0 |
| 0 | 51 | 0 |
| 0 | 42 | 0 |
| 1 | 67 | 1 |
| 1 | 53 | 0 |
| 0 | 50 | 0 |
| 1 | 51 | 0 |
| 1 | 72 | 1 |
| 0 | 48 | 0 |
| 0 | 40 | 0 |
| 0 | 53 | 0 |
| 0 | 39 | 0 |
| 1 | 63 | 1 |
| 0 | 51 | 0 |
| 0 | 45 | 0 |
| 0 | 39 | 0 |
| 0 | 42 | 0 |
| 0 | 62 | 0 |
| 0 | 44 | 0 |
| 0 | 65 | 1 |
| 1 | 63 | 1 |
| 0 | 54 | 0 |
| 0 | 45 | 0 |
| 1 | 60 | 0 |
| 1 | 49 | 0 |
| 0 | 48 | 0 |
| 1 | 57 | 0 |
| 1 | 55 | 0 |
| 1 | 66 | 1 |
| 1 | 64 | 1 |
| 0 | 55 | 0 |
| 0 | 42 | 0 |
| 1 | 56 | 0 |
| 0 | 53 | 0 |
| 0 | 41 | 0 |
| 0 | 42 | 0 |
| 0 | 53 | 0 |
| 0 | 42 | 0 |
| 1 | 60 | 0 |
| 0 | 52 | 0 |
| 0 | 38 | 0 |
| 0 | 57 | 0 |
| 1 | 58 | 0 |
| 1 | 65 | 1 |
matriz_confusion <- table(datos$matricula, datos$predicciones, dnn = c("matrícula original", "predicciones"))
matriz_confusion
## predicciones
## matrícula original 0 1
## 0 140 11
## 1 27 22
cat ("Total de casos ", n)
## Total de casos 200
cat ("Total de aciertos = ", (matriz_confusion[1,1] + matriz_confusion[2,2]) / n)
## Total de aciertos = 0.81
Probabilidad:
nuevos_valores <- c(33, 50, 60, 70)
prediccion_manual <- exp (-9.793942 + 0.15634 * nuevos_valores) / (1 + exp (-9.793942 + 0.15634 * nuevos_valores))
prediccion_manual
## [1] 0.009615451 0.121645200 0.398063121 0.759484979
nuevos_puntos <- seq(from = min(datos$matematicas), to = max(datos$matematicas),by = 0.5)
nuevos_puntos
## [1] 33.0 33.5 34.0 34.5 35.0 35.5 36.0 36.5 37.0 37.5 38.0 38.5 39.0 39.5 40.0
## [16] 40.5 41.0 41.5 42.0 42.5 43.0 43.5 44.0 44.5 45.0 45.5 46.0 46.5 47.0 47.5
## [31] 48.0 48.5 49.0 49.5 50.0 50.5 51.0 51.5 52.0 52.5 53.0 53.5 54.0 54.5 55.0
## [46] 55.5 56.0 56.5 57.0 57.5 58.0 58.5 59.0 59.5 60.0 60.5 61.0 61.5 62.0 62.5
## [61] 63.0 63.5 64.0 64.5 65.0 65.5 66.0 66.5 67.0 67.5 68.0 68.5 69.0 69.5 70.0
## [76] 70.5 71.0 71.5 72.0 72.5 73.0 73.5 74.0 74.5 75.0
predicciones <- predict(modelo, data.frame(matematicas = nuevos_puntos), se.fit = TRUE)
predicciones
## $fit
## 1 2 3 4 5 6
## -4.63471038 -4.55654020 -4.47837002 -4.40019984 -4.32202967 -4.24385949
## 7 8 9 10 11 12
## -4.16568931 -4.08751913 -4.00934896 -3.93117878 -3.85300860 -3.77483842
## 13 14 15 16 17 18
## -3.69666824 -3.61849807 -3.54032789 -3.46215771 -3.38398753 -3.30581736
## 19 20 21 22 23 24
## -3.22764718 -3.14947700 -3.07130682 -2.99313664 -2.91496647 -2.83679629
## 25 26 27 28 29 30
## -2.75862611 -2.68045593 -2.60228576 -2.52411558 -2.44594540 -2.36777522
## 31 32 33 34 35 36
## -2.28960504 -2.21143487 -2.13326469 -2.05509451 -1.97692433 -1.89875415
## 37 38 39 40 41 42
## -1.82058398 -1.74241380 -1.66424362 -1.58607344 -1.50790327 -1.42973309
## 43 44 45 46 47 48
## -1.35156291 -1.27339273 -1.19522255 -1.11705238 -1.03888220 -0.96071202
## 49 50 51 52 53 54
## -0.88254184 -0.80437167 -0.72620149 -0.64803131 -0.56986113 -0.49169095
## 55 56 57 58 59 60
## -0.41352078 -0.33535060 -0.25718042 -0.17901024 -0.10084007 -0.02266989
## 61 62 63 64 65 66
## 0.05550029 0.13367047 0.21184065 0.29001082 0.36818100 0.44635118
## 67 68 69 70 71 72
## 0.52452136 0.60269153 0.68086171 0.75903189 0.83720207 0.91537225
## 73 74 75 76 77 78
## 0.99354242 1.07171260 1.14988278 1.22805296 1.30622313 1.38439331
## 79 80 81 82 83 84
## 1.46256349 1.54073367 1.61890385 1.69707402 1.77524420 1.85341438
## 85
## 1.93158456
##
## $se.fit
## 1 2 3 4 5 6 7 8
## 0.6532674 0.6410488 0.6288535 0.6166829 0.6045386 0.5924220 0.5803350 0.5682794
## 9 10 11 12 13 14 15 16
## 0.5562574 0.5442710 0.5323227 0.5204152 0.5085513 0.4967341 0.4849670 0.4732538
## 17 18 19 20 21 22 23 24
## 0.4615987 0.4500059 0.4384807 0.4270283 0.4156549 0.4043671 0.3931722 0.3820784
## 25 26 27 28 29 30 31 32
## 0.3710949 0.3602317 0.3494999 0.3389122 0.3284823 0.3182259 0.3081603 0.2983048
## 33 34 35 36 37 38 39 40
## 0.2886808 0.2793124 0.2702261 0.2614513 0.2530204 0.2449690 0.2373357 0.2301621
## 41 42 43 44 45 46 47 48
## 0.2234924 0.2173730 0.2118517 0.2069762 0.2027933 0.1993464 0.1966743 0.1948089
## 49 50 51 52 53 54 55 56
## 0.1937734 0.1935812 0.1942349 0.1957258 0.1980352 0.2011347 0.2049887 0.2095554
## 57 58 59 60 61 62 63 64
## 0.2147894 0.2206432 0.2270689 0.2340194 0.2414494 0.2493160 0.2575791 0.2662020
## 65 66 67 68 69 70 71 72
## 0.2751507 0.2843945 0.2939056 0.3036588 0.3136315 0.3238036 0.3341567 0.3446746
## 73 74 75 76 77 78 79 80
## 0.3553426 0.3661476 0.3770779 0.3881227 0.3992728 0.4105194 0.4218548 0.4332721
## 81 82 83 84 85
## 0.4447650 0.4563277 0.4679551 0.4796425 0.4913856
##
## $residual.scale
## [1] 1
predicciones_prob <- exp(predicciones$fit) / (1 + exp(predicciones$fit))
predicciones_prob
## 1 2 3 4 5 6
## 0.009615562 0.010389249 0.011224482 0.012126041 0.013099054 0.014149025
## 7 8 9 10 11 12
## 0.015281854 0.016503865 0.017821824 0.019242973 0.020775051 0.022426319
## 13 14 15 16 17 18
## 0.024205592 0.026122257 0.028186305 0.030408352 0.032799659 0.035372158
## 19 20 21 22 23 24
## 0.038138464 0.041111891 0.044306459 0.047736902 0.051418657 0.055367861
## 25 26 27 28 29 30
## 0.059601325 0.064136505 0.068991458 0.074184786 0.079735560 0.085663235
## 31 32 33 34 35 36
## 0.091987534 0.098728324 0.105905462 0.113538621 0.121647088 0.130249544
## 37 38 39 40 41 42
## 0.139363815 0.149006597 0.159193163 0.169937053 0.181249737 0.193140276
## 43 44 45 46 47 48
## 0.205614973 0.218677025 0.232326188 0.246558447 0.261365732 0.276735659
## 49 50 51 52 53 54
## 0.292651324 0.309091155 0.326028838 0.343433314 0.361268868 0.379495303
## 55 56 57 58 59 60
## 0.398068206 0.416939312 0.436056949 0.455366564 0.474811325 0.494332771
## 61 62 63 64 65 66
## 0.513871512 0.533367947 0.552762991 0.571998783 0.591019371 0.609771345
## 67 68 69 70 71 72
## 0.628204401 0.646271845 0.663930996 0.681143510 0.697875612 0.714098231
## 73 74 75 76 77 78
## 0.729787049 0.744922469 0.759489505 0.773477614 0.786880463 0.799695659
## 79 80 81 82 83 84
## 0.811924441 0.823571354 0.834643901 0.845152199 0.855108627 0.864527492
## 85
## 0.873424703
las.predicciones <- data.frame(nuevos_puntos, predicciones_prob)
colnames(las.predicciones) <- c('matematicas', 'prob.prediccion.matricula')
las.predicciones
## matematicas prob.prediccion.matricula
## 1 33.0 0.009615562
## 2 33.5 0.010389249
## 3 34.0 0.011224482
## 4 34.5 0.012126041
## 5 35.0 0.013099054
## 6 35.5 0.014149025
## 7 36.0 0.015281854
## 8 36.5 0.016503865
## 9 37.0 0.017821824
## 10 37.5 0.019242973
## 11 38.0 0.020775051
## 12 38.5 0.022426319
## 13 39.0 0.024205592
## 14 39.5 0.026122257
## 15 40.0 0.028186305
## 16 40.5 0.030408352
## 17 41.0 0.032799659
## 18 41.5 0.035372158
## 19 42.0 0.038138464
## 20 42.5 0.041111891
## 21 43.0 0.044306459
## 22 43.5 0.047736902
## 23 44.0 0.051418657
## 24 44.5 0.055367861
## 25 45.0 0.059601325
## 26 45.5 0.064136505
## 27 46.0 0.068991458
## 28 46.5 0.074184786
## 29 47.0 0.079735560
## 30 47.5 0.085663235
## 31 48.0 0.091987534
## 32 48.5 0.098728324
## 33 49.0 0.105905462
## 34 49.5 0.113538621
## 35 50.0 0.121647088
## 36 50.5 0.130249544
## 37 51.0 0.139363815
## 38 51.5 0.149006597
## 39 52.0 0.159193163
## 40 52.5 0.169937053
## 41 53.0 0.181249737
## 42 53.5 0.193140276
## 43 54.0 0.205614973
## 44 54.5 0.218677025
## 45 55.0 0.232326188
## 46 55.5 0.246558447
## 47 56.0 0.261365732
## 48 56.5 0.276735659
## 49 57.0 0.292651324
## 50 57.5 0.309091155
## 51 58.0 0.326028838
## 52 58.5 0.343433314
## 53 59.0 0.361268868
## 54 59.5 0.379495303
## 55 60.0 0.398068206
## 56 60.5 0.416939312
## 57 61.0 0.436056949
## 58 61.5 0.455366564
## 59 62.0 0.474811325
## 60 62.5 0.494332771
## 61 63.0 0.513871512
## 62 63.5 0.533367947
## 63 64.0 0.552762991
## 64 64.5 0.571998783
## 65 65.0 0.591019371
## 66 65.5 0.609771345
## 67 66.0 0.628204401
## 68 66.5 0.646271845
## 69 67.0 0.663930996
## 70 67.5 0.681143510
## 71 68.0 0.697875612
## 72 68.5 0.714098231
## 73 69.0 0.729787049
## 74 69.5 0.744922469
## 75 70.0 0.759489505
## 76 70.5 0.773477614
## 77 71.0 0.786880463
## 78 71.5 0.799695659
## 79 72.0 0.811924441
## 80 72.5 0.823571354
## 81 73.0 0.834643901
## 82 73.5 0.845152199
## 83 74.0 0.855108627
## 84 74.5 0.864527492
## 85 75.0 0.873424703
plot(las.predicciones)