REGRESIoN LOgiSTICA SIMPLE

rlsimple=read.csv2("../R. LOGISTICA SIMPLE YY PGEN/reglogisticayypgen.csv")

rlsimple$vdependiente <- as.numeric(as.character(rlsimple$vdependiente))
rlsimple$P.GEN <- as.numeric(as.character(rlsimple$P.GEN))

rlsimple

##     vdependiente P.GEN
## 1              0   4.5
## 2              0   4.9
## 3              0   4.5
## 4              0   4.4
## 5              0   6.0
## 6              1   6.4
## 7              1   5.8
## 8              0   5.3
## 9              0   5.7
## 10             0   4.6
## 11             1   5.7
## 12             0   4.8
## 13             1   5.1
## 14             0   5.2
## 15             0   4.9
## 16             0   4.7
## 17             0   4.8
## 18             1   5.8
## 19             1   6.6
## 20             1   6.4
## 21             0   5.4
## 22             0   4.9
## 23             1   6.0
## 24             1   5.0
## 25             0   5.0
## 26             0   4.4
## 27             1   6.3
## 28             1   6.0
## 29             0   4.7
## 30             0   4.5
## 31             0   5.1
## 32             1   5.9
## 33             0   4.8
## 34             0   4.3
## 35             1   5.1
## 36             0   5.0
## 37             0   5.0
## 38             0   4.1
## 39             0   4.7
## 40             0   5.7
## 41             1   5.3
## 42             1   5.0
## 43             1   5.1
## 44             0   5.0
## 45             1   6.4
## 46             1   5.1
## 47             1   6.7
## 48             1   5.4
## 49             0   4.9
## 50             0   5.0
## 51             1   6.4
## 52             0   4.8
## 53             0   5.3
## 54             1   5.7
## 55             1   5.2
## 56             1   5.1
## 57             1   5.2
## 58             1   5.7
## 59             0   5.5
## 60             1   5.0
## 61             0   5.1
## 62             0   4.3
## 63             0   5.5
## 64             1   5.2
## 65             0   5.3
## 66             1   5.6
## 67             0   5.0
## 68             1   6.7
## 69             0   5.4
## 70             1   6.6
## 71             1   6.1
## 72             1   6.5
## 73             1   6.6
## 74             1   6.7
## 75             1   6.3
## 76             1   6.2
## 77             1   5.7
## 78             0   5.0
## 79             1   5.0
## 80             0   5.0
## 81             1   6.6
## 82             1   6.2
## 83             1   5.1
## 84             1   5.1
## 85             0   4.8
## 86             1   4.9
## 87             0   5.1
## 88             1   6.3
## 89             0   5.1
## 90             1   6.4
## 91             1   5.0
## 92             1   5.2
## 93             0   5.2
## 94             0   4.4
## 95             1   4.5
## 96             0   5.0
## 97             1   5.5
## 98             1   6.5
## 99             0   4.9
## 100            0   5.4
## 101            0   4.3
## 102            1   4.7
## 103            0   4.9
## 104            0   5.2
## 105            1   6.3
## 106            0   4.8
## 107            0   4.6
## 108            1   5.9
## 109            0   5.1
## 110            0   6.1
## 111            1   6.2
## 112            1   4.8
## 113            1   4.8
## 114            1   6.3
## 115            1   5.2
## 116            1   5.7
## 117            0   5.0
## 118            1   5.1
## 119            0   4.4
## 120            0   4.9
## 121            1   6.2
## 122            0   4.2
## 123            1   6.1
## 124            0   4.7
## 125            1   5.9
## 126            1   4.7
## 127            0   4.8
## 128            0   4.4
## 129            1   5.2
## 130            1   5.3
## 131            1   6.4
## 132            1   6.1
## 133            0   4.7
## 134            0   4.1
## 135            0   4.0
## 136            0   4.6
## 137            0   4.8
## 138            1   6.0
## 139            1   6.1
## 140            0   4.7
## 141            0   4.6
## 142            0   4.8
## 143            0   5.6
## 144            0   5.6
## 145            1   6.4
## 146            1   4.5
## 147            1   5.9
## 148            1   5.0
## 149            0   4.3
## 150            1   5.2
## 151            1   6.1
## 152            0   4.6
## 153            1   5.4
## 154            1   5.4
## 155            1   6.2
## 156            1   6.5
## 157            1   6.7
## 158            1   5.4
## 159            1   6.7
## 160            1   6.3
## 161            1   5.0
## 162            0   4.4
## 163            1   5.0
## 164            0   5.0
## 165            1   4.3
## 166            1   6.0
## 167            1   5.1
## 168            0   4.8
## 169            1   6.4
## 170            0   4.9
## 171            0   4.5
## 172            1   4.3
## 173            1   5.2
## 174            1   4.3
## 175            0   5.4
## 176            1   6.2
## 177            0   4.1
## 178            0   5.1
## 179            1   4.3
## 180            1   5.3
## 181            1   6.4
## 182            1   5.4
## 183            0   5.5
## 184            0   5.3
## 185            1   5.6

View(rlsimple)

# PARA QUE LA VARIABLE DEPENDIENTE BINARIA SEA 1=EXITO LOS ALUMNOS DEBEN RESPONDER DE FORMA POSITIVA Yi>10 
# SI SE REALIZARON 16 PREGUNTAS BINARIAS, Y PARA QUE LA VARIABLE DEPENDIENTE SE CONSIDERE COMO FRACASO DEBE RESPONDER Yi<=10

#16=100%
#10=X%

LIM_MAX_FRACASO_PORC <- (10*100)/16
LIM_MAX_FRACASO_PORC

## [1] 62.5

# LIMITE MAXIMO DE FRACASO ES DE 62.5% AL RESPONDER 10 PREGUNTAS DE FORMA POSITIVA

LIM_MIN_EXITO_PORC <- (11*100)/16
LIM_MIN_EXITO_PORC

## [1] 68.75

# LIMITE MINIMO DE EXITO ES DE 68.75 AL RESPONDER 11 PREGUNTAS DE FORMA POSITIVA

library(ggplot2)
library(MASS)

# Ajuste de un modelo logIstico.
rlsimple2 <- glm(vdependiente ~ P.GEN, data = rlsimple, family = "binomial")
# Representacion grafica del modelo.
#linea ver vertical = 5.4       NOTA MAXIMA PARA Y=0
#linea roja vertical = 5.5      NOTA MINIMA PARA Y=1
#linea roja horizontal = 68.75%      PARA Y=1 DEBE SUPERAR EL 68.75% DE RESPUESTAS POSITIVAS
#linea verde horizontal = 62.5%      PARA Y=0 DEBE ALCANZAR UN MAXIMO DE 62.5% DE RESPUESTAS POSITIVAS 
ggplot(data = rlsimple, aes(x = P.GEN, y = vdependiente)) +
  geom_point(aes(color = as.factor(vdependiente)), shape = 1) + 

  geom_hline(aes(yintercept=0.6875), color="red") +
  geom_vline(aes(xintercept=5.5), color="red") +
  
  geom_hline(aes(yintercept=0.625), color="green") +
  geom_vline(aes(xintercept=5.4), color="green") +
  
  stat_function(fun = function(x){predict(rlsimple2,
                                          newdata = data.frame(P.GEN = x),
                                          type = "response")}) +
  theme_bw() +
  labs(title = "REGRESION LOGISTICA",
       y = "Probabilidad de Exito = 1 >= 0.6875 >= nota 5.5") +
  theme(legend.position = "none")

summary(rlsimple2)

## 
## Call:
## glm(formula = vdependiente ~ P.GEN, family = "binomial", data = rlsimple)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1370  -0.8392   0.2633   0.7891   2.0758  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -12.0817     1.8936  -6.380 1.77e-10 ***
## P.GEN         2.3373     0.3654   6.397 1.58e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 255.25  on 184  degrees of freedom
## Residual deviance: 185.91  on 183  degrees of freedom
## AIC: 189.91
## 
## Number of Fisher Scoring iterations: 5

# Con geom_smooth se puede obtener el gráfico directamente.
ggplot(data = rlsimple, aes(x = P.GEN, y = vdependiente)) +
  geom_point(aes(color = as.factor(vdependiente)), shape = 1) + 
  geom_smooth(method = "glm",
              method.args = list(family = "binomial"),
              color = "gray20",
              se = FALSE) +
  theme_bw() +
  theme(legend.position = "none")

library(ggplot2)
table(rlsimple$vdependiente)

## 
##   0   1 
##  85 100

boxplot(rlsimple$P.GEN)

boxplot(rlsimple$P.GEN,
        notch = TRUE,
        col = 'palegreen',
        xlab = "PROMEDIO DE NOTAS GENERAL",
        horizontal = TRUE)

#VALORES DE INTERVALO DE CONFIANZA
boxplot.stats(rlsimple$P.GEN)$conf

## [1] 4.97222 5.22778

# Horizontal
data <- rlsimple$P.GEN
boxplot(data,
        notch = TRUE,
        col = 'palegreen', 
        horizontal = TRUE, 
        axes = FALSE, 
        staplewex =1)
values <- c(round(boxplot.stats(data)$conf, 1), boxplot.stats(data)$stats)
text(x = values, labels = values, y = 1.25)

# Vertical
data2 <- rlsimple$P.GEN
boxplot(data2,
        notch = TRUE,
        col = 'palegreen', 
        axes = FALSE, 
        staplewex =1)
values <- c(round(boxplot.stats(data2)$conf, 1), boxplot.stats(data2)$stats)
text(y = values, labels = values, x = 1.25)

promgeneral<-c(rlsimple$P.GEN)
summary(promgeneral)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   4.800   5.100   5.314   5.900   6.700

#Adem?s del valor de las estimaciones de los coeficientes parciales de correlaci?n del modelo, es conveniente calcular sus correspondientes intervalos de confianza. En el caso de regresi?n log?stica, estos intervalos suelen calcularse empleando el m?todo de profile likelihood

confint(object = rlsimple2, level = 0.95 )

## Waiting for profiling to be done...

##                  2.5 %    97.5 %
## (Intercept) -16.105152 -8.632246
## P.GEN         1.674523  3.116597

REGRESIoN LOgiSTICA SIMPLE

JOSÉ BAEZA

07-10-2019