Regresion-Logistica.R

#UNIVERSIDAD NACIONAL DEL ALTIPLANO
#INGENIERIA ESTADISTICA E INFORMATICA
#REGRESION LOGISTICA

library(readxl)

## Warning: package 'readxl' was built under R version 4.0.2

Datos <- read_excel("E:/VII SEMESTRE/REGRESION AVANZADA/Trabajo 4/Datos.xlsx")
Datos

## # A tibble: 40 x 3
##    Abandona Nivel_Satifaccion Ultima_Evaluacion
##    <chr>                <dbl>             <dbl>
##  1 No                    0.22              0.98
##  2 Sí                    0.89              0.65
##  3 No                    0.63              0.76
##  4 No                    0.8               0.96
##  5 Sí                    0.42              0.48
##  6 No                    0.77              0.82
##  7 No                    0.4               0.98
##  8 No                    0.6               0.65
##  9 Sí                    0.4               0.76
## 10 No                    0.76              0.96
## # ... with 30 more rows

head(Datos)

## # A tibble: 6 x 3
##   Abandona Nivel_Satifaccion Ultima_Evaluacion
##   <chr>                <dbl>             <dbl>
## 1 No                    0.22              0.98
## 2 Sí                    0.89              0.65
## 3 No                    0.63              0.76
## 4 No                    0.8               0.96
## 5 Sí                    0.42              0.48
## 6 No                    0.77              0.82

View(Datos)

#Convert to a factor and see the Abandon data table
Datos$Abandona <- factor(Datos$Abandona)
table(Datos$Abandona)

## 
## No Sí 
## 21 19

#Category Summary
summary(Datos$Nivel_Satifaccion)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.3200  0.5250  0.5212  0.7625  0.9800

summary(Datos$Ultima_Evaluacion)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0100  0.4800  0.6500  0.6352  0.8450  0.9800

#Graphic of Points in Logistic Regression with the ggplot2 library
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.0.3

ggplot(Datos, aes(x = Nivel_Satifaccion, y = Ultima_Evaluacion, color = Abandona)) + geom_point()

#Model Logistic
modelo.logit <- glm(Abandona ~ Ultima_Evaluacion + Nivel_Satifaccion, 
                    data = Datos, family = "binomial")

#Summary of the Logistic Model
summary(modelo.logit)

## 
## Call:
## glm(formula = Abandona ~ Ultima_Evaluacion + Nivel_Satifaccion, 
##     family = "binomial", data = Datos)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2421  -1.1568  -0.9374   1.2043   1.4931  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)
## (Intercept)        -0.2840     0.8138  -0.349    0.727
## Ultima_Evaluacion  -0.5663     1.3222  -0.428    0.668
## Nivel_Satifaccion   1.0390     1.3525   0.768    0.442
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 55.352  on 39  degrees of freedom
## Residual deviance: 54.747  on 37  degrees of freedom
## AIC: 60.747
## 
## Number of Fisher Scoring iterations: 4

# Way to facilitate the interpretation of the coefficients is exponentiating
exp(coefficients(modelo.logit))

##       (Intercept) Ultima_Evaluacion Nivel_Satifaccion 
##         0.7527843         0.5676293         2.8263157

#The function n for an individual who has, for example, an evaluation of 0.75 and a level of satisfaction of 0.6
log.odds <- predict(modelo.logit, data.frame(Nivel_Satifaccion = 0.6,
                                             Ultima_Evaluacion = 0.75))
log.odds

##           1 
## -0.08530712

#Probability of Leaving the Company
exp(log.odds)/(1+exp(log.odds))

##         1 
## 0.4786861

# merge (Data, probs, by = "position")
# Data $ Abandon <- as.character (Data $ Abandon)
# Data $ Abandon [Data $ Abandon == "Yes"] <- 1
# Data $ Abandon [Data $ Abandon == "No"] <- 0
# Data $ Abandon <- as.numeric (Data $ Abandon) - 1
Datos$Abandona <-as.numeric(Datos$Abandona) - 1

binomial_sm <- function(...){
  geom_smooth(method = "glm", method.args = list(family = "binomial"), ...)
}

ggplot(Datos, aes(x = Nivel_Satifaccion, y = Abandona)) + geom_jitter(height = 0.05) + binomial_sm(formula = y ~ splines::ns(x, 3))

Regresion-Logistica.R

BRITMAN SALCEDO

2021-08-20