#UNIVERSIDAD NACIONAL DEL ALTIPLANO
#INGENIERIA ESTADISTICA E INFORMATICA
#REGRESION LOGISTICA
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.2
Datos <- read_excel("E:/VII SEMESTRE/REGRESION AVANZADA/Trabajo 4/Datos.xlsx")
Datos
## # A tibble: 40 x 3
## Abandona Nivel_Satifaccion Ultima_Evaluacion
## <chr> <dbl> <dbl>
## 1 No 0.22 0.98
## 2 Sí 0.89 0.65
## 3 No 0.63 0.76
## 4 No 0.8 0.96
## 5 Sí 0.42 0.48
## 6 No 0.77 0.82
## 7 No 0.4 0.98
## 8 No 0.6 0.65
## 9 Sí 0.4 0.76
## 10 No 0.76 0.96
## # ... with 30 more rows
head(Datos)
## # A tibble: 6 x 3
## Abandona Nivel_Satifaccion Ultima_Evaluacion
## <chr> <dbl> <dbl>
## 1 No 0.22 0.98
## 2 Sí 0.89 0.65
## 3 No 0.63 0.76
## 4 No 0.8 0.96
## 5 Sí 0.42 0.48
## 6 No 0.77 0.82
View(Datos)
#Convert to a factor and see the Abandon data table
Datos$Abandona <- factor(Datos$Abandona)
table(Datos$Abandona)
##
## No Sí
## 21 19
#Category Summary
summary(Datos$Nivel_Satifaccion)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0100 0.3200 0.5250 0.5212 0.7625 0.9800
summary(Datos$Ultima_Evaluacion)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0100 0.4800 0.6500 0.6352 0.8450 0.9800
#Graphic of Points in Logistic Regression with the ggplot2 library
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
ggplot(Datos, aes(x = Nivel_Satifaccion, y = Ultima_Evaluacion, color = Abandona)) + geom_point()

#Model Logistic
modelo.logit <- glm(Abandona ~ Ultima_Evaluacion + Nivel_Satifaccion,
data = Datos, family = "binomial")
#Summary of the Logistic Model
summary(modelo.logit)
##
## Call:
## glm(formula = Abandona ~ Ultima_Evaluacion + Nivel_Satifaccion,
## family = "binomial", data = Datos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2421 -1.1568 -0.9374 1.2043 1.4931
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2840 0.8138 -0.349 0.727
## Ultima_Evaluacion -0.5663 1.3222 -0.428 0.668
## Nivel_Satifaccion 1.0390 1.3525 0.768 0.442
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 55.352 on 39 degrees of freedom
## Residual deviance: 54.747 on 37 degrees of freedom
## AIC: 60.747
##
## Number of Fisher Scoring iterations: 4
# Way to facilitate the interpretation of the coefficients is exponentiating
exp(coefficients(modelo.logit))
## (Intercept) Ultima_Evaluacion Nivel_Satifaccion
## 0.7527843 0.5676293 2.8263157
#The function n for an individual who has, for example, an evaluation of 0.75 and a level of satisfaction of 0.6
log.odds <- predict(modelo.logit, data.frame(Nivel_Satifaccion = 0.6,
Ultima_Evaluacion = 0.75))
log.odds
## 1
## -0.08530712
#Probability of Leaving the Company
exp(log.odds)/(1+exp(log.odds))
## 1
## 0.4786861
# merge (Data, probs, by = "position")
# Data $ Abandon <- as.character (Data $ Abandon)
# Data $ Abandon [Data $ Abandon == "Yes"] <- 1
# Data $ Abandon [Data $ Abandon == "No"] <- 0
# Data $ Abandon <- as.numeric (Data $ Abandon) - 1
Datos$Abandona <-as.numeric(Datos$Abandona) - 1
binomial_sm <- function(...){
geom_smooth(method = "glm", method.args = list(family = "binomial"), ...)
}
ggplot(Datos, aes(x = Nivel_Satifaccion, y = Abandona)) + geom_jitter(height = 0.05) + binomial_sm(formula = y ~ splines::ns(x, 3))
