1 1. Preparación del ambiente

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(broom)
set.seed(2025)

2 2. Simulación de datos

Se simulan datos con una variable respuesta binaria y que depende de una variable continua x1 y una categórica x2.

n <- 200
x1 <- rnorm(n)
x2 <- sample(c("A", "B"), n, replace = TRUE)
prob <- 1 / (1 + exp(-(0.5 * x1 + ifelse(x2 == "B", 1, 0))))
y <- rbinom(n, 1, prob)

df <- data.frame(y = as.factor(y), x1 = x1, x2 = as.factor(x2))

3 3. Exploración de datos

ggplot(df, aes(x = x1, fill = y)) +
  geom_density(alpha = 0.5) +
  facet_wrap(~x2) +
  labs(title = "Distribución de x1 por clase y categoría x2")
Distribución de x1 por clase y y categoría x2

Distribución de x1 por clase y y categoría x2

4 4. Ajuste del modelo de regresión logística

modelo_logit <- glm(y ~ x1 + x2, data = df, family = "binomial")
summary(modelo_logit)
## 
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial", data = df)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.1209     0.2036   0.594 0.552669    
## x1            0.6034     0.1649   3.659 0.000253 ***
## x2B           0.9121     0.3142   2.903 0.003695 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 265.63  on 199  degrees of freedom
## Residual deviance: 242.18  on 197  degrees of freedom
## AIC: 248.18
## 
## Number of Fisher Scoring iterations: 4

5 5. Evaluación del modelo

df$prob_pred <- predict(modelo_logit, type = "response")
df$y_pred <- ifelse(df$prob_pred > 0.5, 1, 0)

# Matriz de confusión
table(Predicho = df$y_pred, Real = df$y)
##         Real
## Predicho   0   1
##        0  29  21
##        1  47 103
# Visualización
ggplot(df, aes(x = prob_pred, fill = y)) +
  geom_histogram(bins = 30, position = "identity", alpha = 0.6) +
  labs(title = "Distribución de probabilidades predichas", x = "Probabilidad predicha", y = "Frecuencia")

6 6. Interpretación de coeficientes

broom::tidy(modelo_logit, exponentiate = TRUE, conf.int = TRUE)
  • El coeficiente de x1 indica el efecto de esta variable sobre la probabilidad de éxito.
  • El término x2B compara el grupo B contra A.
LS0tDQp0aXRsZTogIkFuw6FsaXNpcyBjb24gUmVncmVzacOzbiBMb2fDrXN0aWNhIg0KYXV0aG9yOiAiTWFudWVsIFJvbWVybyINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgdG9jX2RlcHRoOiAzDQogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlDQogICAgY29kZV9mb2xkaW5nOiBzaG93DQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIHRoZW1lOiBmbGF0bHkNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAgZmlnX2NhcHRpb246IHRydWUNCiAgICBkZl9wcmludDogcGFnZWQNCi0tLQ0KDQojIDEuIFByZXBhcmFjacOzbiBkZWwgYW1iaWVudGUNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShicm9vbSkNCnNldC5zZWVkKDIwMjUpDQpgYGANCg0KIyAyLiBTaW11bGFjacOzbiBkZSBkYXRvcw0KDQpTZSBzaW11bGFuIGRhdG9zIGNvbiB1bmEgdmFyaWFibGUgcmVzcHVlc3RhIGJpbmFyaWEgYHlgIHF1ZSBkZXBlbmRlIGRlIHVuYSB2YXJpYWJsZSBjb250aW51YSBgeDFgIHkgdW5hIGNhdGVnw7NyaWNhIGB4MmAuDQoNCmBgYHtyfQ0KbiA8LSAyMDANCngxIDwtIHJub3JtKG4pDQp4MiA8LSBzYW1wbGUoYygiQSIsICJCIiksIG4sIHJlcGxhY2UgPSBUUlVFKQ0KcHJvYiA8LSAxIC8gKDEgKyBleHAoLSgwLjUgKiB4MSArIGlmZWxzZSh4MiA9PSAiQiIsIDEsIDApKSkpDQp5IDwtIHJiaW5vbShuLCAxLCBwcm9iKQ0KDQpkZiA8LSBkYXRhLmZyYW1lKHkgPSBhcy5mYWN0b3IoeSksIHgxID0geDEsIHgyID0gYXMuZmFjdG9yKHgyKSkNCmBgYA0KDQojIDMuIEV4cGxvcmFjacOzbiBkZSBkYXRvcw0KDQpgYGB7ciBmaWcuY2FwPSJEaXN0cmlidWNpw7NuIGRlIHgxIHBvciBjbGFzZSB5IHkgY2F0ZWdvcsOtYSB4MiJ9DQpnZ3Bsb3QoZGYsIGFlcyh4ID0geDEsIGZpbGwgPSB5KSkgKw0KICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjUpICsNCiAgZmFjZXRfd3JhcCh+eDIpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidWNpw7NuIGRlIHgxIHBvciBjbGFzZSB5IGNhdGVnb3LDrWEgeDIiKQ0KYGBgDQoNCiMgNC4gQWp1c3RlIGRlbCBtb2RlbG8gZGUgcmVncmVzacOzbiBsb2fDrXN0aWNhDQoNCmBgYHtyfQ0KbW9kZWxvX2xvZ2l0IDwtIGdsbSh5IH4geDEgKyB4MiwgZGF0YSA9IGRmLCBmYW1pbHkgPSAiYmlub21pYWwiKQ0Kc3VtbWFyeShtb2RlbG9fbG9naXQpDQpgYGANCg0KIyA1LiBFdmFsdWFjacOzbiBkZWwgbW9kZWxvDQoNCmBgYHtyfQ0KZGYkcHJvYl9wcmVkIDwtIHByZWRpY3QobW9kZWxvX2xvZ2l0LCB0eXBlID0gInJlc3BvbnNlIikNCmRmJHlfcHJlZCA8LSBpZmVsc2UoZGYkcHJvYl9wcmVkID4gMC41LCAxLCAwKQ0KDQojIE1hdHJpeiBkZSBjb25mdXNpw7NuDQp0YWJsZShQcmVkaWNobyA9IGRmJHlfcHJlZCwgUmVhbCA9IGRmJHkpDQoNCiMgVmlzdWFsaXphY2nDs24NCmdncGxvdChkZiwgYWVzKHggPSBwcm9iX3ByZWQsIGZpbGwgPSB5KSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gMzAsIHBvc2l0aW9uID0gImlkZW50aXR5IiwgYWxwaGEgPSAwLjYpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidWNpw7NuIGRlIHByb2JhYmlsaWRhZGVzIHByZWRpY2hhcyIsIHggPSAiUHJvYmFiaWxpZGFkIHByZWRpY2hhIiwgeSA9ICJGcmVjdWVuY2lhIikNCmBgYA0KDQojIDYuIEludGVycHJldGFjacOzbiBkZSBjb2VmaWNpZW50ZXMNCg0KYGBge3J9DQpicm9vbTo6dGlkeShtb2RlbG9fbG9naXQsIGV4cG9uZW50aWF0ZSA9IFRSVUUsIGNvbmYuaW50ID0gVFJVRSkNCmBgYA0KDQotIEVsIGNvZWZpY2llbnRlIGRlIGB4MWAgaW5kaWNhIGVsIGVmZWN0byBkZSBlc3RhIHZhcmlhYmxlIHNvYnJlIGxhIHByb2JhYmlsaWRhZCBkZSDDqXhpdG8uDQotIEVsIHTDqXJtaW5vIGB4MkJgIGNvbXBhcmEgZWwgZ3J1cG8gYEJgIGNvbnRyYSBgQWAuDQoNCg==