1. Preparación del
ambiente
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(broom)
set.seed(2025)
2. Simulación de
datos
Se simulan datos con una variable respuesta binaria y
que depende de una variable continua x1
y una categórica
x2
.
n <- 200
x1 <- rnorm(n)
x2 <- sample(c("A", "B"), n, replace = TRUE)
prob <- 1 / (1 + exp(-(0.5 * x1 + ifelse(x2 == "B", 1, 0))))
y <- rbinom(n, 1, prob)
df <- data.frame(y = as.factor(y), x1 = x1, x2 = as.factor(x2))
3. Exploración de
datos
ggplot(df, aes(x = x1, fill = y)) +
geom_density(alpha = 0.5) +
facet_wrap(~x2) +
labs(title = "Distribución de x1 por clase y categoría x2")
4. Ajuste del modelo de
regresión logística
modelo_logit <- glm(y ~ x1 + x2, data = df, family = "binomial")
summary(modelo_logit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial", data = df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.1209 0.2036 0.594 0.552669
## x1 0.6034 0.1649 3.659 0.000253 ***
## x2B 0.9121 0.3142 2.903 0.003695 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 265.63 on 199 degrees of freedom
## Residual deviance: 242.18 on 197 degrees of freedom
## AIC: 248.18
##
## Number of Fisher Scoring iterations: 4
5. Evaluación del
modelo
df$prob_pred <- predict(modelo_logit, type = "response")
df$y_pred <- ifelse(df$prob_pred > 0.5, 1, 0)
# Matriz de confusión
table(Predicho = df$y_pred, Real = df$y)
## Real
## Predicho 0 1
## 0 29 21
## 1 47 103
# Visualización
ggplot(df, aes(x = prob_pred, fill = y)) +
geom_histogram(bins = 30, position = "identity", alpha = 0.6) +
labs(title = "Distribución de probabilidades predichas", x = "Probabilidad predicha", y = "Frecuencia")

6. Interpretación de
coeficientes
broom::tidy(modelo_logit, exponentiate = TRUE, conf.int = TRUE)
- El coeficiente de
x1
indica el efecto de esta variable
sobre la probabilidad de éxito.
- El término
x2B
compara el grupo B
contra
A
.
LS0tDQp0aXRsZTogIkFuw6FsaXNpcyBjb24gUmVncmVzacOzbiBMb2fDrXN0aWNhIg0KYXV0aG9yOiAiTWFudWVsIFJvbWVybyINCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDogDQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgdG9jX2RlcHRoOiAzDQogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlDQogICAgY29kZV9mb2xkaW5nOiBzaG93DQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIHRoZW1lOiBmbGF0bHkNCiAgICBoaWdobGlnaHQ6IHRhbmdvDQogICAgZmlnX2NhcHRpb246IHRydWUNCiAgICBkZl9wcmludDogcGFnZWQNCi0tLQ0KDQojIDEuIFByZXBhcmFjacOzbiBkZWwgYW1iaWVudGUNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShicm9vbSkNCnNldC5zZWVkKDIwMjUpDQpgYGANCg0KIyAyLiBTaW11bGFjacOzbiBkZSBkYXRvcw0KDQpTZSBzaW11bGFuIGRhdG9zIGNvbiB1bmEgdmFyaWFibGUgcmVzcHVlc3RhIGJpbmFyaWEgYHlgIHF1ZSBkZXBlbmRlIGRlIHVuYSB2YXJpYWJsZSBjb250aW51YSBgeDFgIHkgdW5hIGNhdGVnw7NyaWNhIGB4MmAuDQoNCmBgYHtyfQ0KbiA8LSAyMDANCngxIDwtIHJub3JtKG4pDQp4MiA8LSBzYW1wbGUoYygiQSIsICJCIiksIG4sIHJlcGxhY2UgPSBUUlVFKQ0KcHJvYiA8LSAxIC8gKDEgKyBleHAoLSgwLjUgKiB4MSArIGlmZWxzZSh4MiA9PSAiQiIsIDEsIDApKSkpDQp5IDwtIHJiaW5vbShuLCAxLCBwcm9iKQ0KDQpkZiA8LSBkYXRhLmZyYW1lKHkgPSBhcy5mYWN0b3IoeSksIHgxID0geDEsIHgyID0gYXMuZmFjdG9yKHgyKSkNCmBgYA0KDQojIDMuIEV4cGxvcmFjacOzbiBkZSBkYXRvcw0KDQpgYGB7ciBmaWcuY2FwPSJEaXN0cmlidWNpw7NuIGRlIHgxIHBvciBjbGFzZSB5IHkgY2F0ZWdvcsOtYSB4MiJ9DQpnZ3Bsb3QoZGYsIGFlcyh4ID0geDEsIGZpbGwgPSB5KSkgKw0KICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjUpICsNCiAgZmFjZXRfd3JhcCh+eDIpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidWNpw7NuIGRlIHgxIHBvciBjbGFzZSB5IGNhdGVnb3LDrWEgeDIiKQ0KYGBgDQoNCiMgNC4gQWp1c3RlIGRlbCBtb2RlbG8gZGUgcmVncmVzacOzbiBsb2fDrXN0aWNhDQoNCmBgYHtyfQ0KbW9kZWxvX2xvZ2l0IDwtIGdsbSh5IH4geDEgKyB4MiwgZGF0YSA9IGRmLCBmYW1pbHkgPSAiYmlub21pYWwiKQ0Kc3VtbWFyeShtb2RlbG9fbG9naXQpDQpgYGANCg0KIyA1LiBFdmFsdWFjacOzbiBkZWwgbW9kZWxvDQoNCmBgYHtyfQ0KZGYkcHJvYl9wcmVkIDwtIHByZWRpY3QobW9kZWxvX2xvZ2l0LCB0eXBlID0gInJlc3BvbnNlIikNCmRmJHlfcHJlZCA8LSBpZmVsc2UoZGYkcHJvYl9wcmVkID4gMC41LCAxLCAwKQ0KDQojIE1hdHJpeiBkZSBjb25mdXNpw7NuDQp0YWJsZShQcmVkaWNobyA9IGRmJHlfcHJlZCwgUmVhbCA9IGRmJHkpDQoNCiMgVmlzdWFsaXphY2nDs24NCmdncGxvdChkZiwgYWVzKHggPSBwcm9iX3ByZWQsIGZpbGwgPSB5KSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW5zID0gMzAsIHBvc2l0aW9uID0gImlkZW50aXR5IiwgYWxwaGEgPSAwLjYpICsNCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidWNpw7NuIGRlIHByb2JhYmlsaWRhZGVzIHByZWRpY2hhcyIsIHggPSAiUHJvYmFiaWxpZGFkIHByZWRpY2hhIiwgeSA9ICJGcmVjdWVuY2lhIikNCmBgYA0KDQojIDYuIEludGVycHJldGFjacOzbiBkZSBjb2VmaWNpZW50ZXMNCg0KYGBge3J9DQpicm9vbTo6dGlkeShtb2RlbG9fbG9naXQsIGV4cG9uZW50aWF0ZSA9IFRSVUUsIGNvbmYuaW50ID0gVFJVRSkNCmBgYA0KDQotIEVsIGNvZWZpY2llbnRlIGRlIGB4MWAgaW5kaWNhIGVsIGVmZWN0byBkZSBlc3RhIHZhcmlhYmxlIHNvYnJlIGxhIHByb2JhYmlsaWRhZCBkZSDDqXhpdG8uDQotIEVsIHTDqXJtaW5vIGB4MkJgIGNvbXBhcmEgZWwgZ3J1cG8gYEJgIGNvbnRyYSBgQWAuDQoNCg==