Regressão LogÃstica
data <- read_csv(here::here("data/speed-dating2.csv"),
col_types = cols(
.default = col_double(),
field = col_character(),
from = col_character(),
career = col_character(),
attr3_s = col_logical(),
sinc3_s = col_logical(),
intel3_s = col_logical(),
fun3_s = col_logical(),
amb3_s = col_logical(),
dec = col_character()
))
data <- data %>%
select(dec, attr, intel, fun, shar) %>%
na.omit()
glimpse(data)
## Rows: 4,235
## Columns: 5
## $ dec <chr> "yes", "yes", "yes", "yes", "yes", "no", "yes", "no", "yes", "ye…
## $ attr <dbl> 6, 7, 5, 7, 5, 4, 7, 4, 7, 5, 5, 8, 5, 7, 6, 8, 7, 5, 7, 6, 7, 9…
## $ intel <dbl> 7, 7, 9, 8, 7, 7, 7, 7, 8, 6, 8, 6, 9, 7, 7, 8, 9, 7, 8, 8, 10, …
## $ fun <dbl> 7, 8, 8, 7, 7, 4, 4, 6, 9, 8, 4, 6, 6, 6, 9, 3, 6, 5, 9, 7, 7, 8…
## $ shar <dbl> 5, 6, 7, 8, 6, 4, 7, 6, 8, 8, 3, 6, 4, 7, 8, 2, 9, 5, 5, 8, 9, 7…
data %>%
ggplot(aes(x = dec, y = attr)) +
geom_violin(aes(fill = dec), alpha = .4) +
geom_count() +
coord_flip() +
labs(
title = "Relação entre dec e attr",
subtitle = "dec = match dos dois participantes\nattr = quão atraente p1 achou p2"
)

data %>%
ggplot(aes(x = dec, y = intel)) +
geom_violin(aes(fill = dec), alpha = .4) +
geom_count() +
coord_flip() +
labs(
title = "Relação entre dec e intel",
subtitle = "dec = match dos dois participantes\nintel = quão inteligente p1 achou p2"
)

data %>%
ggplot(aes(x = dec, y = fun)) +
geom_violin(aes(fill = dec), alpha = .4) +
geom_count() +
coord_flip() +
labs(
title = "Relação entre dec e fun",
subtitle = "dec = match dos dois participantes\nfun = quão divertido p1 achou p2"
)

data %>%
ggplot(aes(x = dec, y = shar)) +
geom_violin(aes(fill = dec), alpha = .4) +
geom_count() +
coord_flip() +
labs(
title = "Relação entre dec e shar",
subtitle = "dec = match dos dois participantes\nshar = quanto p1 achou que compartilha interesses e hobbies com p2"
)

data_t = data %>%
mutate(dec = as.factor(dec)) # glm que usaremos abaixo lida melhor com factor que character
bm <- glm(dec ~ attr + intel + fun + shar,
data = data_t,
family = "binomial")
tidy(bm, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
pscl::pR2(bm)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2145.3372779 -2888.2944334 1485.9143112 0.2572304 0.2959214
## r2CU
## 0.3975478
bm_just_attr <- glm(dec ~ attr,
data = data_t,
family = "binomial")
tidy(bm_just_attr, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
pscl::pR2(bm_just_attr)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2315.4085878 -2888.2944334 1145.7716913 0.1983475 0.2370389
## r2CU
## 0.3184437
bm_just_intel <- glm(dec ~ intel,
data = data_t,
family = "binomial")
tidy(bm_just_intel, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
pscl::pR2(bm_just_intel)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2.806183e+03 -2.888294e+03 1.642224e+02 2.842895e-02 3.803520e-02
## r2CU
## 5.109739e-02
bm_just_fun <- glm(dec ~ fun,
data = data_t,
family = "binomial")
tidy(bm_just_fun, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
pscl::pR2(bm_just_fun)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2500.7109046 -2888.2944334 775.1670576 0.1341911 0.1672637
## r2CU
## 0.2247060
bm_just_shar <- glm(dec ~ shar,
data = data_t,
family = "binomial")
tidy(bm_just_shar, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
pscl::pR2(bm_just_shar)
## fitting null model for pseudo-r2
## llh llhNull G2 McFadden r2ML
## -2492.5566405 -2888.2944334 791.4755859 0.1370144 0.1704643
## r2CU
## 0.2290058