Regressão Logística

data <- read_csv(here::here("data/speed-dating2.csv"),
                 col_types = cols(
  .default = col_double(),
  field = col_character(),
  from = col_character(),
  career = col_character(),
  attr3_s = col_logical(),
  sinc3_s = col_logical(),
  intel3_s = col_logical(),
  fun3_s = col_logical(),
  amb3_s = col_logical(),
  dec = col_character()
))

data <- data %>% 
  select(dec, attr, intel, fun, shar) %>%
  na.omit()

glimpse(data)
## Rows: 4,235
## Columns: 5
## $ dec   <chr> "yes", "yes", "yes", "yes", "yes", "no", "yes", "no", "yes", "ye…
## $ attr  <dbl> 6, 7, 5, 7, 5, 4, 7, 4, 7, 5, 5, 8, 5, 7, 6, 8, 7, 5, 7, 6, 7, 9…
## $ intel <dbl> 7, 7, 9, 8, 7, 7, 7, 7, 8, 6, 8, 6, 9, 7, 7, 8, 9, 7, 8, 8, 10, …
## $ fun   <dbl> 7, 8, 8, 7, 7, 4, 4, 6, 9, 8, 4, 6, 6, 6, 9, 3, 6, 5, 9, 7, 7, 8…
## $ shar  <dbl> 5, 6, 7, 8, 6, 4, 7, 6, 8, 8, 3, 6, 4, 7, 8, 2, 9, 5, 5, 8, 9, 7…
data %>% 
  ggplot(aes(x = dec, y = attr)) + 
  geom_violin(aes(fill = dec), alpha = .4) + 
  geom_count() + 
  coord_flip() +
  labs(
    title = "Relação entre dec e attr",
    subtitle = "dec = match dos dois participantes\nattr = quão atraente p1 achou p2"
  )

data %>% 
  ggplot(aes(x = dec, y = intel)) + 
  geom_violin(aes(fill = dec), alpha = .4) + 
  geom_count() + 
  coord_flip() +
  labs(
    title = "Relação entre dec e intel",
    subtitle = "dec = match dos dois participantes\nintel = quão inteligente p1 achou p2"
  )

data %>% 
  ggplot(aes(x = dec, y = fun)) + 
  geom_violin(aes(fill = dec), alpha = .4) + 
  geom_count() + 
  coord_flip() +
  labs(
    title = "Relação entre dec e fun",
    subtitle = "dec = match dos dois participantes\nfun = quão divertido p1 achou p2"
  )

data %>% 
  ggplot(aes(x = dec, y = shar)) + 
  geom_violin(aes(fill = dec), alpha = .4) + 
  geom_count() + 
  coord_flip() +
  labs(
    title = "Relação entre dec e shar",
    subtitle = "dec = match dos dois participantes\nshar = quanto p1 achou que compartilha interesses e hobbies com p2"
  )

data_t = data %>% 
  mutate(dec = as.factor(dec)) # glm que usaremos abaixo lida melhor com factor que character
  
bm <- glm(dec ~ attr + intel + fun + shar,
          data = data_t, 
          family = "binomial")

tidy(bm, conf.int = TRUE, exponentiate = TRUE) %>% 
  select(-statistic, -p.value)
pscl::pR2(bm)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -2145.3372779 -2888.2944334  1485.9143112     0.2572304     0.2959214 
##          r2CU 
##     0.3975478
bm_just_attr <- glm(dec ~ attr,
          data = data_t, 
          family = "binomial")

tidy(bm_just_attr, conf.int = TRUE, exponentiate = TRUE) %>% 
  select(-statistic, -p.value)
pscl::pR2(bm_just_attr)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -2315.4085878 -2888.2944334  1145.7716913     0.1983475     0.2370389 
##          r2CU 
##     0.3184437
bm_just_intel <- glm(dec ~ intel,
          data = data_t, 
          family = "binomial")

tidy(bm_just_intel, conf.int = TRUE, exponentiate = TRUE) %>% 
  select(-statistic, -p.value)
pscl::pR2(bm_just_intel)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -2.806183e+03 -2.888294e+03  1.642224e+02  2.842895e-02  3.803520e-02 
##          r2CU 
##  5.109739e-02
bm_just_fun <- glm(dec ~ fun,
          data = data_t, 
          family = "binomial")

tidy(bm_just_fun, conf.int = TRUE, exponentiate = TRUE) %>% 
  select(-statistic, -p.value)
pscl::pR2(bm_just_fun)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -2500.7109046 -2888.2944334   775.1670576     0.1341911     0.1672637 
##          r2CU 
##     0.2247060
bm_just_shar <- glm(dec ~ shar,
          data = data_t, 
          family = "binomial")

tidy(bm_just_shar, conf.int = TRUE, exponentiate = TRUE) %>% 
  select(-statistic, -p.value)
pscl::pR2(bm_just_shar)
## fitting null model for pseudo-r2
##           llh       llhNull            G2      McFadden          r2ML 
## -2492.5566405 -2888.2944334   791.4755859     0.1370144     0.1704643 
##          r2CU 
##     0.2290058