speeddating <- read_csv("data/speed-dating2.csv")
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
field = col_character(),
from = col_character(),
career = col_character(),
attr3_s = col_logical(),
sinc3_s = col_logical(),
intel3_s = col_logical(),
fun3_s = col_logical(),
amb3_s = col_logical(),
dec = col_character()
)
ℹ Use `spec()` for the full column specifications.
Warning: 10220 parsing failures.
row col expected actual file
1847 attr3_s 1/0/T/F/TRUE/FALSE 8.00 'data/speed-dating2.csv'
1847 sinc3_s 1/0/T/F/TRUE/FALSE 10.00 'data/speed-dating2.csv'
1847 intel3_s 1/0/T/F/TRUE/FALSE 9.00 'data/speed-dating2.csv'
1847 fun3_s 1/0/T/F/TRUE/FALSE 10 'data/speed-dating2.csv'
1847 amb3_s 1/0/T/F/TRUE/FALSE 10 'data/speed-dating2.csv'
.... ........ .................. ...... ........................
See problems(...) for more details.
glimpse(speeddating)
Rows: 4,918
Columns: 44
$ iid <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4,…
$ gender <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ order <dbl> 4, 3, 10, 5, 7, 6, 1, 2, 8, 9, 10, 9, 6, 1, 3, 2, 7, 8, 4, 5, 6, 5, 2, 7, 9, 8, 3, 4, 10, 1, 3, 2,…
$ pid <dbl> 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15…
$ int_corr <dbl> 0.14, 0.54, 0.16, 0.61, 0.21, 0.25, 0.34, 0.50, 0.28, -0.36, 0.29, 0.18, 0.10, -0.21, 0.32, 0.73, …
$ samerace <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,…
$ age_o <dbl> 27, 22, 22, 23, 24, 25, 30, 27, 28, 24, 27, 22, 22, 23, 24, 25, 30, 27, 28, 24, 27, 22, 22, 23, 24…
$ age <dbl> 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25…
$ field <chr> "Law", "Law", "Law", "Law", "Law", "Law", "Law", "Law", "Law", "Law", "law", "law", "law", "law", …
$ race <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ from <chr> "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",…
$ career <chr> "lawyer", "lawyer", "lawyer", "lawyer", "lawyer", "lawyer", "lawyer", "lawyer", "lawyer", "lawyer"…
$ sports <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1,…
$ tvsports <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1,…
$ exercise <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6,…
$ dining <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8…
$ museums <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,…
$ art <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7,…
$ hiking <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7,…
$ gaming <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,…
$ clubbing <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7,…
$ reading <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7…
$ tv <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7,…
$ theater <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9,…
$ movies <dbl> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7…
$ concerts <dbl> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7…
$ music <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7,…
$ shopping <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1,…
$ yoga <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8,…
$ attr <dbl> 6, 7, 5, 7, 5, 4, 7, 4, 7, 5, 5, 8, 5, 7, 6, 8, 7, 5, 7, 6, 7, 9, 7, 9, 9, 8, 8, 7, 9, 8, 4, 8, 4,…
$ sinc <dbl> 9, 8, 8, 6, 6, 9, 6, 9, 6, 6, 7, 5, 8, 9, 8, 7, 5, 8, 6, 7, 9, 7, 9, 7, 10, 10, 9, 9, 9, 7, 10, 7,…
$ intel <dbl> 7, 7, 9, 8, 7, 7, 7, 7, 8, 6, 8, 6, 9, 7, 7, 8, 9, 7, 8, 8, 10, 9, 9, 9, 10, 10, 10, 9, 9, 9, 8, 8…
$ fun <dbl> 7, 8, 8, 7, 7, 4, 4, 6, 9, 8, 4, 6, 6, 6, 9, 3, 6, 5, 9, 7, 7, 8, 7, 7, 10, 7, 7, 8, 9, 7, 5, 10, …
$ amb <dbl> 6, 5, 5, 6, 6, 6, 6, 5, 8, 10, 6, 9, 3, 5, 7, 6, 7, 9, 4, 9, 8, 9, 9, 9, 10, 9, 7, 9, 9, 9, 8, 7, …
$ shar <dbl> 5, 6, 7, 8, 6, 4, 7, 6, 8, 8, 3, 6, 4, 7, 8, 2, 9, 5, 5, 8, 9, 7, 7, 7, 10, 9, 9, 7, 9, 7, 7, 8, 7…
$ like <dbl> 7, 7, 7, 7, 6, 6, 6, 6, 7, 6, 6, 7, 6, 7, 8, 6, 8, 5, 5, 8, 8, 8, 8, 8, 9, 8, 8, 8, 9, 8, 6, 8, 4,…
$ prob <dbl> 6, 5, NA, 6, 6, 5, 5, 7, 7, 6, 4, 3, 7, 8, 6, 5, 7, 6, 6, 7, 7, 7, 7, 7, NA, NA, 7, 7, 7, 7, 7, 1,…
$ match_es <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ attr3_s <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ sinc3_s <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ intel3_s <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ fun3_s <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ amb3_s <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ dec <chr> "yes", "yes", "yes", "yes", "yes", "no", "yes", "no", "yes", "yes", "no", "no", "no", "yes", "no",…
speeddating %>%
ggplot(aes(x=gender, fill=dec))+
geom_bar(position="dodge")
Para nossa regressão logistica estamos utilizando 4 variaveis:
samerace, que diz se os casais pesquisados são da mesma raça ou não;
movies, que mostra o grau de interesse de filme para o entrevistado 1;
sinc, que relata o quão sincero o entrevistado 1 achou ser o entrevistado 2;
intel, que também relata o quão inteligente o entrevistado 1 acha que o entrevistado 2 é;
speeddating_s = speeddating %>%
mutate(dec = as.factor(dec)) # glm que usaremos abaixo lida melhor com factor que character
bm <- glm(dec ~ samerace + movies +sinc + intel ,
data = speeddating_s,
family = "binomial")
tidy(bm, conf.int = TRUE) %>%
select(-statistic, -p.value)
# EXPONENCIANDO:
tidy(bm, conf.int = TRUE, exponentiate = TRUE) %>%
select(-statistic, -p.value)
## Como aqui y = exp(b0)*exp(b1*x1), aumentar em uma unidade x, faz com que y seja multiplicado por exp(b1), que é o estimate nessa tabela acima
esses dados explicam:
Ser da mesma raça tem 16 % mais odds de haver match do que pessoas de raças diferentes, ou seja, se for da mesma raça , o valor de odds será multiplicado por 1.16.Afirmando com 95% de confiança, nós esperamos que sua influencia seja de 2 a 30% a mais que pessoas de raças diferentes IC[1.02, 1.30]
movies faz com que diminua em 9% o odd de ocorre um match entre as pessoas. Afirmando com 95% de confiança , esperamos que sua diminuição no match seja de 5 a 12% IC[0.88, 0.95]
Achar a outra pessoas sincera aumenta em 16% as chances de ocorrer um reencontro depois. Afirmando isso com 95% de confiança , temos que seu aumento pode ir de cerca de 11 a 21% IC[1.11, 1.21]
Achar a outra pessoa inteligente aumenta em 19 % a chance de ocorrer um match.Nós estamos 95% confiantes que pessoas consideradas mais inteligentes por p1 tem de 14 a 26 % a mais de odds em relação a menos avaliados IC[1.14, 1.26]
Temos , portanto sua formula como:
dec = 0.12 + 1.16 * samerace + 0.91 * movies + 1.16 * sinc + 1.19*intel