Init

options(
  digits = 3
)

library(pacman)
p_load(
  kirkegaard,
  rms,
  mirt,
  ggeffects
)

#nice theme
theme_set(theme_bw())

Data

d = read_rds("data/parsed_data.rds")
variables = read_csv2("data/question_data.csv")

## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.

## Rows: 2620 Columns: 10

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (9): question, text, option_1, option_2, option_3, option_4, Type, Order...
## dbl (1): N

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

iq_items_meta = read_csv("data/test_items.csv")

## Rows: 28 Columns: 8

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): variable, text, option_1, option_2, option_3, option_4
## dbl (2): ID, option_correct

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Recode

Misc

d %<>% mutate(
  #basic
  age = d_age,
  sex2 = case_when(d_gender %in% c("Man", "Cis Man") ~ "Man",
                   d_gender %in% c("Woman", "Cis Woman") ~ "Woman",
                   TRUE ~ NA_character_)
)

Intelligence

#items
CA_items = intersect(iq_items_meta$variable, names(d))

#get scoring key
v_correct_answers_text = apply(iq_items_meta %>% filter(variable %in% CA_items), MARGIN = 1, function(item) {
  item["option_" + 1:4][item["option_correct"] %>% as.numeric()]
})

#score items
CA_scored_items = score_items(d[CA_items], key = v_correct_answers_text)

#CA counts
CA_items_counts = miss_by_case(CA_scored_items, reverse = T)
CA_items_counts %>% table2()

#factor analysis / IRT
# irt_CA = irt.fa(CA_scored_items)
irt_CA = mirt(CA_scored_items, model = 1, technical = list(removeEmptyRows=TRUE))

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: data contains response patterns with only NAs

## 
Iteration: 1, Log-Lik: -188768.194, Max-Change: 5.04127
Iteration: 2, Log-Lik: -167586.674, Max-Change: 0.93778
Iteration: 3, Log-Lik: -163943.799, Max-Change: 0.34531
Iteration: 4, Log-Lik: -162360.344, Max-Change: 0.28657
Iteration: 5, Log-Lik: -161709.787, Max-Change: 0.22768
Iteration: 6, Log-Lik: -161353.642, Max-Change: 0.16053
Iteration: 7, Log-Lik: -161173.781, Max-Change: 0.09737
Iteration: 8, Log-Lik: -161064.493, Max-Change: 0.22971
Iteration: 9, Log-Lik: -160991.363, Max-Change: 0.12791
Iteration: 10, Log-Lik: -160959.392, Max-Change: 0.07594
Iteration: 11, Log-Lik: -160939.094, Max-Change: 0.04297
Iteration: 12, Log-Lik: -160925.798, Max-Change: 0.04225
Iteration: 13, Log-Lik: -160916.474, Max-Change: 0.02643
Iteration: 14, Log-Lik: -160913.117, Max-Change: 0.01116
Iteration: 15, Log-Lik: -160909.698, Max-Change: 0.00764
Iteration: 16, Log-Lik: -160907.840, Max-Change: 0.00427
Iteration: 17, Log-Lik: -160907.466, Max-Change: 0.00338
Iteration: 18, Log-Lik: -160907.281, Max-Change: 0.00242
Iteration: 19, Log-Lik: -160907.163, Max-Change: 0.00138
Iteration: 20, Log-Lik: -160907.136, Max-Change: 0.00128
Iteration: 21, Log-Lik: -160907.121, Max-Change: 0.00118
Iteration: 22, Log-Lik: -160907.111, Max-Change: 0.00045
Iteration: 23, Log-Lik: -160907.109, Max-Change: 0.00028
Iteration: 24, Log-Lik: -160907.108, Max-Change: 0.00025
Iteration: 25, Log-Lik: -160907.108, Max-Change: 0.00024
Iteration: 26, Log-Lik: -160907.107, Max-Change: 0.00008

#score
# irt_CA_scores = scoreIrt(irt_CA, CA_scored_items)
irt_CA_scores = fscores(irt_CA, full.scores = T, full.scores.SE = T)

#reliabilities
empirical_rxx(irt_CA_scores)

##    F1 
## 0.549

marginal_rxx(irt_CA)

## [1] 0.764

#save
d$CA_old = d$CA %>% standardize()
d$CA = NA
# d$CA[CA_items_counts != 0] = irt_CA_scores[, 1] %>% standardize(focal_group = d$race[CA_items_counts != 0] == "White")
d$CA = irt_CA_scores[, 1] %>% standardize(focal_group = d$race == "White")

## Warning in standardize(., focal_group = d$race == "White"): `focal_group`
## contains `NA` values. These were converted to `FALSE` following tidyverse
## convention.

Reliability by minimal cognitive item count

reliability_by_item_count <- map_df(0:14, function(i) {
  #subset
  d_subset = CA_scored_items[CA_items_counts >= i, ]
  
  #fit
  irt_CA = mirt(d_subset, model = 1, technical = list(removeEmptyRows=TRUE), verbose = F)

  #score
  irt_CA_scores = fscores(irt_CA, full.scores = T, full.scores.SE = T)
  
  #reliabilities
  tibble(
    minimal_item_count = i,
    n = nrow(d_subset),
    empirical = empirical_rxx(irt_CA_scores) %>% as.vector(),
    marginal = marginal_rxx(irt_CA)
  )
})

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: data contains response patterns with only NAs

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

## Warning: removeEmptyRows option has been deprecated. Complete NA response
## vectors now supported by using NA placeholders

reliability_by_item_count

#plot version
reliability_by_item_count %>% 
  select(-n) %>% 
  pivot_longer(cols = -minimal_item_count) %>% 
  ggplot(aes(minimal_item_count, value, color = name)) +
  geom_line() +
  scale_color_discrete("Metric") +
  scale_x_continuous("Minimum number of questions answered", 
                     breaks = 0:14, 
                     labels = str_glue("{0:14}\nn={reliability_by_item_count$n}"),
                     guide = guide_axis(n.dodge = 2))

GG_save("figs/minimum_item_count_reliability.png")

Fertility

d %<>% mutate(
  #desired children
  desired_children = q979,
  
  #has a child
  has_children = q105 %>% factor(levels = c("No", "Yes"))
)

Subsets

#at least 8 items for IQ
d_IQ8 = d[CA_items_counts >= 8, ]

#heteros
d_het = d %>% filter(str_detect(gender_orientation, "Hetero"))

#combo
d2 = d[CA_items_counts >= 8, ] %>% filter(str_detect(gender_orientation, "Hetero"))
nrow(d2)

## [1] 18219

Analyses

Descriptive stats

d %>% select(age) %>% describe2()

Basic

#desired
GG_group_means(d2, "CA", groupvar = "desired_children", type = "violin") +
  xlab("Desired number of children\n'How many children would you ideally like to have?'") +
  ylab("Intelligence (z score)")

## Missing values were removed.

GG_save("figs/desired_kids_violin.png")

#current
GG_group_means(d2, "CA", groupvar = "has_children", type = "violin") +
  xlab("Has at least one child\n'Do you have a child or children?'") +
  ylab("Intelligence (z score)")

## Missing values were removed.

GG_save("figs/has_kids_violin.png")

#numbers
SMD_matrix(-d2$CA, group = d2$desired_children, extended_output = T)

## $d
##              None    1-2    3-4 5 or more!
## None           NA -0.113 -0.238     -0.410
## 1-2        -0.113     NA -0.125     -0.297
## 3-4        -0.238 -0.125     NA     -0.172
## 5 or more! -0.410 -0.297 -0.172         NA
## 
## $d_string
##            None                   1-2                    3-4                   
## None       NA                     "-0.11 [-0.15 -0.076]" "-0.24 [-0.29 -0.19]" 
## 1-2        "-0.11 [-0.15 -0.076]" NA                     "-0.13 [-0.17 -0.081]"
## 3-4        "-0.24 [-0.29 -0.19]"  "-0.13 [-0.17 -0.081]" NA                    
## 5 or more! "-0.41 [-0.54 -0.28]"  "-0.30 [-0.42 -0.17]"  "-0.17 [-0.30 -0.042]"
##            5 or more!            
## None       "-0.41 [-0.54 -0.28]" 
## 1-2        "-0.30 [-0.42 -0.17]" 
## 3-4        "-0.17 [-0.30 -0.042]"
## 5 or more! NA                    
## 
## $CI_lower
##              None    1-2    3-4 5 or more!
## None           NA -0.150 -0.289     -0.538
## 1-2        -0.150     NA -0.169     -0.422
## 3-4        -0.289 -0.169     NA     -0.302
## 5 or more! -0.538 -0.422 -0.302         NA
## 
## $CI_upper
##               None     1-2     3-4 5 or more!
## None            NA -0.0757 -0.1870    -0.2826
## 1-2        -0.0757      NA -0.0809    -0.1724
## 3-4        -0.1870 -0.0809      NA    -0.0424
## 5 or more! -0.2826 -0.1724 -0.0424         NA
## 
## $se_z
## [1] 1.96
## 
## $se
##              None    1-2    3-4 5 or more!
## None           NA 0.0191 0.0261     0.0651
## 1-2        0.0191     NA 0.0226     0.0637
## 3-4        0.0261 0.0226     NA     0.0661
## 5 or more! 0.0651 0.0637 0.0661         NA
## 
## $p
##                None      1-2      3-4 5 or more!
## None             NA 3.09e-09 7.59e-20   2.98e-10
## 1-2        3.09e-09       NA 2.92e-08   3.06e-06
## 3-4        7.59e-20 2.92e-08       NA   9.27e-03
## 5 or more! 2.98e-10 3.06e-06 9.27e-03         NA
## 
## $pairwise_n
##             None   1-2   3-4 5 or more!
## None          NA 14047  6183       4006
## 1-2        14047    NA 12724      10547
## 3-4         6183 12724    NA       2683
## 5 or more!  4006 10547  2683         NA

SMD_matrix(-d2$CA, group = d2$has_children, extended_output = T)

## $d
##        No   Yes
## No     NA -0.39
## Yes -0.39    NA
## 
## $d_string
##     No                    Yes                  
## No  NA                    "-0.39 [-0.43 -0.35]"
## Yes "-0.39 [-0.43 -0.35]" NA                   
## 
## $CI_lower
##         No    Yes
## No      NA -0.425
## Yes -0.425     NA
## 
## $CI_upper
##         No    Yes
## No      NA -0.354
## Yes -0.354     NA
## 
## $se_z
## [1] 1.96
## 
## $se
##         No    Yes
## No      NA 0.0182
## Yes 0.0182     NA
## 
## $p
##            No       Yes
## No         NA 2.96e-101
## Yes 2.96e-101        NA
## 
## $pairwise_n
##        No   Yes
## No     NA 17554
## Yes 17554    NA

#pearson cors
d2 %>% 
  select(CA, desired_children, has_children) %>% 
  df_as_num() %>% 
  wtd.cors()

##                       CA desired_children has_children
## CA                1.0000          -0.0799       -0.160
## desired_children -0.0799           1.0000        0.149
## has_children     -0.1602           0.1490        1.000

#latent cors
d2 %>% 
  select(CA, desired_children, has_children) %>% 
  df_as_num() %>% 
  mixedCor()

## Call: mixedCor(data = .)
##                  CA    dsrd_ hs_ch
## CA                1.00            
## desired_children -0.09  1.00      
## has_children     -0.22  0.22  1.00

#for 35+ year olds
d2 %>% 
  filter(age >= 35) %>% 
  select(CA, desired_children, has_children) %>% 
  df_as_num() %>% 
  mixedCor()

## Call: mixedCor(data = .)
##                  CA    dsrd_ hs_ch
## CA                1.00            
## desired_children -0.07  1.00      
## has_children     -0.22  0.28  1.00

d2 %>% 
  filter(age >= 35) %>% 
  nrow()

## [1] 8321

Regressions

Desired number

#desired numbers
desired_models = list(
  basic = lrm(desired_children ~ CA, data = d2),
  add_ASR = lrm(desired_children ~ CA + race + sex2 * rcs(age), data = d2),
  add_sex_interaction = lrm(desired_children ~ CA*sex2 + race + sex2 * rcs(age), data = d2)
)

desired_models

## $basic
## Frequencies of Missing Values Due to Each Variable
## desired_children               CA 
##             1489                0 
## 
## Logistic Regression Model
##  
##  lrm(formula = desired_children ~ CA, data = d2)
##  
##  
##  Frequencies of Responses
##  
##        None        1-2        3-4 5 or more! 
##        3753      10294       2430        253 
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         16730    LR chi2     102.67    R2       0.007    C       0.541    
##  max |deriv| 7e-13    d.f.             1    g        0.180    Dxy     0.082    
##                       Pr(> chi2) <0.0001    gr       1.197    gamma   0.083    
##                                             gp       0.024    tau-a   0.045    
##                                             Brier    0.134                     
##  
##                Coef    S.E.   Wald Z Pr(>|Z|)
##  y>=1-2         1.2632 0.0188  67.35 <0.0001 
##  y>=3-4        -1.6478 0.0211 -78.09 <0.0001 
##  y>=5 or more! -4.1728 0.0634 -65.86 <0.0001 
##  CA            -0.1478 0.0146 -10.12 <0.0001 
##  
## 
## $add_ASR
## Frequencies of Missing Values Due to Each Variable
## desired_children               CA             race             sex2 
##             1489                0              974                0 
##              age 
##                0 
## 
## Logistic Regression Model
##  
##  lrm(formula = desired_children ~ CA + race + sex2 * rcs(age), 
##      data = d2)
##  
##  
##  Frequencies of Responses
##  
##        None        1-2        3-4 5 or more! 
##        3533       9787       2303        236 
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         15859    LR chi2     474.08    R2       0.034    C       0.589    
##  max |deriv| 1e-09    d.f.            19    g        0.392    Dxy     0.178    
##                       Pr(> chi2) <0.0001    gr       1.479    gamma   0.178    
##                                             gp       0.051    tau-a   0.097    
##                                             Brier    0.133                     
##  
##                        Coef    S.E.   Wald Z Pr(>|Z|)
##  y>=1-2                 1.9175 0.4508  4.25  <0.0001 
##  y>=3-4                -1.0634 0.4506 -2.36  0.0183  
##  y>=5 or more!         -3.6127 0.4547 -7.95  <0.0001 
##  CA                    -0.1392 0.0153 -9.09  <0.0001 
##  race=Mixed             0.2608 0.0552  4.73  <0.0001 
##  race=Asian             0.3863 0.0814  4.75  <0.0001 
##  race=Hispanic / Latin  0.4481 0.0852  5.26  <0.0001 
##  race=Black             0.4817 0.0873  5.52  <0.0001 
##  race=Other             0.1669 0.1078  1.55  0.1218  
##  race=Indian            0.0844 0.1677  0.50  0.6149  
##  race=Middle Eastern    0.5533 0.3025  1.83  0.0674  
##  race=Native American   0.3164 0.3223  0.98  0.3264  
##  race=Pacific Islander  0.6169 0.4374  1.41  0.1584  
##  sex2=Woman            -0.4243 0.7750 -0.55  0.5841  
##  age                   -0.0235 0.0168 -1.40  0.1601  
##  age'                   0.2149 0.1568  1.37  0.1706  
##  age''                 -0.8834 0.5495 -1.61  0.1079  
##  age'''                 1.0716 0.6590  1.63  0.1039  
##  sex2=Woman * age       0.0278 0.0291  0.96  0.3393  
##  sex2=Woman * age'     -0.3636 0.2936 -1.24  0.2156  
##  sex2=Woman * age''     0.6555 1.0871  0.60  0.5465  
##  sex2=Woman * age'''    0.0948 1.4063  0.07  0.9463  
##  
## 
## $add_sex_interaction
## Frequencies of Missing Values Due to Each Variable
## desired_children               CA             sex2             race 
##             1489                0                0              974 
##              age 
##                0 
## 
## Logistic Regression Model
##  
##  lrm(formula = desired_children ~ CA * sex2 + race + sex2 * rcs(age), 
##      data = d2)
##  
##  
##  Frequencies of Responses
##  
##        None        1-2        3-4 5 or more! 
##        3533       9787       2303        236 
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         15859    LR chi2     474.13    R2       0.034    C       0.589    
##  max |deriv| 1e-09    d.f.            20    g        0.392    Dxy     0.178    
##                       Pr(> chi2) <0.0001    gr       1.480    gamma   0.178    
##                                             gp       0.051    tau-a   0.097    
##                                             Brier    0.133                     
##  
##                        Coef    S.E.   Wald Z Pr(>|Z|)
##  y>=1-2                 1.9164 0.4508  4.25  <0.0001 
##  y>=3-4                -1.0644 0.4506 -2.36  0.0182  
##  y>=5 or more!         -3.6137 0.4547 -7.95  <0.0001 
##  CA                    -0.1410 0.0174 -8.09  <0.0001 
##  sex2=Woman            -0.4255 0.7751 -0.55  0.5830  
##  race=Mixed             0.2608 0.0552  4.73  <0.0001 
##  race=Asian             0.3863 0.0814  4.75  <0.0001 
##  race=Hispanic / Latin  0.4480 0.0852  5.26  <0.0001 
##  race=Black             0.4818 0.0873  5.52  <0.0001 
##  race=Other             0.1665 0.1078  1.54  0.1225  
##  race=Indian            0.0841 0.1677  0.50  0.6158  
##  race=Middle Eastern    0.5523 0.3025  1.83  0.0679  
##  race=Native American   0.3153 0.3224  0.98  0.3281  
##  race=Pacific Islander  0.6162 0.4374  1.41  0.1589  
##  age                   -0.0235 0.0168 -1.40  0.1610  
##  age'                   0.2147 0.1568  1.37  0.1710  
##  age''                 -0.8833 0.5495 -1.61  0.1080  
##  age'''                 1.0718 0.6590  1.63  0.1038  
##  CA * sex2=Woman        0.0075 0.0359  0.21  0.8353  
##  sex2=Woman * age       0.0278 0.0291  0.96  0.3386  
##  sex2=Woman * age'     -0.3642 0.2937 -1.24  0.2149  
##  sex2=Woman * age''     0.6587 1.0873  0.61  0.5446  
##  sex2=Woman * age'''    0.0902 1.4065  0.06  0.9489  
##

#plot predictions for CA
pred_data = ggeffects::new_data(desired_models$add_ASR, terms = c("CA", "sex2"), typical = c("race", "age"))

## Warning in if (fun == "median") myfun <- get("median", asNamespace("stats"))
## else if (fun == : the condition has length > 1 and only the first element will
## be used

## Warning in if (fun == "weighted.mean") myfun <- get("weighted.mean",
## asNamespace("stats")) else if (fun == : the condition has length > 1 and only
## the first element will be used

## Warning in if (fun == "mode") myfun <- get(".mode_value",
## asNamespace("ggeffects")) else if (fun == : the condition has length > 1 and
## only the first element will be used

## Warning in if (fun == "zero") return(0) else myfun <- get("mean",
## asNamespace("base")): the condition has length > 1 and only the first element
## will be used

## Warning in if (fun != "mode") out <- levels(x)[1] else out <- .mode_value(x):
## the condition has length > 1 and only the first element will be used

## Warning in if (fun == "median") myfun <- get("median", asNamespace("stats"))
## else if (fun == : the condition has length > 1 and only the first element will
## be used

## Warning in if (fun == "weighted.mean") myfun <- get("weighted.mean",
## asNamespace("stats")) else if (fun == : the condition has length > 1 and only
## the first element will be used

## Warning in if (fun == "mode") myfun <- get(".mode_value",
## asNamespace("ggeffects")) else if (fun == : the condition has length > 1 and
## only the first element will be used

## Warning in if (fun == "zero") return(0) else myfun <- get("mean",
## asNamespace("base")): the condition has length > 1 and only the first element
## will be used

pred_data = bind_cols(
  pred_data,
  rms:::predict.lrm(desired_models$add_ASR, newdata = pred_data, type = "fitted.ind") %>% as.data.frame()
) %>%
  mutate(
    id = 1:n()
    ) %>%
  pivot_longer(cols = contains("desired_children")) %>%
  mutate(
    desired_children = str_match(name, "=(.+)")[, 2] %>% ordered(levels = levels(d$desired_children))
  )

pred_data

#plot
pred_data %>% 
  #sample rows for speedup
  # sample_frac(size = .01) %>% 
  ggplot(aes(CA, value, color = desired_children, linetype = sex2)) +
  geom_line() +
  scale_y_continuous("Probability", labels = scales::percent) +
  scale_x_continuous("Intelligence (z-score)") +
  scale_color_ordinal("Ideal number of children") +
  scale_linetype_discrete("Sex")

GG_save("figs/desired_kids.png")

#predictions for race
pred_data = ggeffects::new_data(desired_models$add_ASR, terms = c("race"), typical = c("CA", "sex2", "age"))

## Warning in if (fun == "median") myfun <- get("median", asNamespace("stats"))
## else if (fun == : the condition has length > 1 and only the first element will
## be used

## Warning in if (fun == "weighted.mean") myfun <- get("weighted.mean",
## asNamespace("stats")) else if (fun == : the condition has length > 1 and only
## the first element will be used

## Warning in if (fun == "mode") myfun <- get(".mode_value",
## asNamespace("ggeffects")) else if (fun == : the condition has length > 1 and
## only the first element will be used

## Warning in if (fun == "zero") return(0) else myfun <- get("mean",
## asNamespace("base")): the condition has length > 1 and only the first element
## will be used

## Warning in if (fun == "weighted.mean") out <- do.call(myfun, args = list(x =
## x, : the condition has length > 1 and only the first element will be used

## Warning in if (fun == "median") myfun <- get("median", asNamespace("stats"))
## else if (fun == : the condition has length > 1 and only the first element will
## be used

## Warning in if (fun == "weighted.mean") myfun <- get("weighted.mean",
## asNamespace("stats")) else if (fun == : the condition has length > 1 and only
## the first element will be used

## Warning in if (fun == "mode") myfun <- get(".mode_value",
## asNamespace("ggeffects")) else if (fun == : the condition has length > 1 and
## only the first element will be used

## Warning in if (fun == "zero") return(0) else myfun <- get("mean",
## asNamespace("base")): the condition has length > 1 and only the first element
## will be used

## Warning in if (fun == "median") myfun <- get("median", asNamespace("stats"))
## else if (fun == : the condition has length > 1 and only the first element will
## be used

## Warning in if (fun == "weighted.mean") myfun <- get("weighted.mean",
## asNamespace("stats")) else if (fun == : the condition has length > 1 and only
## the first element will be used

## Warning in if (fun == "mode") myfun <- get(".mode_value",
## asNamespace("ggeffects")) else if (fun == : the condition has length > 1 and
## only the first element will be used

## Warning in if (fun == "zero") return(0) else myfun <- get("mean",
## asNamespace("base")): the condition has length > 1 and only the first element
## will be used

pred_data = bind_cols(
  pred_data,
  rms:::predict.lrm(desired_models$add_ASR, newdata = pred_data, type = "fitted.ind") %>% as.data.frame()
) %>%
  mutate(
    id = 1:n()
    ) %>%
  pivot_longer(cols = contains("desired_children")) %>%
  mutate(
    desired_children = str_match(name, "=(.+)")[, 2] %>% ordered(levels = levels(d$desired_children))
  )

pred_data

#plot
pred_data %>% 
  #sample rows for speedup
  # sample_frac(size = .01) %>% 
  ggplot(aes(race, value, fill = desired_children)) +
  geom_bar(stat = "identity") +
  scale_y_continuous("Distribution", labels = scales::percent) +
  scale_x_discrete(guide = guide_axis(n.dodge = 3)) +
  scale_fill_ordinal("Ideal number of children")

GG_save("figs/desired_kids_race.png")

Has children

#desired numbers
has_models = list(
  basic = lrm(has_children ~ CA, data = d2),
  add_ASR = lrm(has_children ~ CA + race + sex2 * rcs(age), data = d2),
  add_sex_interaction = lrm(has_children ~ CA*sex2 + race + sex2 * rcs(age), data = d2)
)

has_models

## $basic
## Frequencies of Missing Values Due to Each Variable
## has_children           CA 
##          665            0 
## 
## Logistic Regression Model
##  
##  lrm(formula = has_children ~ CA, data = d2)
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         17554    LR chi2     444.49    R2       0.038    C       0.611    
##   No         13635    d.f.             1    g        0.432    Dxy     0.222    
##   Yes         3919    Pr(> chi2) <0.0001    gr       1.540    gamma   0.223    
##  max |deriv| 2e-12                          gp       0.075    tau-a   0.077    
##                                             Brier    0.169                     
##  
##            Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept -1.2382 0.0184 -67.23 <0.0001 
##  CA        -0.3551 0.0170 -20.93 <0.0001 
##  
## 
## $add_ASR
## Frequencies of Missing Values Due to Each Variable
## has_children           CA         race         sex2          age 
##          665            0          974            0            0 
## 
## Logistic Regression Model
##  
##  lrm(formula = has_children ~ CA + race + sex2 * rcs(age), data = d2)
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         16637    LR chi2    2915.63    R2       0.245    C       0.776    
##   No         12892    d.f.            19    g        1.326    Dxy     0.552    
##   Yes         3745    Pr(> chi2) <0.0001    gr       3.766    gamma   0.552    
##  max |deriv| 1e-07                          gp       0.191    tau-a   0.193    
##                                             Brier    0.144                     
##  
##                        Coef     S.E.   Wald Z Pr(>|Z|)
##  Intercept             -10.5128 1.3214  -7.96 <0.0001 
##  CA                     -0.3589 0.0193 -18.59 <0.0001 
##  race=Mixed             -0.0335 0.0707  -0.47 0.6353  
##  race=Asian             -1.1707 0.1538  -7.61 <0.0001 
##  race=Hispanic / Latin  -0.0273 0.1139  -0.24 0.8106  
##  race=Black              0.2118 0.1093   1.94 0.0526  
##  race=Other             -0.0709 0.1343  -0.53 0.5977  
##  race=Indian            -1.1109 0.3402  -3.27 0.0011  
##  race=Middle Eastern    -0.8829 0.4524  -1.95 0.0510  
##  race=Native American   -0.5326 0.4320  -1.23 0.2176  
##  race=Pacific Islander  -0.8568 0.6192  -1.38 0.1664  
##  sex2=Woman              3.4758 1.8247   1.90 0.0568  
##  age                     0.2831 0.0475   5.96 <0.0001 
##  age'                   -0.7981 0.3387  -2.36 0.0184  
##  age''                   2.5486 1.0473   2.43 0.0150  
##  age'''                 -3.0498 1.0825  -2.82 0.0048  
##  sex2=Woman * age       -0.1037 0.0662  -1.57 0.1172  
##  sex2=Woman * age'       0.2866 0.5181   0.55 0.5802  
##  sex2=Woman * age''     -0.0474 1.6973  -0.03 0.9777  
##  sex2=Woman * age'''    -1.2320 1.9061  -0.65 0.5180  
##  
## 
## $add_sex_interaction
## Frequencies of Missing Values Due to Each Variable
## has_children           CA         sex2         race          age 
##          665            0            0          974            0 
## 
## Logistic Regression Model
##  
##  lrm(formula = has_children ~ CA * sex2 + race + sex2 * rcs(age), 
##      data = d2)
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs         16637    LR chi2    2916.65    R2       0.245    C       0.776    
##   No         12892    d.f.            20    g        1.326    Dxy     0.552    
##   Yes         3745    Pr(> chi2) <0.0001    gr       3.767    gamma   0.552    
##  max |deriv| 1e-07                          gp       0.191    tau-a   0.193    
##                                             Brier    0.144                     
##  
##                        Coef     S.E.   Wald Z Pr(>|Z|)
##  Intercept             -10.4969 1.3210  -7.95 <0.0001 
##  CA                     -0.3484 0.0219 -15.89 <0.0001 
##  sex2=Woman              3.4401 1.8259   1.88 0.0596  
##  race=Mixed             -0.0328 0.0707  -0.46 0.6423  
##  race=Asian             -1.1714 0.1539  -7.61 <0.0001 
##  race=Hispanic / Latin  -0.0268 0.1139  -0.24 0.8140  
##  race=Black              0.2123 0.1093   1.94 0.0521  
##  race=Other             -0.0684 0.1343  -0.51 0.6103  
##  race=Indian            -1.1102 0.3401  -3.26 0.0011  
##  race=Middle Eastern    -0.8778 0.4523  -1.94 0.0523  
##  race=Native American   -0.5250 0.4319  -1.22 0.2241  
##  race=Pacific Islander  -0.8502 0.6189  -1.37 0.1695  
##  age                     0.2825 0.0474   5.96 <0.0001 
##  age'                   -0.7958 0.3385  -2.35 0.0187  
##  age''                   2.5417 1.0467   2.43 0.0152  
##  age'''                 -3.0425 1.0819  -2.81 0.0049  
##  CA * sex2=Woman        -0.0462 0.0457  -1.01 0.3124  
##  sex2=Woman * age       -0.1029 0.0663  -1.55 0.1203  
##  sex2=Woman * age'       0.2884 0.5186   0.56 0.5782  
##  sex2=Woman * age''     -0.0584 1.6993  -0.03 0.9726  
##  sex2=Woman * age'''    -1.2166 1.9089  -0.64 0.5239  
##

#plot predictions
ggpredict(has_models$add_sex_interaction, terms = c("CA", "sex2")) %>% plot()

GG_save("figs/has_kids.png")

ggpredict(has_models$add_sex_interaction, terms = c("CA", "race")) %>% 
  #plot()
  as.data.frame() %>% 
  ggplot(aes(x, predicted, color = group)) +
  geom_line() +
  scale_y_continuous("Probability", labels = scales::percent)

GG_save("figs/has_kids_race.png")

OKCupid: dysgenics

Init

Data

Recode

Misc

Intelligence

Reliability by minimal cognitive item count

Fertility

Subsets

Analyses

Descriptive stats

Basic

Regressions

Desired number

Has children

Meta