This figure may seem mysterious, but it is not. Well, less so!
My attempt at explaining:
https://twitter.com/KirkegaardEmil/status/1626448996246888449
Blacks from 98th centile income homes are far above their racial mean, so they regress a lot. White kids less so. The relative effect size of the regression becomes larger the stronger the selection, so will the difference between the children. Thus, the ratio goes up.
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
## Loading required package: lattice
##
## Loading required package: survival
##
## Loading required package: Formula
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
ggeffects
)
theme_set(theme_bw())
options(
digits = 3
)
n = 10000
threshold = -1
family_transfer = .5
random_component_sd = .5
#assume 1 SD gap in human capital, simulate parents' data
set.seed(1)
d = tibble(
family = (1:(2*n)) %>% as.factor(),
race = c(
rep("black", n), rep("white", n)
),
hc_parent = rnorm(n = n*2, mean = 0, sd = 1),
hc_children = c(
hc_parent * family_transfer + rnorm(n = n*2, mean = 0, sd = sqrt(1 - family_transfer^2))
)
)
#adjust group means, add threshold binary
d %<>% mutate(
hc_parent = if_else(race=="black", true = hc_parent-1, false = hc_parent),
hc_children = if_else(race=="black", true = hc_children-1, false = hc_children),
below_threshold = ((rnorm(n, mean = 0, sd = random_component_sd) + hc_children) < threshold) %>% as.factor(),
delta_parent_child = hc_children - hc_parent
)
#stats
describe2(d)
describe2(d, group = d$race)
cor_matrix(d)
## hc_parent hc_children delta_parent_child
## hc_parent 1.000 0.595 -0.454
## hc_children 0.595 1.000 0.446
## delta_parent_child -0.454 0.446 1.000
cor_matrix(d, by = d$race)
## $black
## hc_parent hc_children delta_parent_child
## hc_parent 1.000 0.501 -0.502
## hc_children 0.501 1.000 0.497
## delta_parent_child -0.502 0.497 1.000
##
## $white
## hc_parent hc_children delta_parent_child
## hc_parent 1.000 0.488 -0.506
## hc_children 0.488 1.000 0.506
## delta_parent_child -0.506 0.506 1.000
#wide format for ratio
d_wide = pivot_wider(
d,
names_from = "race",
values_from = "hc_children"
)
#plot regression lines
d %>%
ggplot(aes(hc_parent, hc_children, color = race)) +
geom_smooth() +
scale_x_continuous(breaks = seq(-5, 5)) +
scale_y_continuous(breaks = seq(-5, 5))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#distance to children as function as parent
d %>%
ggplot(aes(hc_parent, delta_parent_child, color = race)) +
geom_smooth() +
scale_x_continuous(breaks = seq(-5, 5)) +
scale_y_continuous(breaks = seq(-5, 5, by = .5))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#ratio threshold as function of parent hc
#logistic function
logis_fit = glm(formula = below_threshold ~ hc_parent + race, data = d, family = "binomial")
logis_fit %>% summary()
##
## Call:
## glm(formula = below_threshold ~ hc_parent + race, family = "binomial",
## data = d)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.121 -0.830 -0.509 0.967 2.726
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.8198 0.0286 -28.7 <2e-16 ***
## hc_parent -0.8096 0.0188 -43.1 <2e-16 ***
## racewhite -0.8161 0.0360 -22.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25756 on 19999 degrees of freedom
## Residual deviance: 21377 on 19997 degrees of freedom
## AIC: 21383
##
## Number of Fisher Scoring iterations: 4
#fancy package to ensure we did it correctly
ggpredict(logis_fit, terms = c("hc_parent [all]", "race")) %>%
plot()
#compute ratios from logistic prediction lines
d %<>% mutate(
pred_below = predict(logis_fit, newdata = d) %>% plogis()
)
#manual plot
d %>%
ggplot(aes(hc_parent, pred_below, color = race)) +
geom_line()
#new smooth data for ratios
d_pred = tibble(
hc_parent = c(
seq(-3, 3, length.out = n/2),
seq(-3, 3, length.out = n/2)
),
race = rep(c("black", "white"), each = n/2)
) %>% mutate(
pred_below = predict(logis_fit, newdata = .) %>% plogis()
)
#wide format so we can get the ratio
d_pred_wide = pivot_wider(
d_pred,
id_cols = c("hc_parent"),
names_from = "race",
values_from = "pred_below"
)
#add ratio of below
d_pred_wide %<>% mutate(
ratio_below = black / white
)
#final plot
d_pred_wide %>%
ggplot(aes(hc_parent, ratio_below)) +
geom_line()
In the real data, the ratio of Black/White is about 3x at the lowest parental income, and about 6-7x at the highest. These values correspond to ours if you multiply by 3. This scaling factor would reflect an extra Black proneness to crime, controlling for income.
#versions
write_sessioninfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggeffects_1.1.4 kirkegaard_2023-02-17 psych_2.2.9
## [4] assertthat_0.2.1 weights_1.0.4 Hmisc_4.7-1
## [7] Formula_1.2-4 survival_3.4-0 lattice_0.20-45
## [10] magrittr_2.0.3 forcats_0.5.2 stringr_1.4.1
## [13] dplyr_1.0.10 purrr_0.3.5 readr_2.1.3
## [16] tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0
## [19] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-160 fs_1.5.2 lubridate_1.9.0
## [4] insight_0.18.8 RColorBrewer_1.1-3 httr_1.4.4
## [7] tools_4.2.2 backports_1.4.1 bslib_0.4.1
## [10] sjlabelled_1.2.0 utf8_1.2.2 R6_2.5.1
## [13] rpart_4.1.19 mgcv_1.8-41 DBI_1.1.3
## [16] colorspace_2.0-3 nnet_7.3-18 withr_2.5.0
## [19] mnormt_2.1.1 tidyselect_1.2.0 gridExtra_2.3
## [22] compiler_4.2.2 cli_3.4.1 rvest_1.0.3
## [25] htmlTable_2.4.1 mice_3.14.0 xml2_1.3.3
## [28] labeling_0.4.2 sass_0.4.2 scales_1.2.1
## [31] checkmate_2.1.0 digest_0.6.30 minqa_1.2.5
## [34] foreign_0.8-82 rmarkdown_2.18 base64enc_0.1-3
## [37] jpeg_0.1-9 pkgconfig_2.0.3 htmltools_0.5.4
## [40] lme4_1.1-31 highr_0.9 dbplyr_2.2.1
## [43] fastmap_1.1.0 htmlwidgets_1.6.1 rlang_1.0.6
## [46] readxl_1.4.1 rstudioapi_0.14 farver_2.1.1
## [49] jquerylib_0.1.4 generics_0.1.3 jsonlite_1.8.3
## [52] gtools_3.9.3 googlesheets4_1.0.1 interp_1.1-3
## [55] Matrix_1.5-3 Rcpp_1.0.9 munsell_0.5.0
## [58] fansi_1.0.3 lifecycle_1.0.3 stringi_1.7.8
## [61] yaml_2.3.6 MASS_7.3-58 plyr_1.8.8
## [64] grid_4.2.2 parallel_4.2.2 gdata_2.18.0.1
## [67] crayon_1.5.2 deldir_1.0-6 haven_2.5.1
## [70] splines_4.2.2 hms_1.1.2 knitr_1.40
## [73] pillar_1.8.1 boot_1.3-28 reprex_2.0.2
## [76] glue_1.6.2 evaluate_0.18 latticeExtra_0.6-30
## [79] data.table_1.14.6 modelr_0.1.10 nloptr_2.0.3
## [82] png_0.1-7 vctrs_0.5.1 tzdb_0.3.0
## [85] cellranger_1.1.0 gtable_0.3.1 cachem_1.0.6
## [88] xfun_0.35 broom_1.0.1 googledrive_2.0.0
## [91] gargle_1.2.1 cluster_2.1.4 timechange_0.1.1
## [94] ellipsis_0.3.2