This figure may seem mysterious, but it is not. Well, less so!

My attempt at explaining:

https://twitter.com/KirkegaardEmil/status/1626448996246888449

Blacks from 98th centile income homes are far above their racial mean, so they regress a lot. White kids less so. The relative effect size of the regression becomes larger the stronger the selection, so will the difference between the children. Thus, the ratio goes up.

Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## Loading required package: lattice
## 
## Loading required package: survival
## 
## Loading required package: Formula
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  ggeffects
)

theme_set(theme_bw())

options(
    digits = 3
)

Data

n = 10000
threshold = -1
family_transfer = .5
random_component_sd = .5

#assume 1 SD gap in human capital, simulate parents' data
set.seed(1)
d = tibble(
  family = (1:(2*n)) %>% as.factor(),
  
  race = c(
    rep("black", n), rep("white", n)
  ),
  
  hc_parent = rnorm(n = n*2, mean = 0, sd = 1),
  
  hc_children = c(
    hc_parent * family_transfer + rnorm(n = n*2, mean = 0, sd = sqrt(1 - family_transfer^2))
  )
)

#adjust group means, add threshold binary
d %<>% mutate(
  hc_parent = if_else(race=="black", true = hc_parent-1, false = hc_parent),
  hc_children = if_else(race=="black", true = hc_children-1, false = hc_children),
  below_threshold = ((rnorm(n, mean = 0, sd = random_component_sd) + hc_children) < threshold) %>% as.factor(),
  delta_parent_child = hc_children - hc_parent
)

#stats
describe2(d)
describe2(d, group = d$race)
cor_matrix(d)
##                    hc_parent hc_children delta_parent_child
## hc_parent              1.000       0.595             -0.454
## hc_children            0.595       1.000              0.446
## delta_parent_child    -0.454       0.446              1.000
cor_matrix(d, by = d$race)
## $black
##                    hc_parent hc_children delta_parent_child
## hc_parent              1.000       0.501             -0.502
## hc_children            0.501       1.000              0.497
## delta_parent_child    -0.502       0.497              1.000
## 
## $white
##                    hc_parent hc_children delta_parent_child
## hc_parent              1.000       0.488             -0.506
## hc_children            0.488       1.000              0.506
## delta_parent_child    -0.506       0.506              1.000
#wide format for ratio
d_wide = pivot_wider(
  d,
  names_from = "race",
  values_from = "hc_children"
)

#plot regression lines
d %>% 
  ggplot(aes(hc_parent, hc_children, color = race)) +
  geom_smooth() +
  scale_x_continuous(breaks = seq(-5, 5)) +
  scale_y_continuous(breaks = seq(-5, 5))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#distance to children as function as parent
d %>% 
  ggplot(aes(hc_parent, delta_parent_child, color = race)) +
  geom_smooth() +
  scale_x_continuous(breaks = seq(-5, 5)) +
  scale_y_continuous(breaks = seq(-5, 5, by = .5))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

#ratio threshold as function of parent hc
#logistic function
logis_fit = glm(formula = below_threshold ~ hc_parent + race, data = d, family = "binomial")
logis_fit %>% summary()
## 
## Call:
## glm(formula = below_threshold ~ hc_parent + race, family = "binomial", 
##     data = d)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.121  -0.830  -0.509   0.967   2.726  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.8198     0.0286   -28.7   <2e-16 ***
## hc_parent    -0.8096     0.0188   -43.1   <2e-16 ***
## racewhite    -0.8161     0.0360   -22.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25756  on 19999  degrees of freedom
## Residual deviance: 21377  on 19997  degrees of freedom
## AIC: 21383
## 
## Number of Fisher Scoring iterations: 4
#fancy package to ensure we did it correctly
ggpredict(logis_fit, terms = c("hc_parent [all]", "race")) %>% 
  plot()

#compute ratios from logistic prediction lines
d %<>% mutate(
  pred_below = predict(logis_fit, newdata = d) %>% plogis()
)

#manual plot
d %>% 
  ggplot(aes(hc_parent, pred_below, color = race)) +
  geom_line()

#new smooth data for ratios
d_pred = tibble(
  hc_parent = c(
    seq(-3, 3, length.out = n/2),
    seq(-3, 3, length.out = n/2)
  ),
  race = rep(c("black", "white"), each = n/2)
) %>% mutate(
  pred_below = predict(logis_fit, newdata = .) %>% plogis()
)

#wide format so we can get the ratio
d_pred_wide = pivot_wider(
  d_pred,
  id_cols = c("hc_parent"),
  names_from = "race",
  values_from = "pred_below"
)

#add ratio of below
d_pred_wide %<>% mutate(
  ratio_below = black / white
)

#final plot
d_pred_wide %>% 
  ggplot(aes(hc_parent, ratio_below)) +
  geom_line()

In the real data, the ratio of Black/White is about 3x at the lowest parental income, and about 6-7x at the highest. These values correspond to ours if you multiply by 3. This scaling factor would reflect an extra Black proneness to crime, controlling for income.

Meta

#versions
write_sessioninfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggeffects_1.1.4       kirkegaard_2023-02-17 psych_2.2.9          
##  [4] assertthat_0.2.1      weights_1.0.4         Hmisc_4.7-1          
##  [7] Formula_1.2-4         survival_3.4-0        lattice_0.20-45      
## [10] magrittr_2.0.3        forcats_0.5.2         stringr_1.4.1        
## [13] dplyr_1.0.10          purrr_0.3.5           readr_2.1.3          
## [16] tidyr_1.2.1           tibble_3.1.8          ggplot2_3.4.0        
## [19] tidyverse_1.3.2      
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-160        fs_1.5.2            lubridate_1.9.0    
##  [4] insight_0.18.8      RColorBrewer_1.1-3  httr_1.4.4         
##  [7] tools_4.2.2         backports_1.4.1     bslib_0.4.1        
## [10] sjlabelled_1.2.0    utf8_1.2.2          R6_2.5.1           
## [13] rpart_4.1.19        mgcv_1.8-41         DBI_1.1.3          
## [16] colorspace_2.0-3    nnet_7.3-18         withr_2.5.0        
## [19] mnormt_2.1.1        tidyselect_1.2.0    gridExtra_2.3      
## [22] compiler_4.2.2      cli_3.4.1           rvest_1.0.3        
## [25] htmlTable_2.4.1     mice_3.14.0         xml2_1.3.3         
## [28] labeling_0.4.2      sass_0.4.2          scales_1.2.1       
## [31] checkmate_2.1.0     digest_0.6.30       minqa_1.2.5        
## [34] foreign_0.8-82      rmarkdown_2.18      base64enc_0.1-3    
## [37] jpeg_0.1-9          pkgconfig_2.0.3     htmltools_0.5.4    
## [40] lme4_1.1-31         highr_0.9           dbplyr_2.2.1       
## [43] fastmap_1.1.0       htmlwidgets_1.6.1   rlang_1.0.6        
## [46] readxl_1.4.1        rstudioapi_0.14     farver_2.1.1       
## [49] jquerylib_0.1.4     generics_0.1.3      jsonlite_1.8.3     
## [52] gtools_3.9.3        googlesheets4_1.0.1 interp_1.1-3       
## [55] Matrix_1.5-3        Rcpp_1.0.9          munsell_0.5.0      
## [58] fansi_1.0.3         lifecycle_1.0.3     stringi_1.7.8      
## [61] yaml_2.3.6          MASS_7.3-58         plyr_1.8.8         
## [64] grid_4.2.2          parallel_4.2.2      gdata_2.18.0.1     
## [67] crayon_1.5.2        deldir_1.0-6        haven_2.5.1        
## [70] splines_4.2.2       hms_1.1.2           knitr_1.40         
## [73] pillar_1.8.1        boot_1.3-28         reprex_2.0.2       
## [76] glue_1.6.2          evaluate_0.18       latticeExtra_0.6-30
## [79] data.table_1.14.6   modelr_0.1.10       nloptr_2.0.3       
## [82] png_0.1-7           vctrs_0.5.1         tzdb_0.3.0         
## [85] cellranger_1.1.0    gtable_0.3.1        cachem_1.0.6       
## [88] xfun_0.35           broom_1.0.1         googledrive_2.0.0  
## [91] gargle_1.2.1        cluster_2.1.4       timechange_0.1.1   
## [94] ellipsis_0.3.2