Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

## Loading required package: magrittr

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## Loading required package: weights

## Loading required package: Hmisc

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: assertthat

## 
## Attaching package: 'assertthat'

## The following object is masked from 'package:tibble':
## 
##     has_name

## Loading required package: psych

## 
## Attaching package: 'psych'

## The following object is masked from 'package:Hmisc':
## 
##     describe

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## 
## Attaching package: 'kirkegaard'

## The following object is masked from 'package:psych':
## 
##     rescale

## The following object is masked from 'package:assertthat':
## 
##     are_equal

## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric

## The following object is masked from 'package:base':
## 
##     +

load_packages(
   googlesheets4,
   metafor,
   broom,
   rms,
   readxl,
   DescTools,
   future, furrr
)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: metadat

## 
## Loading the 'metafor' package (version 3.4-0). For an
## introduction to the package please type: help(metafor)

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 
## Attaching package: 'rms'

## The following object is masked from 'package:metafor':
## 
##     vif

## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata

## 
## Attaching package: 'DescTools'

## The following objects are masked from 'package:psych':
## 
##     AUC, ICC, SD

## The following objects are masked from 'package:Hmisc':
## 
##     %nin%, Label, Mean, Quantile

## 
## Attaching package: 'future'

## The following object is masked from 'package:survival':
## 
##     cluster

theme_set(theme_bw())

options(
    digits = 3
)

plan(multisession(workers = 6))

Functions

#google sheets fuck up the data with NULLs in lists
#convert these back to vectors is a bit annoying
fix_listcol = function(x) {
  map_dbl(x, function(x) {
    if (is.null(x)) return(NA_real_)
    as.numeric(x)
  })
}

#extract data to long format function
extract_data = function(x, topic) {
  #data are only in some cols, without proper headers, but they are in a specific column from the header of PISA X.
  idx_cols = x %>% names() %>% str_detect("^PISA \\d") %>% which()
  
  map_df(idx_cols, function(idx) {
    #rows to keep
    #remove top 3, and any row without a name
    #the last row is used for human friendly means
    no_names = x[[1]] %>% is.na()
    i_d = x[!no_names, c(idx, idx+1)]
    
    #conver to vectors
    i_d = map_df(i_d, fix_listcol)
    
    #fix names
    names(i_d) = c("SD", "SD_se")
    
    #add year identifier and region
    bind_cols(
      tibble(
        name = x[[1]][!no_names],
        topic = topic,
        year = names(x)[idx] %>% str_extract("\\d+")
      ),
      i_d
    )
    })
  }

Data

#PISA data from google sheets
gs4_auth("the.dfx@gmail.com")
data_sheet = "https://docs.google.com/spreadsheets/d/1nmVEouPz8vgT5EUIEvZNoZ4JNPl6b6GKEXhvUa3E6Yw/edit#gid=4345659"

d_math = read_sheet(data_sheet, sheet = "Mathematics")

## Auto-refreshing stale OAuth token.

## ✔ Reading from "Data for cognitive and income inequality".

## ✔ Range ''Mathematics''.

## New names:
## • `` -> `...1`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`

d_read = read_sheet(data_sheet, sheet = "Reading")

## ✔ Reading from "Data for cognitive and income inequality".
## ✔ Range ''Reading''.
## New names:

d_sci = read_sheet(data_sheet, sheet = "Science")

## ✔ Reading from "Data for cognitive and income inequality".
## ✔ Range ''Science''.
## New names:

#inequality data
inequality = read_sheet(data_sheet, sheet = "income_inequality") %>% 
  df_legalize_names()

## ✔ Reading from "Data for cognitive and income inequality".
## ✔ Range ''income_inequality''.

#Becker national IQ data
becker = read_excel("data/NIQ-DATASET-V1.3.3/NIQ-DATA (V1.3.3).xlsx", sheet = 2, range = "A2:N205") %>% 
  df_legalize_names()

#Heiner Rindermann dataset
rindermann = read_excel("data/cognitive_capitalism_appendix.xlsx") %>% 
  df_legalize_names()

#make the cognitive inequality variables
rindermann %<>% mutate(
  R_95_5 = x95pct_IQc - x05pct_IQc,
  R_95_5_z = standardize(R_95_5)
)

Data restructure

Data are not in machine friendly format, so we make them. It’s probably difficult to generalize by year, but relatively easy to hardcode (don’t hate me).

#loop over these indexes
pisa = bind_rows(
  extract_data(d_math, "math"),
  extract_data(d_read, "reading"),
  extract_data(d_sci, "science")
)

#get rid of OECD cols, or no data otherwise
pisa %<>% filter(!str_detect(name, "OECD"), !is.na(SD))

#counts by country
pisa %>% filter(!is.na(SD)) %>% pull(name) %>% table2()

#set Hungary as the baseline country
pisa$name %<>% fct_relevel("Hungary")

Meta analysis of PISA standard deviations

#fit meta overall
overall_fit = rma(
  yi = pisa$SD,
  sei = pisa$SD_se
)

overall_fit

## 
## Random-Effects Model (k = 1073; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of total heterogeneity): 88.4596 (SE = 3.9504)
## tau (square root of estimated tau^2 value):      9.4053
## I^2 (total heterogeneity / total variability):   97.73%
## H^2 (total variability / sampling variability):  44.04
## 
## Test for Heterogeneity:
## Q(df = 1072) = 46440.2200, p-val < .0001
## 
## Model Results:
## 
## estimate      se      zval    pval    ci.lb    ci.ub      
##  91.7160  0.2920  314.1033  <.0001  91.1437  92.2883  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#with countries as moderator (fixed effects)
country_fit = rma(
  yi = pisa$SD,
  sei = pisa$SD_se,
  data = pisa,
  mods = ~ name
)

country_fit

## 
## Mixed-Effects Model (k = 1073; tau^2 estimator: REML)
## 
## tau^2 (estimated amount of residual heterogeneity):     31.0059 (SE = 1.5207)
## tau (square root of estimated tau^2 value):             5.5683
## I^2 (residual heterogeneity / unaccounted variability): 93.72%
## H^2 (unaccounted variability / sampling variability):   15.93
## R^2 (amount of heterogeneity accounted for):            64.95%
## 
## Test for Residual Heterogeneity:
## QE(df = 994) = 14666.2287, p-val < .0001
## 
## Test of Moderators (coefficients 2:79):
## QM(df = 78) = 1868.9712, p-val < .0001
## 
## Model Results:
## 
##                             estimate      se     zval    pval     ci.lb 
## intrcpt                      92.6772  1.3943  66.4669  <.0001   89.9444 
## nameAlbania                  -1.8739  2.1314  -0.8792  0.3793   -6.0515 
## nameArgentina                 3.9387  2.2083   1.7836  0.0745   -0.3895 
## nameAustralia                 5.3456  1.9362   2.7609  0.0058    1.5507 
## nameAustria                   4.3146  2.0607   2.0938  0.0363    0.2757 
## nameBaku Azerbaijan         -13.6779  3.6340  -3.7639  0.0002  -20.8004 
## nameBelarus                  -3.5580  3.5878  -0.9917  0.3214  -10.5900 
## nameBelgium                  10.1464  1.9660   5.1611  <.0001    6.2932 
## nameBosnia and Herzegovina  -13.3800  3.5798  -3.7377  0.0002  -20.3963 
## nameBrazil                   -1.8659  1.9647  -0.9497  0.3423   -5.7167 
## nameBrunei Darussalam         2.2370  3.5490   0.6303  0.5285   -4.7190 
## nameBulgaria                 11.1672  2.0782   5.3735  <.0001    7.0940 
## nameCanada                   -1.1573  1.9298  -0.5997  0.5487   -4.9396 
## nameChile                    -6.8597  2.0124  -3.4087  0.0007  -10.8040 
## nameChina                    -9.0856  3.6444  -2.4930  0.0127  -16.2284 
## nameChinese Taipei            3.6307  2.0554   1.7664  0.0773   -0.3979 
## nameColombia                 -9.0084  2.0545  -4.3847  <.0001  -13.0351 
## nameCosta Rica              -18.8505  2.1796  -8.6487  <.0001  -23.1224 
## nameCroatia                  -5.1766  2.0531  -2.5214  0.0117   -9.2007 
## nameCyprus                    4.4130  2.3508   1.8773  0.0605   -0.1944 
## nameCzech Republic            3.2611  1.9662   1.6586  0.0972   -0.5925 
## nameDenmark                  -4.2669  1.9446  -2.1942  0.0282   -8.0783 
## nameDominican Republic      -17.7163  2.7720  -6.3911  <.0001  -23.1494 
## nameEstonia                  -8.7672  2.0328  -4.3129  <.0001  -12.7513 
## nameFinland                  -4.7545  1.9389  -2.4521  0.0142   -8.5547 
## nameFrance                    6.9563  1.9729   3.5259  0.0004    3.0894 
## nameGeorgia                  -2.1182  2.3844  -0.8884  0.3743   -6.7915 
## nameGermany                   7.3614  1.9643   3.7476  0.0002    3.5114 
## nameGreece                    0.8324  1.9739   0.4217  0.6732   -3.0363 
## nameHong Kong                -3.6693  1.9683  -1.8642  0.0623   -7.5271 
## nameIceland                   1.9644  1.9391   1.0130  0.3110   -1.8362 
## nameIndonesia               -19.2163  1.9901  -9.6559  <.0001  -23.1169 
## nameIreland                  -4.6803  1.9495  -2.4008  0.0164   -8.5012 
## nameIsrael                   17.5270  2.0571   8.5203  <.0001   13.4952 
## nameItaly                     2.6385  1.9542   1.3502  0.1770   -1.1916 
## nameJapan                     2.6062  1.9834   1.3140  0.1888   -1.2811 
## nameJordan                   -5.4740  2.0715  -2.6425  0.0082   -9.5340 
## nameKazakhstan              -12.6837  2.3805  -5.3283  <.0001  -17.3493 
## nameKorea                    -2.3320  1.9766  -1.1798  0.2381   -6.2060 
## nameKosovo                  -20.2178  2.7038  -7.4776  <.0001  -25.5171 
## nameLatvia                   -8.3458  1.9477  -4.2850  <.0001  -12.1631 
## nameLebanon                  10.7053  2.7756   3.8570  0.0001    5.2652 
## nameLithuania                -3.0266  2.0406  -1.4832  0.1380   -7.0261 
## nameLuxembourg                7.1513  1.9580   3.6522  0.0003    3.3136 
## nameMacao                   -11.1664  1.9611  -5.6939  <.0001  -15.0102 
## nameMalaysia                -12.9597  2.3764  -5.4534  <.0001  -17.6174 
## nameMalta                    19.4119  2.3714   8.1857  <.0001   14.7640 
## nameMexico                  -12.1132  1.9481  -6.2181  <.0001  -15.9313 
## nameMoldova                  -2.4532  2.3815  -1.0301  0.3030   -7.1208 
## nameMontenegro               -6.3051  2.0239  -3.1153  0.0018  -10.2718 
## nameMorocco                 -20.1432  3.5808  -5.6254  <.0001  -27.1613 
## nameNetherlands               1.9428  1.9996   0.9716  0.3312   -1.9763 
## nameNew Zealand               9.2660  1.9469   4.7593  <.0001    5.4501 
## nameNorth Macedonia           0.6170  2.5685   0.2402  0.8102   -4.4172 
## nameNorway                    2.8835  1.9469   1.4811  0.1386   -0.9323 
## namePanama                   -6.2064  2.8712  -2.1616  0.0306  -11.8338 
## namePeru                     -5.5845  2.1464  -2.6018  0.0093   -9.7913 
## namePhilippines             -14.8353  3.7277  -3.9797  <.0001  -22.1415 
## namePoland                   -1.7866  1.9504  -0.9160  0.3596   -5.6093 
## namePortugal                 -0.6217  1.9515  -0.3186  0.7500   -4.4466 
## nameQatar                     9.8638  2.0146   4.8962  <.0001    5.9152 
## nameRomania                  -6.1691  2.0811  -2.9643  0.0030  -10.2481 
## nameRussia                   -4.2525  1.9560  -2.1741  0.0297   -8.0863 
## nameSaudi Arabia            -12.1050  3.6146  -3.3489  0.0008  -19.1895 
## nameSerbia                   -2.4919  2.1856  -1.1402  0.2542   -6.7757 
## nameSingapore                 8.5307  2.1497   3.9683  <.0001    4.3174 
## nameSlovak Republic           4.9398  2.0101   2.4574  0.0140    1.0000 
## nameSlovenia                 -0.9466  2.0223  -0.4681  0.6397   -4.9102 
## nameSpain                    -4.0611  1.9603  -2.0717  0.0383   -7.9032 
## nameSweden                    4.2758  1.9486   2.1944  0.0282    0.4567 
## nameSwitzerland               3.7829  1.9508   1.9392  0.0525   -0.0405 
## nameThailand                -13.0893  1.9609  -6.6751  <.0001  -16.9326 
## nameTurkey                   -6.1063  2.0466  -2.9837  0.0028  -10.1175 
## nameUkraine                   0.2356  3.6553   0.0645  0.9486   -6.9285 
## nameUnited Arab Emirates      6.3952  2.1580   2.9635  0.0030    2.1656 
## nameUnited Kingdom            4.0657  2.0385   1.9945  0.0461    0.0704 
## nameUnited States             3.9528  1.9812   1.9952  0.0460    0.0698 
## nameUruguay                   3.6879  1.9857   1.8572  0.0633   -0.2040 
## nameViet Nam                -14.4421  2.8465  -5.0735  <.0001  -20.0212 
##                                ci.ub 
## intrcpt                      95.4101  *** 
## nameAlbania                   2.3036      
## nameArgentina                 8.2669    . 
## nameAustralia                 9.1405   ** 
## nameAustria                   8.3535    * 
## nameBaku Azerbaijan          -6.5554  *** 
## nameBelarus                   3.4741      
## nameBelgium                  13.9997  *** 
## nameBosnia and Herzegovina   -6.3638  *** 
## nameBrazil                    1.9849      
## nameBrunei Darussalam         9.1929      
## nameBulgaria                 15.2405  *** 
## nameCanada                    2.6250      
## nameChile                    -2.9154  *** 
## nameChina                    -1.9428    * 
## nameChinese Taipei            7.6593    . 
## nameColombia                 -4.9816  *** 
## nameCosta Rica              -14.5786  *** 
## nameCroatia                  -1.1526    * 
## nameCyprus                    9.0204    . 
## nameCzech Republic            7.1148    . 
## nameDenmark                  -0.4555    * 
## nameDominican Republic      -12.2832  *** 
## nameEstonia                  -4.7830  *** 
## nameFinland                  -0.9542    * 
## nameFrance                   10.8232  *** 
## nameGeorgia                   2.5550      
## nameGermany                  11.2114  *** 
## nameGreece                    4.7011      
## nameHong Kong                 0.1886    . 
## nameIceland                   5.7649      
## nameIndonesia               -15.3157  *** 
## nameIreland                  -0.8595    * 
## nameIsrael                   21.5589  *** 
## nameItaly                     6.4686      
## nameJapan                     6.4936      
## nameJordan                   -1.4140   ** 
## nameKazakhstan               -8.0181  *** 
## nameKorea                     1.5421      
## nameKosovo                  -14.9185  *** 
## nameLatvia                   -4.5284  *** 
## nameLebanon                  16.1453  *** 
## nameLithuania                 0.9730      
## nameLuxembourg               10.9889  *** 
## nameMacao                    -7.3227  *** 
## nameMalaysia                 -8.3020  *** 
## nameMalta                    24.0598  *** 
## nameMexico                   -8.2951  *** 
## nameMoldova                   2.2145      
## nameMontenegro               -2.3383   ** 
## nameMorocco                 -13.1250  *** 
## nameNetherlands               5.8619      
## nameNew Zealand              13.0820  *** 
## nameNorth Macedonia           5.6513      
## nameNorway                    6.6993      
## namePanama                   -0.5790    * 
## namePeru                     -1.3776   ** 
## namePhilippines              -7.5291  *** 
## namePoland                    2.0361      
## namePortugal                  3.2031      
## nameQatar                    13.8123  *** 
## nameRomania                  -2.0902   ** 
## nameRussia                   -0.4188    * 
## nameSaudi Arabia             -5.0205  *** 
## nameSerbia                    1.7918      
## nameSingapore                12.7440  *** 
## nameSlovak Republic           8.8796    * 
## nameSlovenia                  3.0169      
## nameSpain                    -0.2190    * 
## nameSweden                    8.0950    * 
## nameSwitzerland               7.6064    . 
## nameThailand                 -9.2459  *** 
## nameTurkey                   -2.0951   ** 
## nameUkraine                   7.3998      
## nameUnited Arab Emirates     10.6249   ** 
## nameUnited Kingdom            8.0610    * 
## nameUnited States             7.8359    * 
## nameUruguay                   7.5798    . 
## nameViet Nam                 -8.8630  *** 
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#save data for reuse, write to gsheets
SD_results = country_fit %>% tidy() %>% 
  mutate(
    term = mapvalues(term, "intercept", "Hungary"),
    term = str_replace(term, "^name", "")
  )

#for non-Hungary, add Hungary's value
SD_results$estimate = c(
  SD_results$estimate[1],
  SD_results$estimate[1] + SD_results$estimate[-1]
)

#individual country meta-analysis approach
#maybe more appropriate SEs
SD_results_indiv = map_df(unique(pisa$name), function(i_name) {
  # browser()
  #subset data to this country
  i_pisa = pisa %>% filter(name == i_name)
  
  #fit meta
  i_fit = rma(
    yi = i_pisa$SD,
    sei = i_pisa$SD_se
  )
  
  
  
  tidy(i_fit) %>% 
    mutate(
      name = i_name,
      simple_mean = mean(i_pisa$SD, na.rm = T)
      )
})


#join
SD_results_combined = full_join(
  SD_results %>% rename(name = term, SD_joint = estimate, SD_joint_SE = std.error) %>% select(-p.value, -type, -statistic),
  SD_results_indiv %>% select(name, estimate, std.error, simple_mean) %>% rename(SD_indiv = estimate, SD_indiv_SE = std.error),
  by = "name"
)

SD_results_combined[-1] %>% wtd.cors()

##             SD_joint SD_joint_SE SD_indiv SD_indiv_SE simple_mean
## SD_joint      1.0000      -0.366   0.9999     -0.0953      0.9998
## SD_joint_SE  -0.3661       1.000  -0.3659      0.4686     -0.3674
## SD_indiv      0.9999      -0.366   1.0000     -0.0903      0.9999
## SD_indiv_SE  -0.0953       0.469  -0.0903      1.0000     -0.0894
## simple_mean   0.9998      -0.367   0.9999     -0.0894      1.0000

#plot comparison with errors shown
GG_scatter(SD_results_combined, "SD_joint", "SD_indiv", case_names = "name")

## `geom_smooth()` using formula 'y ~ x'

Main analysis

#add ISO's
inequality$ISO = pu_translate(inequality$Country)
SD_results_combined$ISO = pu_translate(SD_results_combined$name)

## No exact match: Baku Azerbaijan

## Best fuzzy match found: Baku Azerbaijan -> Azerbaijan with distance 5.00

becker$ISO = pu_translate(becker$Country)

## No exact match: Central African Rep.

## No exact match: Korea, North

## No exact match: Saint Helena, Ascension, and Tristan da Cunha

## No exact match: Virgin Islands

## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00

## Best fuzzy match found: Korea, North -> Korea North with distance 1.00

## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00

## Warning: There were multiple equally good matches for Virgin Islands: Cayman
## Islands | Faroe Islands | Mariana Islands | Pitcairn Islands | Jarvis Island |
## Midway Islands | U.S. Virgin Islands. All with distance 5.00

rindermann$ISO = pu_translate(rindermann$Country)

## No exact match: Antigua-Barbuda

## No exact match: Benin (Dahomey)

## No exact match: Central Afric R

## No exact match: Congo (Brazz)

## No exact match: Dominican Repub

## No exact match: Equat. Guinea

## No exact match: Korea-North

## No exact match: Korea-South

## No exact match: Nether Antilles

## No exact match: Papua N-Guinea

## No exact match: Sao Tome/Princi

## No exact match: St. Kitts & Nevis

## No exact match: St. Vincent/Gre

## No exact match: Trinidad Tobago

## No exact match: United Arab Emi

## Best fuzzy match found: Antigua-Barbuda -> Antigua & Barbuda with distance 3.00

## Best fuzzy match found: Benin (Dahomey) -> Benin (ex Dahomey) with distance 3.00

## Best fuzzy match found: Central Afric R -> Central Africa with distance 2.00

## Best fuzzy match found: Congo (Brazz) -> Congo (Brazz rep) with distance 4.00

## Best fuzzy match found: Dominican Repub -> Dominican Rep. with distance 2.00

## Best fuzzy match found: Equat. Guinea -> Equ. Guinea with distance 2.00

## Best fuzzy match found: Korea-North -> Korea North with distance 1.00

## Best fuzzy match found: Korea-South -> Korea South with distance 1.00

## Best fuzzy match found: Nether Antilles -> Neth. Antilles with distance 2.00

## Best fuzzy match found: Papua N-Guinea -> Papua New Guinea with distance 3.00

## Best fuzzy match found: Sao Tome/Princi -> Sao Tome & Principe with distance 5.00

## Best fuzzy match found: St. Kitts & Nevis -> St. Kitts en Nevis with distance 2.00

## Best fuzzy match found: St. Vincent/Gre -> St. Vincent with distance 4.00

## Best fuzzy match found: Trinidad Tobago -> Trinidad & Tobago with distance 2.00

## Best fuzzy match found: United Arab Emi -> United Arab Emirates with distance 5.00

d = full_join(
  SD_results_combined,
  inequality,
  by = "ISO"
) %>% full_join(
  becker,
  by = "ISO"
) %>% full_join(
  rindermann,
  by = "ISO"
)

#no fuckups
assert_that(!anyDuplicated(d$ISO))

## [1] TRUE

#sum stats
d %>% select(SD_indiv, UN_10:WB_gini, R, R_95_5) %>% describe2()

#inequality factor
fa_ineq = fa(d %>% select(UN_10:WB_gini) %>% miss_impute())
d$ineq_fct = fa_ineq$scores[, 1] %>% standardize()

#correlations
d %>% select(SD_indiv, ineq_fct, UN_10:WB_gini, R, R_95_5) %>% cor_matrix(p_val = T, asterisks_only = F)

##          SD_indiv               ineq_fct                UN_10                  
## SD_indiv "1"                    "-0.29%a [p=0.016]"     "-0.22%a [p=0.083]"    
## ineq_fct "-0.29%a [p=0.016]"    "1"                     "0.70%a [p=<0.001***]" 
## UN_10    "-0.22%a [p=0.083]"    "0.70%a [p=<0.001***]"  "1"                    
## UN_20    "-0.26%a [p=0.036]"    "0.93%a [p=<0.001***]"  "0.60%a [p=<0.001***]" 
## WB_gini  "-0.29%a [p=0.016]"    "0.97%a [p=<0.001***]"  "0.61%a [p=<0.001***]" 
## R        "0.42%a [p=<0.001***]" "-0.51%a [p=<0.001***]" "-0.40%a [p=<0.001***]"
## R_95_5   "0.54%a [p=<0.001***]" "0.21%a [p=0.056]"      "0.01%a [p=0.927]"     
##          UN_20                   WB_gini                
## SD_indiv "-0.26%a [p=0.036]"     "-0.29%a [p=0.016]"    
## ineq_fct "0.93%a [p=<0.001***]"  "0.97%a [p=<0.001***]" 
## UN_10    "0.60%a [p=<0.001***]"  "0.61%a [p=<0.001***]" 
## UN_20    "1"                     "0.82%a [p=<0.001***]" 
## WB_gini  "0.82%a [p=<0.001***]"  "1"                    
## R        "-0.40%a [p=<0.001***]" "-0.51%a [p=<0.001***]"
## R_95_5   "0.18%a [p=0.107]"      "0.20%a [p=0.066]"     
##          R                       R_95_5                 
## SD_indiv "0.42%a [p=<0.001***]"  "0.54%a [p=<0.001***]" 
## ineq_fct "-0.51%a [p=<0.001***]" "0.21%a [p=0.056]"     
## UN_10    "-0.40%a [p=<0.001***]" "0.01%a [p=0.927]"     
## UN_20    "-0.40%a [p=<0.001***]" "0.18%a [p=0.107]"     
## WB_gini  "-0.51%a [p=<0.001***]" "0.20%a [p=0.066]"     
## R        "1"                     "-0.35%a [p=<0.001***]"
## R_95_5   "-0.35%a [p=<0.001***]" "1"

#sample sizes
d %>% select(ineq_fct, SD_indiv, UN_10:WB_gini, R, R_95_5, Region) %>% 
  pairwiseCount()

##          ineq_fct SD_indiv UN_10 UN_20 WB_gini   R R_95_5 Region
## ineq_fct      154       69   122   148     154 151     86    154
## SD_indiv       69       79    65    68      71  78     75     78
## UN_10         122       65   126   116     122 126     80    126
## UN_20         148       68   116   148     148 145     83    148
## WB_gini       154       71   122   148     159 155     88    159
## R             151       78   126   145     155 199     98    172
## R_95_5         86       75    80    83      88  98     99     98
## Region        154       78   126   148     159 172     98    176

#plot it
GG_scatter(d, "SD_indiv", "ineq_fct", case_names = "ISO") +
  scale_x_continuous("PISA cognitive inequality [standard deviation]") +
  scale_y_continuous("Economic inequality (index of gini and 10/20 top-bottom ratios)")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/PISA_SD_income_ineq.png")

## `geom_smooth()` using formula 'y ~ x'

GG_scatter(d, "SD_indiv", "WB_gini", case_names = "ISO") +
  scale_x_continuous("PISA cognitive inequality [standard deviation]") +
  scale_y_continuous("Economic inequality (Worldbank gini coef)")

## `geom_smooth()` using formula 'y ~ x'

#standardize for models
d %<>% mutate(
  SD_indiv_z = standardize(SD_indiv),
  R_z = standardize(R),
  WB_gini_z = standardize(WB_gini)
)

#models
#primary outcome
list(
  ols(ineq_fct ~ SD_indiv_z, data = d),
  ols(ineq_fct ~ SD_indiv_z + R_z, data = d),
  ols(ineq_fct ~ SD_indiv_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)

#secondary outcome with more data
list(
  ols(WB_gini_z ~ SD_indiv_z, data = d),
  ols(WB_gini_z ~ SD_indiv_z + R_z, data = d),
  ols(WB_gini_z ~ SD_indiv_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)

#other cog ineq predicators
list(
  ols(WB_gini_z ~ R_95_5_z, data = d),
  ols(WB_gini_z ~ R_95_5_z + R_z, data = d),
  ols(WB_gini_z ~ R_95_5_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)

Simulation

Simulate countries with different mean and SD’s of intelligence, then simulate incomes, and then compute income inequality.

Income of each person is determined by a log-normal function as glanced on this post (https://humanvarieties.org/2016/01/31/iq-and-permanent-income-sizing-up-the-iq-paradox/). I also randomly vary the slope otherwise every country will have about the same return to skill.

#make up some countries
plan(multisession(workers = 20))

set.seed(1)

sim_countries = future_map_dfr(1:1000, .options = furrr_options(seed = T), .f = function(i) {

  #make up country IQ stats
  y = tibble(
    IQ_mean = rnorm(1, mean = 85, sd = 10),
    IQ_SD = runif(1, 12, 18)
  )
  
  #simulate a population
  y_pop = tibble(
    IQ = rnorm(1e6, y$IQ_mean, y$IQ_SD),
    income = exp(runif(1, .01, .05) * IQ + (rnorm(1, sd = 0.5) + y$IQ_mean/13) + rnorm(1e6, sd = 0.7)) %>% mapvalues(from = NA, to = 0, warn_missing = F)
  )
  
  #inequality income
  y$income_gini = Gini(y_pop$income)
  y$income_ratio_10_90 = quantile(y_pop$income, probs = .9)/quantile(y_pop$income, probs = .1)
  y$mean_income = mean(y_pop$income, na.rm = T)
  y$median_income = median(y_pop$income, na.rm = T)
  y$r_IQ_income = wtd.cors(y_pop)[1, 2]
  
  y
})

#does the data look halfway reasonable?
describe2(sim_countries)

#country level correlations
wtd.cors(sim_countries)

##                    IQ_mean   IQ_SD income_gini income_ratio_10_90 mean_income
## IQ_mean             1.0000 0.03410      0.0457             0.0457     0.28612
## IQ_SD               0.0341 1.00000      0.3082             0.3269     0.00874
## income_gini         0.0457 0.30818      1.0000             0.9876     0.20432
## income_ratio_10_90  0.0457 0.32692      0.9876             1.0000     0.21316
## mean_income         0.2861 0.00874      0.2043             0.2132     1.00000
## median_income       0.2994 0.00186      0.2024             0.2090     0.99933
## r_IQ_income         0.0435 0.25042      0.9205             0.8537     0.16189
##                    median_income r_IQ_income
## IQ_mean                  0.29936      0.0435
## IQ_SD                    0.00186      0.2504
## income_gini              0.20238      0.9205
## income_ratio_10_90       0.20901      0.8537
## mean_income              0.99933      0.1619
## median_income            1.00000      0.1645
## r_IQ_income              0.16450      1.0000

GG_scatter(sim_countries, "IQ_SD", "income_gini") +
  labs(x = "IQ inequality [standard deviation]",
       y = "Income inequality [gini]")

## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/sim IQ SD income gini.png")

## `geom_smooth()` using formula 'y ~ x'

#z scored variables
sim_countries_z = df_standardize(sim_countries, exclude_range_01 = F)

#models
list(
  ols(income_gini ~ IQ_SD, data = sim_countries_z),
  ols(income_gini ~ IQ_SD + IQ_mean, data = sim_countries_z),
  ols(income_gini ~ IQ_SD + IQ_mean + mean_income, data = sim_countries_z),
  ols(income_gini ~ IQ_SD + IQ_mean + mean_income + r_IQ_income, data = sim_countries_z)
) %>% summarize_models()

Cognitive inequality and economic inequality

26 August, 2022