Main analysis
#add ISO's
inequality$ISO = pu_translate(inequality$Country)
SD_results_combined$ISO = pu_translate(SD_results_combined$name)
## No exact match: Baku Azerbaijan
## Best fuzzy match found: Baku Azerbaijan -> Azerbaijan with distance 5.00
becker$ISO = pu_translate(becker$Country)
## No exact match: Central African Rep.
## No exact match: Korea, North
## No exact match: Saint Helena, Ascension, and Tristan da Cunha
## No exact match: Virgin Islands
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Best fuzzy match found: Saint Helena, Ascension, and Tristan da Cunha -> Saint Helena, Ascension and Tristan da Cunha with distance 1.00
## Warning: There were multiple equally good matches for Virgin Islands: Cayman
## Islands | Faroe Islands | Mariana Islands | Pitcairn Islands | Jarvis Island |
## Midway Islands | U.S. Virgin Islands. All with distance 5.00
rindermann$ISO = pu_translate(rindermann$Country)
## No exact match: Antigua-Barbuda
## No exact match: Benin (Dahomey)
## No exact match: Central Afric R
## No exact match: Congo (Brazz)
## No exact match: Dominican Repub
## No exact match: Equat. Guinea
## No exact match: Korea-North
## No exact match: Korea-South
## No exact match: Nether Antilles
## No exact match: Papua N-Guinea
## No exact match: Sao Tome/Princi
## No exact match: St. Kitts & Nevis
## No exact match: St. Vincent/Gre
## No exact match: Trinidad Tobago
## No exact match: United Arab Emi
## Best fuzzy match found: Antigua-Barbuda -> Antigua & Barbuda with distance 3.00
## Best fuzzy match found: Benin (Dahomey) -> Benin (ex Dahomey) with distance 3.00
## Best fuzzy match found: Central Afric R -> Central Africa with distance 2.00
## Best fuzzy match found: Congo (Brazz) -> Congo (Brazz rep) with distance 4.00
## Best fuzzy match found: Dominican Repub -> Dominican Rep. with distance 2.00
## Best fuzzy match found: Equat. Guinea -> Equ. Guinea with distance 2.00
## Best fuzzy match found: Korea-North -> Korea North with distance 1.00
## Best fuzzy match found: Korea-South -> Korea South with distance 1.00
## Best fuzzy match found: Nether Antilles -> Neth. Antilles with distance 2.00
## Best fuzzy match found: Papua N-Guinea -> Papua New Guinea with distance 3.00
## Best fuzzy match found: Sao Tome/Princi -> Sao Tome & Principe with distance 5.00
## Best fuzzy match found: St. Kitts & Nevis -> St. Kitts en Nevis with distance 2.00
## Best fuzzy match found: St. Vincent/Gre -> St. Vincent with distance 4.00
## Best fuzzy match found: Trinidad Tobago -> Trinidad & Tobago with distance 2.00
## Best fuzzy match found: United Arab Emi -> United Arab Emirates with distance 5.00
d = full_join(
SD_results_combined,
inequality,
by = "ISO"
) %>% full_join(
becker,
by = "ISO"
) %>% full_join(
rindermann,
by = "ISO"
)
#no fuckups
assert_that(!anyDuplicated(d$ISO))
## [1] TRUE
#sum stats
d %>% select(SD_indiv, UN_10:WB_gini, R, R_95_5) %>% describe2()
#inequality factor
fa_ineq = fa(d %>% select(UN_10:WB_gini) %>% miss_impute())
d$ineq_fct = fa_ineq$scores[, 1] %>% standardize()
#correlations
d %>% select(SD_indiv, ineq_fct, UN_10:WB_gini, R, R_95_5) %>% cor_matrix(p_val = T, asterisks_only = F)
## SD_indiv ineq_fct UN_10
## SD_indiv "1" "-0.29%a [p=0.016]" "-0.22%a [p=0.083]"
## ineq_fct "-0.29%a [p=0.016]" "1" "0.70%a [p=<0.001***]"
## UN_10 "-0.22%a [p=0.083]" "0.70%a [p=<0.001***]" "1"
## UN_20 "-0.26%a [p=0.036]" "0.93%a [p=<0.001***]" "0.60%a [p=<0.001***]"
## WB_gini "-0.29%a [p=0.016]" "0.97%a [p=<0.001***]" "0.61%a [p=<0.001***]"
## R "0.42%a [p=<0.001***]" "-0.51%a [p=<0.001***]" "-0.40%a [p=<0.001***]"
## R_95_5 "0.54%a [p=<0.001***]" "0.21%a [p=0.056]" "0.01%a [p=0.927]"
## UN_20 WB_gini
## SD_indiv "-0.26%a [p=0.036]" "-0.29%a [p=0.016]"
## ineq_fct "0.93%a [p=<0.001***]" "0.97%a [p=<0.001***]"
## UN_10 "0.60%a [p=<0.001***]" "0.61%a [p=<0.001***]"
## UN_20 "1" "0.82%a [p=<0.001***]"
## WB_gini "0.82%a [p=<0.001***]" "1"
## R "-0.40%a [p=<0.001***]" "-0.51%a [p=<0.001***]"
## R_95_5 "0.18%a [p=0.107]" "0.20%a [p=0.066]"
## R R_95_5
## SD_indiv "0.42%a [p=<0.001***]" "0.54%a [p=<0.001***]"
## ineq_fct "-0.51%a [p=<0.001***]" "0.21%a [p=0.056]"
## UN_10 "-0.40%a [p=<0.001***]" "0.01%a [p=0.927]"
## UN_20 "-0.40%a [p=<0.001***]" "0.18%a [p=0.107]"
## WB_gini "-0.51%a [p=<0.001***]" "0.20%a [p=0.066]"
## R "1" "-0.35%a [p=<0.001***]"
## R_95_5 "-0.35%a [p=<0.001***]" "1"
#sample sizes
d %>% select(ineq_fct, SD_indiv, UN_10:WB_gini, R, R_95_5, Region) %>%
pairwiseCount()
## ineq_fct SD_indiv UN_10 UN_20 WB_gini R R_95_5 Region
## ineq_fct 154 69 122 148 154 151 86 154
## SD_indiv 69 79 65 68 71 78 75 78
## UN_10 122 65 126 116 122 126 80 126
## UN_20 148 68 116 148 148 145 83 148
## WB_gini 154 71 122 148 159 155 88 159
## R 151 78 126 145 155 199 98 172
## R_95_5 86 75 80 83 88 98 99 98
## Region 154 78 126 148 159 172 98 176
#plot it
GG_scatter(d, "SD_indiv", "ineq_fct", case_names = "ISO") +
scale_x_continuous("PISA cognitive inequality [standard deviation]") +
scale_y_continuous("Economic inequality (index of gini and 10/20 top-bottom ratios)")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/PISA_SD_income_ineq.png")
## `geom_smooth()` using formula 'y ~ x'
GG_scatter(d, "SD_indiv", "WB_gini", case_names = "ISO") +
scale_x_continuous("PISA cognitive inequality [standard deviation]") +
scale_y_continuous("Economic inequality (Worldbank gini coef)")
## `geom_smooth()` using formula 'y ~ x'

#standardize for models
d %<>% mutate(
SD_indiv_z = standardize(SD_indiv),
R_z = standardize(R),
WB_gini_z = standardize(WB_gini)
)
#models
#primary outcome
list(
ols(ineq_fct ~ SD_indiv_z, data = d),
ols(ineq_fct ~ SD_indiv_z + R_z, data = d),
ols(ineq_fct ~ SD_indiv_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)
#secondary outcome with more data
list(
ols(WB_gini_z ~ SD_indiv_z, data = d),
ols(WB_gini_z ~ SD_indiv_z + R_z, data = d),
ols(WB_gini_z ~ SD_indiv_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)
#other cog ineq predicators
list(
ols(WB_gini_z ~ R_95_5_z, data = d),
ols(WB_gini_z ~ R_95_5_z + R_z, data = d),
ols(WB_gini_z ~ R_95_5_z + R_z + Region, data = d)
) %>% summarize_models(asterisks_only = F)
Simulation
Simulate countries with different mean and SD’s of intelligence, then
simulate incomes, and then compute income inequality.
Income of each person is determined by a log-normal function as
glanced on this post (https://humanvarieties.org/2016/01/31/iq-and-permanent-income-sizing-up-the-iq-paradox/).
I also randomly vary the slope otherwise every country will have about
the same return to skill.
#make up some countries
plan(multisession(workers = 20))
set.seed(1)
sim_countries = future_map_dfr(1:1000, .options = furrr_options(seed = T), .f = function(i) {
#make up country IQ stats
y = tibble(
IQ_mean = rnorm(1, mean = 85, sd = 10),
IQ_SD = runif(1, 12, 18)
)
#simulate a population
y_pop = tibble(
IQ = rnorm(1e6, y$IQ_mean, y$IQ_SD),
income = exp(runif(1, .01, .05) * IQ + (rnorm(1, sd = 0.5) + y$IQ_mean/13) + rnorm(1e6, sd = 0.7)) %>% mapvalues(from = NA, to = 0, warn_missing = F)
)
#inequality income
y$income_gini = Gini(y_pop$income)
y$income_ratio_10_90 = quantile(y_pop$income, probs = .9)/quantile(y_pop$income, probs = .1)
y$mean_income = mean(y_pop$income, na.rm = T)
y$median_income = median(y_pop$income, na.rm = T)
y$r_IQ_income = wtd.cors(y_pop)[1, 2]
y
})
#does the data look halfway reasonable?
describe2(sim_countries)
#country level correlations
wtd.cors(sim_countries)
## IQ_mean IQ_SD income_gini income_ratio_10_90 mean_income
## IQ_mean 1.0000 0.03410 0.0457 0.0457 0.28612
## IQ_SD 0.0341 1.00000 0.3082 0.3269 0.00874
## income_gini 0.0457 0.30818 1.0000 0.9876 0.20432
## income_ratio_10_90 0.0457 0.32692 0.9876 1.0000 0.21316
## mean_income 0.2861 0.00874 0.2043 0.2132 1.00000
## median_income 0.2994 0.00186 0.2024 0.2090 0.99933
## r_IQ_income 0.0435 0.25042 0.9205 0.8537 0.16189
## median_income r_IQ_income
## IQ_mean 0.29936 0.0435
## IQ_SD 0.00186 0.2504
## income_gini 0.20238 0.9205
## income_ratio_10_90 0.20901 0.8537
## mean_income 0.99933 0.1619
## median_income 1.00000 0.1645
## r_IQ_income 0.16450 1.0000
GG_scatter(sim_countries, "IQ_SD", "income_gini") +
labs(x = "IQ inequality [standard deviation]",
y = "Income inequality [gini]")
## `geom_smooth()` using formula 'y ~ x'

GG_save("figs/sim IQ SD income gini.png")
## `geom_smooth()` using formula 'y ~ x'
#z scored variables
sim_countries_z = df_standardize(sim_countries, exclude_range_01 = F)
#models
list(
ols(income_gini ~ IQ_SD, data = sim_countries_z),
ols(income_gini ~ IQ_SD + IQ_mean, data = sim_countries_z),
ols(income_gini ~ IQ_SD + IQ_mean + mean_income, data = sim_countries_z),
ols(income_gini ~ IQ_SD + IQ_mean + mean_income + r_IQ_income, data = sim_countries_z)
) %>% summarize_models()