What

Rindermann et al have used some poor methods for testing their cognitive elite model of aggregate social performance. Here we use better methods, and do so using the 51 indicators in the Social Progress Index, a very comprehensive version of Human Development Index.

Init

options(digits = 3)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## Loading required package: lattice
## 
## Loading required package: survival
## 
## Loading required package: Formula
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     is_logical, is_numeric
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readxl,
  sf,
  tmap,
  psych,
  rms,
  broom,
  GGally
  )
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## 
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
theme_set(theme_bw())

tmap_options(check.and.fix = TRUE)

Functions

#get R2
r2get = function(x) {
  y = x %>% 
    summary.lm() %>% 
    glance()
  
  y$adj.r.squared
}

describe = function(...) {
  psych::describe(...) %>% as.matrix()
}

Data

#Rindermann
#Cognitive Capitalism cognitive data
cc = read_excel("data/cognitive_capitalism_appendix.xlsx") %>% 
  df_legalize_names() %>% 
  mutate(ISO = pu_translate(Country))
## No exact match: Antigua-Barbuda
## No exact match: Benin (Dahomey)
## No exact match: Central Afric R
## No exact match: Congo (Brazz)
## No exact match: Dominican Repub
## No exact match: Equat. Guinea
## No exact match: Korea-North
## No exact match: Korea-South
## No exact match: Nether Antilles
## No exact match: Papua N-Guinea
## No exact match: Sao Tome/Princi
## No exact match: St. Kitts & Nevis
## No exact match: St. Vincent/Gre
## No exact match: Trinidad Tobago
## No exact match: United Arab Emi
## Best fuzzy match found: Antigua-Barbuda -> Antigua & Barbuda with distance 3.00
## Best fuzzy match found: Benin (Dahomey) -> Benin (ex Dahomey) with distance 3.00
## Best fuzzy match found: Central Afric R -> Central Africa with distance 2.00
## Best fuzzy match found: Congo (Brazz) -> Congo (Brazz rep) with distance 4.00
## Best fuzzy match found: Dominican Repub -> Dominican Rep. with distance 2.00
## Best fuzzy match found: Equat. Guinea -> Equ. Guinea with distance 2.00
## Best fuzzy match found: Korea-North -> Korea North with distance 1.00
## Best fuzzy match found: Korea-South -> Korea South with distance 1.00
## Best fuzzy match found: Nether Antilles -> Neth. Antilles with distance 2.00
## Best fuzzy match found: Papua N-Guinea -> Papua New Guinea with distance 3.00
## Best fuzzy match found: Sao Tome/Princi -> Sao Tome & Principe with distance 5.00
## Best fuzzy match found: St. Kitts & Nevis -> St. Kitts en Nevis with distance 2.00
## Best fuzzy match found: St. Vincent/Gre -> St. Vincent with distance 4.00
## Best fuzzy match found: Trinidad Tobago -> Trinidad & Tobago with distance 2.00
## Best fuzzy match found: United Arab Emi -> United Arab Emirates with distance 5.00
#safety data
cc_chap8 = haven::read_sav("data/RindCogCapTable8-1.sav") %>% 
  rename(
    ISO = ISOSKODE,
    airline_safety = FLUGSICH,
    road_safety = VTOTW10B,
    occu_safety = ARBUNFGB,
    tech_safety = TECHSICH
  )
## Failed to find HARMONY
## Failed to find EMBEDD
## Failed to find HIERARCH
## Failed to find MASTERY
## Failed to find AFF_AUTO
## Failed to find INT_AUTO
## Failed to find EGALIT
#Math Olympics data
IMO = read_excel("data/IMO_Rindermann2011.xlsx") %>% 
  mutate(
    ISO = pu_translate(Country),
    IMO_rr = IMO_rr %>% as.numeric()
  )
## No exact match: Cote d'Ivore
## No exact match: Korea, North
## Best fuzzy match found: Cote d'Ivore -> Cote d'Ivoire with distance 1.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Warning in IMO_rr %>% as.numeric(): NAs introduced by coercion
#Social Progress Index
#https://www.socialprogress.org/download
spi = read_excel("data/2014-2019-SPI-Public.xlsx", sheet = 2) %>% 
  df_legalize_names() %>% 
  mutate(ISO = pu_translate(Country))
## No exact match: World
## No exact match: Congo, Democratic Republic of
## No exact match: Korea, Democratic Republic of
## No exact match: Korea, Republic of
## No exact match: Virgin Islands (U.S.)
## No exact match: North Cyprus
## No exact match: Svalbard and Jan Mayen Islands
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Congo, Democratic Republic of -> Congo, The Democratic Republic of with distance 4.00
## Best fuzzy match found: Korea, Democratic Republic of -> Korea, Democratic Republic with distance 3.00
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Warning: There were multiple equally good matches for Virgin Islands (U.S.):
## Jarvis Island (USA) | Midway Islands (USA) | U.S. Virgin Islands (USA). All with
## distance 7.00
## Best fuzzy match found: North Cyprus -> Northern Cyprus with distance 3.00
## Best fuzzy match found: Svalbard and Jan Mayen Islands -> Svalbard and Jan Mayen (NOR) with distance 7.00
#UN regions
#not used for study
UN = read_rds("data/chess_kirkegaard2019.rds")

#HDI data
#http://hdr.undp.org/en/composite/HDI
HDI = read_excel("data/2018_Statistical_Annex_Table_1.xlsx", sheet = 2) %>% 
  #fix names
  df_legalize_names() %>% 
  #remove groups
  filter(!str_detect(Country, "DEVELOPMENT")) %>% 
  #add ISO
  mutate(ISO = pu_translate(Country))

#world map spatial data
#http://thematicmapping.org/downloads/world_borders.php
#these data are now broken...
# world = st_read("data/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp") %>% 
#   rename(ISO = ISO3)
# world2 = st_read("data/TM_WORLD_BORDERS_SIMPL-0.3/TM_WORLD_BORDERS_SIMPL-0.3.shp") %>% 
#   rename(ISO = ISO3)
#but we use this one
#https://tapiquen-sig.jimdofree.com/english-version/free-downloads/world/
world3 = st_read("data/World_Countries/World_Countries.shp")
## Reading layer `World_Countries' from data source 
##   `/media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/data/World_Countries/World_Countries.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 252 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -180 ymin: -90 xmax: 180 ymax: 83.6
## Geodetic CRS:  WGS 84
#need to filter the non-countries
#these are marked by parens
world3 = world3 %>% 
  filter(!str_detect(COUNTRY, "\\(")) %>% 
  mutate(
    ISO = pu_translate(COUNTRY)
  )

#brain drain emigration data
#https://www.iab.de/en/daten/iab-brain-drain-data.aspx
#not used for study
emi = read_excel("data/iabbd_8010_v1_emigration.xls", sheet = 3, col_names = F)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
#fix data
names(emi) = (emi[1, ] %>% unlist() %>% miss_locf()) + "_" +  (emi[2, ] %>% unlist())
emi %<>% df_legalize_names()
emi = emi[-c(1:3), ]
for (i in seq_along(emi)[-1]) emi[[i]] = as.numeric(emi[[i]])
emi$high_mean = emi %>% select(contains("High")) %>% rowMeans()
emi$med_mean = emi %>% select(contains("Medium")) %>% rowMeans()
emi$medhigh_mean = emi$med_mean + emi$high_mean
emi$low_mean = emi %>% select(contains("Low")) %>% rowMeans()
emi$total_mean = emi %>% select(contains("Total")) %>% rowMeans()
emi$ISO = pu_translate(emi[[1]])
## No exact match: World
## No exact match: Congo, Dem. Rep. of the
## No exact match: Congo, Rep. of the
## No exact match: Holy See (Vatican City)
## No exact match: Timor Leste
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Congo, Dem. Rep. of the -> Congo, Dem. Rep. with distance 7.00
## Best fuzzy match found: Congo, Rep. of the -> Congo, Republic of the with distance 5.00
## Best fuzzy match found: Holy See (Vatican City) -> Holy See (Vatican City State) with distance 6.00
## Best fuzzy match found: Timor Leste -> Timor–Leste with distance 1.00
#ecnomics data
#world bank data
#https://data.worldbank.org/indicator/NY.GNP.PCAP.PP.CD
#https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD
#these have missing data, which we can impute
GNI = read_excel("data/API_NY.GNP.PCAP.PP.CD_DS2_en_excel_v2_1218005.xls", skip = 3)
GDP = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", skip = 3)

#save back, add growth
GNI %<>% mutate(
  GNI_growth = (`2019` / `1990`) %>% log10(),
  GNI_2019 = `2019` %>% log10(),
  ISO = `Country Code`
)

GDP %<>% mutate(
  GDP_growth = (`2019` / `1990`) %>% log10(),
  GDP_2019 = `2019` %>% log10(),
  ISO = `Country Code`
)

#these are based on self-report data gathered by Gallup over the years
#https://news.gallup.com/poll/166211/worldwide-median-household-income-000.aspx
median_income = read_csv("data/median income gallup.csv") %>% 
  mutate(
    median_income = medianPerCapitaIncome %>% log10(),
    median_income_hh = medianHouseholdIncome %>% log10(),
    ISO = pu_translate(country)
  )
## Rows: 131 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (3): medianHouseholdIncome, medianPerCapitaIncome, pop2020
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#economic outcomes
econ_outcomes = c("median_income", "median_income_hh", "GNI_2019", "GNI_growth", "GDP_2019", "GDP_growth")

#no dup ISOs
assert_that(!any(duplicated(cc$ISO)))
## [1] TRUE
assert_that(!any(duplicated(spi$ISO)))
## [1] TRUE
assert_that(!any(duplicated(UN$ISO)))
## [1] TRUE
assert_that(!any(duplicated(HDI$ISO)))
## [1] TRUE
assert_that(!any(duplicated(world3$ISO)))
## [1] TRUE
assert_that(!any(duplicated(emi$ISO)))
## [1] TRUE
assert_that(!any(duplicated(cc_chap8$ISO)))
## [1] TRUE
assert_that(!any(duplicated(IMO$ISO)))
## [1] TRUE
#join
d = cc %>% 
  full_join(spi, by = "ISO") %>% 
  full_join(UN, by = "ISO") %>% 
  full_join(emi %>% select(ISO, total_mean, high_mean, med_mean, low_mean, medhigh_mean), by = "ISO") %>% 
  full_join(HDI, by = "ISO") %>% 
  full_join(cc_chap8 %>% select(ISO, airline_safety:tech_safety), by = "ISO") %>% 
  full_join(IMO, by = "ISO") %>% 
  left_join(median_income %>% select(ISO, median_income, median_income_hh), by = "ISO") %>% 
  left_join(GNI %>% select(ISO, GNI_2019, GNI_growth), by = "ISO") %>% 
  left_join(GDP %>% select(ISO, GDP_2019, GDP_growth), by = "ISO")

#check
assert_that(!any(duplicated(d$ISO)))
## [1] TRUE
assert_that(!any(duplicated(names(d))))
## [1] TRUE

Intelligence variables

#recode cognitive class measures
#intellectual class
d$IC_reg = lm(x95pct_IQc ~ SAS_IQc, data = d, na.action = na.exclude) %>% resid() %>% standardize()
#dummy class
d$LC_reg = lm(x05pct_IQc ~ SAS_IQc, data = d, na.action = na.exclude) %>% resid() %>% standardize()

#direct versions of tails
d$IC_delta = d$x95pct_IQc - d$SAS_IQc
d$LC_delta = d$x05pct_IQc - d$SAS_IQc

#standardized versions of average
d$SAS_IQc_z = d$SAS_IQc %>% standardize()

#advantage of IC and LC as function of mean
GG_scatter(d, "SAS_IQc", "IC_delta", case_names = "Country.x") +
  scale_x_continuous("Mean IQ") +
  scale_y_continuous("IQ advantage of 95th over the mean")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/direct_IC_mean.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d, "SAS_IQc", "LC_delta", case_names = "Country.x") +
  scale_x_continuous("Mean IQ") +
  scale_y_continuous("IQ advantage of 5th over the mean")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/direct_LC_mean.png")
## `geom_smooth()` using formula = 'y ~ x'

Missing cognitive data

#count
d %>% 
  select(SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg) %>% 
  miss_by_var(reverse = T)
##    SAS_IQc x95pct_IQc x05pct_IQc     IC_reg     LC_reg 
##        107         99         99         99         99

General factor of SPI

Following prior study (Kirkegaard 2014).

#collect data
spi_indicators = d %>% select(Undernourishment_pct_of_pop:Percent_of_tertiary_students_enrolled_in_globally_ranked_universities)
spi_indicators %>% miss_amount()
## cases with missing data  vars with missing data cells with missing data 
##                   0.680                   1.000                   0.231
#impute
spi_indicators_imp = spi_indicators %>% miss_impute(max_na = 10)

#amount after imputing
spi_indicators_imp %>% miss_by_case() %>% table2()
#factor analyze
spi_fa = fa(spi_indicators_imp)
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
spi_fa
## Factor Analysis using method =  minres
## Call: fa(r = spi_indicators_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                                                                       MR1
## Undernourishment_pct_of_pop                                                         -0.80
## Maternal_mortality_rate_deaths_100_000_live_births                                  -0.83
## Child_mortality_rate_deaths_1_000_live_births                                       -0.88
## Child_stunting_pct_of_children                                                      -0.88
## Deaths_from_infectious_diseases_deaths_100_000_people                               -0.79
## Access_to_at_least_basic_drinking_water_pct_of_pop                                   0.86
## Access_to_piped_water_pct_of_pop                                                     0.84
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                            0.84
## Rural_open_defecation_pct_of_pop                                                    -0.64
## Access_to_electricity_pct_of_pop                                                     0.80
## Quality_of_electricity_supply_1_low_7_high                                           0.88
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   -0.83
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                          0.85
## Homicide_rate_deaths_100_000_people                                                 -0.14
## Perceived_criminality_1_low_5_high                                                  -0.60
## Political_killings_and_torture_0_low_freedom_1_high_freedom                          0.65
## Traffic_deaths_deaths_100_000_people                                                -0.68
## Adult_literacy_rate_pct_of_pop_aged_15plus                                           0.78
## Primary_school_enrollment_pct_of_children                                            0.64
## Secondary_school_enrollment_pct_of_children                                          0.90
## Gender_parity_in_secondary_enrollment_distance_from_parity                           0.54
## Access_to_quality_education_0_unequal_4_equal                                        0.76
## Mobile_telephone_subscriptions_subscriptions_100_people                              0.64
## Internet_users_pct_of_pop                                                            0.90
## Access_to_online_governance_0_low_1_high                                             0.75
## Media_censorship_0_frequent_4_rare                                                   0.46
## Life_expectancy_at_60_years                                                          0.83
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               -0.59
## Access_to_essential_services_0_none_100_full_coverage                                0.94
## Access_to_quality_healthcare_0_unequal_4_equal                                       0.84
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     -0.19
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        -0.45
## Biome_protection                                                                     0.23
## Political_rights_0_no_rights_40_full_rights                                          0.62
## Freedom_of_expression_0_no_freedom_1_full_freedom                                    0.47
## Freedom_of_religion_0_no_freedom_4_full_freedom                                      0.33
## Access_to_justice_0_non_existent_1_observed                                          0.68
## Property_rights_for_women_0_no_rights_5_full_rights                                  0.68
## Vulnerable_employment_pct_of_employees                                              -0.86
## Early_marriage_pct_of_women                                                         -0.75
## Satisfied_demand_for_contraception_pct_of_women                                      0.66
## Corruption_0_high_100_low                                                            0.80
## Acceptance_of_gays_and_lesbians_0_low_100_high                                       0.63
## Discrimination_and_violence_against_minorities_1_low_10_high                        -0.47
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                  0.52
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power  0.49
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power            0.47
## Years_of_tertiary_schooling                                                          0.79
## Women_s_average_years_in_school                                                      0.87
## Globally_ranked_universities_points                                                  0.28
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities                0.57
##                                                                                        h2
## Undernourishment_pct_of_pop                                                         0.644
## Maternal_mortality_rate_deaths_100_000_live_births                                  0.697
## Child_mortality_rate_deaths_1_000_live_births                                       0.770
## Child_stunting_pct_of_children                                                      0.767
## Deaths_from_infectious_diseases_deaths_100_000_people                               0.624
## Access_to_at_least_basic_drinking_water_pct_of_pop                                  0.734
## Access_to_piped_water_pct_of_pop                                                    0.701
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                           0.699
## Rural_open_defecation_pct_of_pop                                                    0.411
## Access_to_electricity_pct_of_pop                                                    0.635
## Quality_of_electricity_supply_1_low_7_high                                          0.767
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   0.693
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                         0.715
## Homicide_rate_deaths_100_000_people                                                 0.020
## Perceived_criminality_1_low_5_high                                                  0.355
## Political_killings_and_torture_0_low_freedom_1_high_freedom                         0.428
## Traffic_deaths_deaths_100_000_people                                                0.469
## Adult_literacy_rate_pct_of_pop_aged_15plus                                          0.602
## Primary_school_enrollment_pct_of_children                                           0.412
## Secondary_school_enrollment_pct_of_children                                         0.815
## Gender_parity_in_secondary_enrollment_distance_from_parity                          0.290
## Access_to_quality_education_0_unequal_4_equal                                       0.578
## Mobile_telephone_subscriptions_subscriptions_100_people                             0.413
## Internet_users_pct_of_pop                                                           0.808
## Access_to_online_governance_0_low_1_high                                            0.560
## Media_censorship_0_frequent_4_rare                                                  0.211
## Life_expectancy_at_60_years                                                         0.696
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               0.352
## Access_to_essential_services_0_none_100_full_coverage                               0.890
## Access_to_quality_healthcare_0_unequal_4_equal                                      0.706
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     0.036
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        0.202
## Biome_protection                                                                    0.054
## Political_rights_0_no_rights_40_full_rights                                         0.381
## Freedom_of_expression_0_no_freedom_1_full_freedom                                   0.224
## Freedom_of_religion_0_no_freedom_4_full_freedom                                     0.110
## Access_to_justice_0_non_existent_1_observed                                         0.457
## Property_rights_for_women_0_no_rights_5_full_rights                                 0.463
## Vulnerable_employment_pct_of_employees                                              0.744
## Early_marriage_pct_of_women                                                         0.561
## Satisfied_demand_for_contraception_pct_of_women                                     0.435
## Corruption_0_high_100_low                                                           0.647
## Acceptance_of_gays_and_lesbians_0_low_100_high                                      0.397
## Discrimination_and_violence_against_minorities_1_low_10_high                        0.217
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                 0.268
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.236
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power           0.221
## Years_of_tertiary_schooling                                                         0.629
## Women_s_average_years_in_school                                                     0.761
## Globally_ranked_universities_points                                                 0.079
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities               0.325
##                                                                                       u2
## Undernourishment_pct_of_pop                                                         0.36
## Maternal_mortality_rate_deaths_100_000_live_births                                  0.30
## Child_mortality_rate_deaths_1_000_live_births                                       0.23
## Child_stunting_pct_of_children                                                      0.23
## Deaths_from_infectious_diseases_deaths_100_000_people                               0.38
## Access_to_at_least_basic_drinking_water_pct_of_pop                                  0.27
## Access_to_piped_water_pct_of_pop                                                    0.30
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                           0.30
## Rural_open_defecation_pct_of_pop                                                    0.59
## Access_to_electricity_pct_of_pop                                                    0.36
## Quality_of_electricity_supply_1_low_7_high                                          0.23
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   0.31
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                         0.28
## Homicide_rate_deaths_100_000_people                                                 0.98
## Perceived_criminality_1_low_5_high                                                  0.64
## Political_killings_and_torture_0_low_freedom_1_high_freedom                         0.57
## Traffic_deaths_deaths_100_000_people                                                0.53
## Adult_literacy_rate_pct_of_pop_aged_15plus                                          0.40
## Primary_school_enrollment_pct_of_children                                           0.59
## Secondary_school_enrollment_pct_of_children                                         0.19
## Gender_parity_in_secondary_enrollment_distance_from_parity                          0.71
## Access_to_quality_education_0_unequal_4_equal                                       0.42
## Mobile_telephone_subscriptions_subscriptions_100_people                             0.59
## Internet_users_pct_of_pop                                                           0.19
## Access_to_online_governance_0_low_1_high                                            0.44
## Media_censorship_0_frequent_4_rare                                                  0.79
## Life_expectancy_at_60_years                                                         0.30
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               0.65
## Access_to_essential_services_0_none_100_full_coverage                               0.11
## Access_to_quality_healthcare_0_unequal_4_equal                                      0.29
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     0.96
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        0.80
## Biome_protection                                                                    0.95
## Political_rights_0_no_rights_40_full_rights                                         0.62
## Freedom_of_expression_0_no_freedom_1_full_freedom                                   0.78
## Freedom_of_religion_0_no_freedom_4_full_freedom                                     0.89
## Access_to_justice_0_non_existent_1_observed                                         0.54
## Property_rights_for_women_0_no_rights_5_full_rights                                 0.54
## Vulnerable_employment_pct_of_employees                                              0.26
## Early_marriage_pct_of_women                                                         0.44
## Satisfied_demand_for_contraception_pct_of_women                                     0.57
## Corruption_0_high_100_low                                                           0.35
## Acceptance_of_gays_and_lesbians_0_low_100_high                                      0.60
## Discrimination_and_violence_against_minorities_1_low_10_high                        0.78
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                 0.73
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.76
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power           0.78
## Years_of_tertiary_schooling                                                         0.37
## Women_s_average_years_in_school                                                     0.24
## Globally_ranked_universities_points                                                 0.92
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities               0.67
##                                                                                     com
## Undernourishment_pct_of_pop                                                           1
## Maternal_mortality_rate_deaths_100_000_live_births                                    1
## Child_mortality_rate_deaths_1_000_live_births                                         1
## Child_stunting_pct_of_children                                                        1
## Deaths_from_infectious_diseases_deaths_100_000_people                                 1
## Access_to_at_least_basic_drinking_water_pct_of_pop                                    1
## Access_to_piped_water_pct_of_pop                                                      1
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                             1
## Rural_open_defecation_pct_of_pop                                                      1
## Access_to_electricity_pct_of_pop                                                      1
## Quality_of_electricity_supply_1_low_7_high                                            1
## Household_air_pollution_attributable_deaths_deaths_100_000_people                     1
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                           1
## Homicide_rate_deaths_100_000_people                                                   1
## Perceived_criminality_1_low_5_high                                                    1
## Political_killings_and_torture_0_low_freedom_1_high_freedom                           1
## Traffic_deaths_deaths_100_000_people                                                  1
## Adult_literacy_rate_pct_of_pop_aged_15plus                                            1
## Primary_school_enrollment_pct_of_children                                             1
## Secondary_school_enrollment_pct_of_children                                           1
## Gender_parity_in_secondary_enrollment_distance_from_parity                            1
## Access_to_quality_education_0_unequal_4_equal                                         1
## Mobile_telephone_subscriptions_subscriptions_100_people                               1
## Internet_users_pct_of_pop                                                             1
## Access_to_online_governance_0_low_1_high                                              1
## Media_censorship_0_frequent_4_rare                                                    1
## Life_expectancy_at_60_years                                                           1
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people                 1
## Access_to_essential_services_0_none_100_full_coverage                                 1
## Access_to_quality_healthcare_0_unequal_4_equal                                        1
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                       1
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                          1
## Biome_protection                                                                      1
## Political_rights_0_no_rights_40_full_rights                                           1
## Freedom_of_expression_0_no_freedom_1_full_freedom                                     1
## Freedom_of_religion_0_no_freedom_4_full_freedom                                       1
## Access_to_justice_0_non_existent_1_observed                                           1
## Property_rights_for_women_0_no_rights_5_full_rights                                   1
## Vulnerable_employment_pct_of_employees                                                1
## Early_marriage_pct_of_women                                                           1
## Satisfied_demand_for_contraception_pct_of_women                                       1
## Corruption_0_high_100_low                                                             1
## Acceptance_of_gays_and_lesbians_0_low_100_high                                        1
## Discrimination_and_violence_against_minorities_1_low_10_high                          1
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                   1
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power   1
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power             1
## Years_of_tertiary_schooling                                                           1
## Women_s_average_years_in_school                                                       1
## Globally_ranked_universities_points                                                   1
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities                 1
## 
##                  MR1
## SS loadings    24.91
## Proportion Var  0.49
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1275  and the objective function was  95.2 with Chi Square of  21727
## The degrees of freedom for the model are 1224  and the objective function was  58.6 
## 
## The root mean square of the residuals (RMSR) is  0.13 
## The df corrected root mean square of the residuals is  0.13 
## 
## The harmonic number of observations is  182 with the empirical chi square  7941  with prob <  0 
## The total number of observations was  247  with Likelihood Chi Square =  13334  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.381
## RMSEA index =  0.2  and the 90 % confidence intervals are  0.197 0.204
## BIC =  6591
## Fit based upon off diagonal values = 0.93
#method variance
spi_fa_methods = fa_all_methods(spi_indicators_imp, messages = F)

#heatmap
spi_fa_methods$loadings %>% GG_heatmap()

spi_fa_methods$scores %>% GG_heatmap()

#save
d$SPI_g = spi_fa$scores[, 1] %>% standardize()

#put the imputed values back into main, reverse as needed
for (i in seq_along(spi_indicators_imp)) {
  #positive loading
  v = names(spi_indicators_imp)[i]
  if (spi_fa$loadings[i] > 0) {
    d[[v]] = spi_indicators_imp[[v]]
  } else {
    d[[v]] = spi_indicators_imp[[v]] * -1
  }
}

#correlations with other indexes
d %>% 
  select(SPI_g, Human_Development_Index_HDI, Social_Progress_Index) %>% 
  wtd.cors(weight = d$population2017 %>% sqrt())
##                             SPI_g Human_Development_Index_HDI
## SPI_g                       1.000                       0.965
## Human_Development_Index_HDI 0.965                       1.000
## Social_Progress_Index       0.991                       0.953
##                             Social_Progress_Index
## SPI_g                                       0.991
## Human_Development_Index_HDI                 0.953
## Social_Progress_Index                       1.000
#no weights
d %>% 
  select(SPI_g, Human_Development_Index_HDI, Social_Progress_Index) %>% 
  wtd.cors()
##                             SPI_g Human_Development_Index_HDI
## SPI_g                       1.000                       0.959
## Human_Development_Index_HDI 0.959                       1.000
## Social_Progress_Index       0.993                       0.950
##                             Social_Progress_Index
## SPI_g                                       0.993
## Human_Development_Index_HDI                 0.950
## Social_Progress_Index                       1.000
#variable lists
main_vars = c("SPI_g", "SAS_IQc", "x95pct_IQc", "x05pct_IQc", "IC_reg", "LC_reg", "IC_delta", "LC_delta")
main_vars2 = c(main_vars, names(spi_indicators_imp), econ_outcomes)

SPI & econ

Add in the economic variables for a more varied index

#collect data, use the original form
spi2_indicators = cbind(
  spi_indicators,
  d %>% select(!!econ_outcomes)
)
spi2_indicators %>% miss_amount()
## cases with missing data  vars with missing data cells with missing data 
##                   0.770                   1.000                   0.247
#impute
spi2_indicators_imp = spi2_indicators %>% miss_impute(max_na = 10)

#amount after imputing
spi2_indicators_imp %>% miss_by_case() %>% table2()
#factor analyze
spi2_fa = fa(spi2_indicators_imp)
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
spi2_fa
## Factor Analysis using method =  minres
## Call: fa(r = spi2_indicators_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                                                                       MR1
## Undernourishment_pct_of_pop                                                         -0.79
## Maternal_mortality_rate_deaths_100_000_live_births                                  -0.84
## Child_mortality_rate_deaths_1_000_live_births                                       -0.88
## Child_stunting_pct_of_children                                                      -0.88
## Deaths_from_infectious_diseases_deaths_100_000_people                               -0.80
## Access_to_at_least_basic_drinking_water_pct_of_pop                                   0.86
## Access_to_piped_water_pct_of_pop                                                     0.84
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                            0.85
## Rural_open_defecation_pct_of_pop                                                    -0.64
## Access_to_electricity_pct_of_pop                                                     0.81
## Quality_of_electricity_supply_1_low_7_high                                           0.87
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   -0.84
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                          0.86
## Homicide_rate_deaths_100_000_people                                                 -0.14
## Perceived_criminality_1_low_5_high                                                  -0.56
## Political_killings_and_torture_0_low_freedom_1_high_freedom                          0.63
## Traffic_deaths_deaths_100_000_people                                                -0.68
## Adult_literacy_rate_pct_of_pop_aged_15plus                                           0.77
## Primary_school_enrollment_pct_of_children                                            0.62
## Secondary_school_enrollment_pct_of_children                                          0.90
## Gender_parity_in_secondary_enrollment_distance_from_parity                           0.51
## Access_to_quality_education_0_unequal_4_equal                                        0.76
## Mobile_telephone_subscriptions_subscriptions_100_people                              0.64
## Internet_users_pct_of_pop                                                            0.91
## Access_to_online_governance_0_low_1_high                                             0.75
## Media_censorship_0_frequent_4_rare                                                   0.44
## Life_expectancy_at_60_years                                                          0.84
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               -0.59
## Access_to_essential_services_0_none_100_full_coverage                                0.95
## Access_to_quality_healthcare_0_unequal_4_equal                                       0.84
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     -0.18
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        -0.43
## Biome_protection                                                                     0.21
## Political_rights_0_no_rights_40_full_rights                                          0.59
## Freedom_of_expression_0_no_freedom_1_full_freedom                                    0.44
## Freedom_of_religion_0_no_freedom_4_full_freedom                                      0.30
## Access_to_justice_0_non_existent_1_observed                                          0.65
## Property_rights_for_women_0_no_rights_5_full_rights                                  0.67
## Vulnerable_employment_pct_of_employees                                              -0.88
## Early_marriage_pct_of_women                                                         -0.75
## Satisfied_demand_for_contraception_pct_of_women                                      0.65
## Corruption_0_high_100_low                                                            0.80
## Acceptance_of_gays_and_lesbians_0_low_100_high                                       0.61
## Discrimination_and_violence_against_minorities_1_low_10_high                        -0.45
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                  0.50
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power  0.47
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power            0.45
## Years_of_tertiary_schooling                                                          0.78
## Women_s_average_years_in_school                                                      0.87
## Globally_ranked_universities_points                                                  0.29
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities                0.58
## median_income                                                                        0.91
## median_income_hh                                                                     0.85
## GNI_2019                                                                             0.93
## GNI_growth                                                                           0.16
## GDP_2019                                                                             0.92
## GDP_growth                                                                           0.17
##                                                                                        h2
## Undernourishment_pct_of_pop                                                         0.617
## Maternal_mortality_rate_deaths_100_000_live_births                                  0.702
## Child_mortality_rate_deaths_1_000_live_births                                       0.771
## Child_stunting_pct_of_children                                                      0.775
## Deaths_from_infectious_diseases_deaths_100_000_people                               0.636
## Access_to_at_least_basic_drinking_water_pct_of_pop                                  0.739
## Access_to_piped_water_pct_of_pop                                                    0.713
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                           0.724
## Rural_open_defecation_pct_of_pop                                                    0.414
## Access_to_electricity_pct_of_pop                                                    0.651
## Quality_of_electricity_supply_1_low_7_high                                          0.751
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   0.706
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                         0.748
## Homicide_rate_deaths_100_000_people                                                 0.019
## Perceived_criminality_1_low_5_high                                                  0.316
## Political_killings_and_torture_0_low_freedom_1_high_freedom                         0.400
## Traffic_deaths_deaths_100_000_people                                                0.457
## Adult_literacy_rate_pct_of_pop_aged_15plus                                          0.588
## Primary_school_enrollment_pct_of_children                                           0.384
## Secondary_school_enrollment_pct_of_children                                         0.807
## Gender_parity_in_secondary_enrollment_distance_from_parity                          0.255
## Access_to_quality_education_0_unequal_4_equal                                       0.577
## Mobile_telephone_subscriptions_subscriptions_100_people                             0.416
## Internet_users_pct_of_pop                                                           0.833
## Access_to_online_governance_0_low_1_high                                            0.562
## Media_censorship_0_frequent_4_rare                                                  0.191
## Life_expectancy_at_60_years                                                         0.709
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               0.353
## Access_to_essential_services_0_none_100_full_coverage                               0.904
## Access_to_quality_healthcare_0_unequal_4_equal                                      0.711
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     0.033
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        0.186
## Biome_protection                                                                    0.046
## Political_rights_0_no_rights_40_full_rights                                         0.350
## Freedom_of_expression_0_no_freedom_1_full_freedom                                   0.198
## Freedom_of_religion_0_no_freedom_4_full_freedom                                     0.087
## Access_to_justice_0_non_existent_1_observed                                         0.427
## Property_rights_for_women_0_no_rights_5_full_rights                                 0.446
## Vulnerable_employment_pct_of_employees                                              0.768
## Early_marriage_pct_of_women                                                         0.565
## Satisfied_demand_for_contraception_pct_of_women                                     0.424
## Corruption_0_high_100_low                                                           0.641
## Acceptance_of_gays_and_lesbians_0_low_100_high                                      0.374
## Discrimination_and_violence_against_minorities_1_low_10_high                        0.204
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                 0.246
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.222
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power           0.200
## Years_of_tertiary_schooling                                                         0.603
## Women_s_average_years_in_school                                                     0.761
## Globally_ranked_universities_points                                                 0.085
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities               0.339
## median_income                                                                       0.833
## median_income_hh                                                                    0.729
## GNI_2019                                                                            0.860
## GNI_growth                                                                          0.024
## GDP_2019                                                                            0.851
## GDP_growth                                                                          0.027
##                                                                                        u2
## Undernourishment_pct_of_pop                                                         0.383
## Maternal_mortality_rate_deaths_100_000_live_births                                  0.298
## Child_mortality_rate_deaths_1_000_live_births                                       0.229
## Child_stunting_pct_of_children                                                      0.225
## Deaths_from_infectious_diseases_deaths_100_000_people                               0.364
## Access_to_at_least_basic_drinking_water_pct_of_pop                                  0.261
## Access_to_piped_water_pct_of_pop                                                    0.287
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                           0.276
## Rural_open_defecation_pct_of_pop                                                    0.586
## Access_to_electricity_pct_of_pop                                                    0.349
## Quality_of_electricity_supply_1_low_7_high                                          0.249
## Household_air_pollution_attributable_deaths_deaths_100_000_people                   0.294
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                         0.252
## Homicide_rate_deaths_100_000_people                                                 0.981
## Perceived_criminality_1_low_5_high                                                  0.684
## Political_killings_and_torture_0_low_freedom_1_high_freedom                         0.600
## Traffic_deaths_deaths_100_000_people                                                0.543
## Adult_literacy_rate_pct_of_pop_aged_15plus                                          0.412
## Primary_school_enrollment_pct_of_children                                           0.616
## Secondary_school_enrollment_pct_of_children                                         0.193
## Gender_parity_in_secondary_enrollment_distance_from_parity                          0.745
## Access_to_quality_education_0_unequal_4_equal                                       0.423
## Mobile_telephone_subscriptions_subscriptions_100_people                             0.584
## Internet_users_pct_of_pop                                                           0.167
## Access_to_online_governance_0_low_1_high                                            0.438
## Media_censorship_0_frequent_4_rare                                                  0.809
## Life_expectancy_at_60_years                                                         0.291
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people               0.647
## Access_to_essential_services_0_none_100_full_coverage                               0.096
## Access_to_quality_healthcare_0_unequal_4_equal                                      0.289
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                     0.967
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                        0.814
## Biome_protection                                                                    0.954
## Political_rights_0_no_rights_40_full_rights                                         0.650
## Freedom_of_expression_0_no_freedom_1_full_freedom                                   0.802
## Freedom_of_religion_0_no_freedom_4_full_freedom                                     0.913
## Access_to_justice_0_non_existent_1_observed                                         0.573
## Property_rights_for_women_0_no_rights_5_full_rights                                 0.554
## Vulnerable_employment_pct_of_employees                                              0.232
## Early_marriage_pct_of_women                                                         0.435
## Satisfied_demand_for_contraception_pct_of_women                                     0.576
## Corruption_0_high_100_low                                                           0.359
## Acceptance_of_gays_and_lesbians_0_low_100_high                                      0.626
## Discrimination_and_violence_against_minorities_1_low_10_high                        0.796
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                 0.754
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.778
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power           0.800
## Years_of_tertiary_schooling                                                         0.397
## Women_s_average_years_in_school                                                     0.239
## Globally_ranked_universities_points                                                 0.915
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities               0.661
## median_income                                                                       0.167
## median_income_hh                                                                    0.271
## GNI_2019                                                                            0.140
## GNI_growth                                                                          0.976
## GDP_2019                                                                            0.149
## GDP_growth                                                                          0.973
##                                                                                     com
## Undernourishment_pct_of_pop                                                           1
## Maternal_mortality_rate_deaths_100_000_live_births                                    1
## Child_mortality_rate_deaths_1_000_live_births                                         1
## Child_stunting_pct_of_children                                                        1
## Deaths_from_infectious_diseases_deaths_100_000_people                                 1
## Access_to_at_least_basic_drinking_water_pct_of_pop                                    1
## Access_to_piped_water_pct_of_pop                                                      1
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop                             1
## Rural_open_defecation_pct_of_pop                                                      1
## Access_to_electricity_pct_of_pop                                                      1
## Quality_of_electricity_supply_1_low_7_high                                            1
## Household_air_pollution_attributable_deaths_deaths_100_000_people                     1
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop                           1
## Homicide_rate_deaths_100_000_people                                                   1
## Perceived_criminality_1_low_5_high                                                    1
## Political_killings_and_torture_0_low_freedom_1_high_freedom                           1
## Traffic_deaths_deaths_100_000_people                                                  1
## Adult_literacy_rate_pct_of_pop_aged_15plus                                            1
## Primary_school_enrollment_pct_of_children                                             1
## Secondary_school_enrollment_pct_of_children                                           1
## Gender_parity_in_secondary_enrollment_distance_from_parity                            1
## Access_to_quality_education_0_unequal_4_equal                                         1
## Mobile_telephone_subscriptions_subscriptions_100_people                               1
## Internet_users_pct_of_pop                                                             1
## Access_to_online_governance_0_low_1_high                                              1
## Media_censorship_0_frequent_4_rare                                                    1
## Life_expectancy_at_60_years                                                           1
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people                 1
## Access_to_essential_services_0_none_100_full_coverage                                 1
## Access_to_quality_healthcare_0_unequal_4_equal                                        1
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people                       1
## Greenhouse_gas_emissions_CO2_equivalents_GDP                                          1
## Biome_protection                                                                      1
## Political_rights_0_no_rights_40_full_rights                                           1
## Freedom_of_expression_0_no_freedom_1_full_freedom                                     1
## Freedom_of_religion_0_no_freedom_4_full_freedom                                       1
## Access_to_justice_0_non_existent_1_observed                                           1
## Property_rights_for_women_0_no_rights_5_full_rights                                   1
## Vulnerable_employment_pct_of_employees                                                1
## Early_marriage_pct_of_women                                                           1
## Satisfied_demand_for_contraception_pct_of_women                                       1
## Corruption_0_high_100_low                                                             1
## Acceptance_of_gays_and_lesbians_0_low_100_high                                        1
## Discrimination_and_violence_against_minorities_1_low_10_high                          1
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power                   1
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power   1
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power             1
## Years_of_tertiary_schooling                                                           1
## Women_s_average_years_in_school                                                       1
## Globally_ranked_universities_points                                                   1
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities                 1
## median_income                                                                         1
## median_income_hh                                                                      1
## GNI_2019                                                                              1
## GNI_growth                                                                            1
## GDP_2019                                                                              1
## GDP_growth                                                                            1
## 
##                  MR1
## SS loadings    27.96
## Proportion Var  0.49
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  1596  and the objective function was  150 with Chi Square of  33981
## The degrees of freedom for the model are 1539  and the objective function was  107 
## 
## The root mean square of the residuals (RMSR) is  0.12 
## The df corrected root mean square of the residuals is  0.13 
## 
## The harmonic number of observations is  179 with the empirical chi square  8881  with prob <  0 
## The total number of observations was  247  with Likelihood Chi Square =  24026  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.278
## RMSEA index =  0.243  and the 90 % confidence intervals are  0.241 0.246
## BIC =  15547
## Fit based upon off diagonal values = 0.94
#method variance
spi2_fa_methods = fa_all_methods(spi2_indicators_imp, messages = F)

#heatmap
spi2_fa_methods$loadings %>% GG_heatmap()

spi2_fa_methods$scores %>% GG_heatmap()

#save
d$SPI2_g = spi2_fa$scores[, 1] %>% standardize()

#correlations with other indexes
d %>% 
  select(SPI_g, SPI2_g, Human_Development_Index_HDI, Social_Progress_Index) %>% 
  wtd.cors(weight = d$population2017 %>% sqrt())
##                             SPI_g SPI2_g Human_Development_Index_HDI
## SPI_g                       1.000  0.998                       0.965
## SPI2_g                      0.998  1.000                       0.972
## Human_Development_Index_HDI 0.965  0.972                       1.000
## Social_Progress_Index       0.991  0.987                       0.953
##                             Social_Progress_Index
## SPI_g                                       0.991
## SPI2_g                                      0.987
## Human_Development_Index_HDI                 0.953
## Social_Progress_Index                       1.000

Subset and range restriction

#keep only cases with data for mean and tails
#otherwise, the model fits are done on slightly different subsets
d2 = d %>% filter(!is.na(SAS_IQc), !is.na(x95pct_IQc))

#LV12 coverage
d$LV2012estimatedIQ %>% miss_count(reverse = T)
## [1] 195
d2$LV2012estimatedIQ %>% miss_count(reverse = T)
## [1] 97
#range restriction in mean ability
wtd_sd(d$LV2012estimatedIQ, d$population2017 %>% sqrt())
## [1] 10.9
wtd_sd(d2$LV2012estimatedIQ, d2$population2017 %>% sqrt())
## [1] 8.05
wtd_sd(d2$LV2012estimatedIQ, d2$population2017 %>% sqrt()) / wtd_sd(d$LV2012estimatedIQ, d$population2017 %>% sqrt())
## [1] 0.739
#range restriction in SPI g
wtd_sd(d$SPI_g, d$population2017 %>% sqrt())
## [1] 1
wtd_sd(d2$SPI_g, d2$population2017 %>% sqrt())
## [1] 0.625

Correlation matrices

Basic results.

#no weights
wtd.cors(d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g))
##                   LV2012estimatedIQ   SAS_IQc x95pct_IQc x05pct_IQc    IC_reg
## LV2012estimatedIQ            1.0000  9.09e-01      0.892     0.9151 -2.47e-02
## SAS_IQc                      0.9087  1.00e+00      0.980     0.9812 -4.84e-16
## x95pct_IQc                   0.8920  9.80e-01      1.000     0.9342  1.99e-01
## x05pct_IQc                   0.9151  9.81e-01      0.934     1.0000 -1.37e-01
## IC_reg                      -0.0247 -4.84e-16      0.199    -0.1368  1.00e+00
## LC_reg                       0.0494  5.98e-16     -0.141     0.1932 -7.08e-01
## IC_delta                    -0.6412 -6.68e-01     -0.506    -0.7568  7.45e-01
## LC_delta                    -0.1139 -1.75e-01     -0.311     0.0181 -6.97e-01
## SPI_g                        0.8034  8.10e-01      0.838     0.7700  2.51e-01
##                      LC_reg IC_delta LC_delta  SPI_g
## LV2012estimatedIQ  4.94e-02   -0.641  -0.1139  0.803
## SAS_IQc            5.98e-16   -0.668  -0.1754  0.810
## x95pct_IQc        -1.41e-01   -0.506  -0.3108  0.838
## x05pct_IQc         1.93e-01   -0.757   0.0181  0.770
## IC_reg            -7.08e-01    0.745  -0.6969  0.251
## LC_reg             1.00e+00   -0.527   0.9845 -0.155
## IC_delta          -5.27e-01    1.000  -0.4018 -0.352
## LC_delta           9.84e-01   -0.402   1.0000 -0.291
## SPI_g             -1.55e-01   -0.352  -0.2912  1.000
#weights
wtd.cors(d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g),
         weight = d$population2017 %>% sqrt())
##                   LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc  IC_reg
## LV2012estimatedIQ             1.000  0.9127     0.8934     0.9235 -0.1542
## SAS_IQc                       0.913  1.0000     0.9776     0.9875 -0.1156
## x95pct_IQc                    0.893  0.9776     1.0000     0.9419  0.0959
## x05pct_IQc                    0.924  0.9875     0.9419     1.0000 -0.2248
## IC_reg                       -0.154 -0.1156     0.0959    -0.2248  1.0000
## LC_reg                        0.142  0.0792    -0.0707     0.2354 -0.7088
## IC_delta                     -0.682 -0.7018    -0.5362    -0.7724  0.7887
## LC_delta                     -0.057 -0.1362    -0.2804     0.0218 -0.6795
## SPI_g                         0.783  0.7575     0.8043     0.7229  0.2013
##                    LC_reg IC_delta LC_delta  SPI_g
## LV2012estimatedIQ  0.1425   -0.682  -0.0570  0.783
## SAS_IQc            0.0792   -0.702  -0.1362  0.758
## x95pct_IQc        -0.0707   -0.536  -0.2804  0.804
## x05pct_IQc         0.2354   -0.772   0.0218  0.723
## IC_reg            -0.7088    0.789  -0.6795  0.201
## LC_reg             1.0000   -0.557   0.9768 -0.108
## IC_delta          -0.5573    1.000  -0.4031 -0.322
## LC_delta           0.9768   -0.403   1.0000 -0.270
## SPI_g             -0.1084   -0.322  -0.2703  1.000
#'complete cases'
wtd.cors(d2 %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g))
##                   LV2012estimatedIQ   SAS_IQc x95pct_IQc x05pct_IQc    IC_reg
## LV2012estimatedIQ            1.0000  9.19e-01      0.892     0.9151 -2.47e-02
## SAS_IQc                      0.9190  1.00e+00      0.980     0.9812 -4.84e-16
## x95pct_IQc                   0.8920  9.80e-01      1.000     0.9342  1.99e-01
## x05pct_IQc                   0.9151  9.81e-01      0.934     1.0000 -1.37e-01
## IC_reg                      -0.0247 -4.84e-16      0.199    -0.1368  1.00e+00
## LC_reg                       0.0494  5.98e-16     -0.141     0.1932 -7.08e-01
## IC_delta                    -0.6412 -6.68e-01     -0.506    -0.7568  7.45e-01
## LC_delta                    -0.1139 -1.75e-01     -0.311     0.0181 -6.97e-01
## SPI_g                        0.7284  8.11e-01      0.838     0.7700  2.51e-01
##                      LC_reg IC_delta LC_delta  SPI_g
## LV2012estimatedIQ  4.94e-02   -0.641  -0.1139  0.728
## SAS_IQc            5.98e-16   -0.668  -0.1754  0.811
## x95pct_IQc        -1.41e-01   -0.506  -0.3108  0.838
## x05pct_IQc         1.93e-01   -0.757   0.0181  0.770
## IC_reg            -7.08e-01    0.745  -0.6969  0.251
## LC_reg             1.00e+00   -0.527   0.9845 -0.155
## IC_delta          -5.27e-01    1.000  -0.4018 -0.352
## LC_delta           9.84e-01   -0.402   1.0000 -0.291
## SPI_g             -1.55e-01   -0.352  -0.2912  1.000
#weights
wtd.cors(d2 %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g),
         weight = d2$population2017 %>% sqrt())
##                   LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc  IC_reg
## LV2012estimatedIQ             1.000  0.9241     0.8934     0.9235 -0.1542
## SAS_IQc                       0.924  1.0000     0.9776     0.9875 -0.1156
## x95pct_IQc                    0.893  0.9776     1.0000     0.9419  0.0959
## x05pct_IQc                    0.924  0.9875     0.9419     1.0000 -0.2248
## IC_reg                       -0.154 -0.1156     0.0959    -0.2248  1.0000
## LC_reg                        0.142  0.0792    -0.0707     0.2354 -0.7088
## IC_delta                     -0.682 -0.7018    -0.5362    -0.7724  0.7887
## LC_delta                     -0.057 -0.1362    -0.2804     0.0218 -0.6795
## SPI_g                         0.619  0.7589     0.8043     0.7229  0.2013
##                    LC_reg IC_delta LC_delta  SPI_g
## LV2012estimatedIQ  0.1425   -0.682  -0.0570  0.619
## SAS_IQc            0.0792   -0.702  -0.1362  0.759
## x95pct_IQc        -0.0707   -0.536  -0.2804  0.804
## x05pct_IQc         0.2354   -0.772   0.0218  0.723
## IC_reg            -0.7088    0.789  -0.6795  0.201
## LC_reg             1.0000   -0.557   0.9768 -0.108
## IC_delta          -0.5573    1.000  -0.4031 -0.322
## LC_delta           0.9768   -0.403   1.0000 -0.270
## SPI_g             -0.1084   -0.322  -0.2703  1.000
#with distributions
d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g) %>% ggpairs(columnLabels = c("Lynn 2012", "Mean ability", "95th ability", "5th ability", "95th ability resid", "5th ability resid", "95th ability subt.", "5th ability subt.", "General SPI"))
## Warning: Removed 52 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 142 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning: Removed 142 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 146 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 77 rows containing missing values (`geom_point()`).
## Warning: Removed 146 rows containing missing values (`geom_point()`).
## Warning: Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Warning: Removed 74 rows containing non-finite values (`stat_density()`).

GG_save("figs/ggpairs.png")
## Warning: Removed 52 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 142 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning: Removed 142 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 146 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 77 rows containing missing values (`geom_point()`).
## Warning: Removed 146 rows containing missing values (`geom_point()`).
## Warning: Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Warning: Removed 74 rows containing non-finite values (`stat_density()`).

Bivariate approach

Same as Rindermann.

#calculate correlation with mean and top and bottom for each outcome
bivar_results = map_df(names(spi_indicators), function(n) {
  #correlations
  cors = cbind(
    spi = d2[[n]], 
    mean = d2$SAS_IQc,
    upper = d2$x95pct_IQc,
    upper_reg = d2$IC_reg,
    lower = d2$x05pct_IQc,
    lower_reg = d2$LC_reg
    ) %>% 
    wtd.cors()
  
  cors_wt = cbind(
    spi = d2[[n]], 
    mean = d2$SAS_IQc,
    upper = d2$x95pct_IQc,
    upper_reg = d2$IC_reg,
    lower = d2$x05pct_IQc,
    lower_reg = d2$LC_reg
    ) %>% 
    wtd.cors(weight = d2$population2017 %>% sqrt())
  
  #results
  tibble(
    indicator = n,
    #no weights
    mean = cors[1, 2],
    IC = cors[1, 3],
    IC_reg = cors[1, 4],
    LC = cors[1, 5],
    LC_reg = cors[1, 6],
    
    #weights
    mean_wt = cors_wt[1, 2],
    IC_wt = cors_wt[1, 3],
    IC_reg_wt = cors_wt[1, 4],
    LC_wt = cors_wt[1, 5],
    LC_reg_wt = cors_wt[1, 6],
  )
})

#summary stats
bivar_results[-1] %>% 
  #mean by column
  describe() %>% 
  as.data.frame() %>% 
  mutate(predictor = names(bivar_results[-1])) %>% 
  select(predictor, everything()) %>% 
  select(mean, median, sd, mad, min, max)
#p values
t.test(bivar_results$mean, bivar_results$IC, paired = T)
## 
##  Paired t-test
## 
## data:  bivar_results$mean and bivar_results$IC
## t = -4, df = 50, p-value = 2e-04
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01885 -0.00629
## sample estimates:
## mean of the differences 
##                 -0.0126
t.test(bivar_results$mean_wt, bivar_results$IC_wt, paired = T)
## 
##  Paired t-test
## 
## data:  bivar_results$mean_wt and bivar_results$IC_wt
## t = -5, df = 50, p-value = 1e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0324 -0.0135
## sample estimates:
## mean of the differences 
##                 -0.0229

Regression approach

Regress each variable on mean and residualized tails.

#regression function
get_reg_results = function(x, spatial_lag = F, data = d2) {
  map_df(x, function(n) {
    #reg forms
    if (spatial_lag) {
      form1 = as.formula(str_glue("{n} ~ SAS_IQc + {n}_lag"))
      form2 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + {n}_lag"))
      form3 = as.formula(str_glue("{n} ~ SAS_IQc + LC_reg + {n}_lag"))
      form4 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + LC_reg + {n}_lag"))
    } else {
      form1 = as.formula(str_glue("{n} ~ SAS_IQc"))
      form2 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg"))
      form3 = as.formula(str_glue("{n} ~ SAS_IQc + LC_reg"))
      form4 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + LC_reg"))
    }

    #fits
    fit1 = ols(form1, data = data)
    fit2 = ols(form2, data = data)
    fit3 = ols(form3, data = data)
    fit4 = ols(form4, data = data)
    
    # + weights
    fit1_wt = ols(form1, data = data, weights = data$population2017 %>% sqrt())
    fit2_wt = ols(form2, data = data, weights = data$population2017 %>% sqrt())
    fit3_wt = ols(form3, data = data, weights = data$population2017 %>% sqrt())
    fit4_wt = ols(form4, data = data, weights = data$population2017 %>% sqrt())
  
    #formal tests
    lr1 = lrtest(fit1, fit2)
    lr2 = lrtest(fit1, fit3)
    lr3 = lrtest(fit2, fit4)
    lr1_wt = lrtest(fit1_wt, fit2_wt)
    lr2_wt = lrtest(fit1_wt, fit3_wt)
    lr3_wt = lrtest(fit2_wt, fit4_wt)
    
    #results
    tibble(
      indicator = n,
      #no weights
      fit1_r2 = fit1 %>% r2get(),
      fit2_r2 = fit2 %>% r2get(),
      fit3_r2 = fit3 %>% r2get(),
      fit4_r2 = fit4 %>% r2get(),
      
      #weights
      fit1_wt_r2 = fit1_wt %>% r2get(),
      fit2_wt_r2 = fit2_wt %>% r2get(),
      fit3_wt_r2 = fit3_wt %>% r2get(),
      fit4_wt_r2 = fit4_wt %>% r2get(),
      
      #lr tests
      LR1_p = lr1$stats[3],
      LR2_p = lr2$stats[3],
      LR3_p = lr3$stats[3],
      LR1_wt_p = lr1_wt$stats[3],
      LR2_wt_p = lr2_wt$stats[3],
      LR3_wt_p = lr3_wt$stats[3]
    )
  })
}


#summarize R2s
reg_results = get_reg_results(names(spi_indicators))
reg_results %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
##    fit1_r2    fit2_r2    fit3_r2    fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2 
##      0.285      0.304      0.294      0.311      0.272      0.313      0.285 
## fit4_wt_r2 
##      0.316
#formal -- how many model tests are better?
reg_results %>% 
  select(LR1_p:LR3_wt_p) %>% 
  {
    plyr::adply(., 1, function(r) {
      tibble(
        LR1 = r$LR1_p < .01,
        LR2 = r$LR2_p < .01,
        LR3 = r$LR3_p < .01,
        LR1_wt = r$LR1_wt_p < .01,
        LR2_wt = r$LR2_wt_p < .01,
        LR3_wt = r$LR3_wt_p < .01
      )
    })
  } %>% 
  select(LR1:LR3_wt) %>% 
  colMeans()
##    LR1    LR2    LR3 LR1_wt LR2_wt LR3_wt 
## 0.1961 0.0784 0.0980 0.3922 0.0784 0.0588
#t tests
t.test(reg_results$fit1_wt_r2, reg_results$fit2_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit1_wt_r2, reg_results$fit3_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit1_wt_r2, reg_results$fit4_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit2_wt_r2, reg_results$fit4_wt_r2, paired = T) %>% tidy()

Economic outcomes

#summarize R2s
reg_results_econ = get_reg_results(econ_outcomes)
reg_results_econ %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
##    fit1_r2    fit2_r2    fit3_r2    fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2 
##      0.373      0.406      0.379      0.401      0.370      0.440      0.394 
## fit4_wt_r2 
##      0.434
#formal -- how many model tests are better?
reg_results_econ %>% 
  select(LR1_p:LR3_wt_p) %>% 
  {
    plyr::adply(., 1, function(r) {
      tibble(
        LR1 = r$LR1_p < .01,
        LR2 = r$LR2_p < .01,
        LR3 = r$LR3_p < .01,
        LR1_wt = r$LR1_wt_p < .01,
        LR2_wt = r$LR2_wt_p < .01,
        LR3_wt = r$LR3_wt_p < .01
      )
    })
  } %>% 
  select(LR1:LR3_wt) %>% 
  colMeans()
##    LR1    LR2    LR3 LR1_wt LR2_wt LR3_wt 
##  0.333  0.167  0.000  0.833  0.167  0.000
#t tests
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit2_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit3_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit4_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit2_wt_r2, reg_results_econ$fit4_wt_r2, paired = T) %>% tidy()

eSports and SPI S

Suggested by prior study.

#SPI
#standardize data before so we get standardized betas
SPI_std = d2 %>% select(SPI_g, SAS_IQc, IC_reg, LC_reg) %>% df_standardize()

#no weights
list(
  SPI_model1 = ols(SPI_g ~ SAS_IQc, data = SPI_std),
  SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_reg, data = SPI_std),
  SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_reg, data = SPI_std),
  SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg, data = SPI_std)
) -> SPI_models

SPI_models %>% summarize_models()
#weights
list(
  SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt

SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##   13.92937    1.00000    0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_reg
## 
## L.R. Chisq       d.f.          P 
##     4.4426     1.0000     0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_reg
## Model 2: SPI_g ~ SAS_IQc + IC_reg + LC_reg
## 
## L.R. Chisq       d.f.          P 
##      0.384      1.000      0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##   2.15e+01   1.00e+00   3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_reg
## 
## L.R. Chisq       d.f.          P 
##      6.309      1.000      0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_reg
## Model 2: SPI_g ~ SAS_IQc + IC_reg + LC_reg
## 
## L.R. Chisq       d.f.          P 
##      0.981      1.000      0.322

Alternative IC LC variables

#SPI
#standardize data before so we get standardized betas
SPI_std = d2 %>% select(SPI_g, SAS_IQc, IC_delta, LC_delta) %>% df_standardize()

#no weights
list(
  SPI_model1 = ols(SPI_g ~ SAS_IQc, data = SPI_std),
  SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_delta, data = SPI_std),
  SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_delta, data = SPI_std),
  SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_delta + LC_delta, data = SPI_std)
) -> SPI_models

SPI_models %>% summarize_models()
#weights
list(
  SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_delta + LC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt

SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
## 
## L.R. Chisq       d.f.          P 
##   13.92937    1.00000    0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
## 
## L.R. Chisq       d.f.          P 
##     4.4426     1.0000     0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      0.384      1.000      0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
## 
## L.R. Chisq       d.f.          P 
##   2.15e+01   1.00e+00   3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      6.309      1.000      0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      0.981      1.000      0.322

Gaming + Rindermann book chapter 8 + IMO

Replicate a few of his analyses with better approach.

#esports models
#standardize data before so we get standardized betas
esports_std = d2 %>% 
  select(general_mental_sports, SAS_IQc, IC_reg, LC_reg) %>% 
  df_standardize()

(esports1_wt = ols(general_mental_sports ~ SAS_IQc, data = esports_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports               SAS_IQc             (weights) 
##                     2                     0                     2 
## 
## Linear Regression Model
##  
##  ols(formula = general_mental_sports ~ SAS_IQc, data = esports_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       97    LR chi2     83.87    R2       0.579    
##  sigma52.3993    d.f.            1    R2 adj   0.574    
##  d.f.      95    Pr(> chi2) 0.0000    g        0.906    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1.7269 -0.8182 -0.3462  0.3079  1.6560 
##  
##  
##            Coef   S.E.   t     Pr(>|t|)
##  Intercept 0.3109 0.0746  4.16 <0.0001 
##  SAS_IQc   0.8130 0.0712 11.43 <0.0001 
## 
(esports2_wt = ols(general_mental_sports ~ SAS_IQc + IC_reg, data = esports_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports               SAS_IQc                IC_reg 
##                     2                     0                     0 
##             (weights) 
##                     2 
## 
## Linear Regression Model
##  
##  ols(formula = general_mental_sports ~ SAS_IQc + IC_reg, data = esports_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       97    LR chi2     88.62    R2       0.599    
##  sigma51.4050    d.f.            2    R2 adj   0.590    
##  d.f.      94    Pr(> chi2) 0.0000    g        0.950    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1.8023 -0.9060 -0.3210  0.2978  1.3017 
##  
##  
##            Coef   S.E.   t     Pr(>|t|)
##  Intercept 0.3182 0.0733  4.34 <0.0001 
##  SAS_IQc   0.8306 0.0703 11.82 <0.0001 
##  IC_reg    0.1468 0.0676  2.17 0.0325  
## 
#formal tests
lrtest(esports1_wt, esports2_wt)
## 
## Model 1: general_mental_sports ~ SAS_IQc
## Model 2: general_mental_sports ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##     4.7431     1.0000     0.0294
#dataset
chap8_std = d2 %>% 
  select(!!main_vars, airline_safety:tech_safety, IMO_rr) %>% 
  df_standardize(exclude_range_01 = F)

#airlines
(air_model1_wt = ols(airline_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## airline_safety        SAS_IQc      (weights) 
##             63              0              2 
## 
## Linear Regression Model
##  
##  ols(formula = airline_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       35    LR chi2     12.76    R2       0.306    
##  sigma84.5452    d.f.            1    R2 adj   0.285    
##  d.f.      33    Pr(> chi2) 0.0004    g        0.573    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -2.59278 -0.05492  0.37441  0.53806  1.52618 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.3647 0.1583 -2.30 0.0277  
##  SAS_IQc    0.5657 0.1484  3.81 0.0006  
## 
(air_model2_wt = ols(airline_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## airline_safety        SAS_IQc         IC_reg      (weights) 
##             63              0              0              2 
## 
## Linear Regression Model
##  
##  ols(formula = airline_safety ~ SAS_IQc + IC_reg, data = chap8_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       35    LR chi2     21.29    R2       0.456    
##  sigma76.0065    d.f.            2    R2 adj   0.422    
##  d.f.      32    Pr(> chi2) 0.0000    g        0.777    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.4405 -0.3236  0.2306  0.4726  1.0753 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.4461 0.1449 -3.08 0.0043  
##  SAS_IQc    0.6778 0.1387  4.89 <0.0001 
##  IC_reg     0.4444 0.1495  2.97 0.0056  
## 
lrtest(air_model1_wt, air_model2_wt)
## 
## Model 1: airline_safety ~ SAS_IQc
## Model 2: airline_safety ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##    8.52972    1.00000    0.00349
#airlines
(road_model1_wt = ols(road_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## road_safety     SAS_IQc   (weights) 
##           5           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = road_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       94    LR chi2     25.24    R2       0.236    
##  sigma64.7808    d.f.            1    R2 adj   0.227    
##  d.f.      92    Pr(> chi2) 0.0000    g        0.527    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.8884  0.0320  0.4099  0.6970  1.9414 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.2051 0.0934 -2.20 0.0307  
##  SAS_IQc    0.4731 0.0889  5.32 <0.0001 
## 
(road_model2_wt = ols(road_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## road_safety     SAS_IQc      IC_reg   (weights) 
##           5           0           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = road_safety ~ SAS_IQc + IC_reg, data = chap8_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       94    LR chi2     36.52    R2       0.322    
##  sigma61.3448    d.f.            2    R2 adj   0.307    
##  d.f.      91    Pr(> chi2) 0.0000    g        0.657    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -2.63771 -0.04456  0.42493  0.69437  1.62877 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.1952 0.0885 -2.21 0.0300  
##  SAS_IQc    0.5136 0.0850  6.04 <0.0001 
##  IC_reg     0.2819 0.0828  3.41 0.0010  
## 
lrtest(road_model1_wt, road_model2_wt)
## 
## Model 1: road_safety ~ SAS_IQc
## Model 2: road_safety ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##   1.13e+01   1.00e+00   7.86e-04
#airlines
(occu_model1_wt = ols(occu_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## occu_safety     SAS_IQc   (weights) 
##           2           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = occu_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       95    LR chi2     43.33    R2       0.366    
##  sigma58.5832    d.f.            1    R2 adj   0.359    
##  d.f.      93    Pr(> chi2) 0.0000    g        0.652    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -2.46799 -0.32033  0.02525  0.67035  1.72501 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.1009 0.0836 -1.21 0.2309  
##  SAS_IQc    0.5840 0.0797  7.33 <0.0001 
## 
(occu_model2_wt = ols(occu_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## occu_safety     SAS_IQc      IC_reg   (weights) 
##           2           0           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = occu_safety ~ SAS_IQc + IC_reg, data = chap8_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       95    LR chi2     71.89    R2       0.531    
##  sigma50.6782    d.f.            2    R2 adj   0.521    
##  d.f.      92    Pr(> chi2) 0.0000    g        0.840    
##  
##  Residuals
##  
##       Min       1Q   Median       3Q      Max 
##  -1.47084 -0.32593  0.04875  0.56106  1.72604 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.0801 0.0725 -1.11 0.2716  
##  SAS_IQc    0.6282 0.0694  9.06 <0.0001 
##  IC_reg     0.3798 0.0668  5.68 <0.0001 
## 
lrtest(occu_model1_wt, occu_model2_wt)
## 
## Model 1: occu_safety ~ SAS_IQc
## Model 2: occu_safety ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##   2.86e+01   1.00e+00   9.05e-08
#airlines
(tech_model1_wt = ols(tech_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## tech_safety     SAS_IQc   (weights) 
##           1           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = tech_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       96    LR chi2     49.00    R2       0.400    
##  sigma56.6203    d.f.            1    R2 adj   0.393    
##  d.f.      94    Pr(> chi2) 0.0000    g        0.679    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.1876 -0.2232  0.2129  0.7289  2.2444 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.1663 0.0807 -2.06 0.0420  
##  SAS_IQc    0.6085 0.0769  7.91 <0.0001 
## 
(tech_model2_wt = ols(tech_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## tech_safety     SAS_IQc      IC_reg   (weights) 
##           1           0           0           2 
## 
## Linear Regression Model
##  
##  ols(formula = tech_safety ~ SAS_IQc + IC_reg, data = chap8_std, 
##      weights = d2$population2017 %>% sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       96    LR chi2     81.60    R2       0.573    
##  sigma48.0344    d.f.            2    R2 adj   0.563    
##  d.f.      93    Pr(> chi2) 0.0000    g        0.866    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -1.5867 -0.1953  0.1605  0.6222  1.8012 
##  
##  
##            Coef    S.E.   t     Pr(>|t|)
##  Intercept -0.1471 0.0685 -2.15 0.0344  
##  SAS_IQc    0.6550 0.0657  9.97 <0.0001 
##  IC_reg     0.3876 0.0632  6.13 <0.0001 
## 
lrtest(tech_model1_wt, tech_model2_wt)
## 
## Model 1: tech_safety ~ SAS_IQc
## Model 2: tech_safety ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##   3.26e+01   1.00e+00   1.13e-08
#IMO
(IMO_model1_wt = ols(IMO_rr ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
##    IMO_rr   SAS_IQc (weights) 
##        12         0         2 
## 
## Linear Regression Model
##  
##  ols(formula = IMO_rr ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       85    LR chi2     18.90    R2       0.199    
##  sigma69.7722    d.f.            1    R2 adj   0.190    
##  d.f.      83    Pr(> chi2) 0.0000    g        0.476    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.3418 -1.3262 -0.6937  0.1735  1.3437 
##  
##  
##            Coef   S.E.   t    Pr(>|t|)
##  Intercept 0.4783 0.1031 4.64 <0.0001 
##  SAS_IQc   0.4802 0.1056 4.55 <0.0001 
## 
(IMO_model2_wt = ols(IMO_rr ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
##    IMO_rr   SAS_IQc    IC_reg (weights) 
##        12         0         0         2 
## 
## Linear Regression Model
##  
##  ols(formula = IMO_rr ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% 
##      sqrt())
##  
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs       85    LR chi2     20.03    R2       0.210    
##  sigma69.7324    d.f.            2    R2 adj   0.191    
##  d.f.      82    Pr(> chi2) 0.0000    g        0.512    
##  
##  Residuals
##  
##      Min      1Q  Median      3Q     Max 
##  -2.3078 -1.3099 -0.7010  0.1269  1.3098 
##  
##  
##            Coef   S.E.   t    Pr(>|t|)
##  Intercept 0.4886 0.1035 4.72 <0.0001 
##  SAS_IQc   0.4892 0.1059 4.62 <0.0001 
##  IC_reg    0.1022 0.0977 1.05 0.2985  
## 
lrtest(IMO_model1_wt, IMO_model2_wt)
## 
## Model 1: IMO_rr ~ SAS_IQc
## Model 2: IMO_rr ~ SAS_IQc + IC_reg
## 
## L.R. Chisq       d.f.          P 
##      1.127      1.000      0.288
#easy table output
list(
  esports1_wt,
  esports2_wt,
  IMO_model1_wt,
  IMO_model2_wt,
  air_model1_wt,
  air_model2_wt,
  road_model1_wt,
  road_model2_wt,
  occu_model1_wt,
  occu_model2_wt,
  tech_model1_wt,
  tech_model2_wt
) %>% 
  summarize_models()

Spatial data

Data

#merge into map data
d_map = right_join(world3, d, by = "ISO")

#calculate centroids, and they must lie within the unit
d_map %<>% 
  mutate(lon2 = map_dbl(geometry, ~st_point_on_surface(.x)[[1]]),
         lat2 = map_dbl(geometry, ~st_point_on_surface(.x)[[2]]))

#add missing values for Hong Kong and Macau
#capitals
d_map$lat[d_map$ISO == "HKG"] = 22.321751
d_map$lon[d_map$ISO == "HKG"] = 114.169663
d_map$lat[d_map$ISO == "MAC"] = 22.198667
d_map$lon[d_map$ISO == "MAC"] = 113.545605 
#centroids
d_map$lat2[d_map$ISO == "HKG"] = 22.397745
d_map$lon2[d_map$ISO == "HKG"] = 114.158591
d_map$lat2[d_map$ISO == "MAC"] = 22.198667
d_map$lon2[d_map$ISO == "MAC"] = 113.545605

#look good?
ggplot() +
  ggplot2::geom_sf(data = d_map) +
  # geom_label(data = d_map, aes(x = lon, y = lat, label = NAME))
  geom_point(data = d_map, aes(x = lon, y = lat), size = .5) +
  geom_point(data = d_map, aes(x = lon2, y = lat2), color = "red", size = .5)
## Warning: Removed 24 rows containing missing values (`geom_point()`).
## Warning: Removed 50 rows containing missing values (`geom_point()`).

#subset
d2_map = right_join(world3, d2, by = "ISO")

Basic maps

Nice for quick overview.

#no empty
d_map_noNA = d_map %>% filter(!st_is_empty(.))

#only run once to save time
if (F) {
  map_continents = tm_shape(d_map_noNA) +
    tm_fill("UN_continent", title = "Continents") +
    tm_borders()
  map_continents
  tmap_save(map_continents, "figs/continents map.png")
  
  map_macroregions = tm_shape(d_map_noNA) +
    tm_fill("UN_macroregion", title = "Macroregions") +
    tm_borders()
  map_macroregions
  tmap_save(map_macroregions, "figs/macroregions map.png")
   
  map_regions = tm_shape(d_map_noNA) +
    tm_fill("UN_region", title = "Regions") +
    tm_borders()
  map_regions
  tmap_save(map_regions, "figs/regions map.png")
}


#IQ map
IQ_map = tm_shape(d_map_noNA) +
  tm_fill("SAS_IQc", title = "IQ (Rindermann)") +
  tm_borders() +
  tm_layout(legend.position = c("left", "bottom"))
IQ_map

tmap_save(IQ_map, "figs/IQ_map.png")
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IQ_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#IQ, all countries
IQ_map2 = tm_shape(d_map_noNA) +
  tm_fill("CA_totc", title = "IQ (Rindermann)") +
  tm_borders() +
  tm_layout(legend.position = c("left", "bottom"))
IQ_map2

tmap_save(IQ_map2, "figs/IQ_map_all.png")
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IQ_map_all.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#IC map
IC_map = tm_shape(d_map_noNA) +
  tm_fill("IC_reg", title = "IQ intellectual class (regression)") +
  tm_borders() +
  tm_layout(legend.position = c("left", "bottom"))
IC_map
## Variable(s) "IC_reg" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(IC_map, "figs/IC_map.png")
## Variable(s) "IC_reg" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IC_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#SPI S map
SPI_map = tm_shape(d_map_noNA) +
  tm_fill("SPI_g", title = "Social progress index (general factor)") +
  tm_borders() +
  tm_layout(legend.position = c("left", "bottom"))
SPI_map
## Variable(s) "SPI_g" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

tmap_save(SPI_map, "figs/SPI_map.png")
## Variable(s) "SPI_g" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/SPI_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)

Spatial stats

Add a covariate for spatial autocorrelation.

#calculate spatial lag variables
#version 1
d_map = kirkegaard::spatial_knn(
  d_map,
  vars = main_vars2,
  k = 3
  )

#version 2
d_map = kirkegaard::spatial_knn(
  d_map,
  vars = main_vars2,
  k = 3,
  lon = "lon2",
  lat = "lat2",
  suffix = "_lag2"
  )

#cors
autocors = d_map %>% select(!!!main_vars2,
                 !!!main_vars2 + "_lag",
                 !!!main_vars2 + "_lag2") %>% 
  st_drop_geometry() %>% 
  wtd.cors()

#summary stats
map_df(main_vars2, function(v) {
  tibble(
    var = v,
    lag_capital = autocors[v, v + "_lag"],
    lag_centroid = autocors[v, v + "_lag2"]
  )
}) %>% 
  print() %>% 
  {
    filter(., var %in% names(spi_indicators)) %>% 
      .[-1] %>% 
      colMeans()
  }
## # A tibble: 65 × 3
##    var                                                lag_capital lag_centroid
##    <chr>                                                    <dbl>        <dbl>
##  1 SPI_g                                                    0.787        0.809
##  2 SAS_IQc                                                  0.685        0.717
##  3 x95pct_IQc                                               0.613        0.653
##  4 x05pct_IQc                                               0.685        0.700
##  5 IC_reg                                                   0.253        0.205
##  6 LC_reg                                                   0.425        0.415
##  7 IC_delta                                                 0.516        0.479
##  8 LC_delta                                                 0.383        0.385
##  9 Undernourishment_pct_of_pop                              0.663        0.685
## 10 Maternal_mortality_rate_deaths_100_000_live_births       0.770        0.773
## # … with 55 more rows
##  lag_capital lag_centroid 
##        0.609        0.629
#example plot showing autocorrelation using the cognitive data
GG_scatter(d_map %>% st_drop_geometry(), "SAS_IQc_lag", "SAS_IQc", case_names = "Country.x") +
  scale_x_continuous("Mean IQ, spatial lag") +
  scale_y_continuous("Mean IQ")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/spatial_lag_example_iq.png")
## `geom_smooth()` using formula = 'y ~ x'

Indicator regressions

#join the lagged variables to d2
d2b = d_map %>% 
  st_drop_geometry() %>% 
  filter(!is.na(SAS_IQc), !is.na(x95pct_IQc))

#regression
reg_results_spatial = get_reg_results(names(spi_indicators), spatial_lag = T, data = d2b)

#summarize R2s
reg_results_spatial %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
##    fit1_r2    fit2_r2    fit3_r2    fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2 
##      0.413      0.430      0.418      0.429      0.406      0.439      0.414 
## fit4_wt_r2 
##      0.439
#formal -- how many model tests are better?
reg_results_spatial %>% 
  select(LR1_p:LR3_wt_p) %>% 
  {
    plyr::adply(., 1, function(r) {
      tibble(
        LR1 = r$LR1_p < .01,
        LR2 = r$LR2_p < .01,
        LR3 = r$LR3_p < .01,
        LR1_wt = r$LR1_wt_p < .01,
        LR2_wt = r$LR2_wt_p < .01,
        LR3_wt = r$LR3_wt_p < .01
      )
    })
  } %>% 
  select(LR1:LR3_wt) %>% 
  colMeans()
##    LR1    LR2    LR3 LR1_wt LR2_wt LR3_wt 
## 0.2549 0.0588 0.0196 0.3922 0.0980 0.0196

SPI regression

#SPI
#standardize data before so we get standardized betas
#join in lags before that
SPI_std = d2 %>% 
  #join lags
  left_join(d_map %>% select(SPI_g_lag, "ISO"), by = "ISO") %>% 
  select(SPI_g, SAS_IQc, IC_reg, LC_reg, SPI_g_lag) %>% 
  df_standardize()

#fits
list(
  SPI_model1 = ols(SPI_g ~ SAS_IQc + SPI_g_lag, data = SPI_std),
  SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_reg + SPI_g_lag, data = SPI_std),
  SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_reg + SPI_g_lag, data = SPI_std),
  SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg + SPI_g_lag, data = SPI_std)
) -> SPI_models_spatial

SPI_models_spatial %>% summarize_models()
#with weights
list(
  SPI_model1_wt = ols(SPI_g ~ SAS_IQc + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
   SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
  SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt_spatial

SPI_models_wt_spatial %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
## 
## L.R. Chisq       d.f.          P 
##   13.92937    1.00000    0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
## 
## L.R. Chisq       d.f.          P 
##     4.4426     1.0000     0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      0.384      1.000      0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
## 
## L.R. Chisq       d.f.          P 
##   2.15e+01   1.00e+00   3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      6.309      1.000      0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
## 
## L.R. Chisq       d.f.          P 
##      0.981      1.000      0.322

Simulations

Simulate some populations that vary in 1) means, 2) standard deviations, 3) presence of subpopulations of low or high IQ.

#simulate a country of each combination
country_n = 10e3
high = 15
low = -15
sim_combos = expand_grid(
  mean = seq(70, 105, by = 5),
  sd = seq(14, 16, by = 1),
  high = c(T, F),
  low = c(T, F),
  replicate = 1:20
)

#sample some data
sim_combos = plyr::adply(sim_combos, .margins = 1, .fun = function(dd) {
  #simulate case data
  main_n = country_n - (dd$high * country_n * .05) - (dd$high * country_n * .05)
  x = tibble(
    score = rnorm(main_n, mean = dd$mean, sd = dd$sd),
    group = "main"
  )
  
  #subgroups
  if (dd$high) {
    x = bind_rows(x, tibble(
     score = rnorm(country_n * .05, mean = dd$mean + high, sd = 15),
     group = "high"
     ))
  }
  if (dd$low) {
     x = bind_rows(x, tibble(
       score = rnorm(country_n * .05, mean = dd$mean + low, sd = 15),
       group = "low"
     ))
  }
  
  #summary stats
  tibble(
    obs_mean = mean(x$score),
    obs_95 = quantile(x$score, probs = .95),
    obs_05 = quantile(x$score, probs = .05),
    obs_sd = sd(x$score),
    data = list(x)
  )
})

#resid tail measures
sim_combos %<>% mutate(
  IC_reg = ols(obs_95 ~ obs_mean, data = sim_combos) %>% resid() %>% standardize(),
  LC_reg = ols(obs_05 ~ obs_mean, data = sim_combos) %>% resid() %>% standardize(),
  IC_z = obs_95 - obs_mean,
  LC_z = obs_05 - obs_mean
)


#work as expected?
sim_combos %>% select(-data) %>% wtd.cors() %>% round(2)
##           mean    sd  high   low replicate obs_mean obs_95 obs_05 obs_sd IC_reg
## mean      1.00  0.00  0.00  0.00         0     1.00   0.99   0.99   0.00   0.00
## sd        0.00  1.00  0.00  0.00         0     0.00   0.11  -0.11   0.94   0.92
## high      0.00  0.00  1.00  0.00         0     0.03   0.07   0.01   0.23   0.28
## low       0.00  0.00  0.00  1.00         0    -0.03  -0.01  -0.06   0.22   0.15
## replicate 0.00  0.00  0.00  0.00         1     0.00   0.00   0.00   0.00   0.00
## obs_mean  1.00  0.00  0.03 -0.03         0     1.00   0.99   0.99   0.00   0.00
## obs_95    0.99  0.11  0.07 -0.01         0     0.99   1.00   0.97   0.11   0.12
## obs_05    0.99 -0.11  0.01 -0.06         0     0.99   0.97   1.00  -0.12  -0.11
## obs_sd    0.00  0.94  0.23  0.22         0     0.00   0.11  -0.12   1.00   0.98
## IC_reg    0.00  0.92  0.28  0.15         0     0.00   0.12  -0.11   0.98   1.00
## LC_reg    0.00 -0.92 -0.16 -0.27         0     0.00  -0.11   0.12  -0.98  -0.95
## IC_z      0.00  0.92  0.28  0.15         0     0.01   0.13  -0.10   0.98   1.00
## LC_z      0.00 -0.92 -0.16 -0.27         0     0.00  -0.11   0.12  -0.98  -0.95
##           LC_reg  IC_z  LC_z
## mean        0.00  0.00  0.00
## sd         -0.92  0.92 -0.92
## high       -0.16  0.28 -0.16
## low        -0.27  0.15 -0.27
## replicate   0.00  0.00  0.00
## obs_mean    0.00  0.01  0.00
## obs_95     -0.11  0.13 -0.11
## obs_05      0.12 -0.10  0.12
## obs_sd     -0.98  0.98 -0.98
## IC_reg     -0.95  1.00 -0.95
## LC_reg      1.00 -0.95  1.00
## IC_z       -0.95  1.00 -0.95
## LC_z        1.00 -0.95  1.00
ols(IC_reg ~ mean + sd + high + low, data = sim_combos)
## Linear Regression Model
##  
##  ols(formula = IC_reg ~ mean + sd + high + low, data = sim_combos)
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs    1920    LR chi2    5667.23    R2       0.948    
##  sigma0.2288    d.f.             4    R2 adj   0.948    
##  d.f.   1915    Pr(> chi2)  0.0000    g        1.121    
##  
##  Residuals
##  
##         Min         1Q     Median         3Q        Max 
##  -0.9245695 -0.1559471  0.0005589  0.1494833  0.7637447 
##  
##  
##            Coef     S.E.   t       Pr(>|t|)
##  Intercept -17.2821 0.1043 -165.71 <0.0001 
##  mean       -0.0004 0.0005   -0.79 0.4313  
##  sd          1.1252 0.0064  175.93 <0.0001 
##  high        0.5654 0.0104   54.13 <0.0001 
##  low         0.3049 0.0104   29.20 <0.0001 
## 
ols(IC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
## Linear Regression Model
##  
##  ols(formula = IC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs    1920    LR chi2    6626.84    R2       0.968    
##  sigma0.1782    d.f.             4    R2 adj   0.968    
##  d.f.   1915    Pr(> chi2)  0.0000    g        1.135    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -0.610050 -0.113820 -0.002221  0.115865  0.645633 
##  
##  
##            Coef     S.E.   t       Pr(>|t|)
##  Intercept -18.6193 0.0858 -216.95 <0.0001 
##  obs_mean   -0.0001 0.0004   -0.20 0.8442  
##  obs_sd      1.2126 0.0053  228.61 <0.0001 
##  high        0.1080 0.0084   12.88 <0.0001 
##  low        -0.1221 0.0084  -14.62 <0.0001 
## 
ols(LC_reg ~ mean + sd + high + low, data = sim_combos)
## Linear Regression Model
##  
##  ols(formula = LC_reg ~ mean + sd + high + low, data = sim_combos)
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs    1920    LR chi2    5891.67    R2       0.954    
##  sigma0.2158    d.f.             4    R2 adj   0.953    
##  d.f.   1915    Pr(> chi2)  0.0000    g        1.124    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -0.763303 -0.150697  0.004135  0.152390  0.797079 
##  
##  
##            Coef    S.E.   t       Pr(>|t|)
##  Intercept 17.4326 0.0984  177.21 <0.0001 
##  mean      -0.0003 0.0004   -0.77 0.4419  
##  sd        -1.1314 0.0060 -187.54 <0.0001 
##  high      -0.3255 0.0099  -33.04 <0.0001 
##  low       -0.5411 0.0099  -54.93 <0.0001 
## 
ols(LC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
## Linear Regression Model
##  
##  ols(formula = LC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
##  
##                   Model Likelihood    Discrimination    
##                         Ratio Test           Indexes    
##  Obs    1920    LR chi2    6957.32    R2       0.973    
##  sigma0.1635    d.f.             4    R2 adj   0.973    
##  d.f.   1915    Pr(> chi2)  0.0000    g        1.138    
##  
##  Residuals
##  
##        Min        1Q    Median        3Q       Max 
##  -0.509047 -0.110737  0.001407  0.107260  0.500626 
##  
##  
##            Coef    S.E.   t       Pr(>|t|)
##  Intercept 18.7661 0.0787  238.31 <0.0001 
##  obs_mean  -0.0006 0.0003   -1.88 0.0602  
##  obs_sd    -1.2185 0.0049 -250.37 <0.0001 
##  high       0.1346 0.0077   17.50 <0.0001 
##  low       -0.1125 0.0077  -14.68 <0.0001 
## 
#sampling error
centiles_sampling = map_df(1:1000, function(i) {
  #sample 500 persons, calculate the 1-99th centiles
  quantile(rnorm(500), probs = seq(.01, .99, .01)) %>% 
    as.list() %>% 
    as.data.frame()
}) %>% set_colnames("centile_" + seq(1, 99))

#SDs
centiles_sampling[c(1, 5, 10, 20, 30, 40, 50)] %>% 
  describe()
##            vars    n      mean     sd    median  trimmed    mad    min     max
## centile_1     1 1000 -2.283752 0.1645 -2.28e+00 -2.27951 0.1617 -2.866 -1.8373
## centile_5     2 1000 -1.634859 0.0974 -1.63e+00 -1.63466 0.0945 -1.929 -1.3018
## centile_10    3 1000 -1.274779 0.0778 -1.28e+00 -1.27486 0.0813 -1.535 -0.9912
## centile_20    4 1000 -0.840024 0.0647 -8.42e-01 -0.84098 0.0614 -1.028 -0.6397
## centile_30    5 1000 -0.524172 0.0591 -5.24e-01 -0.52391 0.0609 -0.703 -0.3733
## centile_40    6 1000 -0.254878 0.0557 -2.56e-01 -0.25482 0.0566 -0.470 -0.0423
## centile_50    7 1000 -0.000804 0.0563 -7.57e-05 -0.00026 0.0547 -0.233  0.2032
##            range     skew  kurtosis      se
## centile_1  1.029 -0.29390  0.241782 0.00520
## centile_5  0.627 -0.00264 -0.044301 0.00308
## centile_10 0.544  0.01239 -0.000936 0.00246
## centile_20 0.388  0.11815 -0.061934 0.00205
## centile_30 0.329 -0.06915 -0.169868 0.00187
## centile_40 0.428 -0.05609  0.139900 0.00176
## centile_50 0.437 -0.13837  0.281520 0.00178
centiles_sampling %>% 
  describe() %>% 
  as.data.frame() %>% 
  ggplot(aes(vars, sd)) +
  geom_line() +
  scale_x_continuous("Centile", breaks = seq(0, 100, 5)) +
  scale_y_continuous("SD of estimate", limits = c(0, NA))

GG_save("figs/SE_centiles.png")

Reviewer replies

Impossiblity of MR without pre-residualization

#z score data
d2_z = d2 %>% df_standardize(messages = F)

#no weights
list(
  SPI_model1 = ols(SPI_g ~ SAS_IQc, data = d2_z),
  SPI_model2 = ols(SPI_g ~ SAS_IQc + x95pct_IQc, data = d2_z),
  SPI_model3 = ols(SPI_g ~ SAS_IQc + x05pct_IQc, data = d2_z),
  SPI_model4 = ols(SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc, data = d2_z)
) -> SPI_models

SPI_models %>% summarize_models()
#weights
list(
  SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
  SPI_model2_wt = ols(SPI_g ~ SAS_IQc + x95pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
  SPI_model3_wt = ols(SPI_g ~ SAS_IQc + x05pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
  SPI_model4_wt = ols(SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt

SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc
## 
## L.R. Chisq       d.f.          P 
##   13.92937    1.00000    0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x05pct_IQc
## 
## L.R. Chisq       d.f.          P 
##     4.4426     1.0000     0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + x95pct_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc
## 
## L.R. Chisq       d.f.          P 
##      0.384      1.000      0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc
## 
## L.R. Chisq       d.f.          P 
##   2.15e+01   1.00e+00   3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
## 
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x05pct_IQc
## 
## L.R. Chisq       d.f.          P 
##      6.309      1.000      0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
## 
## Model 1: SPI_g ~ SAS_IQc + x95pct_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc
## 
## L.R. Chisq       d.f.          P 
##      0.981      1.000      0.322

Outlier effects

#main regression with standard metrics


#leave one out approach
loo_results = map_df(seq_along_rows(d2), function(idx) {
  #fit the models without this case
  i_reg_results = get_reg_results(names(spi_indicators), data = d2[-idx, ]) %>% mutate(case_removed = idx)
  
  #summarize R2s
  regs_r2s = i_reg_results %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
  
  #formal -- how many model tests are better?
  regs_lr_tests = i_reg_results %>% 
    select(LR1_p:LR3_wt_p) %>% 
    {
      plyr::adply(., 1, function(r) {
        tibble(
          LR1 = r$LR1_p < .01,
          LR2 = r$LR2_p < .01,
          LR3 = r$LR3_p < .01,
          LR1_wt = r$LR1_wt_p < .01,
          LR2_wt = r$LR2_wt_p < .01,
          LR3_wt = r$LR3_wt_p < .01
        )
      })
    } %>% 
    select(LR1:LR3_wt) %>% 
    colMeans()
  
  #make a data frame
  bind_cols(
    as_tibble(t(regs_r2s)),
    as_tibble(t(regs_lr_tests))
  ) %>% mutate(case_omitted = idx)
})

#plot effect of case removal
loo_results %>% 
  select(case_omitted, LR1:LR3_wt) %>% 
  pivot_longer(cols = LR1:LR3_wt) %>% 
  ggplot(aes(case_omitted, value, color = name)) +
  geom_line() +
  scale_x_continuous("Case omitted") +
  scale_y_continuous("% of tests showing signal (p < .01)", labels = scales::percent) +
  scale_color_discrete("Model comparison")

GG_save("figs/loo_results.png")

#what is case ?
d2$Country.x[17]
## [1] "China"
d2$Country.x[24]
## [1] "Egypt"
d2$Country.x[82]
## [1] "South Africa"
d2$Country.x[98]
## [1] "Vietnam"

Data output

#selected variables
d2 %>% 
  select(ISO, Country.x, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, SPI_g)
#save dataset
d %>% write_csv("data/data_out.csv.gz")
d %>% write_rds("data/data_out.rds")

Meta

Run manually

#versions
write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] GGally_2.1.2          broom_1.0.1           rms_6.3-0            
##  [4] SparseM_1.81          tmap_3.3-3            sf_1.0-9             
##  [7] readxl_1.4.1          kirkegaard_2022-11-08 psych_2.2.9          
## [10] assertthat_0.2.1      weights_1.0.4         Hmisc_4.7-1          
## [13] Formula_1.2-4         survival_3.2-13       lattice_0.20-45      
## [16] magrittr_2.0.3        forcats_0.5.2         stringr_1.4.1        
## [19] dplyr_1.0.10          purrr_0.3.5           readr_2.1.3          
## [22] tidyr_1.2.1           tibble_3.1.8          ggplot2_3.4.0        
## [25] tidyverse_1.3.2      
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1     systemfonts_1.0.4   lwgeom_0.2-9       
##   [4] plyr_1.8.7          sp_1.5-1            splines_4.1.2      
##   [7] crosstalk_1.2.0     leaflet_2.1.1       TH.data_1.1-1      
##  [10] digest_0.6.30       htmltools_0.5.3     gdata_2.18.0.1     
##  [13] fansi_1.0.3         checkmate_2.1.0     googlesheets4_1.0.1
##  [16] cluster_2.1.2       tzdb_0.3.0          modelr_0.1.9       
##  [19] vroom_1.6.0         sandwich_3.0-2      jpeg_0.1-9         
##  [22] colorspace_2.0-3    rvest_1.0.3         textshaping_0.3.6  
##  [25] haven_2.5.1         xfun_0.34           leafem_0.2.0       
##  [28] crayon_1.5.2        jsonlite_1.8.3      lme4_1.1-30        
##  [31] zoo_1.8-11          glue_1.6.2          stars_0.5-6        
##  [34] gtable_0.3.1        gargle_1.2.1        MatrixModels_0.5-1 
##  [37] car_3.1-1           DEoptimR_1.0-11     VIM_6.2.2          
##  [40] abind_1.4-5         scales_1.2.1        mvtnorm_1.1-3      
##  [43] DBI_1.1.3           Rcpp_1.0.9          laeken_0.5.2       
##  [46] viridisLite_0.4.1   htmlTable_2.4.1     units_0.8-0        
##  [49] bit_4.0.4           foreign_0.8-82      proxy_0.4-27       
##  [52] vcd_1.4-10          htmlwidgets_1.5.4   httr_1.4.4         
##  [55] RColorBrewer_1.1-3  wk_0.7.0            ellipsis_0.3.2     
##  [58] mice_3.14.0         farver_2.1.1        reshape_0.8.9      
##  [61] pkgconfig_2.0.3     XML_3.99-0.12       nnet_7.3-17        
##  [64] sass_0.4.2          dbplyr_2.2.1        deldir_1.0-6       
##  [67] utf8_1.2.2          labeling_0.4.2      tidyselect_1.2.0   
##  [70] rlang_1.0.6         tmaptools_3.1-1     munsell_0.5.0      
##  [73] cellranger_1.1.0    tools_4.1.2         cachem_1.0.6       
##  [76] cli_3.4.1           generics_0.1.3      ranger_0.14.1      
##  [79] evaluate_0.17       fastmap_1.1.0       ragg_1.2.3         
##  [82] yaml_2.3.6          bit64_4.0.5         leafsync_0.1.0     
##  [85] knitr_1.40          fs_1.5.2            robustbase_0.95-0  
##  [88] s2_1.1.0            nlme_3.1-155        quantreg_5.94      
##  [91] xml2_1.3.3          compiler_4.1.2      rstudioapi_0.14    
##  [94] png_0.1-7           e1071_1.7-12        reprex_2.0.2       
##  [97] bslib_0.4.0         stringi_1.7.8       highr_0.9          
## [100] Matrix_1.5-1        classInt_0.4-8      nloptr_2.0.3       
## [103] vctrs_0.5.0         stringdist_0.9.9    pillar_1.8.1       
## [106] lifecycle_1.0.3     lmtest_0.9-40       jquerylib_0.1.4    
## [109] data.table_1.14.4   raster_3.6-3        R6_2.5.1           
## [112] latticeExtra_0.6-30 KernSmooth_2.23-20  gridExtra_2.3      
## [115] codetools_0.2-18    polspline_1.1.20    dichromat_2.0-0.1  
## [118] boot_1.3-28         MASS_7.3-55         gtools_3.9.3       
## [121] withr_2.5.0         mnormt_2.1.1        multcomp_1.4-20    
## [124] mgcv_1.8-39         parallel_4.1.2      hms_1.1.2          
## [127] terra_1.6-17        grid_4.1.2          rpart_4.1.16       
## [130] Rcsdp_0.1.57.2      class_7.3-20        minqa_1.2.5        
## [133] rmarkdown_2.17      carData_3.0-5       googledrive_2.0.0  
## [136] lubridate_1.8.0     base64enc_0.1-3     interp_1.1-3
if (F) {
  library(osfr)
  
  #auth
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/95wtc/")
  
  #upload files
  #overwrite existing (versioning)
  osf_upload(osf_proj, conflicts = "overwrite", 
             path = c(
               "data",
               "figs",
               "notebook.html",
               "notebook.Rmd",
               "simulation.R",
               "sessions_info.txt"
             ))
}