Rindermann et al have used some poor methods for testing their cognitive elite model of aggregate social performance. Here we use better methods, and do so using the 51 indicators in the Social Progress Index, a very comprehensive version of Human Development Index.
options(digits = 3)
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
## Loading required package: lattice
##
## Loading required package: survival
##
## Loading required package: Formula
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following objects are masked from 'package:purrr':
##
## is_logical, is_numeric
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
readxl,
sf,
tmap,
psych,
rms,
broom,
GGally
)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
##
## The following object is masked from 'package:base':
##
## backsolve
##
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
theme_set(theme_bw())
tmap_options(check.and.fix = TRUE)
#get R2
r2get = function(x) {
y = x %>%
summary.lm() %>%
glance()
y$adj.r.squared
}
describe = function(...) {
psych::describe(...) %>% as.matrix()
}
#Rindermann
#Cognitive Capitalism cognitive data
cc = read_excel("data/cognitive_capitalism_appendix.xlsx") %>%
df_legalize_names() %>%
mutate(ISO = pu_translate(Country))
## No exact match: Antigua-Barbuda
## No exact match: Benin (Dahomey)
## No exact match: Central Afric R
## No exact match: Congo (Brazz)
## No exact match: Dominican Repub
## No exact match: Equat. Guinea
## No exact match: Korea-North
## No exact match: Korea-South
## No exact match: Nether Antilles
## No exact match: Papua N-Guinea
## No exact match: Sao Tome/Princi
## No exact match: St. Kitts & Nevis
## No exact match: St. Vincent/Gre
## No exact match: Trinidad Tobago
## No exact match: United Arab Emi
## Best fuzzy match found: Antigua-Barbuda -> Antigua & Barbuda with distance 3.00
## Best fuzzy match found: Benin (Dahomey) -> Benin (ex Dahomey) with distance 3.00
## Best fuzzy match found: Central Afric R -> Central Africa with distance 2.00
## Best fuzzy match found: Congo (Brazz) -> Congo (Brazz rep) with distance 4.00
## Best fuzzy match found: Dominican Repub -> Dominican Rep. with distance 2.00
## Best fuzzy match found: Equat. Guinea -> Equ. Guinea with distance 2.00
## Best fuzzy match found: Korea-North -> Korea North with distance 1.00
## Best fuzzy match found: Korea-South -> Korea South with distance 1.00
## Best fuzzy match found: Nether Antilles -> Neth. Antilles with distance 2.00
## Best fuzzy match found: Papua N-Guinea -> Papua New Guinea with distance 3.00
## Best fuzzy match found: Sao Tome/Princi -> Sao Tome & Principe with distance 5.00
## Best fuzzy match found: St. Kitts & Nevis -> St. Kitts en Nevis with distance 2.00
## Best fuzzy match found: St. Vincent/Gre -> St. Vincent with distance 4.00
## Best fuzzy match found: Trinidad Tobago -> Trinidad & Tobago with distance 2.00
## Best fuzzy match found: United Arab Emi -> United Arab Emirates with distance 5.00
#safety data
cc_chap8 = haven::read_sav("data/RindCogCapTable8-1.sav") %>%
rename(
ISO = ISOSKODE,
airline_safety = FLUGSICH,
road_safety = VTOTW10B,
occu_safety = ARBUNFGB,
tech_safety = TECHSICH
)
## Failed to find HARMONY
## Failed to find EMBEDD
## Failed to find HIERARCH
## Failed to find MASTERY
## Failed to find AFF_AUTO
## Failed to find INT_AUTO
## Failed to find EGALIT
#Math Olympics data
IMO = read_excel("data/IMO_Rindermann2011.xlsx") %>%
mutate(
ISO = pu_translate(Country),
IMO_rr = IMO_rr %>% as.numeric()
)
## No exact match: Cote d'Ivore
## No exact match: Korea, North
## Best fuzzy match found: Cote d'Ivore -> Cote d'Ivoire with distance 1.00
## Best fuzzy match found: Korea, North -> Korea North with distance 1.00
## Warning in IMO_rr %>% as.numeric(): NAs introduced by coercion
#Social Progress Index
#https://www.socialprogress.org/download
spi = read_excel("data/2014-2019-SPI-Public.xlsx", sheet = 2) %>%
df_legalize_names() %>%
mutate(ISO = pu_translate(Country))
## No exact match: World
## No exact match: Congo, Democratic Republic of
## No exact match: Korea, Democratic Republic of
## No exact match: Korea, Republic of
## No exact match: Virgin Islands (U.S.)
## No exact match: North Cyprus
## No exact match: Svalbard and Jan Mayen Islands
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Congo, Democratic Republic of -> Congo, The Democratic Republic of with distance 4.00
## Best fuzzy match found: Korea, Democratic Republic of -> Korea, Democratic Republic with distance 3.00
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Warning: There were multiple equally good matches for Virgin Islands (U.S.):
## Jarvis Island (USA) | Midway Islands (USA) | U.S. Virgin Islands (USA). All with
## distance 7.00
## Best fuzzy match found: North Cyprus -> Northern Cyprus with distance 3.00
## Best fuzzy match found: Svalbard and Jan Mayen Islands -> Svalbard and Jan Mayen (NOR) with distance 7.00
#UN regions
#not used for study
UN = read_rds("data/chess_kirkegaard2019.rds")
#HDI data
#http://hdr.undp.org/en/composite/HDI
HDI = read_excel("data/2018_Statistical_Annex_Table_1.xlsx", sheet = 2) %>%
#fix names
df_legalize_names() %>%
#remove groups
filter(!str_detect(Country, "DEVELOPMENT")) %>%
#add ISO
mutate(ISO = pu_translate(Country))
#world map spatial data
#http://thematicmapping.org/downloads/world_borders.php
#these data are now broken...
# world = st_read("data/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp") %>%
# rename(ISO = ISO3)
# world2 = st_read("data/TM_WORLD_BORDERS_SIMPL-0.3/TM_WORLD_BORDERS_SIMPL-0.3.shp") %>%
# rename(ISO = ISO3)
#but we use this one
#https://tapiquen-sig.jimdofree.com/english-version/free-downloads/world/
world3 = st_read("data/World_Countries/World_Countries.shp")
## Reading layer `World_Countries' from data source
## `/media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/data/World_Countries/World_Countries.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 252 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6
## Geodetic CRS: WGS 84
#need to filter the non-countries
#these are marked by parens
world3 = world3 %>%
filter(!str_detect(COUNTRY, "\\(")) %>%
mutate(
ISO = pu_translate(COUNTRY)
)
#brain drain emigration data
#https://www.iab.de/en/daten/iab-brain-drain-data.aspx
#not used for study
emi = read_excel("data/iabbd_8010_v1_emigration.xls", sheet = 3, col_names = F)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
#fix data
names(emi) = (emi[1, ] %>% unlist() %>% miss_locf()) + "_" + (emi[2, ] %>% unlist())
emi %<>% df_legalize_names()
emi = emi[-c(1:3), ]
for (i in seq_along(emi)[-1]) emi[[i]] = as.numeric(emi[[i]])
emi$high_mean = emi %>% select(contains("High")) %>% rowMeans()
emi$med_mean = emi %>% select(contains("Medium")) %>% rowMeans()
emi$medhigh_mean = emi$med_mean + emi$high_mean
emi$low_mean = emi %>% select(contains("Low")) %>% rowMeans()
emi$total_mean = emi %>% select(contains("Total")) %>% rowMeans()
emi$ISO = pu_translate(emi[[1]])
## No exact match: World
## No exact match: Congo, Dem. Rep. of the
## No exact match: Congo, Rep. of the
## No exact match: Holy See (Vatican City)
## No exact match: Timor Leste
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Congo, Dem. Rep. of the -> Congo, Dem. Rep. with distance 7.00
## Best fuzzy match found: Congo, Rep. of the -> Congo, Republic of the with distance 5.00
## Best fuzzy match found: Holy See (Vatican City) -> Holy See (Vatican City State) with distance 6.00
## Best fuzzy match found: Timor Leste -> Timor–Leste with distance 1.00
#ecnomics data
#world bank data
#https://data.worldbank.org/indicator/NY.GNP.PCAP.PP.CD
#https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD
#these have missing data, which we can impute
GNI = read_excel("data/API_NY.GNP.PCAP.PP.CD_DS2_en_excel_v2_1218005.xls", skip = 3)
GDP = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", skip = 3)
#save back, add growth
GNI %<>% mutate(
GNI_growth = (`2019` / `1990`) %>% log10(),
GNI_2019 = `2019` %>% log10(),
ISO = `Country Code`
)
GDP %<>% mutate(
GDP_growth = (`2019` / `1990`) %>% log10(),
GDP_2019 = `2019` %>% log10(),
ISO = `Country Code`
)
#these are based on self-report data gathered by Gallup over the years
#https://news.gallup.com/poll/166211/worldwide-median-household-income-000.aspx
median_income = read_csv("data/median income gallup.csv") %>%
mutate(
median_income = medianPerCapitaIncome %>% log10(),
median_income_hh = medianHouseholdIncome %>% log10(),
ISO = pu_translate(country)
)
## Rows: 131 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (3): medianHouseholdIncome, medianPerCapitaIncome, pop2020
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#economic outcomes
econ_outcomes = c("median_income", "median_income_hh", "GNI_2019", "GNI_growth", "GDP_2019", "GDP_growth")
#no dup ISOs
assert_that(!any(duplicated(cc$ISO)))
## [1] TRUE
assert_that(!any(duplicated(spi$ISO)))
## [1] TRUE
assert_that(!any(duplicated(UN$ISO)))
## [1] TRUE
assert_that(!any(duplicated(HDI$ISO)))
## [1] TRUE
assert_that(!any(duplicated(world3$ISO)))
## [1] TRUE
assert_that(!any(duplicated(emi$ISO)))
## [1] TRUE
assert_that(!any(duplicated(cc_chap8$ISO)))
## [1] TRUE
assert_that(!any(duplicated(IMO$ISO)))
## [1] TRUE
#join
d = cc %>%
full_join(spi, by = "ISO") %>%
full_join(UN, by = "ISO") %>%
full_join(emi %>% select(ISO, total_mean, high_mean, med_mean, low_mean, medhigh_mean), by = "ISO") %>%
full_join(HDI, by = "ISO") %>%
full_join(cc_chap8 %>% select(ISO, airline_safety:tech_safety), by = "ISO") %>%
full_join(IMO, by = "ISO") %>%
left_join(median_income %>% select(ISO, median_income, median_income_hh), by = "ISO") %>%
left_join(GNI %>% select(ISO, GNI_2019, GNI_growth), by = "ISO") %>%
left_join(GDP %>% select(ISO, GDP_2019, GDP_growth), by = "ISO")
#check
assert_that(!any(duplicated(d$ISO)))
## [1] TRUE
assert_that(!any(duplicated(names(d))))
## [1] TRUE
#recode cognitive class measures
#intellectual class
d$IC_reg = lm(x95pct_IQc ~ SAS_IQc, data = d, na.action = na.exclude) %>% resid() %>% standardize()
#dummy class
d$LC_reg = lm(x05pct_IQc ~ SAS_IQc, data = d, na.action = na.exclude) %>% resid() %>% standardize()
#direct versions of tails
d$IC_delta = d$x95pct_IQc - d$SAS_IQc
d$LC_delta = d$x05pct_IQc - d$SAS_IQc
#standardized versions of average
d$SAS_IQc_z = d$SAS_IQc %>% standardize()
#advantage of IC and LC as function of mean
GG_scatter(d, "SAS_IQc", "IC_delta", case_names = "Country.x") +
scale_x_continuous("Mean IQ") +
scale_y_continuous("IQ advantage of 95th over the mean")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/direct_IC_mean.png")
## `geom_smooth()` using formula = 'y ~ x'
GG_scatter(d, "SAS_IQc", "LC_delta", case_names = "Country.x") +
scale_x_continuous("Mean IQ") +
scale_y_continuous("IQ advantage of 5th over the mean")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/direct_LC_mean.png")
## `geom_smooth()` using formula = 'y ~ x'
#count
d %>%
select(SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg) %>%
miss_by_var(reverse = T)
## SAS_IQc x95pct_IQc x05pct_IQc IC_reg LC_reg
## 107 99 99 99 99
Following prior study (Kirkegaard 2014).
#collect data
spi_indicators = d %>% select(Undernourishment_pct_of_pop:Percent_of_tertiary_students_enrolled_in_globally_ranked_universities)
spi_indicators %>% miss_amount()
## cases with missing data vars with missing data cells with missing data
## 0.680 1.000 0.231
#impute
spi_indicators_imp = spi_indicators %>% miss_impute(max_na = 10)
#amount after imputing
spi_indicators_imp %>% miss_by_case() %>% table2()
#factor analyze
spi_fa = fa(spi_indicators_imp)
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
spi_fa
## Factor Analysis using method = minres
## Call: fa(r = spi_indicators_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1
## Undernourishment_pct_of_pop -0.80
## Maternal_mortality_rate_deaths_100_000_live_births -0.83
## Child_mortality_rate_deaths_1_000_live_births -0.88
## Child_stunting_pct_of_children -0.88
## Deaths_from_infectious_diseases_deaths_100_000_people -0.79
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.86
## Access_to_piped_water_pct_of_pop 0.84
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.84
## Rural_open_defecation_pct_of_pop -0.64
## Access_to_electricity_pct_of_pop 0.80
## Quality_of_electricity_supply_1_low_7_high 0.88
## Household_air_pollution_attributable_deaths_deaths_100_000_people -0.83
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.85
## Homicide_rate_deaths_100_000_people -0.14
## Perceived_criminality_1_low_5_high -0.60
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.65
## Traffic_deaths_deaths_100_000_people -0.68
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.78
## Primary_school_enrollment_pct_of_children 0.64
## Secondary_school_enrollment_pct_of_children 0.90
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.54
## Access_to_quality_education_0_unequal_4_equal 0.76
## Mobile_telephone_subscriptions_subscriptions_100_people 0.64
## Internet_users_pct_of_pop 0.90
## Access_to_online_governance_0_low_1_high 0.75
## Media_censorship_0_frequent_4_rare 0.46
## Life_expectancy_at_60_years 0.83
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people -0.59
## Access_to_essential_services_0_none_100_full_coverage 0.94
## Access_to_quality_healthcare_0_unequal_4_equal 0.84
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people -0.19
## Greenhouse_gas_emissions_CO2_equivalents_GDP -0.45
## Biome_protection 0.23
## Political_rights_0_no_rights_40_full_rights 0.62
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.47
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.33
## Access_to_justice_0_non_existent_1_observed 0.68
## Property_rights_for_women_0_no_rights_5_full_rights 0.68
## Vulnerable_employment_pct_of_employees -0.86
## Early_marriage_pct_of_women -0.75
## Satisfied_demand_for_contraception_pct_of_women 0.66
## Corruption_0_high_100_low 0.80
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.63
## Discrimination_and_violence_against_minorities_1_low_10_high -0.47
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.52
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.49
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.47
## Years_of_tertiary_schooling 0.79
## Women_s_average_years_in_school 0.87
## Globally_ranked_universities_points 0.28
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.57
## h2
## Undernourishment_pct_of_pop 0.644
## Maternal_mortality_rate_deaths_100_000_live_births 0.697
## Child_mortality_rate_deaths_1_000_live_births 0.770
## Child_stunting_pct_of_children 0.767
## Deaths_from_infectious_diseases_deaths_100_000_people 0.624
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.734
## Access_to_piped_water_pct_of_pop 0.701
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.699
## Rural_open_defecation_pct_of_pop 0.411
## Access_to_electricity_pct_of_pop 0.635
## Quality_of_electricity_supply_1_low_7_high 0.767
## Household_air_pollution_attributable_deaths_deaths_100_000_people 0.693
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.715
## Homicide_rate_deaths_100_000_people 0.020
## Perceived_criminality_1_low_5_high 0.355
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.428
## Traffic_deaths_deaths_100_000_people 0.469
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.602
## Primary_school_enrollment_pct_of_children 0.412
## Secondary_school_enrollment_pct_of_children 0.815
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.290
## Access_to_quality_education_0_unequal_4_equal 0.578
## Mobile_telephone_subscriptions_subscriptions_100_people 0.413
## Internet_users_pct_of_pop 0.808
## Access_to_online_governance_0_low_1_high 0.560
## Media_censorship_0_frequent_4_rare 0.211
## Life_expectancy_at_60_years 0.696
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 0.352
## Access_to_essential_services_0_none_100_full_coverage 0.890
## Access_to_quality_healthcare_0_unequal_4_equal 0.706
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 0.036
## Greenhouse_gas_emissions_CO2_equivalents_GDP 0.202
## Biome_protection 0.054
## Political_rights_0_no_rights_40_full_rights 0.381
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.224
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.110
## Access_to_justice_0_non_existent_1_observed 0.457
## Property_rights_for_women_0_no_rights_5_full_rights 0.463
## Vulnerable_employment_pct_of_employees 0.744
## Early_marriage_pct_of_women 0.561
## Satisfied_demand_for_contraception_pct_of_women 0.435
## Corruption_0_high_100_low 0.647
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.397
## Discrimination_and_violence_against_minorities_1_low_10_high 0.217
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.268
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.236
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.221
## Years_of_tertiary_schooling 0.629
## Women_s_average_years_in_school 0.761
## Globally_ranked_universities_points 0.079
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.325
## u2
## Undernourishment_pct_of_pop 0.36
## Maternal_mortality_rate_deaths_100_000_live_births 0.30
## Child_mortality_rate_deaths_1_000_live_births 0.23
## Child_stunting_pct_of_children 0.23
## Deaths_from_infectious_diseases_deaths_100_000_people 0.38
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.27
## Access_to_piped_water_pct_of_pop 0.30
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.30
## Rural_open_defecation_pct_of_pop 0.59
## Access_to_electricity_pct_of_pop 0.36
## Quality_of_electricity_supply_1_low_7_high 0.23
## Household_air_pollution_attributable_deaths_deaths_100_000_people 0.31
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.28
## Homicide_rate_deaths_100_000_people 0.98
## Perceived_criminality_1_low_5_high 0.64
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.57
## Traffic_deaths_deaths_100_000_people 0.53
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.40
## Primary_school_enrollment_pct_of_children 0.59
## Secondary_school_enrollment_pct_of_children 0.19
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.71
## Access_to_quality_education_0_unequal_4_equal 0.42
## Mobile_telephone_subscriptions_subscriptions_100_people 0.59
## Internet_users_pct_of_pop 0.19
## Access_to_online_governance_0_low_1_high 0.44
## Media_censorship_0_frequent_4_rare 0.79
## Life_expectancy_at_60_years 0.30
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 0.65
## Access_to_essential_services_0_none_100_full_coverage 0.11
## Access_to_quality_healthcare_0_unequal_4_equal 0.29
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 0.96
## Greenhouse_gas_emissions_CO2_equivalents_GDP 0.80
## Biome_protection 0.95
## Political_rights_0_no_rights_40_full_rights 0.62
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.78
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.89
## Access_to_justice_0_non_existent_1_observed 0.54
## Property_rights_for_women_0_no_rights_5_full_rights 0.54
## Vulnerable_employment_pct_of_employees 0.26
## Early_marriage_pct_of_women 0.44
## Satisfied_demand_for_contraception_pct_of_women 0.57
## Corruption_0_high_100_low 0.35
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.60
## Discrimination_and_violence_against_minorities_1_low_10_high 0.78
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.73
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.76
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.78
## Years_of_tertiary_schooling 0.37
## Women_s_average_years_in_school 0.24
## Globally_ranked_universities_points 0.92
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.67
## com
## Undernourishment_pct_of_pop 1
## Maternal_mortality_rate_deaths_100_000_live_births 1
## Child_mortality_rate_deaths_1_000_live_births 1
## Child_stunting_pct_of_children 1
## Deaths_from_infectious_diseases_deaths_100_000_people 1
## Access_to_at_least_basic_drinking_water_pct_of_pop 1
## Access_to_piped_water_pct_of_pop 1
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 1
## Rural_open_defecation_pct_of_pop 1
## Access_to_electricity_pct_of_pop 1
## Quality_of_electricity_supply_1_low_7_high 1
## Household_air_pollution_attributable_deaths_deaths_100_000_people 1
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 1
## Homicide_rate_deaths_100_000_people 1
## Perceived_criminality_1_low_5_high 1
## Political_killings_and_torture_0_low_freedom_1_high_freedom 1
## Traffic_deaths_deaths_100_000_people 1
## Adult_literacy_rate_pct_of_pop_aged_15plus 1
## Primary_school_enrollment_pct_of_children 1
## Secondary_school_enrollment_pct_of_children 1
## Gender_parity_in_secondary_enrollment_distance_from_parity 1
## Access_to_quality_education_0_unequal_4_equal 1
## Mobile_telephone_subscriptions_subscriptions_100_people 1
## Internet_users_pct_of_pop 1
## Access_to_online_governance_0_low_1_high 1
## Media_censorship_0_frequent_4_rare 1
## Life_expectancy_at_60_years 1
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 1
## Access_to_essential_services_0_none_100_full_coverage 1
## Access_to_quality_healthcare_0_unequal_4_equal 1
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 1
## Greenhouse_gas_emissions_CO2_equivalents_GDP 1
## Biome_protection 1
## Political_rights_0_no_rights_40_full_rights 1
## Freedom_of_expression_0_no_freedom_1_full_freedom 1
## Freedom_of_religion_0_no_freedom_4_full_freedom 1
## Access_to_justice_0_non_existent_1_observed 1
## Property_rights_for_women_0_no_rights_5_full_rights 1
## Vulnerable_employment_pct_of_employees 1
## Early_marriage_pct_of_women 1
## Satisfied_demand_for_contraception_pct_of_women 1
## Corruption_0_high_100_low 1
## Acceptance_of_gays_and_lesbians_0_low_100_high 1
## Discrimination_and_violence_against_minorities_1_low_10_high 1
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 1
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 1
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 1
## Years_of_tertiary_schooling 1
## Women_s_average_years_in_school 1
## Globally_ranked_universities_points 1
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 1
##
## MR1
## SS loadings 24.91
## Proportion Var 0.49
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 1275 and the objective function was 95.2 with Chi Square of 21727
## The degrees of freedom for the model are 1224 and the objective function was 58.6
##
## The root mean square of the residuals (RMSR) is 0.13
## The df corrected root mean square of the residuals is 0.13
##
## The harmonic number of observations is 182 with the empirical chi square 7941 with prob < 0
## The total number of observations was 247 with Likelihood Chi Square = 13334 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.381
## RMSEA index = 0.2 and the 90 % confidence intervals are 0.197 0.204
## BIC = 6591
## Fit based upon off diagonal values = 0.93
#method variance
spi_fa_methods = fa_all_methods(spi_indicators_imp, messages = F)
#heatmap
spi_fa_methods$loadings %>% GG_heatmap()
spi_fa_methods$scores %>% GG_heatmap()
#save
d$SPI_g = spi_fa$scores[, 1] %>% standardize()
#put the imputed values back into main, reverse as needed
for (i in seq_along(spi_indicators_imp)) {
#positive loading
v = names(spi_indicators_imp)[i]
if (spi_fa$loadings[i] > 0) {
d[[v]] = spi_indicators_imp[[v]]
} else {
d[[v]] = spi_indicators_imp[[v]] * -1
}
}
#correlations with other indexes
d %>%
select(SPI_g, Human_Development_Index_HDI, Social_Progress_Index) %>%
wtd.cors(weight = d$population2017 %>% sqrt())
## SPI_g Human_Development_Index_HDI
## SPI_g 1.000 0.965
## Human_Development_Index_HDI 0.965 1.000
## Social_Progress_Index 0.991 0.953
## Social_Progress_Index
## SPI_g 0.991
## Human_Development_Index_HDI 0.953
## Social_Progress_Index 1.000
#no weights
d %>%
select(SPI_g, Human_Development_Index_HDI, Social_Progress_Index) %>%
wtd.cors()
## SPI_g Human_Development_Index_HDI
## SPI_g 1.000 0.959
## Human_Development_Index_HDI 0.959 1.000
## Social_Progress_Index 0.993 0.950
## Social_Progress_Index
## SPI_g 0.993
## Human_Development_Index_HDI 0.950
## Social_Progress_Index 1.000
#variable lists
main_vars = c("SPI_g", "SAS_IQc", "x95pct_IQc", "x05pct_IQc", "IC_reg", "LC_reg", "IC_delta", "LC_delta")
main_vars2 = c(main_vars, names(spi_indicators_imp), econ_outcomes)
Add in the economic variables for a more varied index
#collect data, use the original form
spi2_indicators = cbind(
spi_indicators,
d %>% select(!!econ_outcomes)
)
spi2_indicators %>% miss_amount()
## cases with missing data vars with missing data cells with missing data
## 0.770 1.000 0.247
#impute
spi2_indicators_imp = spi2_indicators %>% miss_impute(max_na = 10)
#amount after imputing
spi2_indicators_imp %>% miss_by_case() %>% table2()
#factor analyze
spi2_fa = fa(spi2_indicators_imp)
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## In smc, smcs > 1 were set to 1.0
## In smc, smcs < 0 were set to .0
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
spi2_fa
## Factor Analysis using method = minres
## Call: fa(r = spi2_indicators_imp)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1
## Undernourishment_pct_of_pop -0.79
## Maternal_mortality_rate_deaths_100_000_live_births -0.84
## Child_mortality_rate_deaths_1_000_live_births -0.88
## Child_stunting_pct_of_children -0.88
## Deaths_from_infectious_diseases_deaths_100_000_people -0.80
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.86
## Access_to_piped_water_pct_of_pop 0.84
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.85
## Rural_open_defecation_pct_of_pop -0.64
## Access_to_electricity_pct_of_pop 0.81
## Quality_of_electricity_supply_1_low_7_high 0.87
## Household_air_pollution_attributable_deaths_deaths_100_000_people -0.84
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.86
## Homicide_rate_deaths_100_000_people -0.14
## Perceived_criminality_1_low_5_high -0.56
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.63
## Traffic_deaths_deaths_100_000_people -0.68
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.77
## Primary_school_enrollment_pct_of_children 0.62
## Secondary_school_enrollment_pct_of_children 0.90
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.51
## Access_to_quality_education_0_unequal_4_equal 0.76
## Mobile_telephone_subscriptions_subscriptions_100_people 0.64
## Internet_users_pct_of_pop 0.91
## Access_to_online_governance_0_low_1_high 0.75
## Media_censorship_0_frequent_4_rare 0.44
## Life_expectancy_at_60_years 0.84
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people -0.59
## Access_to_essential_services_0_none_100_full_coverage 0.95
## Access_to_quality_healthcare_0_unequal_4_equal 0.84
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people -0.18
## Greenhouse_gas_emissions_CO2_equivalents_GDP -0.43
## Biome_protection 0.21
## Political_rights_0_no_rights_40_full_rights 0.59
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.44
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.30
## Access_to_justice_0_non_existent_1_observed 0.65
## Property_rights_for_women_0_no_rights_5_full_rights 0.67
## Vulnerable_employment_pct_of_employees -0.88
## Early_marriage_pct_of_women -0.75
## Satisfied_demand_for_contraception_pct_of_women 0.65
## Corruption_0_high_100_low 0.80
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.61
## Discrimination_and_violence_against_minorities_1_low_10_high -0.45
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.50
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.47
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.45
## Years_of_tertiary_schooling 0.78
## Women_s_average_years_in_school 0.87
## Globally_ranked_universities_points 0.29
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.58
## median_income 0.91
## median_income_hh 0.85
## GNI_2019 0.93
## GNI_growth 0.16
## GDP_2019 0.92
## GDP_growth 0.17
## h2
## Undernourishment_pct_of_pop 0.617
## Maternal_mortality_rate_deaths_100_000_live_births 0.702
## Child_mortality_rate_deaths_1_000_live_births 0.771
## Child_stunting_pct_of_children 0.775
## Deaths_from_infectious_diseases_deaths_100_000_people 0.636
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.739
## Access_to_piped_water_pct_of_pop 0.713
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.724
## Rural_open_defecation_pct_of_pop 0.414
## Access_to_electricity_pct_of_pop 0.651
## Quality_of_electricity_supply_1_low_7_high 0.751
## Household_air_pollution_attributable_deaths_deaths_100_000_people 0.706
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.748
## Homicide_rate_deaths_100_000_people 0.019
## Perceived_criminality_1_low_5_high 0.316
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.400
## Traffic_deaths_deaths_100_000_people 0.457
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.588
## Primary_school_enrollment_pct_of_children 0.384
## Secondary_school_enrollment_pct_of_children 0.807
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.255
## Access_to_quality_education_0_unequal_4_equal 0.577
## Mobile_telephone_subscriptions_subscriptions_100_people 0.416
## Internet_users_pct_of_pop 0.833
## Access_to_online_governance_0_low_1_high 0.562
## Media_censorship_0_frequent_4_rare 0.191
## Life_expectancy_at_60_years 0.709
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 0.353
## Access_to_essential_services_0_none_100_full_coverage 0.904
## Access_to_quality_healthcare_0_unequal_4_equal 0.711
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 0.033
## Greenhouse_gas_emissions_CO2_equivalents_GDP 0.186
## Biome_protection 0.046
## Political_rights_0_no_rights_40_full_rights 0.350
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.198
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.087
## Access_to_justice_0_non_existent_1_observed 0.427
## Property_rights_for_women_0_no_rights_5_full_rights 0.446
## Vulnerable_employment_pct_of_employees 0.768
## Early_marriage_pct_of_women 0.565
## Satisfied_demand_for_contraception_pct_of_women 0.424
## Corruption_0_high_100_low 0.641
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.374
## Discrimination_and_violence_against_minorities_1_low_10_high 0.204
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.246
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.222
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.200
## Years_of_tertiary_schooling 0.603
## Women_s_average_years_in_school 0.761
## Globally_ranked_universities_points 0.085
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.339
## median_income 0.833
## median_income_hh 0.729
## GNI_2019 0.860
## GNI_growth 0.024
## GDP_2019 0.851
## GDP_growth 0.027
## u2
## Undernourishment_pct_of_pop 0.383
## Maternal_mortality_rate_deaths_100_000_live_births 0.298
## Child_mortality_rate_deaths_1_000_live_births 0.229
## Child_stunting_pct_of_children 0.225
## Deaths_from_infectious_diseases_deaths_100_000_people 0.364
## Access_to_at_least_basic_drinking_water_pct_of_pop 0.261
## Access_to_piped_water_pct_of_pop 0.287
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 0.276
## Rural_open_defecation_pct_of_pop 0.586
## Access_to_electricity_pct_of_pop 0.349
## Quality_of_electricity_supply_1_low_7_high 0.249
## Household_air_pollution_attributable_deaths_deaths_100_000_people 0.294
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 0.252
## Homicide_rate_deaths_100_000_people 0.981
## Perceived_criminality_1_low_5_high 0.684
## Political_killings_and_torture_0_low_freedom_1_high_freedom 0.600
## Traffic_deaths_deaths_100_000_people 0.543
## Adult_literacy_rate_pct_of_pop_aged_15plus 0.412
## Primary_school_enrollment_pct_of_children 0.616
## Secondary_school_enrollment_pct_of_children 0.193
## Gender_parity_in_secondary_enrollment_distance_from_parity 0.745
## Access_to_quality_education_0_unequal_4_equal 0.423
## Mobile_telephone_subscriptions_subscriptions_100_people 0.584
## Internet_users_pct_of_pop 0.167
## Access_to_online_governance_0_low_1_high 0.438
## Media_censorship_0_frequent_4_rare 0.809
## Life_expectancy_at_60_years 0.291
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 0.647
## Access_to_essential_services_0_none_100_full_coverage 0.096
## Access_to_quality_healthcare_0_unequal_4_equal 0.289
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 0.967
## Greenhouse_gas_emissions_CO2_equivalents_GDP 0.814
## Biome_protection 0.954
## Political_rights_0_no_rights_40_full_rights 0.650
## Freedom_of_expression_0_no_freedom_1_full_freedom 0.802
## Freedom_of_religion_0_no_freedom_4_full_freedom 0.913
## Access_to_justice_0_non_existent_1_observed 0.573
## Property_rights_for_women_0_no_rights_5_full_rights 0.554
## Vulnerable_employment_pct_of_employees 0.232
## Early_marriage_pct_of_women 0.435
## Satisfied_demand_for_contraception_pct_of_women 0.576
## Corruption_0_high_100_low 0.359
## Acceptance_of_gays_and_lesbians_0_low_100_high 0.626
## Discrimination_and_violence_against_minorities_1_low_10_high 0.796
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 0.754
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 0.778
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 0.800
## Years_of_tertiary_schooling 0.397
## Women_s_average_years_in_school 0.239
## Globally_ranked_universities_points 0.915
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 0.661
## median_income 0.167
## median_income_hh 0.271
## GNI_2019 0.140
## GNI_growth 0.976
## GDP_2019 0.149
## GDP_growth 0.973
## com
## Undernourishment_pct_of_pop 1
## Maternal_mortality_rate_deaths_100_000_live_births 1
## Child_mortality_rate_deaths_1_000_live_births 1
## Child_stunting_pct_of_children 1
## Deaths_from_infectious_diseases_deaths_100_000_people 1
## Access_to_at_least_basic_drinking_water_pct_of_pop 1
## Access_to_piped_water_pct_of_pop 1
## Access_to_at_least_basic_sanitation_facilities_pct_of_pop 1
## Rural_open_defecation_pct_of_pop 1
## Access_to_electricity_pct_of_pop 1
## Quality_of_electricity_supply_1_low_7_high 1
## Household_air_pollution_attributable_deaths_deaths_100_000_people 1
## Access_to_clean_fuels_and_technology_for_cooking_pct_of_pop 1
## Homicide_rate_deaths_100_000_people 1
## Perceived_criminality_1_low_5_high 1
## Political_killings_and_torture_0_low_freedom_1_high_freedom 1
## Traffic_deaths_deaths_100_000_people 1
## Adult_literacy_rate_pct_of_pop_aged_15plus 1
## Primary_school_enrollment_pct_of_children 1
## Secondary_school_enrollment_pct_of_children 1
## Gender_parity_in_secondary_enrollment_distance_from_parity 1
## Access_to_quality_education_0_unequal_4_equal 1
## Mobile_telephone_subscriptions_subscriptions_100_people 1
## Internet_users_pct_of_pop 1
## Access_to_online_governance_0_low_1_high 1
## Media_censorship_0_frequent_4_rare 1
## Life_expectancy_at_60_years 1
## Premature_deaths_from_non_communicable_diseases_deaths_100_000_people 1
## Access_to_essential_services_0_none_100_full_coverage 1
## Access_to_quality_healthcare_0_unequal_4_equal 1
## Outdoor_air_pollution_attributable_deaths_deaths_100_000_people 1
## Greenhouse_gas_emissions_CO2_equivalents_GDP 1
## Biome_protection 1
## Political_rights_0_no_rights_40_full_rights 1
## Freedom_of_expression_0_no_freedom_1_full_freedom 1
## Freedom_of_religion_0_no_freedom_4_full_freedom 1
## Access_to_justice_0_non_existent_1_observed 1
## Property_rights_for_women_0_no_rights_5_full_rights 1
## Vulnerable_employment_pct_of_employees 1
## Early_marriage_pct_of_women 1
## Satisfied_demand_for_contraception_pct_of_women 1
## Corruption_0_high_100_low 1
## Acceptance_of_gays_and_lesbians_0_low_100_high 1
## Discrimination_and_violence_against_minorities_1_low_10_high 1
## Equality_of_political_power_by_gender_0_unequal_power_4_equal_power 1
## Equality_of_political_power_by_socioeconomic_position_0_unequal_power_4_equal_power 1
## Equality_of_political_power_by_social_group_0_unequal_power_4_equal_power 1
## Years_of_tertiary_schooling 1
## Women_s_average_years_in_school 1
## Globally_ranked_universities_points 1
## Percent_of_tertiary_students_enrolled_in_globally_ranked_universities 1
## median_income 1
## median_income_hh 1
## GNI_2019 1
## GNI_growth 1
## GDP_2019 1
## GDP_growth 1
##
## MR1
## SS loadings 27.96
## Proportion Var 0.49
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 1596 and the objective function was 150 with Chi Square of 33981
## The degrees of freedom for the model are 1539 and the objective function was 107
##
## The root mean square of the residuals (RMSR) is 0.12
## The df corrected root mean square of the residuals is 0.13
##
## The harmonic number of observations is 179 with the empirical chi square 8881 with prob < 0
## The total number of observations was 247 with Likelihood Chi Square = 24026 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.278
## RMSEA index = 0.243 and the 90 % confidence intervals are 0.241 0.246
## BIC = 15547
## Fit based upon off diagonal values = 0.94
#method variance
spi2_fa_methods = fa_all_methods(spi2_indicators_imp, messages = F)
#heatmap
spi2_fa_methods$loadings %>% GG_heatmap()
spi2_fa_methods$scores %>% GG_heatmap()
#save
d$SPI2_g = spi2_fa$scores[, 1] %>% standardize()
#correlations with other indexes
d %>%
select(SPI_g, SPI2_g, Human_Development_Index_HDI, Social_Progress_Index) %>%
wtd.cors(weight = d$population2017 %>% sqrt())
## SPI_g SPI2_g Human_Development_Index_HDI
## SPI_g 1.000 0.998 0.965
## SPI2_g 0.998 1.000 0.972
## Human_Development_Index_HDI 0.965 0.972 1.000
## Social_Progress_Index 0.991 0.987 0.953
## Social_Progress_Index
## SPI_g 0.991
## SPI2_g 0.987
## Human_Development_Index_HDI 0.953
## Social_Progress_Index 1.000
#keep only cases with data for mean and tails
#otherwise, the model fits are done on slightly different subsets
d2 = d %>% filter(!is.na(SAS_IQc), !is.na(x95pct_IQc))
#LV12 coverage
d$LV2012estimatedIQ %>% miss_count(reverse = T)
## [1] 195
d2$LV2012estimatedIQ %>% miss_count(reverse = T)
## [1] 97
#range restriction in mean ability
wtd_sd(d$LV2012estimatedIQ, d$population2017 %>% sqrt())
## [1] 10.9
wtd_sd(d2$LV2012estimatedIQ, d2$population2017 %>% sqrt())
## [1] 8.05
wtd_sd(d2$LV2012estimatedIQ, d2$population2017 %>% sqrt()) / wtd_sd(d$LV2012estimatedIQ, d$population2017 %>% sqrt())
## [1] 0.739
#range restriction in SPI g
wtd_sd(d$SPI_g, d$population2017 %>% sqrt())
## [1] 1
wtd_sd(d2$SPI_g, d2$population2017 %>% sqrt())
## [1] 0.625
Basic results.
#no weights
wtd.cors(d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g))
## LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc IC_reg
## LV2012estimatedIQ 1.0000 9.09e-01 0.892 0.9151 -2.47e-02
## SAS_IQc 0.9087 1.00e+00 0.980 0.9812 -4.84e-16
## x95pct_IQc 0.8920 9.80e-01 1.000 0.9342 1.99e-01
## x05pct_IQc 0.9151 9.81e-01 0.934 1.0000 -1.37e-01
## IC_reg -0.0247 -4.84e-16 0.199 -0.1368 1.00e+00
## LC_reg 0.0494 5.98e-16 -0.141 0.1932 -7.08e-01
## IC_delta -0.6412 -6.68e-01 -0.506 -0.7568 7.45e-01
## LC_delta -0.1139 -1.75e-01 -0.311 0.0181 -6.97e-01
## SPI_g 0.8034 8.10e-01 0.838 0.7700 2.51e-01
## LC_reg IC_delta LC_delta SPI_g
## LV2012estimatedIQ 4.94e-02 -0.641 -0.1139 0.803
## SAS_IQc 5.98e-16 -0.668 -0.1754 0.810
## x95pct_IQc -1.41e-01 -0.506 -0.3108 0.838
## x05pct_IQc 1.93e-01 -0.757 0.0181 0.770
## IC_reg -7.08e-01 0.745 -0.6969 0.251
## LC_reg 1.00e+00 -0.527 0.9845 -0.155
## IC_delta -5.27e-01 1.000 -0.4018 -0.352
## LC_delta 9.84e-01 -0.402 1.0000 -0.291
## SPI_g -1.55e-01 -0.352 -0.2912 1.000
#weights
wtd.cors(d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g),
weight = d$population2017 %>% sqrt())
## LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc IC_reg
## LV2012estimatedIQ 1.000 0.9127 0.8934 0.9235 -0.1542
## SAS_IQc 0.913 1.0000 0.9776 0.9875 -0.1156
## x95pct_IQc 0.893 0.9776 1.0000 0.9419 0.0959
## x05pct_IQc 0.924 0.9875 0.9419 1.0000 -0.2248
## IC_reg -0.154 -0.1156 0.0959 -0.2248 1.0000
## LC_reg 0.142 0.0792 -0.0707 0.2354 -0.7088
## IC_delta -0.682 -0.7018 -0.5362 -0.7724 0.7887
## LC_delta -0.057 -0.1362 -0.2804 0.0218 -0.6795
## SPI_g 0.783 0.7575 0.8043 0.7229 0.2013
## LC_reg IC_delta LC_delta SPI_g
## LV2012estimatedIQ 0.1425 -0.682 -0.0570 0.783
## SAS_IQc 0.0792 -0.702 -0.1362 0.758
## x95pct_IQc -0.0707 -0.536 -0.2804 0.804
## x05pct_IQc 0.2354 -0.772 0.0218 0.723
## IC_reg -0.7088 0.789 -0.6795 0.201
## LC_reg 1.0000 -0.557 0.9768 -0.108
## IC_delta -0.5573 1.000 -0.4031 -0.322
## LC_delta 0.9768 -0.403 1.0000 -0.270
## SPI_g -0.1084 -0.322 -0.2703 1.000
#'complete cases'
wtd.cors(d2 %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g))
## LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc IC_reg
## LV2012estimatedIQ 1.0000 9.19e-01 0.892 0.9151 -2.47e-02
## SAS_IQc 0.9190 1.00e+00 0.980 0.9812 -4.84e-16
## x95pct_IQc 0.8920 9.80e-01 1.000 0.9342 1.99e-01
## x05pct_IQc 0.9151 9.81e-01 0.934 1.0000 -1.37e-01
## IC_reg -0.0247 -4.84e-16 0.199 -0.1368 1.00e+00
## LC_reg 0.0494 5.98e-16 -0.141 0.1932 -7.08e-01
## IC_delta -0.6412 -6.68e-01 -0.506 -0.7568 7.45e-01
## LC_delta -0.1139 -1.75e-01 -0.311 0.0181 -6.97e-01
## SPI_g 0.7284 8.11e-01 0.838 0.7700 2.51e-01
## LC_reg IC_delta LC_delta SPI_g
## LV2012estimatedIQ 4.94e-02 -0.641 -0.1139 0.728
## SAS_IQc 5.98e-16 -0.668 -0.1754 0.811
## x95pct_IQc -1.41e-01 -0.506 -0.3108 0.838
## x05pct_IQc 1.93e-01 -0.757 0.0181 0.770
## IC_reg -7.08e-01 0.745 -0.6969 0.251
## LC_reg 1.00e+00 -0.527 0.9845 -0.155
## IC_delta -5.27e-01 1.000 -0.4018 -0.352
## LC_delta 9.84e-01 -0.402 1.0000 -0.291
## SPI_g -1.55e-01 -0.352 -0.2912 1.000
#weights
wtd.cors(d2 %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g),
weight = d2$population2017 %>% sqrt())
## LV2012estimatedIQ SAS_IQc x95pct_IQc x05pct_IQc IC_reg
## LV2012estimatedIQ 1.000 0.9241 0.8934 0.9235 -0.1542
## SAS_IQc 0.924 1.0000 0.9776 0.9875 -0.1156
## x95pct_IQc 0.893 0.9776 1.0000 0.9419 0.0959
## x05pct_IQc 0.924 0.9875 0.9419 1.0000 -0.2248
## IC_reg -0.154 -0.1156 0.0959 -0.2248 1.0000
## LC_reg 0.142 0.0792 -0.0707 0.2354 -0.7088
## IC_delta -0.682 -0.7018 -0.5362 -0.7724 0.7887
## LC_delta -0.057 -0.1362 -0.2804 0.0218 -0.6795
## SPI_g 0.619 0.7589 0.8043 0.7229 0.2013
## LC_reg IC_delta LC_delta SPI_g
## LV2012estimatedIQ 0.1425 -0.682 -0.0570 0.619
## SAS_IQc 0.0792 -0.702 -0.1362 0.759
## x95pct_IQc -0.0707 -0.536 -0.2804 0.804
## x05pct_IQc 0.2354 -0.772 0.0218 0.723
## IC_reg -0.7088 0.789 -0.6795 0.201
## LC_reg 1.0000 -0.557 0.9768 -0.108
## IC_delta -0.5573 1.000 -0.4031 -0.322
## LC_delta 0.9768 -0.403 1.0000 -0.270
## SPI_g -0.1084 -0.322 -0.2703 1.000
#with distributions
d %>% select(LV2012estimatedIQ, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, IC_delta, LC_delta, SPI_g) %>% ggpairs(columnLabels = c("Lynn 2012", "Mean ability", "95th ability", "5th ability", "95th ability resid", "5th ability resid", "95th ability subt.", "5th ability subt.", "General SPI"))
## Warning: Removed 52 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 142 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning: Removed 142 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 146 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 77 rows containing missing values (`geom_point()`).
## Warning: Removed 146 rows containing missing values (`geom_point()`).
## Warning: Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Warning: Removed 74 rows containing non-finite values (`stat_density()`).
GG_save("figs/ggpairs.png")
## Warning: Removed 52 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 142 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 150 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 77 rows containing missing values
## Warning: Removed 142 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 146 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 148 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 150 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Removed 148 rows containing missing values (`geom_point()`).
## Warning: Removed 148 rows containing non-finite values (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 154 rows containing missing values
## Warning: Removed 77 rows containing missing values (`geom_point()`).
## Warning: Removed 146 rows containing missing values (`geom_point()`).
## Warning: Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Removed 154 rows containing missing values (`geom_point()`).
## Warning: Removed 74 rows containing non-finite values (`stat_density()`).
Same as Rindermann.
#calculate correlation with mean and top and bottom for each outcome
bivar_results = map_df(names(spi_indicators), function(n) {
#correlations
cors = cbind(
spi = d2[[n]],
mean = d2$SAS_IQc,
upper = d2$x95pct_IQc,
upper_reg = d2$IC_reg,
lower = d2$x05pct_IQc,
lower_reg = d2$LC_reg
) %>%
wtd.cors()
cors_wt = cbind(
spi = d2[[n]],
mean = d2$SAS_IQc,
upper = d2$x95pct_IQc,
upper_reg = d2$IC_reg,
lower = d2$x05pct_IQc,
lower_reg = d2$LC_reg
) %>%
wtd.cors(weight = d2$population2017 %>% sqrt())
#results
tibble(
indicator = n,
#no weights
mean = cors[1, 2],
IC = cors[1, 3],
IC_reg = cors[1, 4],
LC = cors[1, 5],
LC_reg = cors[1, 6],
#weights
mean_wt = cors_wt[1, 2],
IC_wt = cors_wt[1, 3],
IC_reg_wt = cors_wt[1, 4],
LC_wt = cors_wt[1, 5],
LC_reg_wt = cors_wt[1, 6],
)
})
#summary stats
bivar_results[-1] %>%
#mean by column
describe() %>%
as.data.frame() %>%
mutate(predictor = names(bivar_results[-1])) %>%
select(predictor, everything()) %>%
select(mean, median, sd, mad, min, max)
#p values
t.test(bivar_results$mean, bivar_results$IC, paired = T)
##
## Paired t-test
##
## data: bivar_results$mean and bivar_results$IC
## t = -4, df = 50, p-value = 2e-04
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01885 -0.00629
## sample estimates:
## mean of the differences
## -0.0126
t.test(bivar_results$mean_wt, bivar_results$IC_wt, paired = T)
##
## Paired t-test
##
## data: bivar_results$mean_wt and bivar_results$IC_wt
## t = -5, df = 50, p-value = 1e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0324 -0.0135
## sample estimates:
## mean of the differences
## -0.0229
Regress each variable on mean and residualized tails.
#regression function
get_reg_results = function(x, spatial_lag = F, data = d2) {
map_df(x, function(n) {
#reg forms
if (spatial_lag) {
form1 = as.formula(str_glue("{n} ~ SAS_IQc + {n}_lag"))
form2 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + {n}_lag"))
form3 = as.formula(str_glue("{n} ~ SAS_IQc + LC_reg + {n}_lag"))
form4 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + LC_reg + {n}_lag"))
} else {
form1 = as.formula(str_glue("{n} ~ SAS_IQc"))
form2 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg"))
form3 = as.formula(str_glue("{n} ~ SAS_IQc + LC_reg"))
form4 = as.formula(str_glue("{n} ~ SAS_IQc + IC_reg + LC_reg"))
}
#fits
fit1 = ols(form1, data = data)
fit2 = ols(form2, data = data)
fit3 = ols(form3, data = data)
fit4 = ols(form4, data = data)
# + weights
fit1_wt = ols(form1, data = data, weights = data$population2017 %>% sqrt())
fit2_wt = ols(form2, data = data, weights = data$population2017 %>% sqrt())
fit3_wt = ols(form3, data = data, weights = data$population2017 %>% sqrt())
fit4_wt = ols(form4, data = data, weights = data$population2017 %>% sqrt())
#formal tests
lr1 = lrtest(fit1, fit2)
lr2 = lrtest(fit1, fit3)
lr3 = lrtest(fit2, fit4)
lr1_wt = lrtest(fit1_wt, fit2_wt)
lr2_wt = lrtest(fit1_wt, fit3_wt)
lr3_wt = lrtest(fit2_wt, fit4_wt)
#results
tibble(
indicator = n,
#no weights
fit1_r2 = fit1 %>% r2get(),
fit2_r2 = fit2 %>% r2get(),
fit3_r2 = fit3 %>% r2get(),
fit4_r2 = fit4 %>% r2get(),
#weights
fit1_wt_r2 = fit1_wt %>% r2get(),
fit2_wt_r2 = fit2_wt %>% r2get(),
fit3_wt_r2 = fit3_wt %>% r2get(),
fit4_wt_r2 = fit4_wt %>% r2get(),
#lr tests
LR1_p = lr1$stats[3],
LR2_p = lr2$stats[3],
LR3_p = lr3$stats[3],
LR1_wt_p = lr1_wt$stats[3],
LR2_wt_p = lr2_wt$stats[3],
LR3_wt_p = lr3_wt$stats[3]
)
})
}
#summarize R2s
reg_results = get_reg_results(names(spi_indicators))
reg_results %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
## fit1_r2 fit2_r2 fit3_r2 fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2
## 0.285 0.304 0.294 0.311 0.272 0.313 0.285
## fit4_wt_r2
## 0.316
#formal -- how many model tests are better?
reg_results %>%
select(LR1_p:LR3_wt_p) %>%
{
plyr::adply(., 1, function(r) {
tibble(
LR1 = r$LR1_p < .01,
LR2 = r$LR2_p < .01,
LR3 = r$LR3_p < .01,
LR1_wt = r$LR1_wt_p < .01,
LR2_wt = r$LR2_wt_p < .01,
LR3_wt = r$LR3_wt_p < .01
)
})
} %>%
select(LR1:LR3_wt) %>%
colMeans()
## LR1 LR2 LR3 LR1_wt LR2_wt LR3_wt
## 0.1961 0.0784 0.0980 0.3922 0.0784 0.0588
#t tests
t.test(reg_results$fit1_wt_r2, reg_results$fit2_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit1_wt_r2, reg_results$fit3_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit1_wt_r2, reg_results$fit4_wt_r2, paired = T) %>% tidy()
t.test(reg_results$fit2_wt_r2, reg_results$fit4_wt_r2, paired = T) %>% tidy()
#summarize R2s
reg_results_econ = get_reg_results(econ_outcomes)
reg_results_econ %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
## fit1_r2 fit2_r2 fit3_r2 fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2
## 0.373 0.406 0.379 0.401 0.370 0.440 0.394
## fit4_wt_r2
## 0.434
#formal -- how many model tests are better?
reg_results_econ %>%
select(LR1_p:LR3_wt_p) %>%
{
plyr::adply(., 1, function(r) {
tibble(
LR1 = r$LR1_p < .01,
LR2 = r$LR2_p < .01,
LR3 = r$LR3_p < .01,
LR1_wt = r$LR1_wt_p < .01,
LR2_wt = r$LR2_wt_p < .01,
LR3_wt = r$LR3_wt_p < .01
)
})
} %>%
select(LR1:LR3_wt) %>%
colMeans()
## LR1 LR2 LR3 LR1_wt LR2_wt LR3_wt
## 0.333 0.167 0.000 0.833 0.167 0.000
#t tests
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit2_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit3_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit1_wt_r2, reg_results_econ$fit4_wt_r2, paired = T) %>% tidy()
t.test(reg_results_econ$fit2_wt_r2, reg_results_econ$fit4_wt_r2, paired = T) %>% tidy()
Suggested by prior study.
#SPI
#standardize data before so we get standardized betas
SPI_std = d2 %>% select(SPI_g, SAS_IQc, IC_reg, LC_reg) %>% df_standardize()
#no weights
list(
SPI_model1 = ols(SPI_g ~ SAS_IQc, data = SPI_std),
SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_reg, data = SPI_std),
SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_reg, data = SPI_std),
SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg, data = SPI_std)
) -> SPI_models
SPI_models %>% summarize_models()
#weights
list(
SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt
SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 13.92937 1.00000 0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_reg
##
## L.R. Chisq d.f. P
## 4.4426 1.0000 0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_reg
## Model 2: SPI_g ~ SAS_IQc + IC_reg + LC_reg
##
## L.R. Chisq d.f. P
## 0.384 1.000 0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 2.15e+01 1.00e+00 3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_reg
##
## L.R. Chisq d.f. P
## 6.309 1.000 0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_reg
## Model 2: SPI_g ~ SAS_IQc + IC_reg + LC_reg
##
## L.R. Chisq d.f. P
## 0.981 1.000 0.322
#SPI
#standardize data before so we get standardized betas
SPI_std = d2 %>% select(SPI_g, SAS_IQc, IC_delta, LC_delta) %>% df_standardize()
#no weights
list(
SPI_model1 = ols(SPI_g ~ SAS_IQc, data = SPI_std),
SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_delta, data = SPI_std),
SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_delta, data = SPI_std),
SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_delta + LC_delta, data = SPI_std)
) -> SPI_models
SPI_models %>% summarize_models()
#weights
list(
SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_delta + LC_delta, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt
SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
##
## L.R. Chisq d.f. P
## 13.92937 1.00000 0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
##
## L.R. Chisq d.f. P
## 4.4426 1.0000 0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
##
## L.R. Chisq d.f. P
## 0.384 1.000 0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
##
## L.R. Chisq d.f. P
## 2.15e+01 1.00e+00 3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
##
## L.R. Chisq d.f. P
## 6.309 1.000 0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
##
## L.R. Chisq d.f. P
## 0.981 1.000 0.322
Replicate a few of his analyses with better approach.
#esports models
#standardize data before so we get standardized betas
esports_std = d2 %>%
select(general_mental_sports, SAS_IQc, IC_reg, LC_reg) %>%
df_standardize()
(esports1_wt = ols(general_mental_sports ~ SAS_IQc, data = esports_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports SAS_IQc (weights)
## 2 0 2
##
## Linear Regression Model
##
## ols(formula = general_mental_sports ~ SAS_IQc, data = esports_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 97 LR chi2 83.87 R2 0.579
## sigma52.3993 d.f. 1 R2 adj 0.574
## d.f. 95 Pr(> chi2) 0.0000 g 0.906
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.7269 -0.8182 -0.3462 0.3079 1.6560
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3109 0.0746 4.16 <0.0001
## SAS_IQc 0.8130 0.0712 11.43 <0.0001
##
(esports2_wt = ols(general_mental_sports ~ SAS_IQc + IC_reg, data = esports_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports SAS_IQc IC_reg
## 2 0 0
## (weights)
## 2
##
## Linear Regression Model
##
## ols(formula = general_mental_sports ~ SAS_IQc + IC_reg, data = esports_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 97 LR chi2 88.62 R2 0.599
## sigma51.4050 d.f. 2 R2 adj 0.590
## d.f. 94 Pr(> chi2) 0.0000 g 0.950
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.8023 -0.9060 -0.3210 0.2978 1.3017
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3182 0.0733 4.34 <0.0001
## SAS_IQc 0.8306 0.0703 11.82 <0.0001
## IC_reg 0.1468 0.0676 2.17 0.0325
##
#formal tests
lrtest(esports1_wt, esports2_wt)
##
## Model 1: general_mental_sports ~ SAS_IQc
## Model 2: general_mental_sports ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 4.7431 1.0000 0.0294
#dataset
chap8_std = d2 %>%
select(!!main_vars, airline_safety:tech_safety, IMO_rr) %>%
df_standardize(exclude_range_01 = F)
#airlines
(air_model1_wt = ols(airline_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## airline_safety SAS_IQc (weights)
## 63 0 2
##
## Linear Regression Model
##
## ols(formula = airline_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 35 LR chi2 12.76 R2 0.306
## sigma84.5452 d.f. 1 R2 adj 0.285
## d.f. 33 Pr(> chi2) 0.0004 g 0.573
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.59278 -0.05492 0.37441 0.53806 1.52618
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.3647 0.1583 -2.30 0.0277
## SAS_IQc 0.5657 0.1484 3.81 0.0006
##
(air_model2_wt = ols(airline_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## airline_safety SAS_IQc IC_reg (weights)
## 63 0 0 2
##
## Linear Regression Model
##
## ols(formula = airline_safety ~ SAS_IQc + IC_reg, data = chap8_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 35 LR chi2 21.29 R2 0.456
## sigma76.0065 d.f. 2 R2 adj 0.422
## d.f. 32 Pr(> chi2) 0.0000 g 0.777
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.4405 -0.3236 0.2306 0.4726 1.0753
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.4461 0.1449 -3.08 0.0043
## SAS_IQc 0.6778 0.1387 4.89 <0.0001
## IC_reg 0.4444 0.1495 2.97 0.0056
##
lrtest(air_model1_wt, air_model2_wt)
##
## Model 1: airline_safety ~ SAS_IQc
## Model 2: airline_safety ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 8.52972 1.00000 0.00349
#airlines
(road_model1_wt = ols(road_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## road_safety SAS_IQc (weights)
## 5 0 2
##
## Linear Regression Model
##
## ols(formula = road_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 94 LR chi2 25.24 R2 0.236
## sigma64.7808 d.f. 1 R2 adj 0.227
## d.f. 92 Pr(> chi2) 0.0000 g 0.527
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.8884 0.0320 0.4099 0.6970 1.9414
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.2051 0.0934 -2.20 0.0307
## SAS_IQc 0.4731 0.0889 5.32 <0.0001
##
(road_model2_wt = ols(road_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## road_safety SAS_IQc IC_reg (weights)
## 5 0 0 2
##
## Linear Regression Model
##
## ols(formula = road_safety ~ SAS_IQc + IC_reg, data = chap8_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 94 LR chi2 36.52 R2 0.322
## sigma61.3448 d.f. 2 R2 adj 0.307
## d.f. 91 Pr(> chi2) 0.0000 g 0.657
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.63771 -0.04456 0.42493 0.69437 1.62877
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1952 0.0885 -2.21 0.0300
## SAS_IQc 0.5136 0.0850 6.04 <0.0001
## IC_reg 0.2819 0.0828 3.41 0.0010
##
lrtest(road_model1_wt, road_model2_wt)
##
## Model 1: road_safety ~ SAS_IQc
## Model 2: road_safety ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 1.13e+01 1.00e+00 7.86e-04
#airlines
(occu_model1_wt = ols(occu_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## occu_safety SAS_IQc (weights)
## 2 0 2
##
## Linear Regression Model
##
## ols(formula = occu_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 95 LR chi2 43.33 R2 0.366
## sigma58.5832 d.f. 1 R2 adj 0.359
## d.f. 93 Pr(> chi2) 0.0000 g 0.652
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.46799 -0.32033 0.02525 0.67035 1.72501
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1009 0.0836 -1.21 0.2309
## SAS_IQc 0.5840 0.0797 7.33 <0.0001
##
(occu_model2_wt = ols(occu_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## occu_safety SAS_IQc IC_reg (weights)
## 2 0 0 2
##
## Linear Regression Model
##
## ols(formula = occu_safety ~ SAS_IQc + IC_reg, data = chap8_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 95 LR chi2 71.89 R2 0.531
## sigma50.6782 d.f. 2 R2 adj 0.521
## d.f. 92 Pr(> chi2) 0.0000 g 0.840
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.47084 -0.32593 0.04875 0.56106 1.72604
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.0801 0.0725 -1.11 0.2716
## SAS_IQc 0.6282 0.0694 9.06 <0.0001
## IC_reg 0.3798 0.0668 5.68 <0.0001
##
lrtest(occu_model1_wt, occu_model2_wt)
##
## Model 1: occu_safety ~ SAS_IQc
## Model 2: occu_safety ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 2.86e+01 1.00e+00 9.05e-08
#airlines
(tech_model1_wt = ols(tech_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## tech_safety SAS_IQc (weights)
## 1 0 2
##
## Linear Regression Model
##
## ols(formula = tech_safety ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 96 LR chi2 49.00 R2 0.400
## sigma56.6203 d.f. 1 R2 adj 0.393
## d.f. 94 Pr(> chi2) 0.0000 g 0.679
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.1876 -0.2232 0.2129 0.7289 2.2444
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1663 0.0807 -2.06 0.0420
## SAS_IQc 0.6085 0.0769 7.91 <0.0001
##
(tech_model2_wt = ols(tech_safety ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## tech_safety SAS_IQc IC_reg (weights)
## 1 0 0 2
##
## Linear Regression Model
##
## ols(formula = tech_safety ~ SAS_IQc + IC_reg, data = chap8_std,
## weights = d2$population2017 %>% sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 96 LR chi2 81.60 R2 0.573
## sigma48.0344 d.f. 2 R2 adj 0.563
## d.f. 93 Pr(> chi2) 0.0000 g 0.866
##
## Residuals
##
## Min 1Q Median 3Q Max
## -1.5867 -0.1953 0.1605 0.6222 1.8012
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.1471 0.0685 -2.15 0.0344
## SAS_IQc 0.6550 0.0657 9.97 <0.0001
## IC_reg 0.3876 0.0632 6.13 <0.0001
##
lrtest(tech_model1_wt, tech_model2_wt)
##
## Model 1: tech_safety ~ SAS_IQc
## Model 2: tech_safety ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 3.26e+01 1.00e+00 1.13e-08
#IMO
(IMO_model1_wt = ols(IMO_rr ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## IMO_rr SAS_IQc (weights)
## 12 0 2
##
## Linear Regression Model
##
## ols(formula = IMO_rr ~ SAS_IQc, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 85 LR chi2 18.90 R2 0.199
## sigma69.7722 d.f. 1 R2 adj 0.190
## d.f. 83 Pr(> chi2) 0.0000 g 0.476
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.3418 -1.3262 -0.6937 0.1735 1.3437
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4783 0.1031 4.64 <0.0001
## SAS_IQc 0.4802 0.1056 4.55 <0.0001
##
(IMO_model2_wt = ols(IMO_rr ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>% sqrt()))
## Frequencies of Missing Values Due to Each Variable
## IMO_rr SAS_IQc IC_reg (weights)
## 12 0 0 2
##
## Linear Regression Model
##
## ols(formula = IMO_rr ~ SAS_IQc + IC_reg, data = chap8_std, weights = d2$population2017 %>%
## sqrt())
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 85 LR chi2 20.03 R2 0.210
## sigma69.7324 d.f. 2 R2 adj 0.191
## d.f. 82 Pr(> chi2) 0.0000 g 0.512
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.3078 -1.3099 -0.7010 0.1269 1.3098
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4886 0.1035 4.72 <0.0001
## SAS_IQc 0.4892 0.1059 4.62 <0.0001
## IC_reg 0.1022 0.0977 1.05 0.2985
##
lrtest(IMO_model1_wt, IMO_model2_wt)
##
## Model 1: IMO_rr ~ SAS_IQc
## Model 2: IMO_rr ~ SAS_IQc + IC_reg
##
## L.R. Chisq d.f. P
## 1.127 1.000 0.288
#easy table output
list(
esports1_wt,
esports2_wt,
IMO_model1_wt,
IMO_model2_wt,
air_model1_wt,
air_model2_wt,
road_model1_wt,
road_model2_wt,
occu_model1_wt,
occu_model2_wt,
tech_model1_wt,
tech_model2_wt
) %>%
summarize_models()
#merge into map data
d_map = right_join(world3, d, by = "ISO")
#calculate centroids, and they must lie within the unit
d_map %<>%
mutate(lon2 = map_dbl(geometry, ~st_point_on_surface(.x)[[1]]),
lat2 = map_dbl(geometry, ~st_point_on_surface(.x)[[2]]))
#add missing values for Hong Kong and Macau
#capitals
d_map$lat[d_map$ISO == "HKG"] = 22.321751
d_map$lon[d_map$ISO == "HKG"] = 114.169663
d_map$lat[d_map$ISO == "MAC"] = 22.198667
d_map$lon[d_map$ISO == "MAC"] = 113.545605
#centroids
d_map$lat2[d_map$ISO == "HKG"] = 22.397745
d_map$lon2[d_map$ISO == "HKG"] = 114.158591
d_map$lat2[d_map$ISO == "MAC"] = 22.198667
d_map$lon2[d_map$ISO == "MAC"] = 113.545605
#look good?
ggplot() +
ggplot2::geom_sf(data = d_map) +
# geom_label(data = d_map, aes(x = lon, y = lat, label = NAME))
geom_point(data = d_map, aes(x = lon, y = lat), size = .5) +
geom_point(data = d_map, aes(x = lon2, y = lat2), color = "red", size = .5)
## Warning: Removed 24 rows containing missing values (`geom_point()`).
## Warning: Removed 50 rows containing missing values (`geom_point()`).
#subset
d2_map = right_join(world3, d2, by = "ISO")
Nice for quick overview.
#no empty
d_map_noNA = d_map %>% filter(!st_is_empty(.))
#only run once to save time
if (F) {
map_continents = tm_shape(d_map_noNA) +
tm_fill("UN_continent", title = "Continents") +
tm_borders()
map_continents
tmap_save(map_continents, "figs/continents map.png")
map_macroregions = tm_shape(d_map_noNA) +
tm_fill("UN_macroregion", title = "Macroregions") +
tm_borders()
map_macroregions
tmap_save(map_macroregions, "figs/macroregions map.png")
map_regions = tm_shape(d_map_noNA) +
tm_fill("UN_region", title = "Regions") +
tm_borders()
map_regions
tmap_save(map_regions, "figs/regions map.png")
}
#IQ map
IQ_map = tm_shape(d_map_noNA) +
tm_fill("SAS_IQc", title = "IQ (Rindermann)") +
tm_borders() +
tm_layout(legend.position = c("left", "bottom"))
IQ_map
tmap_save(IQ_map, "figs/IQ_map.png")
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IQ_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#IQ, all countries
IQ_map2 = tm_shape(d_map_noNA) +
tm_fill("CA_totc", title = "IQ (Rindermann)") +
tm_borders() +
tm_layout(legend.position = c("left", "bottom"))
IQ_map2
tmap_save(IQ_map2, "figs/IQ_map_all.png")
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IQ_map_all.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#IC map
IC_map = tm_shape(d_map_noNA) +
tm_fill("IC_reg", title = "IQ intellectual class (regression)") +
tm_borders() +
tm_layout(legend.position = c("left", "bottom"))
IC_map
## Variable(s) "IC_reg" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_save(IC_map, "figs/IC_map.png")
## Variable(s) "IC_reg" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/IC_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
#SPI S map
SPI_map = tm_shape(d_map_noNA) +
tm_fill("SPI_g", title = "Social progress index (general factor)") +
tm_borders() +
tm_layout(legend.position = c("left", "bottom"))
SPI_map
## Variable(s) "SPI_g" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_save(SPI_map, "figs/SPI_map.png")
## Variable(s) "SPI_g" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
## Map saved to /media/emil/8tb_ssd_3/projects/Smart fraction reanalysis/figs/SPI_map.png
## Resolution: 3332 by 1324 pixels
## Size: 11.1 by 4.41 inches (300 dpi)
Add a covariate for spatial autocorrelation.
#calculate spatial lag variables
#version 1
d_map = kirkegaard::spatial_knn(
d_map,
vars = main_vars2,
k = 3
)
#version 2
d_map = kirkegaard::spatial_knn(
d_map,
vars = main_vars2,
k = 3,
lon = "lon2",
lat = "lat2",
suffix = "_lag2"
)
#cors
autocors = d_map %>% select(!!!main_vars2,
!!!main_vars2 + "_lag",
!!!main_vars2 + "_lag2") %>%
st_drop_geometry() %>%
wtd.cors()
#summary stats
map_df(main_vars2, function(v) {
tibble(
var = v,
lag_capital = autocors[v, v + "_lag"],
lag_centroid = autocors[v, v + "_lag2"]
)
}) %>%
print() %>%
{
filter(., var %in% names(spi_indicators)) %>%
.[-1] %>%
colMeans()
}
## # A tibble: 65 × 3
## var lag_capital lag_centroid
## <chr> <dbl> <dbl>
## 1 SPI_g 0.787 0.809
## 2 SAS_IQc 0.685 0.717
## 3 x95pct_IQc 0.613 0.653
## 4 x05pct_IQc 0.685 0.700
## 5 IC_reg 0.253 0.205
## 6 LC_reg 0.425 0.415
## 7 IC_delta 0.516 0.479
## 8 LC_delta 0.383 0.385
## 9 Undernourishment_pct_of_pop 0.663 0.685
## 10 Maternal_mortality_rate_deaths_100_000_live_births 0.770 0.773
## # … with 55 more rows
## lag_capital lag_centroid
## 0.609 0.629
#example plot showing autocorrelation using the cognitive data
GG_scatter(d_map %>% st_drop_geometry(), "SAS_IQc_lag", "SAS_IQc", case_names = "Country.x") +
scale_x_continuous("Mean IQ, spatial lag") +
scale_y_continuous("Mean IQ")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/spatial_lag_example_iq.png")
## `geom_smooth()` using formula = 'y ~ x'
#join the lagged variables to d2
d2b = d_map %>%
st_drop_geometry() %>%
filter(!is.na(SAS_IQc), !is.na(x95pct_IQc))
#regression
reg_results_spatial = get_reg_results(names(spi_indicators), spatial_lag = T, data = d2b)
#summarize R2s
reg_results_spatial %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
## fit1_r2 fit2_r2 fit3_r2 fit4_r2 fit1_wt_r2 fit2_wt_r2 fit3_wt_r2
## 0.413 0.430 0.418 0.429 0.406 0.439 0.414
## fit4_wt_r2
## 0.439
#formal -- how many model tests are better?
reg_results_spatial %>%
select(LR1_p:LR3_wt_p) %>%
{
plyr::adply(., 1, function(r) {
tibble(
LR1 = r$LR1_p < .01,
LR2 = r$LR2_p < .01,
LR3 = r$LR3_p < .01,
LR1_wt = r$LR1_wt_p < .01,
LR2_wt = r$LR2_wt_p < .01,
LR3_wt = r$LR3_wt_p < .01
)
})
} %>%
select(LR1:LR3_wt) %>%
colMeans()
## LR1 LR2 LR3 LR1_wt LR2_wt LR3_wt
## 0.2549 0.0588 0.0196 0.3922 0.0980 0.0196
#SPI
#standardize data before so we get standardized betas
#join in lags before that
SPI_std = d2 %>%
#join lags
left_join(d_map %>% select(SPI_g_lag, "ISO"), by = "ISO") %>%
select(SPI_g, SAS_IQc, IC_reg, LC_reg, SPI_g_lag) %>%
df_standardize()
#fits
list(
SPI_model1 = ols(SPI_g ~ SAS_IQc + SPI_g_lag, data = SPI_std),
SPI_model2 = ols(SPI_g ~ SAS_IQc + IC_reg + SPI_g_lag, data = SPI_std),
SPI_model3 = ols(SPI_g ~ SAS_IQc + LC_reg + SPI_g_lag, data = SPI_std),
SPI_model4 = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg + SPI_g_lag, data = SPI_std)
) -> SPI_models_spatial
SPI_models_spatial %>% summarize_models()
#with weights
list(
SPI_model1_wt = ols(SPI_g ~ SAS_IQc + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model2_wt = ols(SPI_g ~ SAS_IQc + IC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model3_wt = ols(SPI_g ~ SAS_IQc + LC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt()),
SPI_model4_wt = ols(SPI_g ~ SAS_IQc + IC_reg + LC_reg + SPI_g_lag, data = SPI_std, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt_spatial
SPI_models_wt_spatial %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
##
## L.R. Chisq d.f. P
## 13.92937 1.00000 0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
##
## L.R. Chisq d.f. P
## 4.4426 1.0000 0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
##
## L.R. Chisq d.f. P
## 0.384 1.000 0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + IC_delta
##
## L.R. Chisq d.f. P
## 2.15e+01 1.00e+00 3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + LC_delta
##
## L.R. Chisq d.f. P
## 6.309 1.000 0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + IC_delta
## Model 2: SPI_g ~ SAS_IQc + IC_delta + LC_delta
##
## L.R. Chisq d.f. P
## 0.981 1.000 0.322
Simulate some populations that vary in 1) means, 2) standard deviations, 3) presence of subpopulations of low or high IQ.
#simulate a country of each combination
country_n = 10e3
high = 15
low = -15
sim_combos = expand_grid(
mean = seq(70, 105, by = 5),
sd = seq(14, 16, by = 1),
high = c(T, F),
low = c(T, F),
replicate = 1:20
)
#sample some data
sim_combos = plyr::adply(sim_combos, .margins = 1, .fun = function(dd) {
#simulate case data
main_n = country_n - (dd$high * country_n * .05) - (dd$high * country_n * .05)
x = tibble(
score = rnorm(main_n, mean = dd$mean, sd = dd$sd),
group = "main"
)
#subgroups
if (dd$high) {
x = bind_rows(x, tibble(
score = rnorm(country_n * .05, mean = dd$mean + high, sd = 15),
group = "high"
))
}
if (dd$low) {
x = bind_rows(x, tibble(
score = rnorm(country_n * .05, mean = dd$mean + low, sd = 15),
group = "low"
))
}
#summary stats
tibble(
obs_mean = mean(x$score),
obs_95 = quantile(x$score, probs = .95),
obs_05 = quantile(x$score, probs = .05),
obs_sd = sd(x$score),
data = list(x)
)
})
#resid tail measures
sim_combos %<>% mutate(
IC_reg = ols(obs_95 ~ obs_mean, data = sim_combos) %>% resid() %>% standardize(),
LC_reg = ols(obs_05 ~ obs_mean, data = sim_combos) %>% resid() %>% standardize(),
IC_z = obs_95 - obs_mean,
LC_z = obs_05 - obs_mean
)
#work as expected?
sim_combos %>% select(-data) %>% wtd.cors() %>% round(2)
## mean sd high low replicate obs_mean obs_95 obs_05 obs_sd IC_reg
## mean 1.00 0.00 0.00 0.00 0 1.00 0.99 0.99 0.00 0.00
## sd 0.00 1.00 0.00 0.00 0 0.00 0.11 -0.11 0.94 0.92
## high 0.00 0.00 1.00 0.00 0 0.03 0.07 0.01 0.23 0.28
## low 0.00 0.00 0.00 1.00 0 -0.03 -0.01 -0.06 0.22 0.15
## replicate 0.00 0.00 0.00 0.00 1 0.00 0.00 0.00 0.00 0.00
## obs_mean 1.00 0.00 0.03 -0.03 0 1.00 0.99 0.99 0.00 0.00
## obs_95 0.99 0.11 0.07 -0.01 0 0.99 1.00 0.97 0.11 0.12
## obs_05 0.99 -0.11 0.01 -0.06 0 0.99 0.97 1.00 -0.12 -0.11
## obs_sd 0.00 0.94 0.23 0.22 0 0.00 0.11 -0.12 1.00 0.98
## IC_reg 0.00 0.92 0.28 0.15 0 0.00 0.12 -0.11 0.98 1.00
## LC_reg 0.00 -0.92 -0.16 -0.27 0 0.00 -0.11 0.12 -0.98 -0.95
## IC_z 0.00 0.92 0.28 0.15 0 0.01 0.13 -0.10 0.98 1.00
## LC_z 0.00 -0.92 -0.16 -0.27 0 0.00 -0.11 0.12 -0.98 -0.95
## LC_reg IC_z LC_z
## mean 0.00 0.00 0.00
## sd -0.92 0.92 -0.92
## high -0.16 0.28 -0.16
## low -0.27 0.15 -0.27
## replicate 0.00 0.00 0.00
## obs_mean 0.00 0.01 0.00
## obs_95 -0.11 0.13 -0.11
## obs_05 0.12 -0.10 0.12
## obs_sd -0.98 0.98 -0.98
## IC_reg -0.95 1.00 -0.95
## LC_reg 1.00 -0.95 1.00
## IC_z -0.95 1.00 -0.95
## LC_z 1.00 -0.95 1.00
ols(IC_reg ~ mean + sd + high + low, data = sim_combos)
## Linear Regression Model
##
## ols(formula = IC_reg ~ mean + sd + high + low, data = sim_combos)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1920 LR chi2 5667.23 R2 0.948
## sigma0.2288 d.f. 4 R2 adj 0.948
## d.f. 1915 Pr(> chi2) 0.0000 g 1.121
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.9245695 -0.1559471 0.0005589 0.1494833 0.7637447
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -17.2821 0.1043 -165.71 <0.0001
## mean -0.0004 0.0005 -0.79 0.4313
## sd 1.1252 0.0064 175.93 <0.0001
## high 0.5654 0.0104 54.13 <0.0001
## low 0.3049 0.0104 29.20 <0.0001
##
ols(IC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
## Linear Regression Model
##
## ols(formula = IC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1920 LR chi2 6626.84 R2 0.968
## sigma0.1782 d.f. 4 R2 adj 0.968
## d.f. 1915 Pr(> chi2) 0.0000 g 1.135
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.610050 -0.113820 -0.002221 0.115865 0.645633
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -18.6193 0.0858 -216.95 <0.0001
## obs_mean -0.0001 0.0004 -0.20 0.8442
## obs_sd 1.2126 0.0053 228.61 <0.0001
## high 0.1080 0.0084 12.88 <0.0001
## low -0.1221 0.0084 -14.62 <0.0001
##
ols(LC_reg ~ mean + sd + high + low, data = sim_combos)
## Linear Regression Model
##
## ols(formula = LC_reg ~ mean + sd + high + low, data = sim_combos)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1920 LR chi2 5891.67 R2 0.954
## sigma0.2158 d.f. 4 R2 adj 0.953
## d.f. 1915 Pr(> chi2) 0.0000 g 1.124
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.763303 -0.150697 0.004135 0.152390 0.797079
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 17.4326 0.0984 177.21 <0.0001
## mean -0.0003 0.0004 -0.77 0.4419
## sd -1.1314 0.0060 -187.54 <0.0001
## high -0.3255 0.0099 -33.04 <0.0001
## low -0.5411 0.0099 -54.93 <0.0001
##
ols(LC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
## Linear Regression Model
##
## ols(formula = LC_reg ~ obs_mean + obs_sd + high + low, data = sim_combos)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1920 LR chi2 6957.32 R2 0.973
## sigma0.1635 d.f. 4 R2 adj 0.973
## d.f. 1915 Pr(> chi2) 0.0000 g 1.138
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.509047 -0.110737 0.001407 0.107260 0.500626
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 18.7661 0.0787 238.31 <0.0001
## obs_mean -0.0006 0.0003 -1.88 0.0602
## obs_sd -1.2185 0.0049 -250.37 <0.0001
## high 0.1346 0.0077 17.50 <0.0001
## low -0.1125 0.0077 -14.68 <0.0001
##
#sampling error
centiles_sampling = map_df(1:1000, function(i) {
#sample 500 persons, calculate the 1-99th centiles
quantile(rnorm(500), probs = seq(.01, .99, .01)) %>%
as.list() %>%
as.data.frame()
}) %>% set_colnames("centile_" + seq(1, 99))
#SDs
centiles_sampling[c(1, 5, 10, 20, 30, 40, 50)] %>%
describe()
## vars n mean sd median trimmed mad min max
## centile_1 1 1000 -2.283752 0.1645 -2.28e+00 -2.27951 0.1617 -2.866 -1.8373
## centile_5 2 1000 -1.634859 0.0974 -1.63e+00 -1.63466 0.0945 -1.929 -1.3018
## centile_10 3 1000 -1.274779 0.0778 -1.28e+00 -1.27486 0.0813 -1.535 -0.9912
## centile_20 4 1000 -0.840024 0.0647 -8.42e-01 -0.84098 0.0614 -1.028 -0.6397
## centile_30 5 1000 -0.524172 0.0591 -5.24e-01 -0.52391 0.0609 -0.703 -0.3733
## centile_40 6 1000 -0.254878 0.0557 -2.56e-01 -0.25482 0.0566 -0.470 -0.0423
## centile_50 7 1000 -0.000804 0.0563 -7.57e-05 -0.00026 0.0547 -0.233 0.2032
## range skew kurtosis se
## centile_1 1.029 -0.29390 0.241782 0.00520
## centile_5 0.627 -0.00264 -0.044301 0.00308
## centile_10 0.544 0.01239 -0.000936 0.00246
## centile_20 0.388 0.11815 -0.061934 0.00205
## centile_30 0.329 -0.06915 -0.169868 0.00187
## centile_40 0.428 -0.05609 0.139900 0.00176
## centile_50 0.437 -0.13837 0.281520 0.00178
centiles_sampling %>%
describe() %>%
as.data.frame() %>%
ggplot(aes(vars, sd)) +
geom_line() +
scale_x_continuous("Centile", breaks = seq(0, 100, 5)) +
scale_y_continuous("SD of estimate", limits = c(0, NA))
GG_save("figs/SE_centiles.png")
#z score data
d2_z = d2 %>% df_standardize(messages = F)
#no weights
list(
SPI_model1 = ols(SPI_g ~ SAS_IQc, data = d2_z),
SPI_model2 = ols(SPI_g ~ SAS_IQc + x95pct_IQc, data = d2_z),
SPI_model3 = ols(SPI_g ~ SAS_IQc + x05pct_IQc, data = d2_z),
SPI_model4 = ols(SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc, data = d2_z)
) -> SPI_models
SPI_models %>% summarize_models()
#weights
list(
SPI_model1_wt = ols(SPI_g ~ SAS_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
SPI_model2_wt = ols(SPI_g ~ SAS_IQc + x95pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
SPI_model3_wt = ols(SPI_g ~ SAS_IQc + x05pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt()),
SPI_model4_wt = ols(SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc, data = d2_z, weights = d2$population2017 %>% sqrt())
) -> SPI_models_wt
SPI_models_wt %>% summarize_models()
#formal tests
lrtest(SPI_models[[1]], SPI_models[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc
##
## L.R. Chisq d.f. P
## 13.92937 1.00000 0.00019
lrtest(SPI_models[[1]], SPI_models[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x05pct_IQc
##
## L.R. Chisq d.f. P
## 4.4426 1.0000 0.0351
lrtest(SPI_models[[2]], SPI_models[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + x95pct_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc
##
## L.R. Chisq d.f. P
## 0.384 1.000 0.535
lrtest(SPI_models_wt[[1]], SPI_models_wt[[2]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc
##
## L.R. Chisq d.f. P
## 2.15e+01 1.00e+00 3.53e-06
lrtest(SPI_models_wt[[1]], SPI_models_wt[[3]])
##
## Model 1: SPI_g ~ SAS_IQc
## Model 2: SPI_g ~ SAS_IQc + x05pct_IQc
##
## L.R. Chisq d.f. P
## 6.309 1.000 0.012
lrtest(SPI_models_wt[[2]], SPI_models_wt[[4]])
##
## Model 1: SPI_g ~ SAS_IQc + x95pct_IQc
## Model 2: SPI_g ~ SAS_IQc + x95pct_IQc + x05pct_IQc
##
## L.R. Chisq d.f. P
## 0.981 1.000 0.322
#main regression with standard metrics
#leave one out approach
loo_results = map_df(seq_along_rows(d2), function(idx) {
#fit the models without this case
i_reg_results = get_reg_results(names(spi_indicators), data = d2[-idx, ]) %>% mutate(case_removed = idx)
#summarize R2s
regs_r2s = i_reg_results %>% select(fit1_r2:fit4_wt_r2) %>% colMeans()
#formal -- how many model tests are better?
regs_lr_tests = i_reg_results %>%
select(LR1_p:LR3_wt_p) %>%
{
plyr::adply(., 1, function(r) {
tibble(
LR1 = r$LR1_p < .01,
LR2 = r$LR2_p < .01,
LR3 = r$LR3_p < .01,
LR1_wt = r$LR1_wt_p < .01,
LR2_wt = r$LR2_wt_p < .01,
LR3_wt = r$LR3_wt_p < .01
)
})
} %>%
select(LR1:LR3_wt) %>%
colMeans()
#make a data frame
bind_cols(
as_tibble(t(regs_r2s)),
as_tibble(t(regs_lr_tests))
) %>% mutate(case_omitted = idx)
})
#plot effect of case removal
loo_results %>%
select(case_omitted, LR1:LR3_wt) %>%
pivot_longer(cols = LR1:LR3_wt) %>%
ggplot(aes(case_omitted, value, color = name)) +
geom_line() +
scale_x_continuous("Case omitted") +
scale_y_continuous("% of tests showing signal (p < .01)", labels = scales::percent) +
scale_color_discrete("Model comparison")
GG_save("figs/loo_results.png")
#what is case ?
d2$Country.x[17]
## [1] "China"
d2$Country.x[24]
## [1] "Egypt"
d2$Country.x[82]
## [1] "South Africa"
d2$Country.x[98]
## [1] "Vietnam"
#selected variables
d2 %>%
select(ISO, Country.x, SAS_IQc, x95pct_IQc, x05pct_IQc, IC_reg, LC_reg, SPI_g)
#save dataset
d %>% write_csv("data/data_out.csv.gz")
d %>% write_rds("data/data_out.rds")
Run manually
#versions
write_sessioninfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Linux Mint 21
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GGally_2.1.2 broom_1.0.1 rms_6.3-0
## [4] SparseM_1.81 tmap_3.3-3 sf_1.0-9
## [7] readxl_1.4.1 kirkegaard_2022-11-08 psych_2.2.9
## [10] assertthat_0.2.1 weights_1.0.4 Hmisc_4.7-1
## [13] Formula_1.2-4 survival_3.2-13 lattice_0.20-45
## [16] magrittr_2.0.3 forcats_0.5.2 stringr_1.4.1
## [19] dplyr_1.0.10 purrr_0.3.5 readr_2.1.3
## [22] tidyr_1.2.1 tibble_3.1.8 ggplot2_3.4.0
## [25] tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 systemfonts_1.0.4 lwgeom_0.2-9
## [4] plyr_1.8.7 sp_1.5-1 splines_4.1.2
## [7] crosstalk_1.2.0 leaflet_2.1.1 TH.data_1.1-1
## [10] digest_0.6.30 htmltools_0.5.3 gdata_2.18.0.1
## [13] fansi_1.0.3 checkmate_2.1.0 googlesheets4_1.0.1
## [16] cluster_2.1.2 tzdb_0.3.0 modelr_0.1.9
## [19] vroom_1.6.0 sandwich_3.0-2 jpeg_0.1-9
## [22] colorspace_2.0-3 rvest_1.0.3 textshaping_0.3.6
## [25] haven_2.5.1 xfun_0.34 leafem_0.2.0
## [28] crayon_1.5.2 jsonlite_1.8.3 lme4_1.1-30
## [31] zoo_1.8-11 glue_1.6.2 stars_0.5-6
## [34] gtable_0.3.1 gargle_1.2.1 MatrixModels_0.5-1
## [37] car_3.1-1 DEoptimR_1.0-11 VIM_6.2.2
## [40] abind_1.4-5 scales_1.2.1 mvtnorm_1.1-3
## [43] DBI_1.1.3 Rcpp_1.0.9 laeken_0.5.2
## [46] viridisLite_0.4.1 htmlTable_2.4.1 units_0.8-0
## [49] bit_4.0.4 foreign_0.8-82 proxy_0.4-27
## [52] vcd_1.4-10 htmlwidgets_1.5.4 httr_1.4.4
## [55] RColorBrewer_1.1-3 wk_0.7.0 ellipsis_0.3.2
## [58] mice_3.14.0 farver_2.1.1 reshape_0.8.9
## [61] pkgconfig_2.0.3 XML_3.99-0.12 nnet_7.3-17
## [64] sass_0.4.2 dbplyr_2.2.1 deldir_1.0-6
## [67] utf8_1.2.2 labeling_0.4.2 tidyselect_1.2.0
## [70] rlang_1.0.6 tmaptools_3.1-1 munsell_0.5.0
## [73] cellranger_1.1.0 tools_4.1.2 cachem_1.0.6
## [76] cli_3.4.1 generics_0.1.3 ranger_0.14.1
## [79] evaluate_0.17 fastmap_1.1.0 ragg_1.2.3
## [82] yaml_2.3.6 bit64_4.0.5 leafsync_0.1.0
## [85] knitr_1.40 fs_1.5.2 robustbase_0.95-0
## [88] s2_1.1.0 nlme_3.1-155 quantreg_5.94
## [91] xml2_1.3.3 compiler_4.1.2 rstudioapi_0.14
## [94] png_0.1-7 e1071_1.7-12 reprex_2.0.2
## [97] bslib_0.4.0 stringi_1.7.8 highr_0.9
## [100] Matrix_1.5-1 classInt_0.4-8 nloptr_2.0.3
## [103] vctrs_0.5.0 stringdist_0.9.9 pillar_1.8.1
## [106] lifecycle_1.0.3 lmtest_0.9-40 jquerylib_0.1.4
## [109] data.table_1.14.4 raster_3.6-3 R6_2.5.1
## [112] latticeExtra_0.6-30 KernSmooth_2.23-20 gridExtra_2.3
## [115] codetools_0.2-18 polspline_1.1.20 dichromat_2.0-0.1
## [118] boot_1.3-28 MASS_7.3-55 gtools_3.9.3
## [121] withr_2.5.0 mnormt_2.1.1 multcomp_1.4-20
## [124] mgcv_1.8-39 parallel_4.1.2 hms_1.1.2
## [127] terra_1.6-17 grid_4.1.2 rpart_4.1.16
## [130] Rcsdp_0.1.57.2 class_7.3-20 minqa_1.2.5
## [133] rmarkdown_2.17 carData_3.0-5 googledrive_2.0.0
## [136] lubridate_1.8.0 base64enc_0.1-3 interp_1.1-3
if (F) {
library(osfr)
#auth
osf_auth(readr::read_lines("~/.config/osf_token"))
#the project we will use
osf_proj = osf_retrieve_node("https://osf.io/95wtc/")
#upload files
#overwrite existing (versioning)
osf_upload(osf_proj, conflicts = "overwrite",
path = c(
"data",
"figs",
"notebook.html",
"notebook.Rmd",
"simulation.R",
"sessions_info.txt"
))
}