library(pacman)
p_load(kirkegaard, readxl, lubridate, dplyr, readr)
options(digits = 3)
#world
mega = read_csv("data/Megadataset_v2.0p.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## iso = col_character(),
## Name = col_character(),
## LVnotes = col_character(),
## Capital = col_character(),
## se_Land = col_character(),
## se_abbrev = col_character(),
## de_Country = col_character(),
## de_abbrev = col_character(),
## de_Country_EN = col_character(),
## de_Western_neighbor = col_logical(),
## de_NW_European = col_logical(),
## dk_Origin = col_character(),
## Country_EN = col_character()
## )
## See spec(...) for full column specifications.
mega$IQ = mega$LV2012estimatedIQ
#world population data
worldpop = read_csv("data/population data.csv")
## Parsed with column specification:
## cols(
## Country = col_character(),
## Population_2017 = col_double(),
## Yearly_change_2017 = col_character(),
## Density_2017 = col_double(),
## Land_area_2017 = col_double(),
## Migrants_net_2017 = col_double(),
## Total_fertility_rate_2017 = col_character(),
## Median_age_2017 = col_character(),
## Urban_2017 = col_character()
## )
worldpop$sqrt_pop = worldpop$Population_2017 %>% sqrt
worldpop$iso = pu_translate(worldpop$Country)
mega = dplyr::full_join(mega, worldpop, by = "iso")
#US counties
us_counties = read_csv("data/US_counties.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## County = col_character(),
## State = col_character(),
## temp_bins = col_character(),
## lat_bins = col_character(),
## lon_bins = col_character(),
## precip_bins = col_character(),
## elevation_bins = col_character()
## )
## See spec(...) for full column specifications.
us_counties$sqrt_pop = us_counties$Total.Population
us_counties$White = us_counties$White / 100
#Americas
americas = read_csv("data/Admixture_In_Americas_Datafile.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Nation = col_character(),
## Notesforfilledindata = col_character(),
## CIAFactbookNotes = col_character(),
## NotesaboutMeisenbergOCT2014ACH = col_character(),
## ACHaverag_notes = col_character(),
## S_comments = col_character()
## )
## See spec(...) for full column specifications.
americas$sqrt_pop = americas$Population %>% sqrt
americas$group = plyr::aaply(americas, .margins = 1, function(x) {
#states
if (x$US.State) return("US division")
if (x$Brazil.State) return("Brazilian division")
if (x$Mexico.State) return("Mexican divsion")
if (x$Colombia.State) return("Colombian divsion")
"Sovereign nation/other"
}, .expand = F)
#Scandinavian employment
scand = read_csv("data/Scandinavian_employment_rates.csv")
## Parsed with column specification:
## cols(
## Afsenderland = col_character(),
## Modtagerland = col_character(),
## Beskæftigelse_mænd = col_double(),
## Beskæftigelse_kvinder = col_double(),
## Beskæftigelse_samlet = col_double()
## )
scand$iso = pu_translate(scand$Afsenderland)
sweden = scand %>% filter(Modtagerland == "Sverige")
mega = full_join(mega, sweden, by = "iso")
#world
GG_scatter(mega, "IQ", "International.S.Factor.Kirkegaard2014", case_names = "Name", weights = mega$sqrt_pop) +
xlab("National IQ") +
ylab("S factor")
GG_save("figures/world_iq_S.png")
#Americas
americas_divisions = americas %>%
filter(No.states == 1, !IsCapital)
#correlation string
americas_str = "r = %s. n=%s\nWeighted by sqrt population" %>% sprintf(
cor_matrix(americas_divisions[c("ACHaverage", "S_rescaled")], CI = .95, weights = americas_divisions$sqrt_pop) %>%
`[`(1, 2),
nrow(americas_divisions)
)
americas_divisions %>%
ggplot(aes(ACHaverage, S_rescaled, color = group, size = sqrt_pop)) +
geom_point(alpha = .5) +
geom_smooth(se = F, method = lm, aes(weight = sqrt_pop)) +
geom_smooth(se = F, method = lm, aes(color = NULL), color = "black") +
scale_size_continuous(guide = F) +
theme_bw() +
GG_text(americas_str) +
xlab("IQ/achievement") +
ylab("S factor")
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
GG_save("figures/americas_iq_S.png")
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
#US counties
GG_scatter(us_counties, "CA", "S", case_names = F, weights = "sqrt_pop") +
xlab("County IQ") +
ylab("S factor")
GG_save("figures/us_counties_iq_S.png")
#correlation string
americas_str = "r = %s. n=%s\nWeighted by sqrt population" %>% sprintf(
cor_matrix(americas_divisions[c("Eugenomefilledratio_2016jan", "ACHQIQaverage")], CI = .95, weights = americas_divisions$sqrt_pop) %>%
`[`(1, 2),
nrow(americas_divisions)
)
#americas
americas_divisions %>%
ggplot(aes(Eugenomefilledratio_2016jan, ACHQIQaverage, color = group, size = sqrt_pop)) +
geom_point(alpha = .5) +
geom_smooth(se = F, method = lm, aes(weight = sqrt_pop)) +
geom_smooth(se = F, method = lm, aes(color = NULL), color = "black") +
scale_size_continuous(guide = F) +
theme_bw() +
GG_text(americas_str) +
ylab("IQ/achievement") +
scale_x_continuous("European ancestry (mostly based on genomic data)", labels = scales::percent)
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
GG_save("figures/americas_euro_iq.png")
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
#US counties
GG_scatter(us_counties, "White", "CA", weights = "sqrt_pop") +
scale_x_continuous("White%", label = scales::percent) +
ylab("County IQ")
GG_save("figures/us_counties_white_iq.png")
#Danish correlation matrix
#get all danish data, keep only cases with data, impute missing values
mega$Denmark.crime = mega$dk_struc_adj_crime_rate
silence({
dk_immi = mega %>% df_subset_by_pattern(pattern = "^Denmark.") %>%
cbind(iso = mega$iso) %>%
miss_filter(missing = 10) %>%
miss_impute(max_na = Inf)
})
#factor score the dimensions
dk_immi$benefits_use = dk_immi %>% df_subset_by_pattern("benefits") %>% fa %>% `[[`("scores") %>% as.vector
dk_immi$income = dk_immi %>% df_subset_by_pattern("Income") %>% fa %>% `[[`("scores") %>% as.vector
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs
## = np.obs, : The estimated weights for the factor scores are probably
## incorrect. Try a different factor extraction method.
dk_immi$edu = dk_immi %>% df_subset_by_pattern("school|tert") %>% fa %>% `[[`("scores") %>% as.vector %>% `*`(-1)
dk_immi$crime = dk_immi$Denmark.crime
#higher order S
dk_immi$S = dk_immi[c("benefits_use", "income", "edu", "crime")] %>% fa %>% `[[`("scores") %>% as.vector %>% `*`(-1)
#matrix
wtd.cors(dk_immi[c("benefits_use", "income", "edu", "crime", "S")])
## benefits_use income edu crime S
## benefits_use 1.000 -0.825 -0.761 0.800 -0.986
## income -0.825 1.000 0.730 -0.684 0.892
## edu -0.761 0.730 1.000 -0.629 0.824
## crime 0.800 -0.684 -0.629 1.000 -0.823
## S -0.986 0.892 0.824 -0.823 1.000
#full names
dk_immi$DK_edu = dk_immi$edu
dk_immi$DK_unemployment = dk_immi$benefits_use
dk_immi$DK_income = dk_immi$income
dk_immi$DK_crime = dk_immi$crime
dk_immi$DK_S = dk_immi$S
#add to mega
mega = full_join(mega, dk_immi[c("DK_edu", "DK_unemployment", "DK_income", "DK_crime", "DK_S", "iso")])
## Joining, by = "iso"
## Warning: Column `iso` joining character vector and factor, coercing into
## character vector
#S: DK and NO
GG_scatter(mega, "S.factor.in.Denmark.Kirkegaard2014", "S.factor.in.Norway.Kirkegaard2014", case_names = "Name") +
xlab("S factor in Danmark, cirka 2012") +
ylab("S factor in Norge, cirka 2012")
GG_save("figures/S_dk_no.png")
#crime: DK and old SE
GG_scatter(mega, "dk_sg_crime_rate_rr", "se_Crime_rate_rr", case_names = "Name") +
xlab("Relative crime rate in Denmark, 2000-2015\nAge and sex-adjusted.") +
ylab("Crime rate in Sverige, 1985-1989.\nAge and sex-adjusted.")
GG_save("figures/crime_dk_se.png")
Very simple approach! Data are hard to meta-analyze.
#Norway
silence({
no_immi = mega %>% df_subset_by_pattern("^Norway") %>%
cbind(iso = mega$iso) %>%
miss_filter(missing = 11) %>%
miss_impute(max_na = Inf)
})
#dimensions
no_immi$crime = no_immi[c("NorwayViolentCrimeAdjustedOddsRatioSkardhamar2014", "NorwayLarcenyAdjustedOddsRatioSkardhamar2014")] %>% fa %>% `[[`("scores") %>% as.vector
no_immi$edu = no_immi$Norway.tertiary.edu.att.bigsamples.2013
no_immi$unemployment = no_immi %>% df_subset_by_pattern("OutOfWork") %>% fa %>% `[[`("scores") %>% as.vector
no_immi$income = no_immi %>% df_subset_by_pattern("Income") %>% fa %>% `[[`("scores") %>% as.vector
#higher order S
no_immi$S = no_immi[c("edu", "unemployment", "income", "crime")] %>% fa %>% `[[`("scores") %>% as.vector
#cors
wtd.cors(no_immi[c("edu", "unemployment", "income", "crime", "S")])
## edu unemployment income crime S
## edu 1.000 -0.615 0.723 -0.654 0.774
## unemployment -0.615 1.000 -0.854 0.676 -0.869
## income 0.723 -0.854 1.000 -0.761 0.991
## crime -0.654 0.676 -0.761 1.000 -0.827
## S 0.774 -0.869 0.991 -0.827 1.000
#full names
no_immi$NO_edu = no_immi$edu
no_immi$NO_unemployment = no_immi$unemployment
no_immi$NO_income = no_immi$income
no_immi$NO_crime = no_immi$crime
no_immi$NO_S = no_immi$S
#add to mega
mega = full_join(mega, no_immi[c("NO_edu", "NO_unemployment", "NO_income", "NO_crime", "NO_S", "iso")])
## Joining, by = "iso"
## Warning: Column `iso` joining character vector and factor, coercing into
## character vector
#Sweden
silence({
se_immi = mega[c("se_Crime_rate_rr", "Beskæftigelse_kvinder", "Beskæftigelse_mænd", "iso")] %>%
miss_filter(missing = 2) %>%
miss_impute(max_na = Inf)
})
#dimensions
se_immi$unemployment = se_immi[c("Beskæftigelse_kvinder", "Beskæftigelse_mænd")] %>%
fa %>%
`[[`("scores") %>%
as.vector %>%
`*`(-1)
se_immi$crime = se_immi$se_Crime_rate_rr
se_immi$S = se_immi[c("crime", "unemployment")] %>%
fa %>%
`[[`("scores") %>%
as.vector %>%
`*`(-1)
wtd.cors(se_immi[c("crime", "unemployment", "S")])
## crime unemployment S
## crime 1.000 0.210 -0.778
## unemployment 0.210 1.000 -0.778
## S -0.778 -0.778 1.000
#full names
se_immi$SE_unemployment = se_immi$unemployment
se_immi$SE_crime = se_immi$crime
se_immi$SE_S = se_immi$S
#add to mega
mega = full_join(mega, se_immi[c("SE_unemployment", "SE_crime", "SE_S", "iso")])
## Joining, by = "iso"
## Warning: Column `iso` joining character vector and factor, coercing into
## character vector
#Finland
mega$FI_crime = mega %>%
df_subset_by_pattern("^Finland") %>%
fa %>%
`[[`("scores") %>%
as.vector %>%
`*`(-1)
#Netherlands
mega$NL_crime = mega %>%
df_subset_by_pattern("^DutchCrime\\d\\d_") %>%
miss_impute() %>%
fa %>%
`[[`("scores") %>%
as.vector %>%
`*`(-1)
#Germany
mega$DE_crime = mega$de_crime_rate_adj
#collect all variables
meta = mega %>%
df_subset_by_pattern(pattern = "^(NO|DK|DE|SE|FI|NL)") %>%
cbind(iso = mega$iso,
IQ = mega$IQ,
Islam = mega$IslamPewResearch2010
)
#cors with predictors
meta_pred_cors = wtd.cors(meta)[1:16, 18:19] %>%
as.data.frame %>%
rownames_to_column("full_var") %>%
dplyr::mutate(
country = str_match(full_var, "(\\w+)_") %>% `[`(, 2) %>% as.vector %>% as.factor,
outcome = str_match(full_var, "_(\\w+)") %>% `[`(, 2) %>% as.vector %>% as.factor,
IQ_abs = abs(IQ),
Islam_abs = abs(Islam)
)
## Warning in wtd.cors(meta): NAs introduced by coercion
## Warning in wtd.cors(meta): NAs introduced by coercion
#change order of outcomes
meta_pred_cors$outcome %<>% fct_relevel("S")
#plots!
silence({
GG_group_means(meta_pred_cors, "IQ_abs", groupvar = "outcome", subgroupvar = "country") +
ylab("Validity of IQ")
})
## Warning: Removed 16 rows containing missing values (geom_errorbar).
GG_save("figures/meta_IQ.png")
## Warning: Removed 16 rows containing missing values (geom_errorbar).
silence({
GG_group_means(meta_pred_cors, "Islam_abs", groupvar = "outcome", subgroupvar = "country", CI = F) +
ylab("Validity of Islam%")
})
## Warning: Removed 16 rows containing missing values (geom_errorbar).
meta_pred_cors$Islam_abs %>% describe()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 16 0.55 0.15 0.58 0.56 0.11 0.18 0.76 0.58 -0.82 0
## se
## X1 0.04
ggplot(meta_pred_cors, aes(outcome, IQ_abs, fill = country)) +
geom_bar(stat = "identity", position = "dodge", width = .5) +
theme_bw()
GG_save("figures/meta_Islam.png")
meta_pred_cors$IQ_abs %>% describe()
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 16 0.51 0.06 0.53 0.51 0.05 0.4 0.62 0.22 -0.27 -0.96
## se
## X1 0.02
For convenient reuse.
#output data table
meta_pred_cors
## full_var IQ Islam country outcome IQ_abs Islam_abs
## 1 DK_edu 0.507 -0.577 DK edu 0.507 0.577
## 2 DK_unemployment -0.495 0.704 DK unemployment 0.495 0.704
## 3 DK_income 0.535 -0.616 DK income 0.535 0.616
## 4 DK_crime -0.469 0.764 DK crime 0.469 0.764
## 5 DK_S 0.535 -0.722 DK S 0.535 0.722
## 6 NO_edu 0.449 -0.396 NO edu 0.449 0.396
## 7 NO_unemployment -0.535 0.617 NO unemployment 0.535 0.617
## 8 NO_income 0.560 -0.568 NO income 0.560 0.568
## 9 NO_crime -0.410 0.496 NO crime 0.410 0.496
## 10 NO_S 0.561 -0.580 NO S 0.561 0.580
## 11 SE_unemployment -0.400 0.427 SE unemployment 0.400 0.427
## 12 SE_crime -0.569 0.578 SE crime 0.569 0.578
## 13 SE_S 0.622 -0.643 SE S 0.622 0.643
## 14 FI_crime 0.521 -0.607 FI crime 0.521 0.607
## 15 NL_crime 0.566 -0.184 NL crime 0.566 0.184
## 16 DE_crime -0.455 0.352 DE crime 0.455 0.352
#Z scores outcomes, all countries
meta %>% write_csv("data/meta-analysis-data.csv", na = "")