#Start
options(digits = 3)
library(pacman)
p_load(kirkegaard, rvest, rms, ISOcodes, metafor, broom)
theme_set(theme_bw())
source("R/functions.R")
#Data
I downlaoded these from https://2700chess.com/all-fide-players using a curl command fetched using Firefox’s great networking tool (F12). The site’s UI only allows one to select the top 100 players. I figured that this was just a UI limitation, so I modified the url request POST param to fetch more. This can be done with R’s tools (e.g. curl where one has to set a billion headers), but it is bothersome compared to CLI curl
.
#call curl
#only works on unix
if (!file.exists("data/fide.html")) {
system("curl 'https://2700chess.com/all-fide-players' -H 'Host: 2700chess.com' -H 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' -H 'Accept-Language: en-US,en;q=0.5' --compressed -H 'Referer: https://2700chess.com/all-fide-players' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Cookie: PHPSESSID=0n3dtmlqujcdd11r3uqnmtuu66; _csrf=a799b5e90a10a176eabfa682e6534e52a954b4ced1bcd561977c2378d2a3e5fba%3A2%3A%7Bi%3A0%3Bs%3A5%3A%22_csrf%22%3Bi%3A1%3Bs%3A32%3A%22BD9ImGubxcUFZZXQt8fTtGDI7p-HZ_Lk%22%3B%7D' -H 'Connection: keep-alive' -H 'Upgrade-Insecure-Requests: 1' -H 'DNT: 1' --data '_csrf=ejFody1IVGw4dVE.QA8hDgJSPTF3Egw9DgkOI1kPECVNQUU%2FdxcYBw%3D%3D&official_search=1&name=&country=&title=&rating_from=&rating_to=&format=standard&birth_from=1901&birth_to=2017&activity=1&sex=&count=5000' > data/fide.html")
fide = read_html("data/fide.html") %>% html_node("#official-ratings-table") %>% html_table()
} else {
fide = read_html("data/fide.html") %>% html_node("#official-ratings-table") %>% html_table()
}
#better colnames
names(fide) = c("rank", "title", "name", "country", "classic", "rapid", "blitz", "age")
#clean names
fide$name %<>% str_replace("\n.*", "")
#duplicates?
fide$name[which(duplicated(fide$name))]
## [1] "Nikolic, Nebojsa"
#remove
fide %<>% filter(!duplicated(name))
#recode a few problem cases
fide$country %<>% subs_to_mother_name()
#count by country
(fide_country = table2(fide$country, include_NA = F) %>%
select(-Percent) %>%
rename(
country = Group,
chess_count = Count
)
)
#merge data
#countries to ISO
fide_country$ISO = pu_translate(fide_country$country, superunit = "world")
## No exact match: Iran, Islamic Republic of
## No exact match: Macedonia, the Former Yugoslav Republic of
## No exact match: Korea, Republic of
## Best fuzzy match found: Iran, Islamic Republic of -> Iran Islamic Republic of with distance 1.00
## Best fuzzy match found: Macedonia, the Former Yugoslav Republic of -> Macedonia, the former Yugoslav Republic of with distance 1.00
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
#download in R
if (!file.exists("data/WESPA.rds")) {
WESPA = read_html("http://www.wespa.org/aardvark/cgi-bin/rating.cgi") %>% html_node(".table") %>% html_table() %>% df_legalize_names()
write_rds(WESPA, "data/WESPA.rds")
} else {
WESPA = read_rds("data/WESPA.rds")
}
#duplicates?
length(WESPA$Name[which(duplicated(WESPA$Name))])
## [1] 0
#remove
WESPA %<>% filter(!duplicated(Name))
#recode a few problem cases
WESPA$Country %<>% subs_to_mother_name()
#count by country
(scrabble_country = table2(WESPA$Country, include_NA = F) %>%
select(-Percent) %>%
rename(
country = Group,
scrabble_count = Count
)
)
#merge data
#countries to ISO
scrabble_country$ISO = pu_translate(scrabble_country$country, superunit = "world")
#join
assert_that(!any(duplicated(scrabble_country$ISO)))
## [1] TRUE
#download in R
if (!file.exists("data/go.rds")) {
go_html = read_html("https://www.goratings.org/en/")
#get table
go_table_html = go_html %>% html_nodes("table") %>% .[[3]]
#parse
go = go_table_html %>% html_table() %>% df_legalize_names()
#get countries from flags
go$Flag = str_match_all(go_table_html, '(..)\\.svg') %>% .[[1]] %>% .[, 2]
#rename
names(go) = c("rank", "name", "sex", "country", "rating")
write_rds(go, "data/go.rds")
} else {
go = read_rds("data/go.rds")
}
#duplicates?
length(go$name[which(duplicated(go$name))])
## [1] 0
#remove
go %<>% filter(!duplicated(name))
#recode to ISO3 from ISO2
go$ISO = plyr::mapvalues(go$country, ISO_3166_1$Alpha_2 %>% str_to_lower(),
ISO_3166_1$Alpha_3,
warn_missing = F)
#subs to mother
go$ISO %<>% subs_to_mother_ISO()
#count by country
go_country = table2(go$ISO, include_NA = F) %>%
rename(
ISO = Group,
go_count = Count
)
#join
assert_that(!any(duplicated(go_country$ISO)))
## [1] TRUE
#load from disk if possible
if (!file.exists("data/poker.rds")) {
#annoying, needs a few manual urls and then merge
poker_urls = c("http://www.globalpokerindex.com/rankings/300/",
"http://www.globalpokerindex.com/rankings/301-600/",
"http://www.globalpokerindex.com/rankings/601-900/",
"http://www.globalpokerindex.com/rankings/901-1200/",
"http://www.globalpokerindex.com/rankings/1201-1500/",
"http://www.globalpokerindex.com/rankings/1501-1800/")
#download and extract
poker = map_df(poker_urls, function(x) {
this = read_html(x)
t_table_html = this %>% html_nodes("table") %>% .[[2]]
t_table = t_table_html %>% html_table()
#get flag
t_table$ISO2 = t_table_html %>% as.character() %>% str_match_all('gpiflag-(\\w\\w)') %>% .[[1]] %>% .[, 2]
t_table
})
#ISO3
poker$ISO = plyr::mapvalues(poker$ISO2,
ISO_3166_1$Alpha_2 %>% str_to_lower(),
ISO_3166_1$Alpha_3,
warn_missing = F)
#join some countries
poker$ISO %<>% subs_to_mother_ISO()
#count
poker_country = table2(poker$ISO, include_NA = F) %>%
rename(
poker_count = Count,
ISO = Group
)
poker_country$country = pu_translate(poker_country$ISO, reverse = T)
#to full country
#to ensure that they are all countries, not subdivisions
poker_country$country = pu_translate(poker_country$ISO, superunit = "world", reverse = T)
#save
write_rds(poker, "data/poker.rds")
write_rds(poker_country, "data/poker_country.rds")
} else {
#read
poker = read_rds("data/poker.rds")
poker_country = read_rds("data/poker_country.rds")
}
#get games
ese_games = list(
sc2 = ese_get("sc2", "https://www.esportsearnings.com/games/151-starcraft-ii/countries"),
lol = ese_get("lol", "https://www.esportsearnings.com/games/164-league-of-legends/countries"),
dota2 = ese_get("dota2", "https://www.esportsearnings.com/games/231-dota-2/countries"),
csgo = ese_get("csgo", "https://www.esportsearnings.com/games/245-counter-strike-global-offensive/countries"),
cs = ese_get("cs", "https://www.esportsearnings.com/games/162-counter-strike/countries"),
hs = ese_get("hs", "https://www.esportsearnings.com/games/328-hearthstone-heroes-of-warcraft/countries"),
ow = ese_get("ow", "https://www.esportsearnings.com/games/426-overwatch/countries"),
ssbm = ese_get("ssbm", "https://www.esportsearnings.com/games/204-super-smash-bros-melee/countries")
)
#merge subnational units
ese_games = map(ese_games, ese_merge_subs)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
## No exact match: Korea, Republic of
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## No exact match: Korea, Republic of
## No exact match: Macedonia, The Former Yugoslav Republic of
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Best fuzzy match found: Macedonia, The Former Yugoslav Republic of -> Macedonia, the former Yugoslav Republic of with distance 2.00
## No exact match: Iran, Islamic Republic of
## No exact match: Korea, Republic of
## No exact match: Macedonia, The Former Yugoslav Republic of
## Best fuzzy match found: Iran, Islamic Republic of -> Iran Islamic Republic of with distance 1.00
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Best fuzzy match found: Macedonia, The Former Yugoslav Republic of -> Macedonia, the former Yugoslav Republic of with distance 2.00
## No exact match: Iran, Islamic Republic of
## No exact match: Korea, Republic of
## No exact match: Macedonia, The Former Yugoslav Republic of
## Best fuzzy match found: Iran, Islamic Republic of -> Iran Islamic Republic of with distance 1.00
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Best fuzzy match found: Macedonia, The Former Yugoslav Republic of -> Macedonia, the former Yugoslav Republic of with distance 2.00
## No exact match: Korea, Republic of
## No exact match: Macedonia, The Former Yugoslav Republic of
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## Best fuzzy match found: Macedonia, The Former Yugoslav Republic of -> Macedonia, the former Yugoslav Republic of with distance 2.00
## No exact match: Korea, Republic of
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
## No exact match: Korea, Republic of
## Best fuzzy match found: Korea, Republic of -> Korea Republic of with distance 1.00
#national data collection
natdata = silence(read_csv("data/Megadataset_v2.0p.csv"))
names(natdata)[1] = "ISO"
natdata$IQ = natdata$LV2012estimatedIQ
#population data
worldpop = read_html("data/wiki_worldpop.html") %>% html_table() %>% .[[2]] %>% df_legalize_names()
worldpop %<>% mutate(
population2017 = Population_1_July_2017_2 %>% str_replace_all(",", "") %>% as.integer(),
pop = population2017,
log_pop = log10(population2017),
country = str_replace(Country_or_area, "\\[.*", ""),
ISO = pu_translate(country)
) %>% na.omit()
## Warning in function_list[[k]](value): NAs introduced by coercion to integer
## range
## No exact match: World
## No exact match: Guernsey and  Jersey
## Best fuzzy match found: World -> world with distance 1.00
## Best fuzzy match found: Guernsey and  Jersey -> Guernsey (GBR) with distance 11.00
#merge, keep only countries
natdata = right_join(natdata, worldpop, by = "ISO")
#internet users
internetusers = read_csv("data/internet users 2016.csv")
## Parsed with column specification:
## cols(
## region = col_character(),
## country = col_character(),
## iso = col_character(),
## pop2010 = col_double(),
## pop2016 = col_double(),
## i99h2010 = col_double(),
## i99h2016 = col_double(),
## i2010 = col_double(),
## i2016 = col_double()
## )
internetusers$ISO = internetusers$country %>% pu_translate()
## No exact match: Central African Rep.
## No exact match: Congo (Dem. Rep.)
## No exact match: Congo (Rep.)
## No exact match: S. Tome & Principe
## No exact match: Hong Kong, China
## No exact match: Korea (Rep.)
## No exact match: Lao P.D.R.
## No exact match: Macao, China
## Best fuzzy match found: Central African Rep. -> Central African Republic with distance 5.00
## Best fuzzy match found: Congo (Dem. Rep.) -> Congo, Dem. Rep. with distance 3.00
## Best fuzzy match found: Congo (Rep.) -> Congo, Rep. with distance 3.00
## Best fuzzy match found: S. Tome & Principe -> Sao Tome & Principe with distance 2.00
## Best fuzzy match found: Hong Kong, China -> Hong Kong-China with distance 2.00
## Best fuzzy match found: Korea (Rep.) -> Korea Rep. with distance 2.00
## Best fuzzy match found: Lao P.D.R. -> Lao PDR with distance 3.00
## Best fuzzy match found: Macao, China -> Macao-China with distance 2.00
internetusers$internet_users_pct = internetusers$i99h2016
#merge
natdata = left_join(natdata, internetusers %>% select(ISO, internet_users_pct), by = "ISO")
#recode
natdata %<>% mutate(
UN_continent = UN_continental_region_1 %>% fct_relevel("Europe"),
UN_region = UN_statistical_region_1 %>% fct_relevel("Western Europe")
)
#merge stat areas to
natdata %<>% mutate(
UN_macroregion = plyr::revalue(UN_region, c("Australia and New Zealand" = "N & W Europe + offshoots",
"Europe" = "N & W Europe + offshoots",
"Northern Europe" = "N & W Europe + offshoots",
"Eastern Africa" = "Africa",
"Middle Africa" = "Africa",
"Western Africa" = "Africa",
"Southern Africa" = "Africa",
"Europe" = "North & Western Europe",
"Northern America" = "N & W Europe + offshoots",
"South America" = "Latin America",
"Central America" = "Latin America",
"Western Asia" = "MENA",
"Northern Africa" = "MENA",
"Western Europe" = "N & W Europe + offshoots"
), warn_missing = F)
)
#exclude some quasi subunits
natdata %<>% filter(!ISO %in% sub_names_ISO)
#games
egames_names = names(ese_games)
all_games = c(ese_games,
list(
chess = fide_country,
go = go_country,
scrabble = scrabble_country,
poker = poker_country
)
)
all_names = names(all_games)
#check for quasi-countries that we want to merge into motherland
map_df(all_games, function(x) {
x %>%
filter(ISO %in% c("HKG", "MAC", "GGY", "IMN", "Greenland", "USA_PRI", "FRO", "WSM"))
})
#join all
for (g in all_names) {
#extract cols of interest, then join to main
count_var = g + "_count"
this_df = all_games[[g]][c("ISO", count_var)]
#join
natdata = full_join(natdata, this_df, by = "ISO")
#NA to 0
natdata[[count_var]] %<>% plyr::mapvalues(NA, 0, warn_missing = F)
#mutate
natdata[["log_" + count_var]] = log10(natdata[[count_var]] + 1)
natdata[[count_var + "_per_million"]] = natdata[[count_var]] / natdata$population2017
}
Plot data with IQ in informative ways.
We do more analyses on this as this game had the most data.
#game stats
map2_df(all_games, names(all_games), function(x, g) {
data_frame(
game = g,
esport = (g %in% egames_names) %>% plyr::mapvalues(c(T, F), c("yes", "no"), warn_missing = F),
top_players = sum(x[[g + "_count"]]) %>% as.integer(),
n_unique_countries = nrow(x)
)
}) %>% write_clipboard()
## Game Esport Top players N unique countries
## 1 sc2 yes 2179 64
## 2 lol yes 5072 80
## 3 dota2 yes 2219 71
## 4 csgo yes 9076 92
## 5 cs yes 2624 58
## 6 hs yes 1744 66
## 7 ow yes 2104 63
## 8 ssbm yes 2107 29
## 9 chess no 4942 103
## 10 go no 946 14
## 11 scrabble no 1321 48
## 12 poker no 1800 66
#regions
table2(natdata$UN_continent, include_NA = F)
table2(natdata$UN_region, include_NA = F)
table2(natdata$UN_macroregion, include_NA = F)
Iceland example.
#simple
set.seed(1)
rbinom(1e6, 3.3e5, prob = 1/1e6) %>%
table2() %>%
rename(
event_count = Group,
percent = Percent
) %>% write_clipboard()
## Event count Count Percent
## 1 0 719259.00 71.93
## 2 1 237116.00 23.71
## 3 2 39105.00 3.91
## 4 3 4157.00 0.42
## 5 4 335.00 0.03
## 6 5 28.00 0.00
## 7 0.00 0.00
Realistic data.
#params
world_pops = natdata$population2017
rate_min = 1/1e9
rate_max = 1/1e6
#simulate world
sim_world_data = function(pop_factor = 1, world_size = 200, cause_effect_r = .7) {
#two normals for cause and effect
d_ = MASS::mvrnorm(n = world_size, mu = c(0, 0), Sigma = matrix(c(1, cause_effect_r, cause_effect_r, 1), nrow = 2))
#data frame
d_ = as_data_frame(d_) %>%
set_colnames(c("person_rate", "cause"))
#adjust pop sizes?
world_pops_local = as.numeric(world_pops) * pop_factor
#mutate
d_ %<>% mutate(
#pop sizes
pop = world_pops_local %>% sample(size = world_size, replace = T),
#rescale person rate
person_rate = rescale(person_rate, new_min = rate_min, new_max = rate_max),
#sample for each country
count = map2_int(person_rate, pop, function(rate, pop) {
rbinom(n = 1, size = pop, prob = rate)
}),
#logs
log_pop = log10(pop),
log_count = log10(count + 1),
#per capita.
count_per_million = (count / pop) * 1e6,
#resids
count_resid = resid(lm(count ~ pop, data = d_)),
log_count_resid = resid(lm(log_count ~ log_pop, data = d_))
)
d_
}
sim_world_reg = function(pop_factor = 1, world_size = 200, cause_effect_r = .7) {
d_ = sim_world_data(pop_factor = pop_factor, world_size = world_size, cause_effect_r = cause_effect_r)
#models
fit1 = lm(count ~ cause + pop, data = d_)
fit2 = lm(log_count ~ cause + log_pop, data = d_)
fit3 = lm(count_per_million ~ cause, data = d_)
#save
#don't bother with R2's here because they wont be comparable. the count approaches includes population size as a predictor, and population size explains most of the variance!
data_frame(
count_beta = fit1 %>% summary() %>% .$coef %>% .[2, 1],
count_p = fit1 %>% summary() %>% .$coef %>% .[2, 4],
log_count_beta = fit2 %>% summary() %>% .$coef %>% .[2, 1],
log_count_p = fit2 %>% summary() %>% .$coef %>% .[2, 4],
per_capita_beta = fit3 %>% summary() %>% .$coef %>% .[2, 1],
per_capita_p = fit3 %>% summary() %>% .$coef %>% .[2, 4]
)
}
#simulate
set.seed(1)
sim_results = map_df(rep(1, 1000), sim_world_reg)
## Warning: `as_data_frame()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
#stats
sim_result_desc = describe(sim_results)
#coef of variation
sim_result_desc %>%
rownames_to_column("variable") %>%
mutate(cv = sd / mean) %>% select(-n, -trimmed, -range, -vars, -se) %>%
write_clipboard()
## Variable Mean Sd Median Mad Min Max Skew Kurtosis Cv
## 1 count_beta 3.66 2.07 3.17 1.30 0.19 18.17 2.25 7.85 0.56
## 2 count_p 0.03 0.08 0.00 0.00 0.00 0.94 5.60 40.52 3.08
## 3 log_count_beta 0.08 0.03 0.08 0.03 -0.01 0.17 -0.17 0.12 0.34
## 4 log_count_p 0.03 0.10 0.00 0.00 0.00 1.00 5.49 34.76 3.25
## 5 per_capita_beta 0.12 0.14 0.11 0.06 -2.02 1.26 -3.97 82.87 1.19
## 6 per_capita_p 0.17 0.25 0.04 0.06 0.00 1.00 1.63 1.63 1.46
#p cutoffs
sim_results %>%
select(contains("_p")) %>%
map_df(~data_frame("<.05" = mean(. < .05), "<.01" = mean(. < .01), "<.005" = mean(.< .005), "<.001" = mean(.<.001))) %>%
mutate(
method = sim_results %>%
select(contains("_p")) %>% names()
)
sim_results %>%
select(contains("_p")) %>%
gather(method, p_value, count_p, log_count_p, per_capita_p) %>%
mutate(method = str_replace(method, "_p", "")) %>%
GG_denhist("p_value", group = "method", vline = NULL) +
scale_x_log10("p value") +
geom_vline(xintercept = c(.05, .01, .005, .001), linetype = "dashed") +
scale_fill_discrete(labels = function(x) str_replace(x, "_", " "))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
GG_save("figs/sim_p_dist.png")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#very large pops to determine conversion function with certainty
#pars
runs_large = 1000
set.seed(1)
#simulate
sim_large_results = map2_df(rep(1, runs_large), runif(runs_large, .2, .9), .f = ~sim_world_reg(cause_effect_r = .y, pop_factor = 1e4))
#describe/examine
describe(sim_large_results)
#cors
wtd.cors(sim_large_results[c("count_beta", "log_count_beta", "per_capita_beta")])
## count_beta log_count_beta per_capita_beta
## count_beta 1.000 0.464 0.483
## log_count_beta 0.464 1.000 0.950
## per_capita_beta 0.483 0.950 1.000
#plots
GG_scatter(sim_large_results, "count_beta", "per_capita_beta")
GG_scatter(sim_large_results, "log_count_beta", "per_capita_beta")
#fit model
rms::ols(per_capita_beta ~ log_count_beta, data = sim_large_results)
## Linear Regression Model
##
## rms::ols(formula = per_capita_beta ~ log_count_beta, data = sim_large_results)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 1000 LR chi2 2324.57 R2 0.902
## sigma0.0124 d.f. 1 R2 adj 0.902
## d.f. 998 Pr(> chi2) 0.0000 g 0.043
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0397028 -0.0074611 0.0003148 0.0081496 0.0420562
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0117 0.0010 11.69 <0.0001
## log_count_beta 0.7467 0.0078 95.94 <0.0001
##
#one very large world
large_world = sim_world_data(pop_factor = 1e4, world_size = 1e4)
#cors
wtd.cors(large_world)
## person_rate cause pop count log_pop log_count
## person_rate 1.00000 0.70047 1.22e-02 0.0794 -3.23e-03 0.10672
## cause 0.70047 1.00000 1.18e-02 0.0583 -1.66e-03 0.07518
## pop 0.01215 0.01180 1.00e+00 0.9598 3.76e-01 0.37531
## count 0.07935 0.05833 9.60e-01 1.0000 3.54e-01 0.36082
## log_pop -0.00323 -0.00166 3.76e-01 0.3543 1.00e+00 0.99312
## log_count 0.10672 0.07518 3.75e-01 0.3608 9.93e-01 1.00000
## count_per_million 0.97937 0.68515 1.26e-02 0.0784 3.04e-03 0.11444
## count_resid 0.24113 0.16745 3.84e-17 0.2807 -2.40e-02 0.00215
## log_count_resid 0.93876 0.65607 1.45e-02 0.0761 2.34e-16 0.11711
## count_per_million count_resid log_count_resid
## person_rate 0.97937 2.41e-01 9.39e-01
## cause 0.68515 1.67e-01 6.56e-01
## pop 0.01257 3.84e-17 1.45e-02
## count 0.07837 2.81e-01 7.61e-02
## log_pop 0.00304 -2.40e-02 2.34e-16
## log_count 0.11444 2.15e-03 1.17e-01
## count_per_million 1.00000 2.36e-01 9.51e-01
## count_resid 0.23617 1.00e+00 2.22e-01
## log_count_resid 0.95145 2.22e-01 1.00e+00
#some plots
GG_scatter(large_world, "log_count", "count_per_million")
GG_scatter(large_world, "count_resid", "count_per_million")
ggplot(large_world, aes_string("log_count_resid", "count_per_million")) +
geom_point() +
geom_smooth(se = F) +
# geom_line(data = data_frame(x = large_world$log_count_resid, pred = predict(line_fit)), mapping = aes(x = x, y = pred)) +
theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#fit model
rms::ols(count_per_million ~ rcs(log_count, 3), data = large_world)
## Linear Regression Model
##
## rms::ols(formula = count_per_million ~ rcs(log_count, 3), data = large_world)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 10000 LR chi2 160.22 R2 0.016
## sigma0.1428 d.f. 2 R2 adj 0.016
## d.f. 9997 Pr(> chi2) 0.0000 g 0.020
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.500823 -0.095570 -0.001154 0.096204 0.638505
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.4952 0.0079 62.41 <0.0001
## log_count 0.0026 0.0024 1.07 0.2825
## log_count' 0.0139 0.0026 5.33 <0.0001
##
Fit model without IQ and save residuals. They should correlate and reflect GCA.
#cors
wtd.cors(natdata[c("IQ", "population2017")])
## IQ population2017
## IQ 1.000 0.118
## population2017 0.118 1.000
#using the 0 imputed data
models_popsize = map_df(all_names, function(game) {
#form
t_form = as.formula(sprintf("log_%s_count ~ log_pop", game))
#fit
t_fit = rms::ols(t_form, data = natdata)
#save resids
data_frame(
game = game,
country = natdata$country,
ISO = natdata$ISO,
resid = resid(t_fit)
)
})
#aggregate
models_popsize_countries = models_popsize %>% spread(game, resid)
#factor analysis
(game_cors = wtd.cors(models_popsize_countries %>% select(-country, -ISO)))
## chess cs csgo dota2 go hs lol ow poker sc2 scrabble
## chess 1.0000 0.645 0.754 0.581 0.0534 0.660 0.661 0.519 0.669 0.566 0.0650
## cs 0.6449 1.000 0.813 0.699 0.3639 0.832 0.785 0.784 0.777 0.833 0.2035
## csgo 0.7536 0.813 1.000 0.765 0.2792 0.864 0.857 0.813 0.775 0.823 0.2476
## dota2 0.5808 0.699 0.765 1.000 0.2241 0.736 0.707 0.638 0.623 0.711 0.2921
## go 0.0534 0.364 0.279 0.224 1.0000 0.434 0.376 0.485 0.329 0.486 0.0586
## hs 0.6604 0.832 0.864 0.736 0.4343 1.000 0.911 0.876 0.829 0.909 0.2125
## lol 0.6613 0.785 0.857 0.707 0.3761 0.911 1.000 0.861 0.760 0.864 0.2274
## ow 0.5191 0.784 0.813 0.638 0.4849 0.876 0.861 1.000 0.746 0.876 0.3066
## poker 0.6689 0.777 0.775 0.623 0.3292 0.829 0.760 0.746 1.000 0.795 0.2420
## sc2 0.5661 0.833 0.823 0.711 0.4861 0.909 0.864 0.876 0.795 1.000 0.2744
## scrabble 0.0650 0.203 0.248 0.292 0.0586 0.212 0.227 0.307 0.242 0.274 1.0000
## ssbm 0.4227 0.623 0.524 0.356 0.1913 0.584 0.521 0.579 0.675 0.642 0.2587
## ssbm
## chess 0.423
## cs 0.623
## csgo 0.524
## dota2 0.356
## go 0.191
## hs 0.584
## lol 0.521
## ow 0.579
## poker 0.675
## sc2 0.642
## scrabble 0.259
## ssbm 1.000
describe(game_cors %>% {diag(.) = NA; .})
game_cors %>% MAT_half() %>% describe()
(gms_factor = models_popsize_countries %>% .[all_names] %>% fa())
## Factor Analysis using method = minres
## Call: fa(r = .)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 h2 u2 com
## sc2 0.94 0.885 0.115 1
## lol 0.92 0.845 0.155 1
## dota2 0.76 0.573 0.427 1
## csgo 0.92 0.841 0.159 1
## cs 0.89 0.794 0.206 1
## hs 0.96 0.924 0.076 1
## ow 0.90 0.807 0.193 1
## ssbm 0.63 0.398 0.602 1
## chess 0.69 0.471 0.529 1
## go 0.39 0.151 0.849 1
## scrabble 0.27 0.073 0.927 1
## poker 0.87 0.753 0.247 1
##
## MR1
## SS loadings 7.52
## Proportion Var 0.63
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## The degrees of freedom for the null model are 66 and the objective function was 13.1 with Chi Square of 2888
## The degrees of freedom for the model are 54 and the objective function was 1.64
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.07
##
## The harmonic number of observations is 227 with the empirical chi square 111 with prob < 8.8e-06
## The total number of observations was 227 with Likelihood Chi Square = 361 with prob < 5.1e-47
##
## Tucker Lewis Index of factoring reliability = 0.867
## RMSEA index = 0.161 and the 90 % confidence intervals are 0.143 0.174
## BIC = 68.2
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## MR1
## Correlation of (regression) scores with factors 0.99
## Multiple R square of scores with factors 0.98
## Minimum correlation of possible factor scores 0.97
#save scores
models_popsize_countries$general_mental_sports = gms_factor$scores %>% as.vector()
#merge to main
natdata = full_join(natdata,
models_popsize_countries %>% select(-country),
by = "ISO")
#cors
wtd.cors(natdata[c("IQ",
"general_mental_sports"
)],
weight = sqrt(natdata$population2017))
## IQ general_mental_sports
## IQ 1.000 0.792
## general_mental_sports 0.792 1.000
#plots
GG_scatter(natdata, "IQ", "general_mental_sports",
weights = sqrt(natdata$population2017),
case_names = "country"
) +
geom_smooth(se = F, span = 1) +
scale_y_continuous("General mental gaming skill")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
GG_save("figs/gmgs_IQ.png")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Jensen's method
fa_Jensens_method(gms_factor, natdata, "IQ") +
scale_y_continuous("Correlation with national IQ")
## Using Pearson correlations for the criterion-indicators relationships.
GG_save("figs/Jensens_method.png")
#models
#base
(model_1a = rms::ols(general_mental_sports ~ IQ, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ (weights)
## 0 32 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ IQ, data = natdata,
## weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 192.68 R2 0.628
## sigma53.2267 d.f. 1 R2 adj 0.626
## d.f. 193 Pr(> chi2) 0.0000 g 1.186
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.0638 -0.5878 -0.1587 0.3940 1.9984
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -8.0283 0.4595 -17.47 <0.0001
## IQ 0.0958 0.0053 18.04 <0.0001
##
#nonlinear
(model_1b = rms::ols(general_mental_sports ~ rcs(IQ, 3), data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ (weights)
## 0 32 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3), data = natdata,
## weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 221.62 R2 0.679
## sigma49.5484 d.f. 2 R2 adj 0.676
## d.f. 192 Pr(> chi2) 0.0000 g 1.099
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.53472 -0.42170 -0.04563 0.34492 2.02523
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -3.1654 0.9761 -3.24 0.0014
## IQ 0.0309 0.0127 2.43 0.0161
## IQ' 0.0765 0.0138 5.54 <0.0001
##
#real increase?
lrtest(model_1a,
model_1b)
##
## Model 1: general_mental_sports ~ IQ
## Model 2: general_mental_sports ~ rcs(IQ, 3)
##
## L.R. Chisq d.f. P
## 2.89e+01 1.00e+00 7.46e-08
#add regional controls
(model_2a = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_region, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_region
## 0 32 0
## (weights)
## 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_region,
## data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 330.20 R2 0.816
## sigma39.8613 d.f. 24 R2 adj 0.790
## d.f. 170 Pr(> chi2) 0.0000 g 1.246
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.73752 -0.38474 -0.01435 0.29236 1.37945
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0828 1.7583 0.05 0.9625
## IQ -0.0025 0.0219 -0.11 0.9095
## IQ' 0.1006 0.0253 3.97 0.0001
## UN_region=Australia and New Zealand -0.2965 0.5233 -0.57 0.5717
## UN_region=Caribbean -0.7295 0.5045 -1.45 0.1500
## UN_region=Central America -0.6606 0.4369 -1.51 0.1324
## UN_region=Central Asia -0.5331 0.4931 -1.08 0.2812
## UN_region=Eastern Africa -0.9817 0.4575 -2.15 0.0333
## UN_region=Eastern Asia -1.3095 0.3140 -4.17 <0.0001
## UN_region=Eastern Europe 0.0237 0.3044 0.08 0.9380
## UN_region=Europe -1.0336 1.2425 -0.83 0.4066
## UN_region=Melanesia -0.8173 0.6502 -1.26 0.2105
## UN_region=Micronesia -0.2036 1.2435 -0.16 0.8701
## UN_region=Middle Africa -0.9388 0.5257 -1.79 0.0759
## UN_region=Northern Africa -0.9145 0.4387 -2.08 0.0386
## UN_region=Northern America 1.0949 0.3402 3.22 0.0015
## UN_region=Northern Europe -0.0172 0.3373 -0.05 0.9593
## UN_region=Polynesia -0.5803 1.3690 -0.42 0.6722
## UN_region=South America 0.1381 0.3845 0.36 0.7198
## UN_region=South-Eastern Asia -0.7036 0.3447 -2.04 0.0428
## UN_region=Southern Africa 0.2218 0.5581 0.40 0.6916
## UN_region=Southern Asia -0.9917 0.4091 -2.42 0.0164
## UN_region=Southern Europe -0.4129 0.3311 -1.25 0.2141
## UN_region=Western Africa -0.9411 0.4818 -1.95 0.0524
## UN_region=Western Asia -0.7471 0.3880 -1.93 0.0559
##
(model_2b = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_macroregion, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_macroregion
## 0 32 0
## (weights)
## 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_macroregion,
## data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 294.25 R2 0.779
## sigma42.5975 d.f. 15 R2 adj 0.760
## d.f. 179 Pr(> chi2) 0.0000 g 1.248
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.7355 -0.3812 -0.0395 0.2730 1.6502
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.3981 1.7446 -0.23 0.8198
## IQ 0.0056 0.0218 0.26 0.7979
## IQ' 0.0981 0.0258 3.80 0.0002
## UN_macroregion=Caribbean -0.8678 0.4869 -1.78 0.0764
## UN_macroregion=Latin America -0.3229 0.3367 -0.96 0.3388
## UN_macroregion=Central Asia -0.6974 0.4737 -1.47 0.1427
## UN_macroregion=Africa -0.9587 0.4226 -2.27 0.0245
## UN_macroregion=Eastern Asia -1.5971 0.2806 -5.69 <0.0001
## UN_macroregion=Eastern Europe -0.2231 0.2559 -0.87 0.3845
## UN_macroregion=Melanesia -1.0046 0.6562 -1.53 0.1275
## UN_macroregion=Micronesia -0.3899 1.3091 -0.30 0.7662
## UN_macroregion=MENA -0.9987 0.3446 -2.90 0.0042
## UN_macroregion=Polynesia -0.7886 1.4461 -0.55 0.5862
## UN_macroregion=South-Eastern Asia -0.9207 0.2984 -3.09 0.0024
## UN_macroregion=Southern Asia -1.1646 0.3717 -3.13 0.0020
## UN_macroregion=Southern Europe -0.6531 0.2892 -2.26 0.0252
##
(model_2c = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_continent, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_continent
## 0 32 0
## (weights)
## 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_continent,
## data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 273.56 R2 0.754
## sigma43.8292 d.f. 6 R2 adj 0.746
## d.f. 188 Pr(> chi2) 0.0000 g 1.208
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.07332 -0.51577 -0.02894 0.27535 1.67390
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -1.3971 1.3218 -1.06 0.2919
## IQ 0.0148 0.0162 0.92 0.3613
## IQ' 0.0850 0.0144 5.89 <0.0001
## UN_continent=Africa -0.6435 0.2355 -2.73 0.0069
## UN_continent=Americas 0.1360 0.1826 0.75 0.4572
## UN_continent=Asia -0.8145 0.1500 -5.43 <0.0001
## UN_continent=Oceania -0.4043 0.3806 -1.06 0.2896
##
#final, with linear
(model_2d = rms::ols(general_mental_sports ~ IQ + UN_macroregion, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_macroregion
## 0 32 0
## (weights)
## 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ IQ + UN_macroregion,
## data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 279.11 R2 0.761
## sigma44.1596 d.f. 14 R2 adj 0.742
## d.f. 180 Pr(> chi2) 0.0000 g 1.286
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.77716 -0.51179 -0.06577 0.30740 1.58381
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -4.6415 1.3897 -3.34 0.0010
## IQ 0.0704 0.0140 5.01 <0.0001
## UN_macroregion=Caribbean -1.4389 0.4801 -3.00 0.0031
## UN_macroregion=Latin America -1.0968 0.2779 -3.95 0.0001
## UN_macroregion=Central Asia -1.4872 0.4413 -3.37 0.0009
## UN_macroregion=Africa -1.2835 0.4291 -2.99 0.0032
## UN_macroregion=Eastern Asia -1.0161 0.2439 -4.17 <0.0001
## UN_macroregion=Eastern Europe -0.4636 0.2570 -1.80 0.0729
## UN_macroregion=Melanesia -1.8600 0.6390 -2.91 0.0041
## UN_macroregion=Micronesia -1.2389 1.3372 -0.93 0.3554
## UN_macroregion=MENA -1.7861 0.2854 -6.26 <0.0001
## UN_macroregion=Polynesia -1.5579 1.4843 -1.05 0.2953
## UN_macroregion=South-Eastern Asia -1.5479 0.2577 -6.01 <0.0001
## UN_macroregion=Southern Asia -1.9865 0.3134 -6.34 <0.0001
## UN_macroregion=Southern Europe -0.9746 0.2867 -3.40 0.0008
##
#compare
lrtest(model_2d, model_2b)
##
## Model 1: general_mental_sports ~ IQ + UN_macroregion
## Model 2: general_mental_sports ~ rcs(IQ, 3) + UN_macroregion
##
## L.R. Chisq d.f. P
## 15.1317 1.0000 0.0001
#output
broom::tidy(summary.lm(model_2b), conf.int = T) %>% cbind(model_2b %>% confint()) %>% write_clipboard(digits = 3)
## Term Estimate
## Intercept Intercept -0.398
## IQ IQ 0.006
## IQ' IQ' 0.098
## UN macroregion=Caribbean UN_macroregion=Caribbean -0.868
## UN macroregion=Latin America UN_macroregion=Latin America -0.323
## UN macroregion=Central Asia UN_macroregion=Central Asia -0.697
## UN macroregion=Africa UN_macroregion=Africa -0.959
## UN macroregion=Eastern Asia UN_macroregion=Eastern Asia -1.597
## UN macroregion=Eastern Europe UN_macroregion=Eastern Europe -0.223
## UN macroregion=Melanesia UN_macroregion=Melanesia -1.005
## UN macroregion=Micronesia UN_macroregion=Micronesia -0.390
## UN macroregion=MENA UN_macroregion=MENA -0.999
## UN macroregion=Polynesia UN_macroregion=Polynesia -0.789
## UN macroregion=South-Eastern Asia UN_macroregion=South-Eastern Asia -0.921
## UN macroregion=Southern Asia UN_macroregion=Southern Asia -1.165
## UN macroregion=Southern Europe UN_macroregion=Southern Europe -0.653
## Std error Statistic P value 2 5 % 97 5 %
## Intercept 1.745 -0.228 0.820 -3.841 3.045
## IQ 0.022 0.256 0.798 -0.037 0.049
## IQ' 0.026 3.800 0.000 0.047 0.149
## UN macroregion=Caribbean 0.487 -1.782 0.076 -1.829 0.093
## UN macroregion=Latin America 0.337 -0.959 0.339 -0.987 0.341
## UN macroregion=Central Asia 0.474 -1.472 0.143 -1.632 0.237
## UN macroregion=Africa 0.423 -2.268 0.025 -1.793 -0.125
## UN macroregion=Eastern Asia 0.281 -5.692 0.000 -2.151 -1.043
## UN macroregion=Eastern Europe 0.256 -0.872 0.384 -0.728 0.282
## UN macroregion=Melanesia 0.656 -1.531 0.128 -2.299 0.290
## UN macroregion=Micronesia 1.309 -0.298 0.766 -2.973 2.193
## UN macroregion=MENA 0.345 -2.899 0.004 -1.679 -0.319
## UN macroregion=Polynesia 1.446 -0.545 0.586 -3.642 2.065
## UN macroregion=South-Eastern Asia 0.298 -3.085 0.002 -1.510 -0.332
## UN macroregion=Southern Asia 0.372 -3.133 0.002 -1.898 -0.431
## UN macroregion=Southern Europe 0.289 -2.258 0.025 -1.224 -0.082
#relative effect sizes
model_2d$coefficients[2] / model_1a$coefficients[2]
## IQ
## 0.735
#regions alone
(model_3a = rms::ols(general_mental_sports ~ UN_continent, data = natdata, weights = sqrt(natdata$population2017)))
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ UN_continent, data = natdata,
## weights = sqrt(natdata$population2017))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 227 LR chi2 114.77 R2 0.397
## sigma63.2950 d.f. 4 R2 adj 0.386
## d.f. 222 Pr(> chi2) 0.0000 g 0.995
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.1068 -0.9033 -0.3923 0.1609 2.6257
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 1.6167 0.1697 9.53 <0.0001
## UN_continent=Africa -2.5270 0.2174 -11.62 <0.0001
## UN_continent=Americas -0.9285 0.2443 -3.80 0.0002
## UN_continent=Asia -1.5522 0.2051 -7.57 <0.0001
## UN_continent=Oceania -1.0239 0.5208 -1.97 0.0505
##
(model_3b = rms::ols(general_mental_sports ~ UN_macroregion, data = natdata, weights = sqrt(natdata$population2017)))
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ UN_macroregion, data = natdata,
## weights = sqrt(natdata$population2017))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 227 LR chi2 292.19 R2 0.724
## sigma43.7156 d.f. 13 R2 adj 0.707
## d.f. 213 Pr(> chi2) 0.0000 g 1.190
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.81097 -0.43876 -0.04281 0.36487 1.78954
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 2.2622 0.1453 15.57 <0.0001
## UN_macroregion=Caribbean -2.8168 0.3495 -8.06 <0.0001
## UN_macroregion=Latin America -1.9819 0.2081 -9.53 <0.0001
## UN_macroregion=Central Asia -2.7227 0.3588 -7.59 <0.0001
## UN_macroregion=Africa -3.2036 0.1779 -18.01 <0.0001
## UN_macroregion=Eastern Asia -0.5191 0.2222 -2.34 0.0204
## UN_macroregion=Eastern Europe -0.6420 0.2508 -2.56 0.0112
## UN_macroregion=Melanesia -2.8604 0.5991 -4.77 <0.0001
## UN_macroregion=Micronesia -2.2355 1.0433 -2.14 0.0333
## UN_macroregion=MENA -2.7462 0.2039 -13.47 <0.0001
## UN_macroregion=Polynesia -2.2499 0.9975 -2.26 0.0251
## UN_macroregion=South-Eastern Asia -2.1596 0.2216 -9.75 <0.0001
## UN_macroregion=Southern Asia -3.1356 0.2058 -15.24 <0.0001
## UN_macroregion=Southern Europe -1.2572 0.2761 -4.55 <0.0001
##
(model_3c = rms::ols(general_mental_sports ~ UN_region, data = natdata, weights = sqrt(natdata$population2017)))
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ UN_region, data = natdata,
## weights = sqrt(natdata$population2017))
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 227 LR chi2 332.36 R2 0.769
## sigma40.8862 d.f. 22 R2 adj 0.744
## d.f. 204 Pr(> chi2) 0.0000 g 1.200
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.00009 -0.43755 0.01496 0.39681 1.78954
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 2.1025 0.2298 9.15 <0.0001
## UN_region=Australia and New Zealand -0.2601 0.5365 -0.48 0.6284
## UN_region=Caribbean -2.6570 0.3758 -7.07 <0.0001
## UN_region=Central America -2.4827 0.3342 -7.43 <0.0001
## UN_region=Central Asia -2.5630 0.3833 -6.69 <0.0001
## UN_region=Eastern Africa -3.1275 0.2737 -11.43 <0.0001
## UN_region=Eastern Asia -0.3594 0.2784 -1.29 0.1983
## UN_region=Eastern Europe -0.4823 0.2990 -1.61 0.1082
## UN_region=Europe -2.0736 1.2618 -1.64 0.1018
## UN_region=Melanesia -2.7007 0.5902 -4.58 <0.0001
## UN_region=Micronesia -2.0758 0.9932 -2.09 0.0379
## UN_region=Middle Africa -3.1266 0.3275 -9.55 <0.0001
## UN_region=Northern Africa -2.8533 0.3165 -9.02 <0.0001
## UN_region=Northern America 0.9667 0.3474 2.78 0.0059
## UN_region=Northern Europe -0.2016 0.3421 -0.59 0.5562
## UN_region=Polynesia -2.0901 0.9512 -2.20 0.0291
## UN_region=South America -1.4976 0.2859 -5.24 <0.0001
## UN_region=South-Eastern Asia -1.9998 0.2780 -7.19 <0.0001
## UN_region=Southern Africa -1.9685 0.4225 -4.66 <0.0001
## UN_region=Southern Asia -2.9759 0.2672 -11.14 <0.0001
## UN_region=Southern Europe -1.0975 0.3178 -3.45 0.0007
## UN_region=Western Africa -3.1318 0.2826 -11.08 <0.0001
## UN_region=Western Asia -2.4243 0.2856 -8.49 <0.0001
##
#predict IQ from regions
(model_4a = rms::ols(IQ ~ UN_continent, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## IQ UN_continent (weights)
## 32 0 0
##
## Linear Regression Model
##
## rms::ols(formula = IQ ~ UN_continent, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 161.52 R2 0.563
## sigma480.3925 d.f. 4 R2 adj 0.554
## d.f. 190 Pr(> chi2) 0.0000 g 9.445
##
## Residuals
##
## Min 1Q Median 3Q Max
## -24.902 -5.889 -2.204 2.161 17.760
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 96.3769 1.2937 74.49 <0.0001
## UN_continent=Africa -23.7726 1.6638 -14.29 <0.0001
## UN_continent=Americas -9.4749 1.8714 -5.06 <0.0001
## UN_continent=Asia -7.0365 1.5616 -4.51 <0.0001
## UN_continent=Oceania -5.0029 4.1545 -1.20 0.2300
##
(model_4b = rms::ols(IQ ~ UN_macroregion, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## IQ UN_macroregion (weights)
## 32 0 0
##
## Linear Regression Model
##
## rms::ols(formula = IQ ~ UN_macroregion, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 452.06 R2 0.902
## sigma233.6685 d.f. 13 R2 adj 0.894
## d.f. 181 Pr(> chi2) 0.0000 g 11.492
##
## Residuals
##
## Min 1Q Median 3Q Max
## -15.3156 -2.6951 0.1014 2.1771 17.7614
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 98.3697 0.7809 125.97 <0.0001
## UN_macroregion=Caribbean -21.0541 2.0010 -10.52 <0.0001
## UN_macroregion=Latin America -12.8711 1.1170 -11.52 <0.0001
## UN_macroregion=Central Asia -17.8884 1.9194 -9.32 <0.0001
## UN_macroregion=Africa -27.6866 0.9589 -28.87 <0.0001
## UN_macroregion=Eastern Asia 6.7104 1.1904 5.64 <0.0001
## UN_macroregion=Eastern Europe -2.8796 1.3431 -2.14 0.0334
## UN_macroregion=Melanesia -14.5514 3.2034 -4.54 <0.0001
## UN_macroregion=Micronesia -14.6908 6.9908 -2.10 0.0370
## UN_macroregion=MENA -13.9946 1.0950 -12.78 <0.0001
## UN_macroregion=Polynesia -10.9511 7.8121 -1.40 0.1627
## UN_macroregion=South-Eastern Asia -9.0310 1.1871 -7.61 <0.0001
## UN_macroregion=Southern Asia -16.6625 1.1030 -15.11 <0.0001
## UN_macroregion=Southern Europe -4.2314 1.4842 -2.85 0.0049
##
(model_4c = rms::ols(IQ ~ UN_region, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## IQ UN_region (weights)
## 32 0 0
##
## Linear Regression Model
##
## rms::ols(formula = IQ ~ UN_region, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 472.11 R2 0.911
## sigma227.6908 d.f. 22 R2 adj 0.900
## d.f. 172 Pr(> chi2) 0.0000 g 11.668
##
## Residuals
##
## Min 1Q Median 3Q Max
## -15.315574 -2.809845 -0.002582 1.879562 17.761353
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 98.9497 1.2838 77.08 <0.0001
## UN_region=Australia and New Zealand 0.1588 2.9892 0.05 0.9577
## UN_region=Caribbean -21.6341 2.2070 -9.80 <0.0001
## UN_region=Central America -15.2926 1.8639 -8.20 <0.0001
## UN_region=Central Asia -18.4684 2.1371 -8.64 <0.0001
## UN_region=Eastern Africa -26.4858 1.5434 -17.16 <0.0001
## UN_region=Eastern Asia 6.1304 1.5539 3.95 0.0001
## UN_region=Eastern Europe -3.4596 1.6679 -2.07 0.0395
## UN_region=Europe -7.1497 7.0273 -1.02 0.3104
## UN_region=Melanesia -15.1314 3.2882 -4.60 <0.0001
## UN_region=Micronesia -15.2708 6.8900 -2.22 0.0280
## UN_region=Middle Africa -31.1463 1.8268 -17.05 <0.0001
## UN_region=Northern Africa -16.5471 1.7742 -9.33 <0.0001
## UN_region=Northern America -0.8734 1.9390 -0.45 0.6530
## UN_region=Northern Europe -0.9418 1.9221 -0.49 0.6248
## UN_region=Polynesia -11.5312 7.6821 -1.50 0.1352
## UN_region=South America -12.5371 1.5982 -7.84 <0.0001
## UN_region=South-Eastern Asia -9.6110 1.5515 -6.19 <0.0001
## UN_region=Southern Africa -27.1290 2.3550 -11.52 <0.0001
## UN_region=Southern Asia -17.2425 1.4914 -11.56 <0.0001
## UN_region=Southern Europe -4.8114 1.7778 -2.71 0.0075
## UN_region=Western Africa -29.1176 1.5775 -18.46 <0.0001
## UN_region=Western Asia -13.4011 1.5938 -8.41 <0.0001
##
#summary stats
list(model_1a, model_1b, model_2a, model_2b, model_2c, model_2d, model_3a, model_3b, model_3c) %>% map_df(function(.) {
data_frame(
R2_adj = glance(.) %>% pull(adj.r.squared)
)
})
#internet users
(model_4a = rms::ols(general_mental_sports ~ IQ + UN_region + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_region
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ IQ + UN_region + internet_users_pct,
## data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 320.90 R2 0.820
## sigma40.1955 d.f. 24 R2 adj 0.794
## d.f. 162 Pr(> chi2) 0.0000 g 1.270
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.29531 -0.51771 -0.04888 0.31471 1.25504
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -3.5328 1.3683 -2.58 0.0107
## IQ 0.0443 0.0143 3.09 0.0024
## UN_region=Australia and New Zealand -0.2827 0.5277 -0.54 0.5929
## UN_region=Caribbean -1.2029 0.4901 -2.45 0.0152
## UN_region=Central America -1.2245 0.3938 -3.11 0.0022
## UN_region=Central Asia -1.1172 0.4563 -2.45 0.0154
## UN_region=Eastern Africa -0.9520 0.4636 -2.05 0.0416
## UN_region=Eastern Asia -0.2410 0.3151 -0.76 0.4455
## UN_region=Eastern Europe -0.0658 0.3019 -0.22 0.8277
## UN_region=Europe -1.5947 1.2443 -1.28 0.2018
## UN_region=Melanesia -1.0733 0.6588 -1.63 0.1052
## UN_region=Micronesia -0.5713 1.3827 -0.41 0.6800
## UN_region=Middle Africa -0.6844 0.5395 -1.27 0.2065
## UN_region=Northern Africa -1.4710 0.3923 -3.75 0.0002
## UN_region=Northern America 1.1763 0.3453 3.41 0.0008
## UN_region=Northern Europe -0.1551 0.3398 -0.46 0.6486
## UN_region=Polynesia -0.9175 1.4782 -0.62 0.5357
## UN_region=South America -0.5145 0.3315 -1.55 0.1226
## UN_region=South-Eastern Asia -0.9346 0.3206 -2.92 0.0041
## UN_region=Southern Africa -0.1520 0.5545 -0.27 0.7844
## UN_region=Southern Asia -1.3441 0.3719 -3.61 0.0004
## UN_region=Southern Europe -0.6240 0.3228 -1.93 0.0549
## UN_region=Western Africa -0.8633 0.4905 -1.76 0.0803
## UN_region=Western Asia -1.4259 0.3362 -4.24 <0.0001
## internet_users_pct 0.0143 0.0033 4.32 <0.0001
##
(model_4b = rms::ols(general_mental_sports ~ IQ + UN_macroregion + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_macroregion
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ IQ + UN_macroregion +
## internet_users_pct, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 288.41 R2 0.786
## sigma42.6738 d.f. 15 R2 adj 0.767
## d.f. 171 Pr(> chi2) 0.0000 g 1.307
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.53274 -0.55993 -0.07711 0.30538 1.48396
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -3.5431 1.3663 -2.59 0.0103
## IQ 0.0459 0.0146 3.15 0.0019
## UN_macroregion=Caribbean -1.3622 0.4676 -2.91 0.0041
## UN_macroregion=Latin America -0.9299 0.2727 -3.41 0.0008
## UN_macroregion=Central Asia -1.2787 0.4304 -2.97 0.0034
## UN_macroregion=Africa -0.9359 0.4247 -2.20 0.0289
## UN_macroregion=Eastern Asia -0.4579 0.2705 -1.69 0.0922
## UN_macroregion=Eastern Europe -0.2759 0.2525 -1.09 0.2760
## UN_macroregion=Melanesia -1.2157 0.6624 -1.84 0.0682
## UN_macroregion=Micronesia -0.7213 1.4512 -0.50 0.6198
## UN_macroregion=MENA -1.6191 0.2799 -5.78 <0.0001
## UN_macroregion=Polynesia -1.0806 1.5547 -0.70 0.4880
## UN_macroregion=South-Eastern Asia -1.1102 0.2688 -4.13 <0.0001
## UN_macroregion=Southern Asia -1.4915 0.3245 -4.60 <0.0001
## UN_macroregion=Southern Europe -0.8328 0.2795 -2.98 0.0033
## internet_users_pct 0.0153 0.0033 4.57 <0.0001
##
(model_4c = rms::ols(general_mental_sports ~ IQ + UN_continent + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_continent
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ IQ + UN_continent +
## internet_users_pct, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 249.57 R2 0.737
## sigma46.1461 d.f. 6 R2 adj 0.728
## d.f. 180 Pr(> chi2) 0.0000 g 1.326
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.0891 -0.7170 -0.1079 0.3268 1.4773
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -7.3187 0.7168 -10.21 <0.0001
## IQ 0.0812 0.0084 9.69 <0.0001
## UN_continent=Africa 0.1827 0.2421 0.75 0.4515
## UN_continent=Americas 0.1330 0.1931 0.69 0.4917
## UN_continent=Asia -0.5275 0.1746 -3.02 0.0029
## UN_continent=Oceania -0.2379 0.4148 -0.57 0.5671
## internet_users_pct 0.0145 0.0032 4.53 <0.0001
##
(model_4d = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_region + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_region
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_region +
## internet_users_pct, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 335.03 R2 0.833
## sigma38.8257 d.f. 25 R2 adj 0.807
## d.f. 161 Pr(> chi2) 0.0000 g 1.259
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.25314 -0.42977 -0.02946 0.31131 1.31611
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.3568 1.7160 0.21 0.8356
## IQ -0.0143 0.0215 -0.66 0.5085
## IQ' 0.0884 0.0249 3.55 0.0005
## UN_region=Australia and New Zealand -0.2948 0.5097 -0.58 0.5639
## UN_region=Caribbean -0.6933 0.4946 -1.40 0.1629
## UN_region=Central America -0.5190 0.4291 -1.21 0.2282
## UN_region=Central Asia -0.4117 0.4834 -0.85 0.3957
## UN_region=Eastern Africa -0.6108 0.4580 -1.33 0.1843
## UN_region=Eastern Asia -0.7545 0.3369 -2.24 0.0265
## UN_region=Eastern Europe 0.1738 0.2993 0.58 0.5622
## UN_region=Europe -1.0857 1.2104 -0.90 0.3711
## UN_region=Melanesia -0.3355 0.6694 -0.50 0.6169
## UN_region=Micronesia 0.1704 1.3518 0.13 0.8998
## UN_region=Middle Africa -0.5883 0.5219 -1.13 0.2613
## UN_region=Northern Africa -0.7318 0.4322 -1.69 0.0924
## UN_region=Northern America 1.2232 0.3338 3.67 0.0003
## UN_region=Northern Europe -0.0799 0.3289 -0.24 0.8085
## UN_region=Polynesia -0.2300 1.4409 -0.16 0.8734
## UN_region=South America 0.1881 0.3763 0.50 0.6179
## UN_region=South-Eastern Asia -0.3786 0.3469 -1.09 0.2768
## UN_region=Southern Africa 0.2074 0.5451 0.38 0.7041
## UN_region=Southern Asia -0.6282 0.4119 -1.53 0.1292
## UN_region=Southern Europe -0.3105 0.3240 -0.96 0.3393
## UN_region=Western Africa -0.6425 0.4778 -1.34 0.1806
## UN_region=Western Asia -0.7276 0.3795 -1.92 0.0570
## internet_users_pct 0.0132 0.0032 4.11 <0.0001
##
(model_4e = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_macroregion + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_macroregion
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_macroregion +
## internet_users_pct, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 300.61 R2 0.800
## sigma41.4265 d.f. 16 R2 adj 0.781
## d.f. 170 Pr(> chi2) 0.0000 g 1.287
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.5379 -0.4451 -0.0728 0.2606 1.5423
##
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0638 1.7016 0.04 0.9701
## IQ -0.0088 0.0215 -0.41 0.6834
## IQ' 0.0858 0.0253 3.38 0.0009
## UN_macroregion=Caribbean -0.8632 0.4773 -1.81 0.0723
## UN_macroregion=Latin America -0.2599 0.3306 -0.79 0.4328
## UN_macroregion=Central Asia -0.5966 0.4639 -1.29 0.2002
## UN_macroregion=Africa -0.6685 0.4198 -1.59 0.1132
## UN_macroregion=Eastern Asia -1.0094 0.3090 -3.27 0.0013
## UN_macroregion=Eastern Europe -0.0763 0.2521 -0.30 0.7625
## UN_macroregion=Melanesia -0.5102 0.6759 -0.75 0.4514
## UN_macroregion=Micronesia -0.0132 1.4242 -0.01 0.9926
## UN_macroregion=MENA -0.9371 0.3383 -2.77 0.0062
## UN_macroregion=Polynesia -0.4305 1.5214 -0.28 0.7776
## UN_macroregion=South-Eastern Asia -0.5920 0.3025 -1.96 0.0520
## UN_macroregion=Southern Asia -0.8026 0.3751 -2.14 0.0338
## UN_macroregion=Southern Europe -0.5586 0.2832 -1.97 0.0501
## internet_users_pct 0.0142 0.0033 4.34 <0.0001
##
(model_4f = rms::ols(general_mental_sports ~ rcs(IQ, 3) + UN_continent + internet_users_pct, data = natdata, weights = sqrt(natdata$population2017)))
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ UN_continent
## 0 32 0
## internet_users_pct (weights)
## 37 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3) + UN_continent +
## internet_users_pct, data = natdata, weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 187 LR chi2 287.38 R2 0.785
## sigma41.8249 d.f. 7 R2 adj 0.777
## d.f. 179 Pr(> chi2) 0.0000 g 1.266
##
## Residuals
##
## Min 1Q Median 3Q Max
## -2.2665 -0.5647 -0.1021 0.2758 1.3893
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.3489 1.2779 -0.27 0.7851
## IQ -0.0088 0.0161 -0.54 0.5873
## IQ' 0.0878 0.0139 6.33 <0.0001
## UN_continent=Africa -0.3204 0.2334 -1.37 0.1715
## UN_continent=Americas 0.2390 0.1758 1.36 0.1757
## UN_continent=Asia -0.4662 0.1585 -2.94 0.0037
## UN_continent=Oceania -0.1704 0.3761 -0.45 0.6511
## internet_users_pct 0.0151 0.0029 5.20 <0.0001
##
#test LR
#linear variants
lrtest(model_2d, model_4b)
##
## Model 1: general_mental_sports ~ IQ + UN_macroregion
## Model 2: general_mental_sports ~ IQ + UN_macroregion + internet_users_pct
##
## L.R. Chisq d.f. P
## 9.29986 1.00000 0.00229
1 - (model_4b$coefficients["IQ"] / model_2d$coefficients["IQ"])
## IQ
## 0.348
#spline variants
lrtest(model_2b, model_4e)
##
## Model 1: general_mental_sports ~ rcs(IQ, 3) + UN_macroregion
## Model 2: general_mental_sports ~ rcs(IQ, 3) + UN_macroregion + internet_users_pct
##
## L.R. Chisq d.f. P
## 6.3590 1.0000 0.0117
1 - (model_4e$coefficients["IQ'"] / model_2b$coefficients["IQ'"])
## IQ'
## 0.125
#average
((1 - (model_4b$coefficients["IQ"] / model_2d$coefficients["IQ"])) + (1 - (model_4e$coefficients["IQ'"] / model_2b$coefficients["IQ'"]))) / 2
## IQ
## 0.237
#Jensen's method
fa_Jensens_method(gms_factor, natdata, "internet_users_pct") +
scale_y_continuous("Correlation with internet users")
## Using Pearson correlations for the criterion-indicators relationships.
#simple spline
model_1b
## Frequencies of Missing Values Due to Each Variable
## general_mental_sports IQ (weights)
## 0 32 0
##
## Linear Regression Model
##
## rms::ols(formula = general_mental_sports ~ rcs(IQ, 3), data = natdata,
## weights = sqrt(natdata$population2017))
##
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 195 LR chi2 221.62 R2 0.679
## sigma49.5484 d.f. 2 R2 adj 0.676
## d.f. 192 Pr(> chi2) 0.0000 g 1.099
##
## Residuals
##
## Min 1Q Median 3Q Max
## -3.53472 -0.42170 -0.04563 0.34492 2.02523
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -3.1654 0.9761 -3.24 0.0014
## IQ 0.0309 0.0127 2.43 0.0161
## IQ' 0.0765 0.0138 5.54 <0.0001
##
#visualize model nonlinear IQ relationship
model_vis = tibble(
IQ = seq(min(natdata$IQ, na.rm=T), max(natdata$IQ, na.rm=T), by = 1),
UN_macroregion = natdata$UN_macroregion[3]
)
model_vis$fit = predict(model_1b, newdata = model_vis)
ggplot(model_vis, aes(IQ, fit)) +
geom_line()
#knot IQs
model_1b$Design$parms$IQ
## [1] 69.6 84.6 98.7
#predictions
knot_preds = predict(model_1b, newdata = tibble(IQ = model_1b$Design$parms$IQ))
knot_preds
## 1 2 3
## -1.016 -0.247 1.590
#slopes
(knot_preds[2] - knot_preds[1]) / (model_1b$Design$parms$IQ[2] - model_1b$Design$parms$IQ[1])
## 2
## 0.0512
(knot_preds[3] - knot_preds[2]) / (model_1b$Design$parms$IQ[3] - model_1b$Design$parms$IQ[2])
## 3
## 0.13
#maybe it is slope at the midpoint of the interval
knot_midpoints = c(
model_1b$Design$parms$IQ[2] - ((model_1b$Design$parms$IQ[2] - model_1b$Design$parms$IQ[1])/2),
model_1b$Design$parms$IQ[2] + ((model_1b$Design$parms$IQ[3] - model_1b$Design$parms$IQ[2])/2)
)
knot_midpoints
## [1] 77.1 91.7
knot_midpoints_preds = predict(model_1b, newdata = tibble(IQ = c(knot_midpoints - .1, knot_midpoints + .1)))
knot_midpoints_preds
## 1 2 3 4
## -0.751 0.557 -0.741 0.584
#slopes
(knot_midpoints_preds[3] - knot_midpoints_preds[1]) / .2
## 3
## 0.0461
(knot_midpoints_preds[4] - knot_midpoints_preds[2]) / .2
## 4
## 0.135
#residuals
natdata %>% filter(Country_EN == "Nigeria") %>% select(!!all_names)
natdata %>% filter(Country_EN == "Nigeria") %>% select(general_mental_sports)
natdata$model_1a_resid = model_1a %>% resid() %>% standardize()
natdata %>% filter(Country_EN == "Nigeria") %>% select(model_1a_resid)
#game resids
game_resids = natdata[c("country", all_names)]
#dispersion by country
games_sum_stats = natdata %>% plyr::adply(.margins = 1, .expand = F, .id = NULL, .fun = function(.) {
nums = .[all_names] %>% unlist()
data_frame(
country = .$country,
factor_score = .$general_mental_sports,
mean = mean(nums),
median = median(nums),
sd = sd(nums),
mad = mad(nums),
)
})
wtd.cors(games_sum_stats[-1])
## factor_score mean median sd mad
## factor_score 1.000 0.986 0.990 0.496 0.498
## mean 0.986 1.000 0.985 0.518 0.488
## median 0.990 0.985 1.000 0.488 0.484
## sd 0.496 0.518 0.488 1.000 0.868
## mad 0.498 0.488 0.484 0.868 1.000
#predict Nigerian IQ
natdata$pred_IQ = ols(IQ ~ rcs(general_mental_sports, 3), data = natdata, weights = sqrt(natdata$population2017)) %>% predict()
natdata %>% filter(country == "Nigeria") %>% pull(pred_IQ)
## [1] 73.1
#models to fit
models = expand.grid(
complexity = c("basic", "full"),
game = all_names,
zeros = c(T, F)
)
#fit
models$fit = plyr::alply(models, 1, function(r) {
#make model
y_var = sprintf("%s_count", r$game)
if (r$complexity == "basic") {
this_model = as.formula(sprintf("%s ~ pop * IQ", y_var))
} else {
this_model = as.formula(sprintf("%s ~ pop * IQ + UN_macroregion", y_var))
}
#fit
if (r$zeros) {
y = rms::ols(this_model, data = natdata)
} else {
y = rms::ols(this_model, data = natdata %>% filter(.[[y_var]] != 0))
}
y
})
#add metrics
models$R2a = NA
models$n = NA
models$IQ_beta = NA
models$IQ_p = NA
models$IQ_se = NA
for (i in 1:nrow(models)) {
#fit
this_fit = models$fit[[i]]
#hack solution
mod_print = capture.output(print(this_fit))
#meta
models$n[i] = this_fit$stats[1]
models$R2a[i] = mod_print %>% str_subset("R2 adj") %>% str_match("\\d+\\.\\d+") %>% as.numeric()
models$IQ_beta[i] = this_fit$coefficients[3]
#line with IQ
IQ_line = mod_print[which(str_detect(mod_print, "^ IQ"))]
IQ_nums = str_match_all(IQ_line, "\\d+\\.\\d+") %>% .[[1]] %>% as.vector() %>% as.numeric()
models$IQ_p[i] = IQ_nums[4]
models$IQ_se[i] = IQ_nums[2]
}
#meta-analyze
#simple
full_imp = models %>% filter(complexity == "full", zeros)
print(describe(full_imp$IQ_beta), digits = 4)
## vars n mean sd median trimmed mad min max range skew
## X1 1 12 0.7619 0.7279 0.5696 0.6847 0.4569 0.01963 2.276 2.257 0.9055
## kurtosis se
## X1 -0.7137 0.2101
#RE
(RE_fit = rma(full_imp$IQ_beta,
sei = full_imp$IQ_se))
##
## Random-Effects Model (k = 12; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0745)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 11) = 7.6702, p-val = 0.7425
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.4583 0.1362 3.3654 0.0008 0.1914 0.7253 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#forest
GG_forest(RE_fit, .names = full_imp$game)
GG_save("figs/RE_forest.png")
#no Go
full_imp_noGo = models %>% filter(complexity == "full", zeros, game != "go")
print(describe(full_imp_noGo$IQ_beta), digits = 4)
## vars n mean sd median trimmed mad min max range skew
## X1 1 11 0.8294 0.723 0.5736 0.7493 0.3022 0.1036 2.276 2.173 0.8511
## kurtosis se
## X1 -0.9037 0.218
(RE_fit = rma(full_imp_noGo$IQ_beta,
sei = full_imp_noGo$IQ_se))
##
## Random-Effects Model (k = 11; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 0.0855)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 10) = 5.6659, p-val = 0.8425
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 0.5431 0.1488 3.6506 0.0003 0.2515 0.8347 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#other designs
#no regions, but with 0's
basic_Imp = models %>% filter(complexity == "basic", zeros)
print(describe(basic_Imp$IQ_beta), digits = 4)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 12 1.365 1.131 0.9811 1.182 0.5956 0.3116 4.252 3.941 1.325 0.7882
## se
## X1 0.3265
(RE_fit = rma(basic_Imp$IQ_beta,
sei = basic_Imp$IQ_se))
##
## Random-Effects Model (k = 12; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0.7625 (SE = 0.3791)
## tau (square root of estimated tau^2 value): 0.8732
## I^2 (total heterogeneity / total variability): 93.56%
## H^2 (total variability / sampling variability): 15.54
##
## Test for Heterogeneity:
## Q(df = 11) = 74.6685, p-val < .0001
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 1.2493 0.2735 4.5675 <.0001 0.7132 1.7855 ***
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#regions and no 0's
full_noZeros = models %>% filter(complexity == "full", !zeros)
print(describe(full_noZeros$IQ_beta), digits = 4)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 12 0.4097 4.434 2.017 1.351 1.62 -12.23 3.633 15.86 -1.901 2.621
## se
## X1 1.28
(RE_fit = rma(full_noZeros$IQ_beta,
sei = full_noZeros$IQ_se))
##
## Random-Effects Model (k = 12; tau^2 estimator: REML)
##
## tau^2 (estimated amount of total heterogeneity): 0 (SE = 1.2051)
## tau (square root of estimated tau^2 value): 0
## I^2 (total heterogeneity / total variability): 0.00%
## H^2 (total variability / sampling variability): 1.00
##
## Test for Heterogeneity:
## Q(df = 11) = 5.8896, p-val = 0.8806
##
## Model Results:
##
## estimate se zval pval ci.lb ci.ub
## 1.6735 0.5574 3.0022 0.0027 0.5810 2.7661 **
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Carried out later in 2019 for talk on Youtube
#models
natdata$IQ_z = natdata$IQ %>% standardize()
natdata$internet_users_pct2 = natdata$internet_users_pct/100
natdata$internet_users_pct_z = natdata$internet_users_pct %>% standardize()
regs = list(
simple = rms::ols(general_mental_sports ~ IQ_z, data = natdata, weights = sqrt(natdata$population2017)),
simple_nowt = rms::ols(general_mental_sports ~ IQ_z, data = natdata),
nonlinear = rms::ols(general_mental_sports ~ rcs(IQ_z, 3), data = natdata, weights = sqrt(natdata$population2017)),
continents = rms::ols(general_mental_sports ~ IQ_z + UN_continent, data = natdata, weights = sqrt(natdata$population2017)),
macroregions = rms::ols(general_mental_sports ~ IQ_z + UN_macroregion, data = natdata, weights = sqrt(natdata$population2017)),
internet = rms::ols(general_mental_sports ~ IQ_z + internet_users_pct2, data = natdata, weights = sqrt(natdata$population2017)),
internet_z = rms::ols(general_mental_sports ~ IQ_z + internet_users_pct_z, data = natdata, weights = sqrt(natdata$population2017)),
full = rms::ols(general_mental_sports ~ IQ_z + internet_users_pct_z + UN_macroregion, data = natdata, weights = sqrt(natdata$population2017))
)
#table
regs %>%
summarize_models()
#main data
write_csv(cbind(country = natdata$country, factor_score = natdata$general_mental_sports, games_sum_stats[-1], game_resids[-1], natdata %>% select(ends_with("_count"), -starts_with("log_"))), "data/out/main_data.csv", na = "")
#all data
write_rds(natdata, "data/out/all_data.rds", compress = "xz")
#chess low level data
write_rds(fide, "data/out/fide.rds")