Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  readr,
  readxl,
  dplyr,
  tidyr,
  stringr,
  purrr,
  ggplot2,
  ggrepel,
  countrycode,
  psych,
  ebpm,
  rms
)
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
theme_set(theme_bw())

options(
    digits = 3
)

Data

Load scraped data

d_raw <- read_rds("data/games_by_country.rds")

# ISO3 codes
d_raw$iso3 <- countrycode(d_raw$country, "country.name", "iso3c", warn = FALSE)
d_raw$iso3[d_raw$iso2 == "xk"] <- "XKX"  # Kosovo
d_raw$iso3[d_raw$iso2 == "mf"] <- "NLD"  # Saint Martin
d_raw$iso3[d_raw$iso2 == "an"] <- "NLD"  # Netherlands Antilles

# Subnational -> parent
sub_to_parent <- c(
  "HKG" = "CHN", "MAC" = "CHN",  # Hong Kong, Macao -> China
  "PRI" = "USA",                   # Puerto Rico -> USA
  "ASM" = "USA",                   # American Samoa -> USA
  "VIR" = "USA",                   # US Virgin Islands -> USA
  "ABW" = "NLD",                   # Aruba -> Netherlands
  "FRO" = "DNK",                   # Faroe Islands -> Denmark
  "NCL" = "FRA",                   # New Caledonia -> France
  "BMU" = "GBR",                   # Bermuda -> UK
  "GIB" = "GBR",                   # Gibraltar -> UK
  "GGY" = "GBR",                   # Guernsey -> UK
  "JEY" = "GBR"                    # Jersey -> UK
)
d_raw$iso3 <- ifelse(d_raw$iso3 %in% names(sub_to_parent), sub_to_parent[d_raw$iso3], d_raw$iso3)

# Drop undefined
d_raw <- d_raw %>% filter(!is.na(iso3))

# Aggregate within country x game (after subnational merge)
d_game <- d_raw %>%
  group_by(game, iso3) %>%
  dplyr::summarize(
    players = sum(players, na.rm = TRUE),
    prize_money = sum(prize_money, na.rm = TRUE),
    .groups = "drop"
  )

# Game summary
d_game %>%
  group_by(game) %>%
  dplyr::summarize(
    n_countries = n(),
    total_players = sum(players),
    total_prize = sum(prize_money)
  ) %>%
  arrange(desc(total_prize))

Population data (OWID)

pop_owid <- read_csv("data/owid_population.csv", show_col_types = FALSE)

# Add Kosovo
pop_owid <- bind_rows(pop_owid, tibble(iso3 = "XKX", country = "Kosovo", year = 2024, population = 1800000))

National IQ data

# Jensen & Kirkegaard 2024/2025 NIQ composite (Seb2024)
niq <- read_excel("data/National IQ datasets.xlsx", sheet = "Seb2024") %>%
  filter(!ISO %in% c("HKG", "MAC")) %>%  # subnational, merged into CHN
  select(iso3 = ISO, NIQ = NIQ, SE_NIQ = SE_NIQ)

Build country-level dataset

# Also merge extra territories into parents
extra_subs <- c("GRL" = "DNK", "PYF" = "FRA", "ALA" = "FIN", "REU" = "FRA")
d_game$iso3 <- ifelse(d_game$iso3 %in% names(extra_subs), extra_subs[d_game$iso3], d_game$iso3)
d_game <- d_game %>%
  group_by(game, iso3) %>%
  dplyr::summarize(players = sum(players), prize_money = sum(prize_money), .groups = "drop")

game_names <- d_game %>% distinct(game) %>% pull(game) %>% sort()

# Pivot to wide: one row per country, columns for each game's players and prize_money
d_players_wide <- d_game %>%
  select(game, iso3, players) %>%
  pivot_wider(names_from = game, values_from = players, values_fill = 0, names_prefix = "players_")

d_prize_wide <- d_game %>%
  select(game, iso3, prize_money) %>%
  pivot_wider(names_from = game, values_from = prize_money, values_fill = 0, names_prefix = "prize_")

d_country <- d_players_wide %>%
  left_join(d_prize_wide, by = "iso3") %>%
  left_join(pop_owid %>% select(iso3, pop = population), by = "iso3") %>%
  left_join(niq, by = "iso3")

# Add country names
d_country$country <- countrycode(d_country$iso3, "iso3c", "country.name")
## Warning: Some values were not matched unambiguously: XKX
d_country$country[d_country$iso3 == "XKX"] <- "Kosovo"

nrow(d_country)
## [1] 145

Analysis

Per-capita rates

# Per-capita rates per game (per million)
for (g in game_names) {
  d_country[[paste0("rate_players_", g)]] <- d_country[[paste0("players_", g)]] / (d_country$pop / 1e6)
  d_country[[paste0("rate_prize_", g)]] <- d_country[[paste0("prize_", g)]] / (d_country$pop / 1e6)
}

d_country$log_pop <- log10(d_country$pop)

Factor analysis: raw per-capita rates

rate_player_cols <- paste0("rate_players_", game_names)
rate_prize_cols <- paste0("rate_prize_", game_names)

# Player count rates
fa_players_raw <- d_country[rate_player_cols] %>%
  fa(nfactors = 1, fm = "ml")
fa_players_raw
## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                             ML1      h2   u2 com
## rate_players_aov           0.25 6.2e-02 0.94   1
## rate_players_apex          0.74 5.5e-01 0.45   1
## rate_players_brawl_stars   0.28 7.9e-02 0.92   1
## rate_players_chess         0.12 1.4e-02 0.99   1
## rate_players_clash_royale  0.29 8.5e-02 0.92   1
## rate_players_cod_mobile    0.26 6.6e-02 0.93   1
## rate_players_cod_warzone   0.23 5.5e-02 0.95   1
## rate_players_crossfire     0.17 3.0e-02 0.97   1
## rate_players_cs            0.76 5.8e-01 0.42   1
## rate_players_cs2           0.61 3.7e-01 0.63   1
## rate_players_csgo          0.74 5.5e-01 0.45   1
## rate_players_dota2         0.63 3.9e-01 0.61   1
## rate_players_fortnite      0.50 2.5e-01 0.75   1
## rate_players_freefire      0.01 9.5e-05 1.00   1
## rate_players_hots          0.72 5.2e-01 0.48   1
## rate_players_hs            0.88 7.7e-01 0.23   1
## rate_players_lol           0.54 2.9e-01 0.71   1
## rate_players_lol_wr        0.36 1.3e-01 0.87   1
## rate_players_mlbb          0.20 4.0e-02 0.96   1
## rate_players_mtg_arena     0.58 3.3e-01 0.67   1
## rate_players_ow            0.71 5.1e-01 0.49   1
## rate_players_ow2           0.66 4.4e-01 0.56   1
## rate_players_pubg          0.78 6.1e-01 0.39   1
## rate_players_pubg_mobile   0.05 2.4e-03 1.00   1
## rate_players_r6            0.70 4.9e-01 0.51   1
## rate_players_rocket_league 0.56 3.1e-01 0.69   1
## rate_players_sc2           0.73 5.3e-01 0.47   1
## rate_players_smite         0.54 2.9e-01 0.71   1
## rate_players_tft           0.75 5.7e-01 0.43   1
## rate_players_valorant      0.52 2.7e-01 0.73   1
## 
##                 ML1
## SS loadings    9.19
## Proportion Var 0.31
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  33.7 with Chi Square =  4491
## df of  the model are 405  and the objective function was  24 
## 
## The root mean square of the residuals (RMSR) is  0.17 
## The df corrected root mean square of the residuals is  0.17 
## 
## The harmonic n.obs is  145 with the empirical chi square  3453  with prob <  0 
## The total n.obs was  145  with Likelihood Chi Square =  3175  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.262
## RMSEA index =  0.217  and the 90 % confidence intervals are  0.211 0.225
## BIC =  1159
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.90
d_country$fa_players_raw <- fa_players_raw$scores[,1]

# Prize money rates
fa_prize_raw <- d_country[rate_prize_cols] %>%
  fa(nfactors = 1, fm = "ml")
fa_prize_raw
## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                            ML1      h2   u2 com
## rate_prize_aov            0.13 1.6e-02 0.98   1
## rate_prize_apex           0.66 4.4e-01 0.56   1
## rate_prize_brawl_stars    0.19 3.5e-02 0.96   1
## rate_prize_chess          0.04 1.7e-03 1.00   1
## rate_prize_clash_royale   0.24 5.8e-02 0.94   1
## rate_prize_cod_mobile     0.12 1.5e-02 0.99   1
## rate_prize_cod_warzone   -0.03 1.1e-03 1.00   1
## rate_prize_crossfire      0.04 1.5e-03 1.00   1
## rate_prize_cs             0.72 5.1e-01 0.49   1
## rate_prize_cs2            0.41 1.7e-01 0.83   1
## rate_prize_csgo           0.68 4.7e-01 0.53   1
## rate_prize_dota2          0.60 3.6e-01 0.64   1
## rate_prize_fortnite       0.62 3.8e-01 0.62   1
## rate_prize_freefire      -0.02 2.7e-04 1.00   1
## rate_prize_hots           0.76 5.8e-01 0.42   1
## rate_prize_hs             0.79 6.2e-01 0.38   1
## rate_prize_lol            0.80 6.4e-01 0.36   1
## rate_prize_lol_wr         0.33 1.1e-01 0.89   1
## rate_prize_mlbb           0.03 6.7e-04 1.00   1
## rate_prize_mtg_arena      0.38 1.5e-01 0.85   1
## rate_prize_ow             0.67 4.4e-01 0.56   1
## rate_prize_ow2            0.69 4.8e-01 0.52   1
## rate_prize_pubg           0.75 5.6e-01 0.44   1
## rate_prize_pubg_mobile    0.00 1.4e-05 1.00   1
## rate_prize_r6             0.68 4.7e-01 0.53   1
## rate_prize_rocket_league  0.45 2.0e-01 0.80   1
## rate_prize_sc2            0.68 4.6e-01 0.54   1
## rate_prize_smite          0.68 4.7e-01 0.53   1
## rate_prize_tft            0.60 3.6e-01 0.64   1
## rate_prize_valorant       0.50 2.5e-01 0.75   1
## 
##                 ML1
## SS loadings    8.26
## Proportion Var 0.28
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  29.6 with Chi Square =  3935
## df of  the model are 405  and the objective function was  21.2 
## 
## The root mean square of the residuals (RMSR) is  0.14 
## The df corrected root mean square of the residuals is  0.15 
## 
## The harmonic n.obs is  145 with the empirical chi square  2598  with prob <  1e-315 
## The total n.obs was  145  with Likelihood Chi Square =  2803  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.26
## RMSEA index =  0.202  and the 90 % confidence intervals are  0.196 0.21
## BIC =  788
## Fit based upon off diagonal values = 0.78
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.94
## Minimum correlation of possible factor scores     0.88
d_country$fa_prize_raw <- fa_prize_raw$scores[,1]

# Correlation between player-based and prize-based composite
cor(d_country$fa_players_raw, d_country$fa_prize_raw, use = "complete.obs")
## [1] 0.829

Factor analysis: EB shrunk estimates (ebpm)

# Empirical Bayes via ebpm_gamma (Poisson means with Gamma prior)
for (g in game_names) {
  fit <- ebpm_gamma(d_country[[paste0("players_", g)]], d_country$pop)
  d_country[[paste0("eb_players_", g)]] <- fit$posterior$mean
}

eb_player_cols <- paste0("eb_players_", game_names)
fa_eb <- d_country[eb_player_cols] %>% fa(nfactors = 1, fm = "ml")
fa_eb
## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                           ML1      h2   u2 com
## eb_players_aov           0.25 6.2e-02 0.94   1
## eb_players_apex          0.79 6.3e-01 0.37   1
## eb_players_brawl_stars   0.30 9.2e-02 0.91   1
## eb_players_chess         0.24 5.8e-02 0.94   1
## eb_players_clash_royale  0.34 1.2e-01 0.88   1
## eb_players_cod_mobile    0.30 8.9e-02 0.91   1
## eb_players_cod_warzone   0.39 1.5e-01 0.85   1
## eb_players_crossfire     0.17 3.0e-02 0.97   1
## eb_players_cs            0.75 5.6e-01 0.44   1
## eb_players_cs2           0.57 3.3e-01 0.67   1
## eb_players_csgo          0.71 5.1e-01 0.49   1
## eb_players_dota2         0.61 3.8e-01 0.62   1
## eb_players_fortnite      0.73 5.3e-01 0.47   1
## eb_players_freefire      0.00 6.2e-07 1.00   1
## eb_players_hots          0.71 5.1e-01 0.49   1
## eb_players_hs            0.86 7.4e-01 0.26   1
## eb_players_lol           0.57 3.2e-01 0.68   1
## eb_players_lol_wr        0.36 1.3e-01 0.87   1
## eb_players_mlbb          0.19 3.7e-02 0.96   1
## eb_players_mtg_arena     0.60 3.6e-01 0.64   1
## eb_players_ow            0.74 5.5e-01 0.45   1
## eb_players_ow2           0.75 5.7e-01 0.43   1
## eb_players_pubg          0.78 6.1e-01 0.39   1
## eb_players_pubg_mobile   0.05 2.3e-03 1.00   1
## eb_players_r6            0.74 5.4e-01 0.46   1
## eb_players_rocket_league 0.55 3.1e-01 0.69   1
## eb_players_sc2           0.78 6.1e-01 0.39   1
## eb_players_smite         0.57 3.3e-01 0.67   1
## eb_players_tft           0.70 4.8e-01 0.52   1
## eb_players_valorant      0.53 2.8e-01 0.72   1
## 
##                 ML1
## SS loadings    9.91
## Proportion Var 0.33
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  35.5 with Chi Square =  4727
## df of  the model are 405  and the objective function was  24.7 
## 
## The root mean square of the residuals (RMSR) is  0.16 
## The df corrected root mean square of the residuals is  0.17 
## 
## The harmonic n.obs is  145 with the empirical chi square  3404  with prob <  0 
## The total n.obs was  145  with Likelihood Chi Square =  3272  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.279
## RMSEA index =  0.221  and the 90 % confidence intervals are  0.215 0.229
## BIC =  1257
## Fit based upon off diagonal values = 0.8
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.91
d_country$fa_eb <- fa_eb$scores[,1]

Factor analysis: old method (log-count residuals)

# Replication of 2019 approach: log(count+1) ~ log(pop), FA on residuals
for (g in game_names) {
  y <- log10(d_country[[paste0("players_", g)]] + 1)
  fit <- lm(y ~ d_country$log_pop)
  d_country[[paste0("old_resid_", g)]] <- resid(fit)
}

old_resid_cols <- paste0("old_resid_", game_names)
fa_old <- d_country[old_resid_cols] %>% fa(nfactors = 1, fm = "ml")
fa_old
## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                          ML1    h2   u2 com
## old_resid_aov           0.56 0.318 0.68   1
## old_resid_apex          0.91 0.837 0.16   1
## old_resid_brawl_stars   0.68 0.467 0.53   1
## old_resid_chess         0.63 0.395 0.60   1
## old_resid_clash_royale  0.74 0.552 0.45   1
## old_resid_cod_mobile    0.53 0.286 0.71   1
## old_resid_cod_warzone   0.70 0.491 0.51   1
## old_resid_crossfire     0.62 0.389 0.61   1
## old_resid_cs            0.83 0.696 0.30   1
## old_resid_cs2           0.73 0.532 0.47   1
## old_resid_csgo          0.81 0.663 0.34   1
## old_resid_dota2         0.66 0.432 0.57   1
## old_resid_fortnite      0.85 0.715 0.29   1
## old_resid_freefire      0.14 0.021 0.98   1
## old_resid_hots          0.88 0.781 0.22   1
## old_resid_hs            0.93 0.862 0.14   1
## old_resid_lol           0.89 0.789 0.21   1
## old_resid_lol_wr        0.61 0.373 0.63   1
## old_resid_mlbb          0.24 0.056 0.94   1
## old_resid_mtg_arena     0.74 0.554 0.45   1
## old_resid_ow            0.90 0.813 0.19   1
## old_resid_ow2           0.85 0.726 0.27   1
## old_resid_pubg          0.89 0.788 0.21   1
## old_resid_pubg_mobile   0.31 0.097 0.90   1
## old_resid_r6            0.85 0.719 0.28   1
## old_resid_rocket_league 0.75 0.565 0.44   1
## old_resid_sc2           0.91 0.832 0.17   1
## old_resid_smite         0.73 0.527 0.47   1
## old_resid_tft           0.88 0.783 0.22   1
## old_resid_valorant      0.82 0.677 0.32   1
## 
##                  ML1
## SS loadings    16.73
## Proportion Var  0.56
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  38.2 with Chi Square =  5091
## df of  the model are 405  and the objective function was  13.5 
## 
## The root mean square of the residuals (RMSR) is  0.11 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic n.obs is  145 with the empirical chi square  1552  with prob <  1.2e-133 
## The total n.obs was  145  with Likelihood Chi Square =  1794  with prob <  1.3e-173 
## 
## Tucker Lewis Index of factoring reliability =  0.678
## RMSEA index =  0.154  and the 90 % confidence intervals are  0.147 0.162
## BIC =  -221
## Fit based upon off diagonal values = 0.96
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.97
d_country$fa_old_method <- fa_old$scores[,1]

Game-level correlation matrix

# Correlation matrix of raw per-capita rates across games
cor_raw <- cor(d_country[rate_player_cols], use = "complete.obs")
colnames(cor_raw) <- rownames(cor_raw) <- game_names

# EB per-capita correlations
cor_eb <- cor(d_country[eb_player_cols], use = "complete.obs")
colnames(cor_eb) <- rownames(cor_eb) <- game_names

# Combined matrix: upper triangle = raw per-capita, lower triangle = EB per-capita
cor_combined <- cor_raw
cor_combined[lower.tri(cor_combined)] <- cor_eb[lower.tri(cor_eb)]
diag(cor_combined) <- NA

round(cor_combined, 2)
##                 aov  apex brawl_stars chess clash_royale cod_mobile cod_warzone
## aov              NA  0.32        0.56 -0.06         0.10       0.65       -0.04
## apex           0.33    NA        0.16 -0.01         0.29       0.24        0.24
## brawl_stars    0.54  0.17          NA -0.03         0.11       0.65       -0.02
## chess         -0.08  0.10        0.01    NA        -0.05      -0.07       -0.01
## clash_royale   0.09  0.30        0.15  0.05           NA       0.12        0.01
## cod_mobile     0.70  0.29        0.64 -0.04         0.20         NA        0.00
## cod_warzone   -0.04  0.32        0.01  0.11         0.13       0.09          NA
## crossfire      0.03  0.12       -0.04  0.03         0.15       0.00        0.03
## cs             0.00  0.64        0.04  0.27         0.17       0.01        0.24
## cs2           -0.08  0.34       -0.03  0.40         0.06      -0.01        0.17
## csgo          -0.02  0.42        0.03  0.33         0.08       0.03        0.45
## dota2          0.10  0.47        0.20  0.18         0.15       0.16        0.12
## fortnite      -0.05  0.63        0.06  0.18         0.13       0.06        0.65
## freefire       0.55  0.06        0.12 -0.13        -0.05       0.40       -0.10
## hots           0.58  0.65        0.67  0.11         0.27       0.54        0.13
## hs             0.22  0.68        0.28  0.21         0.38       0.26        0.19
## lol            0.07  0.36        0.09  0.17         0.14       0.09        0.82
## lol_wr         0.79  0.31        0.75 -0.07         0.12       0.68       -0.03
## mlbb           0.56  0.06        0.85 -0.07        -0.05       0.63       -0.05
## mtg_arena     -0.07  0.56        0.02  0.10         0.37       0.07        0.24
## ow             0.10  0.51        0.15  0.16         0.17       0.12        0.64
## ow2            0.18  0.49        0.26  0.21         0.24       0.23        0.19
## pubg           0.26  0.71        0.11  0.19         0.28       0.18        0.22
## pubg_mobile    0.01 -0.03        0.00 -0.02        -0.01       0.02       -0.02
## r6             0.22  0.70        0.34  0.05         0.30       0.31        0.19
## rocket_league -0.04  0.31        0.06  0.23         0.04       0.05        0.42
## sc2            0.28  0.66        0.32  0.14         0.37       0.27        0.18
## smite         -0.06  0.61        0.02  0.16         0.07       0.07        0.27
## tft            0.15  0.53        0.26  0.16         0.49       0.23        0.24
## valorant       0.27  0.29        0.35  0.16         0.14       0.29        0.06
##               crossfire    cs   cs2  csgo dota2 fortnite freefire  hots    hs
## aov                0.03  0.00 -0.09 -0.03  0.10    -0.06     0.54  0.58  0.22
## apex               0.12  0.63  0.32  0.41  0.46     0.44     0.07  0.61  0.65
## brawl_stars       -0.03  0.03 -0.04  0.00  0.18     0.03     0.13  0.64  0.26
## chess             -0.01  0.13  0.21  0.18  0.05     0.03    -0.10  0.06  0.09
## clash_royale       0.16  0.17  0.03  0.03  0.16     0.03    -0.01  0.23  0.36
## cod_mobile        -0.01  0.01  0.01  0.00  0.18    -0.01     0.30  0.53  0.23
## cod_warzone        0.00  0.12  0.07  0.36  0.08     0.40    -0.09  0.05  0.07
## crossfire            NA  0.17  0.08  0.07  0.03     0.03     0.29  0.11  0.21
## cs                 0.18    NA  0.71  0.72  0.57     0.43    -0.06  0.46  0.66
## cs2                0.09  0.71    NA  0.79  0.53     0.31    -0.09  0.29  0.50
## csgo               0.08  0.72  0.79    NA  0.55     0.49    -0.09  0.39  0.55
## dota2              0.03  0.58  0.54  0.54    NA     0.23    -0.03  0.39  0.54
## fortnite           0.06  0.62  0.46  0.68  0.35       NA    -0.10  0.28  0.35
## freefire           0.27 -0.06 -0.08 -0.07 -0.03    -0.10       NA  0.09 -0.02
## hots               0.11  0.44  0.24  0.34  0.40     0.41     0.08    NA  0.64
## hs                 0.22  0.65  0.47  0.51  0.55     0.52    -0.03  0.62    NA
## lol                0.12  0.38  0.37  0.66  0.29     0.72    -0.03  0.28  0.40
## lol_wr            -0.01  0.02  0.00  0.06  0.21     0.07     0.38  0.69  0.34
## mlbb              -0.04 -0.01 -0.05  0.03  0.27    -0.05     0.20  0.56  0.13
## mtg_arena          0.45  0.48  0.39  0.37  0.42     0.42    -0.03  0.24  0.63
## ow                 0.10  0.48  0.42  0.73  0.44     0.71    -0.02  0.41  0.55
## ow2                0.04  0.52  0.44  0.65  0.36     0.55    -0.04  0.61  0.62
## pubg               0.06  0.64  0.43  0.46  0.46     0.54     0.09  0.49  0.74
## pubg_mobile       -0.01 -0.03 -0.02  0.24  0.21    -0.01     0.00 -0.02 -0.04
## r6                 0.09  0.42  0.27  0.30  0.51     0.45    -0.01  0.60  0.72
## rocket_league      0.06  0.36  0.39  0.68  0.17     0.66    -0.07  0.28  0.40
## sc2                0.07  0.57  0.31  0.41  0.50     0.44    -0.04  0.67  0.71
## smite              0.10  0.54  0.40  0.36  0.29     0.67    -0.03  0.40  0.47
## tft                0.31  0.51  0.33  0.39  0.37     0.44    -0.03  0.61  0.76
## valorant           0.14  0.32  0.40  0.64  0.47     0.30     0.06  0.45  0.41
##                 lol lol_wr  mlbb mtg_arena    ow   ow2  pubg pubg_mobile    r6
## aov            0.05   0.79  0.58     -0.06  0.09  0.10  0.25       -0.01  0.23
## apex           0.38   0.29  0.07      0.53  0.50  0.28  0.70       -0.05  0.69
## brawl_stars    0.06   0.74  0.83      0.00  0.12  0.15  0.10       -0.02  0.34
## chess          0.09  -0.07 -0.06      0.01  0.09  0.18  0.08        0.03 -0.02
## clash_royale   0.08   0.11 -0.03      0.31  0.11  0.10  0.28       -0.05  0.28
## cod_mobile     0.05   0.64  0.66      0.04  0.08  0.12  0.14        0.00  0.29
## cod_warzone    0.76  -0.04 -0.05      0.09  0.61  0.09  0.13       -0.03  0.08
## crossfire      0.10  -0.01 -0.04      0.43  0.08  0.01  0.06       -0.03  0.10
## cs             0.36   0.02 -0.01      0.49  0.47  0.46  0.65       -0.04  0.42
## cs2            0.35  -0.01 -0.05      0.43  0.42  0.48  0.45       -0.03  0.26
## csgo           0.65   0.07  0.03      0.37  0.74  0.70  0.49        0.24  0.28
## dota2          0.28   0.24  0.29      0.43  0.42  0.28  0.46        0.26  0.50
## fortnite       0.53   0.03 -0.05      0.27  0.53  0.34  0.38       -0.02  0.29
## freefire      -0.04   0.38  0.21      0.00 -0.03 -0.04  0.09       -0.02  0.01
## hots           0.27   0.66  0.56      0.23  0.41  0.53  0.50       -0.03  0.58
## hs             0.38   0.32  0.13      0.60  0.54  0.57  0.74       -0.06  0.70
## lol              NA   0.09  0.02      0.27  0.85  0.35  0.36       -0.04  0.22
## lol_wr         0.11     NA  0.73      0.08  0.13  0.15  0.30        0.19  0.32
## mlbb           0.03   0.72    NA     -0.09  0.07  0.10  0.03        0.15  0.23
## mtg_arena      0.29   0.07 -0.09        NA  0.37  0.17  0.45       -0.05  0.60
## ow             0.83   0.16  0.09      0.41    NA  0.66  0.53       -0.03  0.48
## ow2            0.39   0.26  0.18      0.29  0.70    NA  0.46       -0.03  0.32
## pubg           0.36   0.32  0.03      0.44  0.55  0.57    NA       -0.04  0.57
## pubg_mobile   -0.03   0.10  0.11     -0.02 -0.03 -0.02 -0.03          NA -0.04
## r6             0.24   0.32  0.22      0.60  0.54  0.54  0.57       -0.03    NA
## rocket_league  0.52   0.04 -0.02      0.21  0.66  0.74  0.33       -0.02  0.33
## sc2            0.30   0.36  0.24      0.39  0.50  0.63  0.79        0.23  0.66
## smite          0.25   0.11 -0.06      0.36  0.29  0.38  0.44       -0.02  0.47
## tft            0.33   0.27  0.10      0.55  0.39  0.49  0.47       -0.02  0.55
## valorant       0.22   0.40  0.37      0.26  0.34  0.48  0.31        0.73  0.30
##               rocket_league   sc2 smite   tft valorant
## aov                   -0.05  0.28 -0.04  0.15     0.22
## apex                   0.27  0.62  0.58  0.44     0.22
## brawl_stars            0.06  0.30  0.01  0.21     0.28
## chess                  0.30  0.02  0.04  0.12     0.08
## clash_royale          -0.02  0.32  0.07  0.34     0.07
## cod_mobile             0.00  0.25  0.05  0.18     0.22
## cod_warzone            0.33  0.08  0.12  0.07    -0.01
## crossfire              0.04  0.07  0.11  0.27     0.11
## cs                     0.36  0.55  0.55  0.56     0.31
## cs2                    0.41  0.29  0.41  0.47     0.39
## csgo                   0.68  0.41  0.35  0.57     0.64
## dota2                  0.17  0.52  0.28  0.37     0.49
## fortnite               0.46  0.29  0.45  0.31     0.18
## freefire              -0.09 -0.03  0.00  0.00     0.04
## hots                   0.33  0.62  0.36  0.63     0.43
## hs                     0.44  0.66  0.47  0.79     0.39
## lol                    0.51  0.26  0.23  0.33     0.20
## lol_wr                 0.02  0.39  0.12  0.25     0.42
## mlbb                  -0.03  0.26 -0.06  0.11     0.36
## mtg_arena              0.15  0.38  0.37  0.46     0.21
## ow                     0.68  0.43  0.25  0.48     0.32
## ow2                    0.81  0.36  0.22  0.68     0.48
## pubg                   0.35  0.76  0.45  0.49     0.29
## pubg_mobile           -0.01  0.33 -0.04 -0.05     0.75
## r6                     0.28  0.64  0.46  0.48     0.24
## rocket_league            NA  0.18  0.28  0.52     0.39
## sc2                    0.22    NA  0.40  0.44     0.52
## smite                  0.35  0.41    NA  0.34     0.13
## tft                    0.31  0.54  0.37    NA     0.43
## valorant               0.39  0.49  0.17  0.36       NA
# Summary stats
raw_upper <- cor_raw[upper.tri(cor_raw)]
eb_lower <- cor_eb[lower.tri(cor_eb)]

tibble(
  method = c("raw_pc (upper)", "EB_pc (lower)"),
  mean = c(mean(raw_upper), mean(eb_lower)),
  median = c(median(raw_upper), median(eb_lower)),
  sd = c(sd(raw_upper), sd(eb_lower)),
  min = c(min(raw_upper), min(eb_lower)),
  max = c(max(raw_upper), max(eb_lower))
)
# How similar are the two sets of inter-game correlations?
cor(raw_upper, eb_lower)
## [1] 0.0824
# Full game name mapping
game_labels <- c(
  aov = "Arena of Valor", apex = "Apex Legends", brawl_stars = "Brawl Stars",
  chess = "Chess.com", clash_royale = "Clash Royale", cod_mobile = "CoD Mobile",
  cod_warzone = "CoD Warzone", crossfire = "CrossFire",
  cs = "Counter-Strike", cs2 = "Counter-Strike 2", csgo = "CS:GO",
  dota2 = "Dota 2", fortnite = "Fortnite", freefire = "Free Fire",
  hots = "Heroes of Storm", hs = "Hearthstone", lol = "League of Legends",
  lol_wr = "LoL Wild Rift", mlbb = "Mobile Legends",
  mtg_arena = "MTG Arena", ow = "Overwatch", ow2 = "Overwatch 2",
  pubg = "PUBG", pubg_mobile = "PUBG Mobile", r6 = "Rainbow Six",
  rocket_league = "Rocket League", sc2 = "StarCraft II", smite = "SMITE",
  tft = "Teamfight Tactics", valorant = "VALORANT"
)

# Order games by hierarchical clustering
dist_mat <- as.dist(1 - cor_raw)
hc <- hclust(dist_mat, method = "ward.D2")
game_order <- hc$labels[hc$order]

# Heatmap
cor_combined_long <- cor_combined %>%
  as.data.frame() %>%
  mutate(game1 = rownames(.)) %>%
  pivot_longer(-game1, names_to = "game2", values_to = "r") %>%
  mutate(
    game1_full = factor(game_labels[game1], levels = game_labels[game_order]),
    game2_full = factor(game_labels[game2], levels = rev(game_labels[game_order]))
  )

ggplot(cor_combined_long, aes(game1_full, game2_full, fill = r)) +
  geom_tile() +
  geom_text(aes(label = ifelse(is.na(r), "", sprintf("%.2f", r))), size = 1.8) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90") +
  labs(title = "Game correlation matrix (upper = raw pc, lower = EB pc)",
       x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
        axis.text.y = element_text(size = 7))

GG_save("figs/game_cor_matrix.png")

Unit-weighted composites

# Unit-weighted: standardize per-capita rates, average across games
rate_z <- scale(d_country[rate_player_cols])
d_country$unit_weighted <- rowMeans(rate_z, na.rm = TRUE)

prize_z <- scale(d_country[rate_prize_cols])
d_country$unit_weighted_prize <- rowMeans(prize_z, na.rm = TRUE)

Hierarchical FA

# Parallel analysis for number of factors
pa <- fa.parallel(d_country[rate_player_cols], fm = "ml", fa = "fa", plot = FALSE)
## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA
pa$nfact
## [1] 5
# Oblimin 1st order
fa6 <- fa(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", rotate = "oblimin")
## Loading required namespace: GPArotation
print(fa6$loadings, cutoff = 0.3, sort = TRUE)
## 
## Loadings:
##                            ML5    ML3    ML2    ML4    ML1   
## rate_players_apex           0.802                            
## rate_players_hs             0.709                            
## rate_players_mtg_arena      0.628                            
## rate_players_pubg           0.775                            
## rate_players_r6             0.787                            
## rate_players_sc2            0.807                       0.326
## rate_players_smite          0.571                            
## rate_players_aov                   0.756                     
## rate_players_brawl_stars           0.894                     
## rate_players_cod_mobile            0.749                     
## rate_players_hots           0.391  0.628                     
## rate_players_lol_wr                0.823                     
## rate_players_mlbb                  0.889                     
## rate_players_cs             0.490         0.569              
## rate_players_cs2                          0.877              
## rate_players_csgo                         0.760  0.337       
## rate_players_ow2                          0.687              
## rate_players_rocket_league                0.532  0.361       
## rate_players_cod_warzone                         0.910       
## rate_players_lol                                 0.906       
## rate_players_ow                                  0.723       
## rate_players_pubg_mobile                                1.015
## rate_players_valorant                     0.393         0.740
## rate_players_chess                                           
## rate_players_clash_royale   0.487                            
## rate_players_crossfire                                       
## rate_players_dota2          0.379                            
## rate_players_fortnite                            0.423       
## rate_players_freefire              0.318                     
## rate_players_tft            0.384         0.460              
## 
##                  ML5   ML3   ML2   ML4   ML1
## SS loadings    4.875 4.185 3.282 2.643 1.879
## Proportion Var 0.162 0.139 0.109 0.088 0.063
## Cumulative Var 0.162 0.302 0.411 0.500 0.562
fa6$Phi %>% round(2)
##      ML5  ML3  ML2  ML4  ML1
## ML5 1.00 0.29 0.46 0.29 0.08
## ML3 0.29 1.00 0.08 0.04 0.15
## ML2 0.46 0.08 1.00 0.36 0.14
## ML4 0.29 0.04 0.36 1.00 0.04
## ML1 0.08 0.15 0.14 0.04 1.00
# Omega hierarchical (Schmid-Leiman)
om <- omega(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", plot = FALSE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
om$omega_h
## [1] 0.617
om$omega.tot
## [1] 0.954
# Hierarchical g scores
g_loadings <- om$schmid$sl[, 1]
d_country$fa_hierarchical <- as.vector(scale(d_country[rate_player_cols]) %*% g_loadings / sum(g_loadings))

Compare composites

d_country %>%
  select(fa_players_raw, fa_eb, unit_weighted, fa_hierarchical, fa_old_method, pop) %>%
  cor(use = "complete.obs") %>%
  round(3)
##                 fa_players_raw  fa_eb unit_weighted fa_hierarchical
## fa_players_raw           1.000  0.989         0.963           0.995
## fa_eb                    0.989  1.000         0.957           0.984
## unit_weighted            0.963  0.957         1.000           0.976
## fa_hierarchical          0.995  0.984         0.976           1.000
## fa_old_method            0.658  0.696         0.679           0.651
## pop                     -0.092 -0.104        -0.093          -0.096
##                 fa_old_method    pop
## fa_players_raw          0.658 -0.092
## fa_eb                   0.696 -0.104
## unit_weighted           0.679 -0.093
## fa_hierarchical         0.651 -0.096
## fa_old_method           1.000  0.073
## pop                     0.073  1.000

Country rankings

d_country %>%
  select(country, iso3, pop, unit_weighted, unit_weighted_prize, fa_players_raw, fa_old_method) %>%
  arrange(desc(unit_weighted)) %>%
  head(30)

Comparison with top 1000 global players

# Scrape top 1000 players by earnings
top1000_urls <- c(
  "https://www.esportsearnings.com/players",
  sprintf("https://www.esportsearnings.com/players/highest-earnings-top-%d", seq(200, 1000, 100))
)

top1000 <- map_dfr(top1000_urls, function(url) {
  Sys.sleep(1.5)
  page <- rvest::read_html(url)
  tbl_node <- page %>% rvest::html_nodes(".detail_list_table") %>% .[[2]]

  flags <- tbl_node %>% rvest::html_nodes("img") %>% rvest::html_attr("src") %>%
    str_extract("[a-z]{2}(?=\\.png)")
  games <- tbl_node %>% rvest::html_nodes("td.detail_list_game a") %>% rvest::html_text()
  tbl_data <- tbl_node %>% rvest::html_table()

  tibble(
    iso2 = flags,
    game = games,
    earnings = tbl_data[["Total (Overall)"]] %>% str_remove_all("[\\$,]") %>% as.numeric()
  )
})

# Apply subnational merges
top1000$iso3 <- countrycode(top1000$iso2, "iso2c", "iso3c", warn = FALSE)
sub_map_iso2 <- c("hk" = "CHN", "mo" = "CHN", "pr" = "USA", "aw" = "NLD",
                   "fo" = "DNK", "nc" = "FRA", "bm" = "GBR", "gi" = "GBR",
                   "gl" = "DNK", "pf" = "FRA", "re" = "FRA")
for (code in names(sub_map_iso2)) {
  top1000$iso3[top1000$iso2 == code] <- sub_map_iso2[code]
}

top1000_country <- top1000 %>%
  count(iso3, name = "top1000_count") %>%
  filter(!is.na(iso3))

d_country <- d_country %>%
  left_join(top1000_country, by = "iso3") %>%
  mutate(top1000_count = replace_na(top1000_count, 0),
         top1000_per_million = top1000_count / (pop / 1e6))

# Correlations: top 1000 per capita vs FA composites and IQ
d_country %>%
  select(top1000_per_million, fa_players_raw, fa_prize_raw, fa_eb, fa_old_method, NIQ, log_pop) %>%
  cor(use = "complete.obs")
##                     top1000_per_million fa_players_raw fa_prize_raw  fa_eb
## top1000_per_million              1.0000          0.648      0.86392  0.670
## fa_players_raw                   0.6476          1.000      0.82852  0.990
## fa_prize_raw                     0.8639          0.829      1.00000  0.852
## fa_eb                            0.6704          0.990      0.85219  1.000
## fa_old_method                    0.4219          0.667      0.58363  0.699
## NIQ                              0.3895          0.657      0.51808  0.665
## log_pop                         -0.0754         -0.121      0.00178 -0.153
##                     fa_old_method     NIQ  log_pop
## top1000_per_million        0.4219  0.3895 -0.07545
## fa_players_raw             0.6675  0.6574 -0.12055
## fa_prize_raw               0.5836  0.5181  0.00178
## fa_eb                      0.6994  0.6652 -0.15276
## fa_old_method              1.0000  0.8082  0.04210
## NIQ                        0.8082  1.0000 -0.02818
## log_pop                    0.0421 -0.0282  1.00000
# Top countries by top 1000 per capita
d_country %>%
  select(country, pop, top1000_count, top1000_per_million, fa_players_raw, fa_prize_raw) %>%
  arrange(desc(top1000_per_million)) %>%
  head(20)

Correlations with NIQ

# Total players and prize (raw sums, not per capita)
d_country$total_players <- rowSums(d_country[paste0("players_", game_names)])
d_country$total_prize <- rowSums(d_country[paste0("prize_", game_names)])

# Main composites vs NIQ
cor_niq <- d_country %>%
  select(
    NIQ,
    `Players per capita (unit-weighted)` = unit_weighted,
    `Prize per capita (unit-weighted)` = unit_weighted_prize,
    `Top 1000 pc` = top1000_per_million,
    `Total players` = total_players,
    `Total prize` = total_prize,
    pop
  ) %>%
  cor(use = "pairwise.complete.obs")

round(cor_niq, 3)
##                                      NIQ Players per capita (unit-weighted)
## NIQ                                1.000                              0.683
## Players per capita (unit-weighted) 0.683                              1.000
## Prize per capita (unit-weighted)   0.617                              0.844
## Top 1000 pc                        0.390                              0.574
## Total players                      0.411                              0.279
## Total prize                        0.356                              0.213
## pop                                0.002                             -0.093
##                                    Prize per capita (unit-weighted) Top 1000 pc
## NIQ                                                           0.617       0.390
## Players per capita (unit-weighted)                            0.844       0.574
## Prize per capita (unit-weighted)                              1.000       0.790
## Top 1000 pc                                                   0.790       1.000
## Total players                                                 0.394       0.213
## Total prize                                                   0.393       0.274
## pop                                                          -0.014      -0.054
##                                    Total players Total prize    pop
## NIQ                                        0.411       0.356  0.002
## Players per capita (unit-weighted)         0.279       0.213 -0.093
## Prize per capita (unit-weighted)           0.394       0.393 -0.014
## Top 1000 pc                                0.213       0.274 -0.054
## Total players                              1.000       0.823  0.413
## Total prize                                0.823       1.000  0.601
## pop                                        0.413       0.601  1.000
# Heatmap
cor_niq %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
  mutate(
    var1 = factor(var1, levels = colnames(cor_niq)),
    var2 = factor(var2, levels = rev(colnames(cor_niq)))
  ) %>%
  ggplot(aes(var1, var2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3.5) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
                       limits = c(-0.2, 1)) +
  labs(title = "Correlations: NIQ and esports composites", x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
        axis.text.y = element_text(size = 9))

GG_save("figs/NIQ_esports_cor_matrix.png")

FA method comparison

# Comparison of FA variants for player per-capita rates
cor_fa <- d_country %>%
  select(
    NIQ,
    `Per capita 1-factor FA` = fa_players_raw,
    `Per capita EB FA` = fa_eb,
    `Per capita unit-weighted` = unit_weighted,
    `Per capita hierarchical g` = fa_hierarchical,
    `Log-residual FA` = fa_old_method,
    pop
  ) %>%
  cor(use = "pairwise.complete.obs")

cor_fa %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
  mutate(
    var1 = factor(var1, levels = colnames(cor_fa)),
    var2 = factor(var2, levels = rev(colnames(cor_fa)))
  ) %>%
  ggplot(aes(var1, var2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f", r)), size = 4) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
                       limits = c(-0.2, 1)) +
  labs(title = "FA method comparison: players per capita", x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
        axis.text.y = element_text(size = 10))

GG_save("figs/FA_variants_cor_matrix.png")

Disattenuation for NIQ measurement error

# NIQ reliability from SEs
niq_in_sample <- d_country %>% filter(!is.na(NIQ))
var_niq <- var(niq_in_sample$NIQ, na.rm = TRUE)
mean_se2 <- mean(niq_in_sample$SE_NIQ^2, na.rm = TRUE)
reliability_niq <- 1 - mean_se2 / var_niq

cat(sprintf("NIQ reliability: %.3f (var=%.1f, mean SE²=%.2f)\n\n", reliability_niq, var_niq, mean_se2))
## NIQ reliability: 0.928 (var=96.3, mean SE²=6.97)
# Disattenuated correlations (correcting NIQ side only)
composites_list <- c(
  "Players per capita (unit-weighted)" = "unit_weighted",
  "Prize per capita (unit-weighted)" = "unit_weighted_prize",
  "Players per capita 1-factor FA" = "fa_players_raw",
  "Players per capita EB FA" = "fa_eb",
  "Players log-residual FA" = "fa_old_method",
  "Top 1000 pc" = "top1000_per_million"
)

tibble(
  composite = names(composites_list),
  r_observed = map_dbl(composites_list, ~cor(d_country$NIQ, d_country[[.x]], use = "complete.obs")),
  r_disattenuated = r_observed / sqrt(reliability_niq)
)

NIQ scatter plots

d_plot <- d_country %>% filter(!is.na(NIQ))
r_val <- cor(d_plot$NIQ, d_plot$unit_weighted)
n_val <- nrow(d_plot)
p_val <- cor.test(d_plot$NIQ, d_plot$unit_weighted)$p.value

ggplot(d_plot, aes(NIQ, unit_weighted, label = country)) +
  geom_point(aes(size = log10(pop)), alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
  geom_text_repel(size = 2, max.overlaps = 20) +
  scale_size_continuous(guide = "none") +
  annotate("text", x = min(d_plot$NIQ) + 1, y = max(d_plot$unit_weighted) * 0.95,
           label = sprintf("r = %.2f, n = %d, p = %.1e", r_val, n_val, p_val),
           hjust = 0, size = 4) +
  labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: ggrepel: 65 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_uw.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Regional means
# Macro-regions based on UN sub-regions
d_country$un_subregion <- countrycode(d_country$iso3, "iso3c", "un.regionsub.name")
## Warning: Some values were not matched unambiguously: TWN, XKX
d_country$un_subregion[d_country$iso3 == "XKX"] <- "Southern Europe"

d_country$macroregion <- dplyr::recode(d_country$un_subregion,
  "Northern Europe" = "N+W Europe + offshoots",
  "Western Europe" = "N+W Europe + offshoots",
  "Northern America" = "N+W Europe + offshoots",
  "Australia and New Zealand" = "N+W Europe + offshoots",
  "Southern Europe" = "Southern Europe",
  "Eastern Europe" = "Eastern Europe",
  "Central Asia" = "Central Asia",
  "Eastern Asia" = "Eastern Asia",
  "South-eastern Asia" = "South-eastern Asia",
  "Southern Asia" = "Southern Asia",
  "Western Asia" = "MENA",
  "Northern Africa" = "MENA",
  "Sub-Saharan Africa" = "Sub-Saharan Africa",
  "Latin America and the Caribbean" = "Latin America",
  "Polynesia" = "Pacific Islands",
  "Melanesia" = "Pacific Islands"
)
d_country$macroregion[d_country$iso3 == "TWN"] <- "Eastern Asia"

macro_order <- d_country %>%
  dplyr::group_by(macroregion) %>%
  dplyr::summarize(med = median(unit_weighted), .groups = "drop") %>%
  dplyr::arrange(desc(med)) %>%
  dplyr::pull(macroregion)

ggplot(d_country, aes(x = factor(macroregion, levels = macro_order), y = unit_weighted)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  geom_text_repel(aes(label = country), size = 2.2, max.overlaps = 20, seed = 3,
                  min.segment.length = 0.1, force = 2, max.time = 5) +
  labs(x = NULL, y = "Esports composite (unit-weighted per capita)",
       title = "Esports performance by macro-region") +
  theme(axis.text.x = element_text(angle = 35, hjust = 1, size = 9),
        plot.margin = margin(5, 5, 5, 15, "mm"))
## Warning: ggrepel: 62 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/esports_by_macroregion.png", width = 16, height = 8)
## Warning: ggrepel: 36 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Regressions with controls

# Internet access data
inet <- read_csv("/media/emil/8tb_ssd_3/projects/finished/2019/Esports and national IQ/data/internet users 2016.csv", show_col_types = FALSE)
inet$iso3 <- pu_translate(inet$country, messages = FALSE)
inet$internet_pct <- inet$i99h2016 / 100

d_country <- d_country %>%
  left_join(inet %>% select(iso3, internet_pct), by = "iso3")

# Steam traffic per capita
steam <- read_csv("data/steam_traffic.csv", show_col_types = FALSE)
names(steam) <- c("iso3", "steam_gb_pc")
steam$iso3 <- ifelse(steam$iso3 %in% names(sub_to_parent), sub_to_parent[steam$iso3], steam$iso3)
steam <- steam %>% filter(!is.na(steam_gb_pc)) %>%
  dplyr::group_by(iso3) %>% dplyr::summarize(steam_gb_pc = max(steam_gb_pc), .groups = "drop")
d_country <- d_country %>% left_join(steam, by = "iso3")

d_mod <- d_country %>% filter(!is.na(NIQ))
d_mod$NIQ_z <- scale(d_mod$NIQ)[,1]
d_mod$internet_z <- scale(d_mod$internet_pct)[,1]
d_mod$steam_z <- scale(d_mod$steam_gb_pc)[,1]
d_mod$esport_z <- scale(d_mod$unit_weighted)[,1]

# Partial sqrt(epsilon^2) for effect sizes comparable across predictors
get_pe2 <- function(fit, term) {
  a <- as.data.frame(anova(fit))
  if (!term %in% rownames(a)) return(NA_real_)
  ss_eff <- a[term, "Partial SS"]
  df_eff <- a[term, "d.f."]
  ms_err <- a["ERROR", "MS"]
  ss_err <- a["ERROR", "Partial SS"]
  pepsilon2 <- (ss_eff - df_eff * ms_err) / (ss_eff + ss_err)
  sqrt(max(pepsilon2, 0))
}

# Linear NIQ models
ml1 <- rms::ols(esport_z ~ NIQ_z, data = d_mod)
ml2 <- rms::ols(esport_z ~ NIQ_z + internet_z, data = d_mod)
ml3 <- rms::ols(esport_z ~ NIQ_z + macroregion, data = d_mod)
ml4 <- rms::ols(esport_z ~ NIQ_z + steam_z, data = d_mod)
ml5 <- rms::ols(esport_z ~ NIQ_z + internet_z + macroregion + steam_z, data = d_mod)

# Spline NIQ models
ms1 <- rms::ols(esport_z ~ rcs(NIQ_z, 3), data = d_mod)
ms2 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z, data = d_mod)
ms3 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + macroregion, data = d_mod)
ms4 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + steam_z, data = d_mod)
ms5 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z + macroregion + steam_z, data = d_mod)

# Summary table: sqrt(partial epsilon^2) for each predictor
tibble(
  NIQ_form = c(rep("Linear", 5), rep("Spline", 5)),
  model = rep(c("NIQ", "+internet", "+region", "+steam", "+all"), 2),
  fits = list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5),
  NIQ = map_dbl(fits, ~get_pe2(.x, "NIQ_z")),
  Internet = map_dbl(fits, ~get_pe2(.x, "internet_z")),
  Region = map_dbl(fits, ~get_pe2(.x, "macroregion")),
  Steam = map_dbl(fits, ~get_pe2(.x, "steam_z")),
  R2_adj = map_dbl(fits, ~summary.lm(.x)$adj.r.squared)
) %>%
  select(-fits) %>%
  mutate(across(where(is.numeric), ~round(.x, 3)))
# Table as figure with significance stars
get_p <- function(fit, term) {
  a <- as.data.frame(anova(fit))
  if (!term %in% rownames(a)) return(NA_real_)
  a[term, "P"]
}

format_cell <- function(value, p) {
  if (is.na(value)) return("")
  star <- ifelse(!is.na(p) & p < 0.05, "*", "")
  sprintf("%.2f%s", value, star)
}

all_fits <- list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5)
fit_names <- c("L1","L2","L3","L4","L5","S1","S2","S3","S4","S5")
predictors <- c("NIQ", "Internet", "Region", "Steam", "R\u00b2 adj")
terms <- c("NIQ_z", "internet_z", "macroregion", "steam_z", NA)

vals <- matrix(NA, 5, 10)
labs <- matrix("", 5, 10)
for (j in 1:10) {
  fit <- all_fits[[j]]
  for (i in 1:4) {
    vals[i,j] <- get_pe2(fit, terms[i])
    labs[i,j] <- format_cell(vals[i,j], get_p(fit, terms[i]))
  }
  vals[5,j] <- summary.lm(fit)$adj.r.squared
  labs[5,j] <- sprintf("%.2f", vals[5,j])
}

model_subtitles <- c("NIQ", "+int", "+reg", "+stm", "+all", "NIQ", "+int", "+reg", "+stm", "+all")

expand.grid(
  row = factor(predictors, levels = rev(predictors)),
  col = factor(fit_names, levels = fit_names)
) %>%
  mutate(
    i = match(as.character(row), predictors),
    j = match(col, fit_names),
    value = map2_dbl(i, j, ~vals[.x, .y]),
    label = map2_chr(i, j, ~labs[.x, .y]),
    is_r2 = as.character(row) == "R\u00b2 adj"
  ) %>%
  ggplot(aes(x = col, y = row)) +
  geom_tile(aes(fill = ifelse(is.na(value) | is_r2, NA, value)), color = "grey80") +
  geom_text(aes(label = label), size = 4.5) +
  scale_fill_gradient(low = "white", high = "salmon", na.value = "grey95", guide = "none",
                      limits = c(0, 0.8)) +
  scale_x_discrete(labels = model_subtitles, position = "top") +
  labs(x = NULL, y = NULL) +
  theme_minimal() +
  theme(axis.text = element_text(size = 10), panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 13, face = "bold")) +
  annotate("text", x = 3, y = 6.2, label = "Linear NIQ", fontface = "bold", size = 4.5) +
  annotate("text", x = 8, y = 6.2, label = "Spline NIQ", fontface = "bold", size = 4.5) +
  annotate("text", x = 10.3, y = 0.3, label = sprintf("n = %d, * p < .05", nrow(d_mod)),
           size = 3.5, hjust = 1, color = "grey40") +
  coord_cartesian(ylim = c(0.4, 6.3), clip = "off") +
  ggtitle(expression("Regression results: " * sqrt("partial " * epsilon^2)))

GG_save("figs/regression_table.png")

Expanded sample: including 0-player countries

# Countries with NIQ but 0 esports players in any game
niq_all <- read_excel("data/National IQ datasets.xlsx", sheet = "Seb2024") %>%
  filter(!ISO %in% c("HKG", "MAC")) %>%
  select(iso3 = ISO, NIQ = NIQ)

merged_subs <- c("GRL", "BMU", "PRI", "GIB", "GGY", "JEY", "ASM", "VIR",
                 "ABW", "FRO", "NCL", "PYF", "ALA", "REU", "HKG", "MAC")

truly_missing <- niq_all %>%
  filter(!iso3 %in% d_country$iso3, !iso3 %in% merged_subs) %>%
  left_join(pop_owid %>% select(iso3, pop = population), by = "iso3")

# Their unit-weighted score: z-score of 0 rate in each game, then average
rate_means <- colMeans(d_country[rate_player_cols])
rate_sds <- apply(d_country[rate_player_cols], 2, sd)
uw_zero <- mean((0 - rate_means) / rate_sds)

d_expanded <- bind_rows(
  d_country %>% select(iso3, country, NIQ, unit_weighted, pop),
  truly_missing %>% mutate(
    country = countrycode(iso3, "iso3c", "country.name"),
    unit_weighted = uw_zero
  ) %>% select(iso3, country, NIQ, unit_weighted, pop)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `country = countrycode(iso3, "iso3c", "country.name")`.
## Caused by warning:
## ! Some values were not matched unambiguously: KSV
d_exp_plot <- d_expanded %>% filter(!is.na(NIQ))
d_exp_plot$has_players <- d_exp_plot$iso3 %in% d_country$iso3

r_exp <- cor(d_exp_plot$NIQ, d_exp_plot$unit_weighted)
n_exp <- nrow(d_exp_plot)
p_exp <- cor.test(d_exp_plot$NIQ, d_exp_plot$unit_weighted)$p.value

ggplot(d_exp_plot, aes(NIQ, unit_weighted, label = country)) +
  geom_point(aes(size = log10(pop), shape = has_players), alpha = 0.6) +
  scale_shape_manual(values = c("TRUE" = 16, "FALSE" = 4), guide = "none") +
  geom_smooth(method = "lm", se = TRUE) +
  geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
  geom_text_repel(size = 2, max.overlaps = 20) +
  scale_size_continuous(guide = "none") +
  annotate("text", x = min(d_exp_plot$NIQ, na.rm = TRUE) + 1,
           y = max(d_exp_plot$unit_weighted) * 0.95,
           label = sprintf("r = %.2f, n = %d, p = %.1e\n(x = 0 players in any game)", r_exp, n_exp, p_exp),
           hjust = 0, size = 3.5) +
  labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ",
       title = "Including countries with 0 esports players")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 122 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_expanded.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 103 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Nonlinearity test

# Test nonlinearity: linear vs restricted cubic spline
fit_linear <- rms::ols(unit_weighted ~ NIQ, data = d_plot)
fit_spline <- rms::ols(unit_weighted ~ rcs(NIQ, 3), data = d_plot)
fit_linear
## Linear Regression Model
## 
## rms::ols(formula = unit_weighted ~ NIQ, data = d_plot)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     141    LR chi2     88.56    R2       0.466    
## sigma0.3922    d.f.            1    R2 adj   0.463    
## d.f.    139    Pr(> chi2) 0.0000    g        0.417    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -0.77832 -0.19062 -0.07712  0.07682  1.71494 
## 
## 
##           Coef    S.E.   t      Pr(>|t|)
## Intercept -3.1990 0.2924 -10.94 <0.0001 
## NIQ        0.0372 0.0034  11.02 <0.0001
fit_spline
## Linear Regression Model
## 
## rms::ols(formula = unit_weighted ~ rcs(NIQ, 3), data = d_plot)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     141    LR chi2    110.16    R2       0.542    
## sigma0.3646    d.f.            2    R2 adj   0.536    
## d.f.    138    Pr(> chi2) 0.0000    g        0.416    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -0.96263 -0.11055 -0.02360  0.02648  1.58026 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept -0.6305 0.6021 -1.05 0.2969  
## NIQ        0.0040 0.0076  0.52 0.6047  
## NIQ'       0.0486 0.0102  4.78 <0.0001
lrtest(fit_linear, fit_spline)
## 
## Model 1: unit_weighted ~ NIQ
## Model 2: unit_weighted ~ rcs(NIQ, 3)
## 
## L.R. Chisq       d.f.          P 
##   2.16e+01   1.00e+00   3.35e-06

Meta

#versions
write_sessioninfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rms_8.0-0             Hmisc_5.2-4           ebpm_0.0.1.3         
##  [4] countrycode_1.6.1     ggrepel_0.9.6         readxl_1.4.5         
##  [7] kirkegaard_2025-11-19 robustbase_0.99-6     psych_2.5.6          
## [10] assertthat_0.2.1      weights_1.1.2         magrittr_2.0.4       
## [13] lubridate_1.9.4       forcats_1.0.1         stringr_1.6.0        
## [16] dplyr_1.1.4           purrr_1.2.0           readr_2.1.5          
## [19] tidyr_1.3.1           tibble_3.3.0          ggplot2_4.0.1.9000   
## [22] tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3   rstudioapi_0.17.1    jsonlite_2.0.0      
##   [4] shape_1.4.6.1        TH.data_1.1-4        jomo_2.7-6          
##   [7] farver_2.1.2         nloptr_2.2.1         rmarkdown_2.30      
##  [10] ragg_1.5.0           vctrs_0.6.5          minqa_1.2.8         
##  [13] base64enc_0.1-3      mixsqp_0.3-54        htmltools_0.5.8.1   
##  [16] polspline_1.1.25     curl_7.0.0           broom_1.0.11        
##  [19] cellranger_1.1.0     Formula_1.2-5        mitml_0.4-5         
##  [22] sass_0.4.10          bslib_0.9.0          htmlwidgets_1.6.4   
##  [25] sandwich_3.1-1       zoo_1.8-14           cachem_1.1.0        
##  [28] lifecycle_1.0.4      iterators_1.0.14     pkgconfig_2.0.3     
##  [31] Matrix_1.7-4         R6_2.6.1             fastmap_1.2.0       
##  [34] rbibutils_2.3        selectr_0.4-2        digest_0.6.39       
##  [37] colorspace_2.1-2     ps_1.9.1             irlba_2.3.5.1       
##  [40] textshaping_1.0.4    labeling_0.4.3       timechange_0.3.0    
##  [43] gdata_3.0.1          mgcv_1.9-3           httr_1.4.7          
##  [46] compiler_4.5.3       bit64_4.6.0-1        withr_3.0.2         
##  [49] htmlTable_2.4.3      S7_0.2.1             backports_1.5.0     
##  [52] pan_1.9              MASS_7.3-65          quantreg_6.1        
##  [55] GPArotation_2025.3-1 gtools_3.9.5         tools_4.5.3         
##  [58] chromote_0.5.1       foreign_0.8-91       nnet_7.3-20         
##  [61] glue_1.8.0           nlme_3.1-168         promises_1.3.3      
##  [64] stringdist_0.9.15    grid_4.5.3           checkmate_2.3.3     
##  [67] cluster_2.1.8.2      generics_0.1.4       gtable_0.3.6        
##  [70] tzdb_0.5.0           websocket_1.4.4      data.table_1.17.8   
##  [73] hms_1.1.3            xml2_1.4.0           foreach_1.5.2       
##  [76] pillar_1.11.1        vroom_1.6.6          later_1.4.4         
##  [79] splines_4.5.3        lattice_0.22-7       survival_3.8-6      
##  [82] bit_4.6.0            SparseM_1.84-2       tidyselect_1.2.1    
##  [85] knitr_1.50           reformulas_0.4.1     gridExtra_2.3       
##  [88] xfun_0.57            matrixStats_1.5.0    DEoptimR_1.1-4      
##  [91] stringi_1.8.7        yaml_2.3.10          boot_1.3-32         
##  [94] evaluate_1.0.5       codetools_0.2-20     archive_1.1.12      
##  [97] cli_3.6.5            rpart_4.1.24         systemfonts_1.3.1   
## [100] Rdpack_2.6.4         processx_3.8.6       jquerylib_0.1.4     
## [103] Rcpp_1.1.0           parallel_4.5.3       MatrixModels_0.5-4  
## [106] lme4_1.1-37          glmnet_4.1-10        mvtnorm_1.3-3       
## [109] scales_1.4.0         crayon_1.5.3         rlang_1.1.6.9000    
## [112] rvest_1.0.5          multcomp_1.4-28      mnormt_2.1.1        
## [115] mice_3.18.0
#write data to file for reuse
d_country %>% write_rds("data/data_for_reuse.rds")

#OSF
if (F) {
  library(osfr)
  
  #login
  osf_auth(readr::read_lines("~/.config/osf_token"))
  
  #the project we will use
  osf_proj = osf_retrieve_node("https://osf.io/XXX/")
  
  #upload all files in project
  #overwrite existing (versioning)
  osf_upload(
    osf_proj,
    path = c("data", "figures", "papers", "notebook.Rmd", "notebook.html", "sessions_info.txt"), 
    conflicts = "overwrite"
    )
}