Init

library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  readr,
  readxl,
  dplyr,
  tidyr,
  stringr,
  purrr,
  ggplot2,
  ggrepel,
  countrycode,
  psych,
  ebpm,
  rms
)

## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## 
## The following object is masked from 'package:psych':
## 
##     describe
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units

theme_set(theme_bw())

options(
    digits = 3
)

Data

Load scraped data

d_raw <- read_rds("data/games_by_country.rds")

# ISO3 codes
d_raw$iso3 <- countrycode(d_raw$country, "country.name", "iso3c", warn = FALSE)
d_raw$iso3[d_raw$iso2 == "xk"] <- "XKX"  # Kosovo
d_raw$iso3[d_raw$iso2 == "mf"] <- "NLD"  # Saint Martin
d_raw$iso3[d_raw$iso2 == "an"] <- "NLD"  # Netherlands Antilles

# Subnational -> parent
sub_to_parent <- c(
  "HKG" = "CHN", "MAC" = "CHN",  # Hong Kong, Macao -> China
  "PRI" = "USA",                   # Puerto Rico -> USA
  "ASM" = "USA",                   # American Samoa -> USA
  "VIR" = "USA",                   # US Virgin Islands -> USA
  "ABW" = "NLD",                   # Aruba -> Netherlands
  "FRO" = "DNK",                   # Faroe Islands -> Denmark
  "NCL" = "FRA",                   # New Caledonia -> France
  "BMU" = "GBR",                   # Bermuda -> UK
  "GIB" = "GBR",                   # Gibraltar -> UK
  "GGY" = "GBR",                   # Guernsey -> UK
  "JEY" = "GBR"                    # Jersey -> UK
)
d_raw$iso3 <- ifelse(d_raw$iso3 %in% names(sub_to_parent), sub_to_parent[d_raw$iso3], d_raw$iso3)

# Drop undefined
d_raw <- d_raw %>% filter(!is.na(iso3))

# Aggregate within country x game (after subnational merge)
d_game <- d_raw %>%
  group_by(game, iso3) %>%
  dplyr::summarize(
    players = sum(players, na.rm = TRUE),
    prize_money = sum(prize_money, na.rm = TRUE),
    .groups = "drop"
  )

# Game summary
d_game %>%
  group_by(game) %>%
  dplyr::summarize(
    n_countries = n(),
    total_players = sum(players),
    total_prize = sum(prize_money)
  ) %>%
  arrange(desc(total_prize))

Population data (OWID)

pop_owid <- read_csv("data/owid_population.csv", show_col_types = FALSE)

# Add Kosovo
pop_owid <- bind_rows(pop_owid, tibble(iso3 = "XKX", country = "Kosovo", year = 2024, population = 1800000))

National IQ data

# Jensen & Kirkegaard 2024/2025 NIQ composite (Seb2024)
niq <- read_excel("data/National IQ datasets.xlsx", sheet = "Seb2024") %>%
  filter(!ISO %in% c("HKG", "MAC")) %>%  # subnational, merged into CHN
  select(iso3 = ISO, NIQ = NIQ, SE_NIQ = SE_NIQ)

Build country-level dataset

# Also merge extra territories into parents
extra_subs <- c("GRL" = "DNK", "PYF" = "FRA", "ALA" = "FIN", "REU" = "FRA")
d_game$iso3 <- ifelse(d_game$iso3 %in% names(extra_subs), extra_subs[d_game$iso3], d_game$iso3)
d_game <- d_game %>%
  group_by(game, iso3) %>%
  dplyr::summarize(players = sum(players), prize_money = sum(prize_money), .groups = "drop")

game_names <- d_game %>% distinct(game) %>% pull(game) %>% sort()

# Pivot to wide: one row per country, columns for each game's players and prize_money
d_players_wide <- d_game %>%
  select(game, iso3, players) %>%
  pivot_wider(names_from = game, values_from = players, values_fill = 0, names_prefix = "players_")

d_prize_wide <- d_game %>%
  select(game, iso3, prize_money) %>%
  pivot_wider(names_from = game, values_from = prize_money, values_fill = 0, names_prefix = "prize_")

d_country <- d_players_wide %>%
  left_join(d_prize_wide, by = "iso3") %>%
  left_join(pop_owid %>% select(iso3, pop = population), by = "iso3") %>%
  left_join(niq, by = "iso3")

# Add country names
d_country$country <- countrycode(d_country$iso3, "iso3c", "country.name")

## Warning: Some values were not matched unambiguously: XKX

d_country$country[d_country$iso3 == "XKX"] <- "Kosovo"

nrow(d_country)

## [1] 145

Analysis

Per-capita rates

# Per-capita rates per game (per million)
for (g in game_names) {
  d_country[[paste0("rate_players_", g)]] <- d_country[[paste0("players_", g)]] / (d_country$pop / 1e6)
  d_country[[paste0("rate_prize_", g)]] <- d_country[[paste0("prize_", g)]] / (d_country$pop / 1e6)
}

d_country$log_pop <- log10(d_country$pop)

Factor analysis: raw per-capita rates

rate_player_cols <- paste0("rate_players_", game_names)
rate_prize_cols <- paste0("rate_prize_", game_names)

# Player count rates
fa_players_raw <- d_country[rate_player_cols] %>%
  fa(nfactors = 1, fm = "ml")
fa_players_raw

## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                             ML1      h2   u2 com
## rate_players_aov           0.25 6.2e-02 0.94   1
## rate_players_apex          0.74 5.5e-01 0.45   1
## rate_players_brawl_stars   0.28 7.9e-02 0.92   1
## rate_players_chess         0.12 1.4e-02 0.99   1
## rate_players_clash_royale  0.29 8.5e-02 0.92   1
## rate_players_cod_mobile    0.26 6.6e-02 0.93   1
## rate_players_cod_warzone   0.23 5.5e-02 0.95   1
## rate_players_crossfire     0.17 3.0e-02 0.97   1
## rate_players_cs            0.76 5.8e-01 0.42   1
## rate_players_cs2           0.61 3.7e-01 0.63   1
## rate_players_csgo          0.74 5.5e-01 0.45   1
## rate_players_dota2         0.63 3.9e-01 0.61   1
## rate_players_fortnite      0.50 2.5e-01 0.75   1
## rate_players_freefire      0.01 9.5e-05 1.00   1
## rate_players_hots          0.72 5.2e-01 0.48   1
## rate_players_hs            0.88 7.7e-01 0.23   1
## rate_players_lol           0.54 2.9e-01 0.71   1
## rate_players_lol_wr        0.36 1.3e-01 0.87   1
## rate_players_mlbb          0.20 4.0e-02 0.96   1
## rate_players_mtg_arena     0.58 3.3e-01 0.67   1
## rate_players_ow            0.71 5.1e-01 0.49   1
## rate_players_ow2           0.66 4.4e-01 0.56   1
## rate_players_pubg          0.78 6.1e-01 0.39   1
## rate_players_pubg_mobile   0.05 2.4e-03 1.00   1
## rate_players_r6            0.70 4.9e-01 0.51   1
## rate_players_rocket_league 0.56 3.1e-01 0.69   1
## rate_players_sc2           0.73 5.3e-01 0.47   1
## rate_players_smite         0.54 2.9e-01 0.71   1
## rate_players_tft           0.75 5.7e-01 0.43   1
## rate_players_valorant      0.52 2.7e-01 0.73   1
## 
##                 ML1
## SS loadings    9.19
## Proportion Var 0.31
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  33.7 with Chi Square =  4491
## df of  the model are 405  and the objective function was  24 
## 
## The root mean square of the residuals (RMSR) is  0.17 
## The df corrected root mean square of the residuals is  0.17 
## 
## The harmonic n.obs is  145 with the empirical chi square  3453  with prob <  0 
## The total n.obs was  145  with Likelihood Chi Square =  3175  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.262
## RMSEA index =  0.217  and the 90 % confidence intervals are  0.211 0.225
## BIC =  1159
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.90

d_country$fa_players_raw <- fa_players_raw$scores[,1]

# Prize money rates
fa_prize_raw <- d_country[rate_prize_cols] %>%
  fa(nfactors = 1, fm = "ml")
fa_prize_raw

## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                            ML1      h2   u2 com
## rate_prize_aov            0.13 1.6e-02 0.98   1
## rate_prize_apex           0.66 4.4e-01 0.56   1
## rate_prize_brawl_stars    0.19 3.5e-02 0.96   1
## rate_prize_chess          0.04 1.7e-03 1.00   1
## rate_prize_clash_royale   0.24 5.8e-02 0.94   1
## rate_prize_cod_mobile     0.12 1.5e-02 0.99   1
## rate_prize_cod_warzone   -0.03 1.1e-03 1.00   1
## rate_prize_crossfire      0.04 1.5e-03 1.00   1
## rate_prize_cs             0.72 5.1e-01 0.49   1
## rate_prize_cs2            0.41 1.7e-01 0.83   1
## rate_prize_csgo           0.68 4.7e-01 0.53   1
## rate_prize_dota2          0.60 3.6e-01 0.64   1
## rate_prize_fortnite       0.62 3.8e-01 0.62   1
## rate_prize_freefire      -0.02 2.7e-04 1.00   1
## rate_prize_hots           0.76 5.8e-01 0.42   1
## rate_prize_hs             0.79 6.2e-01 0.38   1
## rate_prize_lol            0.80 6.4e-01 0.36   1
## rate_prize_lol_wr         0.33 1.1e-01 0.89   1
## rate_prize_mlbb           0.03 6.7e-04 1.00   1
## rate_prize_mtg_arena      0.38 1.5e-01 0.85   1
## rate_prize_ow             0.67 4.4e-01 0.56   1
## rate_prize_ow2            0.69 4.8e-01 0.52   1
## rate_prize_pubg           0.75 5.6e-01 0.44   1
## rate_prize_pubg_mobile    0.00 1.4e-05 1.00   1
## rate_prize_r6             0.68 4.7e-01 0.53   1
## rate_prize_rocket_league  0.45 2.0e-01 0.80   1
## rate_prize_sc2            0.68 4.6e-01 0.54   1
## rate_prize_smite          0.68 4.7e-01 0.53   1
## rate_prize_tft            0.60 3.6e-01 0.64   1
## rate_prize_valorant       0.50 2.5e-01 0.75   1
## 
##                 ML1
## SS loadings    8.26
## Proportion Var 0.28
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  29.6 with Chi Square =  3935
## df of  the model are 405  and the objective function was  21.2 
## 
## The root mean square of the residuals (RMSR) is  0.14 
## The df corrected root mean square of the residuals is  0.15 
## 
## The harmonic n.obs is  145 with the empirical chi square  2598  with prob <  1e-315 
## The total n.obs was  145  with Likelihood Chi Square =  2803  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.26
## RMSEA index =  0.202  and the 90 % confidence intervals are  0.196 0.21
## BIC =  788
## Fit based upon off diagonal values = 0.78
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.94
## Minimum correlation of possible factor scores     0.88

d_country$fa_prize_raw <- fa_prize_raw$scores[,1]

# Correlation between player-based and prize-based composite
cor(d_country$fa_players_raw, d_country$fa_prize_raw, use = "complete.obs")

## [1] 0.829

Factor analysis: EB shrunk estimates (ebpm)

# Empirical Bayes via ebpm_gamma (Poisson means with Gamma prior)
for (g in game_names) {
  fit <- ebpm_gamma(d_country[[paste0("players_", g)]], d_country$pop)
  d_country[[paste0("eb_players_", g)]] <- fit$posterior$mean
}

eb_player_cols <- paste0("eb_players_", game_names)
fa_eb <- d_country[eb_player_cols] %>% fa(nfactors = 1, fm = "ml")
fa_eb

## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                           ML1      h2   u2 com
## eb_players_aov           0.25 6.2e-02 0.94   1
## eb_players_apex          0.79 6.3e-01 0.37   1
## eb_players_brawl_stars   0.30 9.2e-02 0.91   1
## eb_players_chess         0.24 5.8e-02 0.94   1
## eb_players_clash_royale  0.34 1.2e-01 0.88   1
## eb_players_cod_mobile    0.30 8.9e-02 0.91   1
## eb_players_cod_warzone   0.39 1.5e-01 0.85   1
## eb_players_crossfire     0.17 3.0e-02 0.97   1
## eb_players_cs            0.75 5.6e-01 0.44   1
## eb_players_cs2           0.57 3.3e-01 0.67   1
## eb_players_csgo          0.71 5.1e-01 0.49   1
## eb_players_dota2         0.61 3.8e-01 0.62   1
## eb_players_fortnite      0.73 5.3e-01 0.47   1
## eb_players_freefire      0.00 6.2e-07 1.00   1
## eb_players_hots          0.71 5.1e-01 0.49   1
## eb_players_hs            0.86 7.4e-01 0.26   1
## eb_players_lol           0.57 3.2e-01 0.68   1
## eb_players_lol_wr        0.36 1.3e-01 0.87   1
## eb_players_mlbb          0.19 3.7e-02 0.96   1
## eb_players_mtg_arena     0.60 3.6e-01 0.64   1
## eb_players_ow            0.74 5.5e-01 0.45   1
## eb_players_ow2           0.75 5.7e-01 0.43   1
## eb_players_pubg          0.78 6.1e-01 0.39   1
## eb_players_pubg_mobile   0.05 2.3e-03 1.00   1
## eb_players_r6            0.74 5.4e-01 0.46   1
## eb_players_rocket_league 0.55 3.1e-01 0.69   1
## eb_players_sc2           0.78 6.1e-01 0.39   1
## eb_players_smite         0.57 3.3e-01 0.67   1
## eb_players_tft           0.70 4.8e-01 0.52   1
## eb_players_valorant      0.53 2.8e-01 0.72   1
## 
##                 ML1
## SS loadings    9.91
## Proportion Var 0.33
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  35.5 with Chi Square =  4727
## df of  the model are 405  and the objective function was  24.7 
## 
## The root mean square of the residuals (RMSR) is  0.16 
## The df corrected root mean square of the residuals is  0.17 
## 
## The harmonic n.obs is  145 with the empirical chi square  3404  with prob <  0 
## The total n.obs was  145  with Likelihood Chi Square =  3272  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.279
## RMSEA index =  0.221  and the 90 % confidence intervals are  0.215 0.229
## BIC =  1257
## Fit based upon off diagonal values = 0.8
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.98
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.91

d_country$fa_eb <- fa_eb$scores[,1]

Factor analysis: old method (log-count residuals)

# Replication of 2019 approach: log(count+1) ~ log(pop), FA on residuals
for (g in game_names) {
  y <- log10(d_country[[paste0("players_", g)]] + 1)
  fit <- lm(y ~ d_country$log_pop)
  d_country[[paste0("old_resid_", g)]] <- resid(fit)
}

old_resid_cols <- paste0("old_resid_", game_names)
fa_old <- d_country[old_resid_cols] %>% fa(nfactors = 1, fm = "ml")
fa_old

## Factor Analysis using method =  ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                          ML1    h2   u2 com
## old_resid_aov           0.56 0.318 0.68   1
## old_resid_apex          0.91 0.837 0.16   1
## old_resid_brawl_stars   0.68 0.467 0.53   1
## old_resid_chess         0.63 0.395 0.60   1
## old_resid_clash_royale  0.74 0.552 0.45   1
## old_resid_cod_mobile    0.53 0.286 0.71   1
## old_resid_cod_warzone   0.70 0.491 0.51   1
## old_resid_crossfire     0.62 0.389 0.61   1
## old_resid_cs            0.83 0.696 0.30   1
## old_resid_cs2           0.73 0.532 0.47   1
## old_resid_csgo          0.81 0.663 0.34   1
## old_resid_dota2         0.66 0.432 0.57   1
## old_resid_fortnite      0.85 0.715 0.29   1
## old_resid_freefire      0.14 0.021 0.98   1
## old_resid_hots          0.88 0.781 0.22   1
## old_resid_hs            0.93 0.862 0.14   1
## old_resid_lol           0.89 0.789 0.21   1
## old_resid_lol_wr        0.61 0.373 0.63   1
## old_resid_mlbb          0.24 0.056 0.94   1
## old_resid_mtg_arena     0.74 0.554 0.45   1
## old_resid_ow            0.90 0.813 0.19   1
## old_resid_ow2           0.85 0.726 0.27   1
## old_resid_pubg          0.89 0.788 0.21   1
## old_resid_pubg_mobile   0.31 0.097 0.90   1
## old_resid_r6            0.85 0.719 0.28   1
## old_resid_rocket_league 0.75 0.565 0.44   1
## old_resid_sc2           0.91 0.832 0.17   1
## old_resid_smite         0.73 0.527 0.47   1
## old_resid_tft           0.88 0.783 0.22   1
## old_resid_valorant      0.82 0.677 0.32   1
## 
##                  ML1
## SS loadings    16.73
## Proportion Var  0.56
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## df null model =  435  with the objective function =  38.2 with Chi Square =  5091
## df of  the model are 405  and the objective function was  13.5 
## 
## The root mean square of the residuals (RMSR) is  0.11 
## The df corrected root mean square of the residuals is  0.11 
## 
## The harmonic n.obs is  145 with the empirical chi square  1552  with prob <  1.2e-133 
## The total n.obs was  145  with Likelihood Chi Square =  1794  with prob <  1.3e-173 
## 
## Tucker Lewis Index of factoring reliability =  0.678
## RMSEA index =  0.154  and the 90 % confidence intervals are  0.147 0.162
## BIC =  -221
## Fit based upon off diagonal values = 0.96
## Measures of factor score adequacy             
##                                                    ML1
## Correlation of (regression) scores with factors   0.99
## Multiple R square of scores with factors          0.98
## Minimum correlation of possible factor scores     0.97

d_country$fa_old_method <- fa_old$scores[,1]

Game-level correlation matrix

# Correlation matrix of raw per-capita rates across games
cor_raw <- cor(d_country[rate_player_cols], use = "complete.obs")
colnames(cor_raw) <- rownames(cor_raw) <- game_names

# EB per-capita correlations
cor_eb <- cor(d_country[eb_player_cols], use = "complete.obs")
colnames(cor_eb) <- rownames(cor_eb) <- game_names

# Combined matrix: upper triangle = raw per-capita, lower triangle = EB per-capita
cor_combined <- cor_raw
cor_combined[lower.tri(cor_combined)] <- cor_eb[lower.tri(cor_eb)]
diag(cor_combined) <- NA

round(cor_combined, 2)

##                 aov  apex brawl_stars chess clash_royale cod_mobile cod_warzone
## aov              NA  0.32        0.56 -0.06         0.10       0.65       -0.04
## apex           0.33    NA        0.16 -0.01         0.29       0.24        0.24
## brawl_stars    0.54  0.17          NA -0.03         0.11       0.65       -0.02
## chess         -0.08  0.10        0.01    NA        -0.05      -0.07       -0.01
## clash_royale   0.09  0.30        0.15  0.05           NA       0.12        0.01
## cod_mobile     0.70  0.29        0.64 -0.04         0.20         NA        0.00
## cod_warzone   -0.04  0.32        0.01  0.11         0.13       0.09          NA
## crossfire      0.03  0.12       -0.04  0.03         0.15       0.00        0.03
## cs             0.00  0.64        0.04  0.27         0.17       0.01        0.24
## cs2           -0.08  0.34       -0.03  0.40         0.06      -0.01        0.17
## csgo          -0.02  0.42        0.03  0.33         0.08       0.03        0.45
## dota2          0.10  0.47        0.20  0.18         0.15       0.16        0.12
## fortnite      -0.05  0.63        0.06  0.18         0.13       0.06        0.65
## freefire       0.55  0.06        0.12 -0.13        -0.05       0.40       -0.10
## hots           0.58  0.65        0.67  0.11         0.27       0.54        0.13
## hs             0.22  0.68        0.28  0.21         0.38       0.26        0.19
## lol            0.07  0.36        0.09  0.17         0.14       0.09        0.82
## lol_wr         0.79  0.31        0.75 -0.07         0.12       0.68       -0.03
## mlbb           0.56  0.06        0.85 -0.07        -0.05       0.63       -0.05
## mtg_arena     -0.07  0.56        0.02  0.10         0.37       0.07        0.24
## ow             0.10  0.51        0.15  0.16         0.17       0.12        0.64
## ow2            0.18  0.49        0.26  0.21         0.24       0.23        0.19
## pubg           0.26  0.71        0.11  0.19         0.28       0.18        0.22
## pubg_mobile    0.01 -0.03        0.00 -0.02        -0.01       0.02       -0.02
## r6             0.22  0.70        0.34  0.05         0.30       0.31        0.19
## rocket_league -0.04  0.31        0.06  0.23         0.04       0.05        0.42
## sc2            0.28  0.66        0.32  0.14         0.37       0.27        0.18
## smite         -0.06  0.61        0.02  0.16         0.07       0.07        0.27
## tft            0.15  0.53        0.26  0.16         0.49       0.23        0.24
## valorant       0.27  0.29        0.35  0.16         0.14       0.29        0.06
##               crossfire    cs   cs2  csgo dota2 fortnite freefire  hots    hs
## aov                0.03  0.00 -0.09 -0.03  0.10    -0.06     0.54  0.58  0.22
## apex               0.12  0.63  0.32  0.41  0.46     0.44     0.07  0.61  0.65
## brawl_stars       -0.03  0.03 -0.04  0.00  0.18     0.03     0.13  0.64  0.26
## chess             -0.01  0.13  0.21  0.18  0.05     0.03    -0.10  0.06  0.09
## clash_royale       0.16  0.17  0.03  0.03  0.16     0.03    -0.01  0.23  0.36
## cod_mobile        -0.01  0.01  0.01  0.00  0.18    -0.01     0.30  0.53  0.23
## cod_warzone        0.00  0.12  0.07  0.36  0.08     0.40    -0.09  0.05  0.07
## crossfire            NA  0.17  0.08  0.07  0.03     0.03     0.29  0.11  0.21
## cs                 0.18    NA  0.71  0.72  0.57     0.43    -0.06  0.46  0.66
## cs2                0.09  0.71    NA  0.79  0.53     0.31    -0.09  0.29  0.50
## csgo               0.08  0.72  0.79    NA  0.55     0.49    -0.09  0.39  0.55
## dota2              0.03  0.58  0.54  0.54    NA     0.23    -0.03  0.39  0.54
## fortnite           0.06  0.62  0.46  0.68  0.35       NA    -0.10  0.28  0.35
## freefire           0.27 -0.06 -0.08 -0.07 -0.03    -0.10       NA  0.09 -0.02
## hots               0.11  0.44  0.24  0.34  0.40     0.41     0.08    NA  0.64
## hs                 0.22  0.65  0.47  0.51  0.55     0.52    -0.03  0.62    NA
## lol                0.12  0.38  0.37  0.66  0.29     0.72    -0.03  0.28  0.40
## lol_wr            -0.01  0.02  0.00  0.06  0.21     0.07     0.38  0.69  0.34
## mlbb              -0.04 -0.01 -0.05  0.03  0.27    -0.05     0.20  0.56  0.13
## mtg_arena          0.45  0.48  0.39  0.37  0.42     0.42    -0.03  0.24  0.63
## ow                 0.10  0.48  0.42  0.73  0.44     0.71    -0.02  0.41  0.55
## ow2                0.04  0.52  0.44  0.65  0.36     0.55    -0.04  0.61  0.62
## pubg               0.06  0.64  0.43  0.46  0.46     0.54     0.09  0.49  0.74
## pubg_mobile       -0.01 -0.03 -0.02  0.24  0.21    -0.01     0.00 -0.02 -0.04
## r6                 0.09  0.42  0.27  0.30  0.51     0.45    -0.01  0.60  0.72
## rocket_league      0.06  0.36  0.39  0.68  0.17     0.66    -0.07  0.28  0.40
## sc2                0.07  0.57  0.31  0.41  0.50     0.44    -0.04  0.67  0.71
## smite              0.10  0.54  0.40  0.36  0.29     0.67    -0.03  0.40  0.47
## tft                0.31  0.51  0.33  0.39  0.37     0.44    -0.03  0.61  0.76
## valorant           0.14  0.32  0.40  0.64  0.47     0.30     0.06  0.45  0.41
##                 lol lol_wr  mlbb mtg_arena    ow   ow2  pubg pubg_mobile    r6
## aov            0.05   0.79  0.58     -0.06  0.09  0.10  0.25       -0.01  0.23
## apex           0.38   0.29  0.07      0.53  0.50  0.28  0.70       -0.05  0.69
## brawl_stars    0.06   0.74  0.83      0.00  0.12  0.15  0.10       -0.02  0.34
## chess          0.09  -0.07 -0.06      0.01  0.09  0.18  0.08        0.03 -0.02
## clash_royale   0.08   0.11 -0.03      0.31  0.11  0.10  0.28       -0.05  0.28
## cod_mobile     0.05   0.64  0.66      0.04  0.08  0.12  0.14        0.00  0.29
## cod_warzone    0.76  -0.04 -0.05      0.09  0.61  0.09  0.13       -0.03  0.08
## crossfire      0.10  -0.01 -0.04      0.43  0.08  0.01  0.06       -0.03  0.10
## cs             0.36   0.02 -0.01      0.49  0.47  0.46  0.65       -0.04  0.42
## cs2            0.35  -0.01 -0.05      0.43  0.42  0.48  0.45       -0.03  0.26
## csgo           0.65   0.07  0.03      0.37  0.74  0.70  0.49        0.24  0.28
## dota2          0.28   0.24  0.29      0.43  0.42  0.28  0.46        0.26  0.50
## fortnite       0.53   0.03 -0.05      0.27  0.53  0.34  0.38       -0.02  0.29
## freefire      -0.04   0.38  0.21      0.00 -0.03 -0.04  0.09       -0.02  0.01
## hots           0.27   0.66  0.56      0.23  0.41  0.53  0.50       -0.03  0.58
## hs             0.38   0.32  0.13      0.60  0.54  0.57  0.74       -0.06  0.70
## lol              NA   0.09  0.02      0.27  0.85  0.35  0.36       -0.04  0.22
## lol_wr         0.11     NA  0.73      0.08  0.13  0.15  0.30        0.19  0.32
## mlbb           0.03   0.72    NA     -0.09  0.07  0.10  0.03        0.15  0.23
## mtg_arena      0.29   0.07 -0.09        NA  0.37  0.17  0.45       -0.05  0.60
## ow             0.83   0.16  0.09      0.41    NA  0.66  0.53       -0.03  0.48
## ow2            0.39   0.26  0.18      0.29  0.70    NA  0.46       -0.03  0.32
## pubg           0.36   0.32  0.03      0.44  0.55  0.57    NA       -0.04  0.57
## pubg_mobile   -0.03   0.10  0.11     -0.02 -0.03 -0.02 -0.03          NA -0.04
## r6             0.24   0.32  0.22      0.60  0.54  0.54  0.57       -0.03    NA
## rocket_league  0.52   0.04 -0.02      0.21  0.66  0.74  0.33       -0.02  0.33
## sc2            0.30   0.36  0.24      0.39  0.50  0.63  0.79        0.23  0.66
## smite          0.25   0.11 -0.06      0.36  0.29  0.38  0.44       -0.02  0.47
## tft            0.33   0.27  0.10      0.55  0.39  0.49  0.47       -0.02  0.55
## valorant       0.22   0.40  0.37      0.26  0.34  0.48  0.31        0.73  0.30
##               rocket_league   sc2 smite   tft valorant
## aov                   -0.05  0.28 -0.04  0.15     0.22
## apex                   0.27  0.62  0.58  0.44     0.22
## brawl_stars            0.06  0.30  0.01  0.21     0.28
## chess                  0.30  0.02  0.04  0.12     0.08
## clash_royale          -0.02  0.32  0.07  0.34     0.07
## cod_mobile             0.00  0.25  0.05  0.18     0.22
## cod_warzone            0.33  0.08  0.12  0.07    -0.01
## crossfire              0.04  0.07  0.11  0.27     0.11
## cs                     0.36  0.55  0.55  0.56     0.31
## cs2                    0.41  0.29  0.41  0.47     0.39
## csgo                   0.68  0.41  0.35  0.57     0.64
## dota2                  0.17  0.52  0.28  0.37     0.49
## fortnite               0.46  0.29  0.45  0.31     0.18
## freefire              -0.09 -0.03  0.00  0.00     0.04
## hots                   0.33  0.62  0.36  0.63     0.43
## hs                     0.44  0.66  0.47  0.79     0.39
## lol                    0.51  0.26  0.23  0.33     0.20
## lol_wr                 0.02  0.39  0.12  0.25     0.42
## mlbb                  -0.03  0.26 -0.06  0.11     0.36
## mtg_arena              0.15  0.38  0.37  0.46     0.21
## ow                     0.68  0.43  0.25  0.48     0.32
## ow2                    0.81  0.36  0.22  0.68     0.48
## pubg                   0.35  0.76  0.45  0.49     0.29
## pubg_mobile           -0.01  0.33 -0.04 -0.05     0.75
## r6                     0.28  0.64  0.46  0.48     0.24
## rocket_league            NA  0.18  0.28  0.52     0.39
## sc2                    0.22    NA  0.40  0.44     0.52
## smite                  0.35  0.41    NA  0.34     0.13
## tft                    0.31  0.54  0.37    NA     0.43
## valorant               0.39  0.49  0.17  0.36       NA

# Summary stats
raw_upper <- cor_raw[upper.tri(cor_raw)]
eb_lower <- cor_eb[lower.tri(cor_eb)]

tibble(
  method = c("raw_pc (upper)", "EB_pc (lower)"),
  mean = c(mean(raw_upper), mean(eb_lower)),
  median = c(median(raw_upper), median(eb_lower)),
  sd = c(sd(raw_upper), sd(eb_lower)),
  min = c(min(raw_upper), min(eb_lower)),
  max = c(max(raw_upper), max(eb_lower))
)

# How similar are the two sets of inter-game correlations?
cor(raw_upper, eb_lower)

## [1] 0.0824

# Full game name mapping
game_labels <- c(
  aov = "Arena of Valor", apex = "Apex Legends", brawl_stars = "Brawl Stars",
  chess = "Chess.com", clash_royale = "Clash Royale", cod_mobile = "CoD Mobile",
  cod_warzone = "CoD Warzone", crossfire = "CrossFire",
  cs = "Counter-Strike", cs2 = "Counter-Strike 2", csgo = "CS:GO",
  dota2 = "Dota 2", fortnite = "Fortnite", freefire = "Free Fire",
  hots = "Heroes of Storm", hs = "Hearthstone", lol = "League of Legends",
  lol_wr = "LoL Wild Rift", mlbb = "Mobile Legends",
  mtg_arena = "MTG Arena", ow = "Overwatch", ow2 = "Overwatch 2",
  pubg = "PUBG", pubg_mobile = "PUBG Mobile", r6 = "Rainbow Six",
  rocket_league = "Rocket League", sc2 = "StarCraft II", smite = "SMITE",
  tft = "Teamfight Tactics", valorant = "VALORANT"
)

# Order games by hierarchical clustering
dist_mat <- as.dist(1 - cor_raw)
hc <- hclust(dist_mat, method = "ward.D2")
game_order <- hc$labels[hc$order]

# Heatmap
cor_combined_long <- cor_combined %>%
  as.data.frame() %>%
  mutate(game1 = rownames(.)) %>%
  pivot_longer(-game1, names_to = "game2", values_to = "r") %>%
  mutate(
    game1_full = factor(game_labels[game1], levels = game_labels[game_order]),
    game2_full = factor(game_labels[game2], levels = rev(game_labels[game_order]))
  )

ggplot(cor_combined_long, aes(game1_full, game2_full, fill = r)) +
  geom_tile() +
  geom_text(aes(label = ifelse(is.na(r), "", sprintf("%.2f", r))), size = 1.8) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90") +
  labs(title = "Game correlation matrix (upper = raw pc, lower = EB pc)",
       x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
        axis.text.y = element_text(size = 7))

GG_save("figs/game_cor_matrix.png")

Unit-weighted composites

# Unit-weighted: standardize per-capita rates, average across games
rate_z <- scale(d_country[rate_player_cols])
d_country$unit_weighted <- rowMeans(rate_z, na.rm = TRUE)

prize_z <- scale(d_country[rate_prize_cols])
d_country$unit_weighted_prize <- rowMeans(prize_z, na.rm = TRUE)

Hierarchical FA

# Parallel analysis for number of factors
pa <- fa.parallel(d_country[rate_player_cols], fm = "ml", fa = "fa", plot = FALSE)

## Parallel analysis suggests that the number of factors =  5  and the number of components =  NA

pa$nfact

## [1] 5

# Oblimin 1st order
fa6 <- fa(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", rotate = "oblimin")

## Loading required namespace: GPArotation

print(fa6$loadings, cutoff = 0.3, sort = TRUE)

## 
## Loadings:
##                            ML5    ML3    ML2    ML4    ML1   
## rate_players_apex           0.802                            
## rate_players_hs             0.709                            
## rate_players_mtg_arena      0.628                            
## rate_players_pubg           0.775                            
## rate_players_r6             0.787                            
## rate_players_sc2            0.807                       0.326
## rate_players_smite          0.571                            
## rate_players_aov                   0.756                     
## rate_players_brawl_stars           0.894                     
## rate_players_cod_mobile            0.749                     
## rate_players_hots           0.391  0.628                     
## rate_players_lol_wr                0.823                     
## rate_players_mlbb                  0.889                     
## rate_players_cs             0.490         0.569              
## rate_players_cs2                          0.877              
## rate_players_csgo                         0.760  0.337       
## rate_players_ow2                          0.687              
## rate_players_rocket_league                0.532  0.361       
## rate_players_cod_warzone                         0.910       
## rate_players_lol                                 0.906       
## rate_players_ow                                  0.723       
## rate_players_pubg_mobile                                1.015
## rate_players_valorant                     0.393         0.740
## rate_players_chess                                           
## rate_players_clash_royale   0.487                            
## rate_players_crossfire                                       
## rate_players_dota2          0.379                            
## rate_players_fortnite                            0.423       
## rate_players_freefire              0.318                     
## rate_players_tft            0.384         0.460              
## 
##                  ML5   ML3   ML2   ML4   ML1
## SS loadings    4.875 4.185 3.282 2.643 1.879
## Proportion Var 0.162 0.139 0.109 0.088 0.063
## Cumulative Var 0.162 0.302 0.411 0.500 0.562

fa6$Phi %>% round(2)

##      ML5  ML3  ML2  ML4  ML1
## ML5 1.00 0.29 0.46 0.29 0.08
## ML3 0.29 1.00 0.08 0.04 0.15
## ML2 0.46 0.08 1.00 0.36 0.14
## ML4 0.29 0.04 0.36 1.00 0.04
## ML1 0.08 0.15 0.14 0.04 1.00

# Omega hierarchical (Schmid-Leiman)
om <- omega(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", plot = FALSE)

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.

om$omega_h

## [1] 0.617

om$omega.tot

## [1] 0.954

# Hierarchical g scores
g_loadings <- om$schmid$sl[, 1]
d_country$fa_hierarchical <- as.vector(scale(d_country[rate_player_cols]) %*% g_loadings / sum(g_loadings))

Compare composites

d_country %>%
  select(fa_players_raw, fa_eb, unit_weighted, fa_hierarchical, fa_old_method, pop) %>%
  cor(use = "complete.obs") %>%
  round(3)

##                 fa_players_raw  fa_eb unit_weighted fa_hierarchical
## fa_players_raw           1.000  0.989         0.963           0.995
## fa_eb                    0.989  1.000         0.957           0.984
## unit_weighted            0.963  0.957         1.000           0.976
## fa_hierarchical          0.995  0.984         0.976           1.000
## fa_old_method            0.658  0.696         0.679           0.651
## pop                     -0.092 -0.104        -0.093          -0.096
##                 fa_old_method    pop
## fa_players_raw          0.658 -0.092
## fa_eb                   0.696 -0.104
## unit_weighted           0.679 -0.093
## fa_hierarchical         0.651 -0.096
## fa_old_method           1.000  0.073
## pop                     0.073  1.000

Country rankings

d_country %>%
  select(country, iso3, pop, unit_weighted, unit_weighted_prize, fa_players_raw, fa_old_method) %>%
  arrange(desc(unit_weighted)) %>%
  head(30)

Comparison with top 1000 global players

# Scrape top 1000 players by earnings
top1000_urls <- c(
  "https://www.esportsearnings.com/players",
  sprintf("https://www.esportsearnings.com/players/highest-earnings-top-%d", seq(200, 1000, 100))
)

top1000 <- map_dfr(top1000_urls, function(url) {
  Sys.sleep(1.5)
  page <- rvest::read_html(url)
  tbl_node <- page %>% rvest::html_nodes(".detail_list_table") %>% .[[2]]

  flags <- tbl_node %>% rvest::html_nodes("img") %>% rvest::html_attr("src") %>%
    str_extract("[a-z]{2}(?=\\.png)")
  games <- tbl_node %>% rvest::html_nodes("td.detail_list_game a") %>% rvest::html_text()
  tbl_data <- tbl_node %>% rvest::html_table()

  tibble(
    iso2 = flags,
    game = games,
    earnings = tbl_data[["Total (Overall)"]] %>% str_remove_all("[\\$,]") %>% as.numeric()
  )
})

# Apply subnational merges
top1000$iso3 <- countrycode(top1000$iso2, "iso2c", "iso3c", warn = FALSE)
sub_map_iso2 <- c("hk" = "CHN", "mo" = "CHN", "pr" = "USA", "aw" = "NLD",
                   "fo" = "DNK", "nc" = "FRA", "bm" = "GBR", "gi" = "GBR",
                   "gl" = "DNK", "pf" = "FRA", "re" = "FRA")
for (code in names(sub_map_iso2)) {
  top1000$iso3[top1000$iso2 == code] <- sub_map_iso2[code]
}

top1000_country <- top1000 %>%
  count(iso3, name = "top1000_count") %>%
  filter(!is.na(iso3))

d_country <- d_country %>%
  left_join(top1000_country, by = "iso3") %>%
  mutate(top1000_count = replace_na(top1000_count, 0),
         top1000_per_million = top1000_count / (pop / 1e6))

# Correlations: top 1000 per capita vs FA composites and IQ
d_country %>%
  select(top1000_per_million, fa_players_raw, fa_prize_raw, fa_eb, fa_old_method, NIQ, log_pop) %>%
  cor(use = "complete.obs")

##                     top1000_per_million fa_players_raw fa_prize_raw  fa_eb
## top1000_per_million              1.0000          0.648      0.86392  0.670
## fa_players_raw                   0.6476          1.000      0.82852  0.990
## fa_prize_raw                     0.8639          0.829      1.00000  0.852
## fa_eb                            0.6704          0.990      0.85219  1.000
## fa_old_method                    0.4219          0.667      0.58363  0.699
## NIQ                              0.3895          0.657      0.51808  0.665
## log_pop                         -0.0754         -0.121      0.00178 -0.153
##                     fa_old_method     NIQ  log_pop
## top1000_per_million        0.4219  0.3895 -0.07545
## fa_players_raw             0.6675  0.6574 -0.12055
## fa_prize_raw               0.5836  0.5181  0.00178
## fa_eb                      0.6994  0.6652 -0.15276
## fa_old_method              1.0000  0.8082  0.04210
## NIQ                        0.8082  1.0000 -0.02818
## log_pop                    0.0421 -0.0282  1.00000

# Top countries by top 1000 per capita
d_country %>%
  select(country, pop, top1000_count, top1000_per_million, fa_players_raw, fa_prize_raw) %>%
  arrange(desc(top1000_per_million)) %>%
  head(20)

Correlations with NIQ

# Total players and prize (raw sums, not per capita)
d_country$total_players <- rowSums(d_country[paste0("players_", game_names)])
d_country$total_prize <- rowSums(d_country[paste0("prize_", game_names)])

# Main composites vs NIQ
cor_niq <- d_country %>%
  select(
    NIQ,
    `Players per capita (unit-weighted)` = unit_weighted,
    `Prize per capita (unit-weighted)` = unit_weighted_prize,
    `Top 1000 pc` = top1000_per_million,
    `Total players` = total_players,
    `Total prize` = total_prize,
    pop
  ) %>%
  cor(use = "pairwise.complete.obs")

round(cor_niq, 3)

##                                      NIQ Players per capita (unit-weighted)
## NIQ                                1.000                              0.683
## Players per capita (unit-weighted) 0.683                              1.000
## Prize per capita (unit-weighted)   0.617                              0.844
## Top 1000 pc                        0.390                              0.574
## Total players                      0.411                              0.279
## Total prize                        0.356                              0.213
## pop                                0.002                             -0.093
##                                    Prize per capita (unit-weighted) Top 1000 pc
## NIQ                                                           0.617       0.390
## Players per capita (unit-weighted)                            0.844       0.574
## Prize per capita (unit-weighted)                              1.000       0.790
## Top 1000 pc                                                   0.790       1.000
## Total players                                                 0.394       0.213
## Total prize                                                   0.393       0.274
## pop                                                          -0.014      -0.054
##                                    Total players Total prize    pop
## NIQ                                        0.411       0.356  0.002
## Players per capita (unit-weighted)         0.279       0.213 -0.093
## Prize per capita (unit-weighted)           0.394       0.393 -0.014
## Top 1000 pc                                0.213       0.274 -0.054
## Total players                              1.000       0.823  0.413
## Total prize                                0.823       1.000  0.601
## pop                                        0.413       0.601  1.000

# Heatmap
cor_niq %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
  mutate(
    var1 = factor(var1, levels = colnames(cor_niq)),
    var2 = factor(var2, levels = rev(colnames(cor_niq)))
  ) %>%
  ggplot(aes(var1, var2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f", r)), size = 3.5) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
                       limits = c(-0.2, 1)) +
  labs(title = "Correlations: NIQ and esports composites", x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
        axis.text.y = element_text(size = 9))

GG_save("figs/NIQ_esports_cor_matrix.png")

FA method comparison

# Comparison of FA variants for player per-capita rates
cor_fa <- d_country %>%
  select(
    NIQ,
    `Per capita 1-factor FA` = fa_players_raw,
    `Per capita EB FA` = fa_eb,
    `Per capita unit-weighted` = unit_weighted,
    `Per capita hierarchical g` = fa_hierarchical,
    `Log-residual FA` = fa_old_method,
    pop
  ) %>%
  cor(use = "pairwise.complete.obs")

cor_fa %>%
  as.data.frame() %>%
  mutate(var1 = rownames(.)) %>%
  pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
  mutate(
    var1 = factor(var1, levels = colnames(cor_fa)),
    var2 = factor(var2, levels = rev(colnames(cor_fa)))
  ) %>%
  ggplot(aes(var1, var2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = sprintf("%.2f", r)), size = 4) +
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
                       limits = c(-0.2, 1)) +
  labs(title = "FA method comparison: players per capita", x = NULL, y = NULL, fill = "r") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
        axis.text.y = element_text(size = 10))

GG_save("figs/FA_variants_cor_matrix.png")

Disattenuation for NIQ measurement error

# NIQ reliability from SEs
niq_in_sample <- d_country %>% filter(!is.na(NIQ))
var_niq <- var(niq_in_sample$NIQ, na.rm = TRUE)
mean_se2 <- mean(niq_in_sample$SE_NIQ^2, na.rm = TRUE)
reliability_niq <- 1 - mean_se2 / var_niq

cat(sprintf("NIQ reliability: %.3f (var=%.1f, mean SE²=%.2f)\n\n", reliability_niq, var_niq, mean_se2))

## NIQ reliability: 0.928 (var=96.3, mean SE²=6.97)

# Disattenuated correlations (correcting NIQ side only)
composites_list <- c(
  "Players per capita (unit-weighted)" = "unit_weighted",
  "Prize per capita (unit-weighted)" = "unit_weighted_prize",
  "Players per capita 1-factor FA" = "fa_players_raw",
  "Players per capita EB FA" = "fa_eb",
  "Players log-residual FA" = "fa_old_method",
  "Top 1000 pc" = "top1000_per_million"
)

tibble(
  composite = names(composites_list),
  r_observed = map_dbl(composites_list, ~cor(d_country$NIQ, d_country[[.x]], use = "complete.obs")),
  r_disattenuated = r_observed / sqrt(reliability_niq)
)

NIQ scatter plots

d_plot <- d_country %>% filter(!is.na(NIQ))
r_val <- cor(d_plot$NIQ, d_plot$unit_weighted)
n_val <- nrow(d_plot)
p_val <- cor.test(d_plot$NIQ, d_plot$unit_weighted)$p.value

ggplot(d_plot, aes(NIQ, unit_weighted, label = country)) +
  geom_point(aes(size = log10(pop)), alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
  geom_text_repel(size = 2, max.overlaps = 20) +
  scale_size_continuous(guide = "none") +
  annotate("text", x = min(d_plot$NIQ) + 1, y = max(d_plot$unit_weighted) * 0.95,
           label = sprintf("r = %.2f, n = %d, p = %.1e", r_val, n_val, p_val),
           hjust = 0, size = 4) +
  labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: ggrepel: 65 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_uw.png")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Regional means

# Macro-regions based on UN sub-regions
d_country$un_subregion <- countrycode(d_country$iso3, "iso3c", "un.regionsub.name")

## Warning: Some values were not matched unambiguously: TWN, XKX

d_country$un_subregion[d_country$iso3 == "XKX"] <- "Southern Europe"

d_country$macroregion <- dplyr::recode(d_country$un_subregion,
  "Northern Europe" = "N+W Europe + offshoots",
  "Western Europe" = "N+W Europe + offshoots",
  "Northern America" = "N+W Europe + offshoots",
  "Australia and New Zealand" = "N+W Europe + offshoots",
  "Southern Europe" = "Southern Europe",
  "Eastern Europe" = "Eastern Europe",
  "Central Asia" = "Central Asia",
  "Eastern Asia" = "Eastern Asia",
  "South-eastern Asia" = "South-eastern Asia",
  "Southern Asia" = "Southern Asia",
  "Western Asia" = "MENA",
  "Northern Africa" = "MENA",
  "Sub-Saharan Africa" = "Sub-Saharan Africa",
  "Latin America and the Caribbean" = "Latin America",
  "Polynesia" = "Pacific Islands",
  "Melanesia" = "Pacific Islands"
)
d_country$macroregion[d_country$iso3 == "TWN"] <- "Eastern Asia"

macro_order <- d_country %>%
  dplyr::group_by(macroregion) %>%
  dplyr::summarize(med = median(unit_weighted), .groups = "drop") %>%
  dplyr::arrange(desc(med)) %>%
  dplyr::pull(macroregion)

ggplot(d_country, aes(x = factor(macroregion, levels = macro_order), y = unit_weighted)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  geom_text_repel(aes(label = country), size = 2.2, max.overlaps = 20, seed = 3,
                  min.segment.length = 0.1, force = 2, max.time = 5) +
  labs(x = NULL, y = "Esports composite (unit-weighted per capita)",
       title = "Esports performance by macro-region") +
  theme(axis.text.x = element_text(angle = 35, hjust = 1, size = 9),
        plot.margin = margin(5, 5, 5, 15, "mm"))

## Warning: ggrepel: 62 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/esports_by_macroregion.png", width = 16, height = 8)

## Warning: ggrepel: 36 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Regressions with controls

# Internet access data
inet <- read_csv("/media/emil/8tb_ssd_3/projects/finished/2019/Esports and national IQ/data/internet users 2016.csv", show_col_types = FALSE)
inet$iso3 <- pu_translate(inet$country, messages = FALSE)
inet$internet_pct <- inet$i99h2016 / 100

d_country <- d_country %>%
  left_join(inet %>% select(iso3, internet_pct), by = "iso3")

# Steam traffic per capita
steam <- read_csv("data/steam_traffic.csv", show_col_types = FALSE)
names(steam) <- c("iso3", "steam_gb_pc")
steam$iso3 <- ifelse(steam$iso3 %in% names(sub_to_parent), sub_to_parent[steam$iso3], steam$iso3)
steam <- steam %>% filter(!is.na(steam_gb_pc)) %>%
  dplyr::group_by(iso3) %>% dplyr::summarize(steam_gb_pc = max(steam_gb_pc), .groups = "drop")
d_country <- d_country %>% left_join(steam, by = "iso3")

d_mod <- d_country %>% filter(!is.na(NIQ))
d_mod$NIQ_z <- scale(d_mod$NIQ)[,1]
d_mod$internet_z <- scale(d_mod$internet_pct)[,1]
d_mod$steam_z <- scale(d_mod$steam_gb_pc)[,1]
d_mod$esport_z <- scale(d_mod$unit_weighted)[,1]

# Partial sqrt(epsilon^2) for effect sizes comparable across predictors
get_pe2 <- function(fit, term) {
  a <- as.data.frame(anova(fit))
  if (!term %in% rownames(a)) return(NA_real_)
  ss_eff <- a[term, "Partial SS"]
  df_eff <- a[term, "d.f."]
  ms_err <- a["ERROR", "MS"]
  ss_err <- a["ERROR", "Partial SS"]
  pepsilon2 <- (ss_eff - df_eff * ms_err) / (ss_eff + ss_err)
  sqrt(max(pepsilon2, 0))
}

# Linear NIQ models
ml1 <- rms::ols(esport_z ~ NIQ_z, data = d_mod)
ml2 <- rms::ols(esport_z ~ NIQ_z + internet_z, data = d_mod)
ml3 <- rms::ols(esport_z ~ NIQ_z + macroregion, data = d_mod)
ml4 <- rms::ols(esport_z ~ NIQ_z + steam_z, data = d_mod)
ml5 <- rms::ols(esport_z ~ NIQ_z + internet_z + macroregion + steam_z, data = d_mod)

# Spline NIQ models
ms1 <- rms::ols(esport_z ~ rcs(NIQ_z, 3), data = d_mod)
ms2 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z, data = d_mod)
ms3 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + macroregion, data = d_mod)
ms4 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + steam_z, data = d_mod)
ms5 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z + macroregion + steam_z, data = d_mod)

# Summary table: sqrt(partial epsilon^2) for each predictor
tibble(
  NIQ_form = c(rep("Linear", 5), rep("Spline", 5)),
  model = rep(c("NIQ", "+internet", "+region", "+steam", "+all"), 2),
  fits = list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5),
  NIQ = map_dbl(fits, ~get_pe2(.x, "NIQ_z")),
  Internet = map_dbl(fits, ~get_pe2(.x, "internet_z")),
  Region = map_dbl(fits, ~get_pe2(.x, "macroregion")),
  Steam = map_dbl(fits, ~get_pe2(.x, "steam_z")),
  R2_adj = map_dbl(fits, ~summary.lm(.x)$adj.r.squared)
) %>%
  select(-fits) %>%
  mutate(across(where(is.numeric), ~round(.x, 3)))

# Table as figure with significance stars
get_p <- function(fit, term) {
  a <- as.data.frame(anova(fit))
  if (!term %in% rownames(a)) return(NA_real_)
  a[term, "P"]
}

format_cell <- function(value, p) {
  if (is.na(value)) return("")
  star <- ifelse(!is.na(p) & p < 0.05, "*", "")
  sprintf("%.2f%s", value, star)
}

all_fits <- list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5)
fit_names <- c("L1","L2","L3","L4","L5","S1","S2","S3","S4","S5")
predictors <- c("NIQ", "Internet", "Region", "Steam", "R\u00b2 adj")
terms <- c("NIQ_z", "internet_z", "macroregion", "steam_z", NA)

vals <- matrix(NA, 5, 10)
labs <- matrix("", 5, 10)
for (j in 1:10) {
  fit <- all_fits[[j]]
  for (i in 1:4) {
    vals[i,j] <- get_pe2(fit, terms[i])
    labs[i,j] <- format_cell(vals[i,j], get_p(fit, terms[i]))
  }
  vals[5,j] <- summary.lm(fit)$adj.r.squared
  labs[5,j] <- sprintf("%.2f", vals[5,j])
}

model_subtitles <- c("NIQ", "+int", "+reg", "+stm", "+all", "NIQ", "+int", "+reg", "+stm", "+all")

expand.grid(
  row = factor(predictors, levels = rev(predictors)),
  col = factor(fit_names, levels = fit_names)
) %>%
  mutate(
    i = match(as.character(row), predictors),
    j = match(col, fit_names),
    value = map2_dbl(i, j, ~vals[.x, .y]),
    label = map2_chr(i, j, ~labs[.x, .y]),
    is_r2 = as.character(row) == "R\u00b2 adj"
  ) %>%
  ggplot(aes(x = col, y = row)) +
  geom_tile(aes(fill = ifelse(is.na(value) | is_r2, NA, value)), color = "grey80") +
  geom_text(aes(label = label), size = 4.5) +
  scale_fill_gradient(low = "white", high = "salmon", na.value = "grey95", guide = "none",
                      limits = c(0, 0.8)) +
  scale_x_discrete(labels = model_subtitles, position = "top") +
  labs(x = NULL, y = NULL) +
  theme_minimal() +
  theme(axis.text = element_text(size = 10), panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 13, face = "bold")) +
  annotate("text", x = 3, y = 6.2, label = "Linear NIQ", fontface = "bold", size = 4.5) +
  annotate("text", x = 8, y = 6.2, label = "Spline NIQ", fontface = "bold", size = 4.5) +
  annotate("text", x = 10.3, y = 0.3, label = sprintf("n = %d, * p < .05", nrow(d_mod)),
           size = 3.5, hjust = 1, color = "grey40") +
  coord_cartesian(ylim = c(0.4, 6.3), clip = "off") +
  ggtitle(expression("Regression results: " * sqrt("partial " * epsilon^2)))

GG_save("figs/regression_table.png")

Expanded sample: including 0-player countries

# Countries with NIQ but 0 esports players in any game
niq_all <- read_excel("data/National IQ datasets.xlsx", sheet = "Seb2024") %>%
  filter(!ISO %in% c("HKG", "MAC")) %>%
  select(iso3 = ISO, NIQ = NIQ)

merged_subs <- c("GRL", "BMU", "PRI", "GIB", "GGY", "JEY", "ASM", "VIR",
                 "ABW", "FRO", "NCL", "PYF", "ALA", "REU", "HKG", "MAC")

truly_missing <- niq_all %>%
  filter(!iso3 %in% d_country$iso3, !iso3 %in% merged_subs) %>%
  left_join(pop_owid %>% select(iso3, pop = population), by = "iso3")

# Their unit-weighted score: z-score of 0 rate in each game, then average
rate_means <- colMeans(d_country[rate_player_cols])
rate_sds <- apply(d_country[rate_player_cols], 2, sd)
uw_zero <- mean((0 - rate_means) / rate_sds)

d_expanded <- bind_rows(
  d_country %>% select(iso3, country, NIQ, unit_weighted, pop),
  truly_missing %>% mutate(
    country = countrycode(iso3, "iso3c", "country.name"),
    unit_weighted = uw_zero
  ) %>% select(iso3, country, NIQ, unit_weighted, pop)
)

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `country = countrycode(iso3, "iso3c", "country.name")`.
## Caused by warning:
## ! Some values were not matched unambiguously: KSV

d_exp_plot <- d_expanded %>% filter(!is.na(NIQ))
d_exp_plot$has_players <- d_exp_plot$iso3 %in% d_country$iso3

r_exp <- cor(d_exp_plot$NIQ, d_exp_plot$unit_weighted)
n_exp <- nrow(d_exp_plot)
p_exp <- cor.test(d_exp_plot$NIQ, d_exp_plot$unit_weighted)$p.value

ggplot(d_exp_plot, aes(NIQ, unit_weighted, label = country)) +
  geom_point(aes(size = log10(pop), shape = has_players), alpha = 0.6) +
  scale_shape_manual(values = c("TRUE" = 16, "FALSE" = 4), guide = "none") +
  geom_smooth(method = "lm", se = TRUE) +
  geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
  geom_text_repel(size = 2, max.overlaps = 20) +
  scale_size_continuous(guide = "none") +
  annotate("text", x = min(d_exp_plot$NIQ, na.rm = TRUE) + 1,
           y = max(d_exp_plot$unit_weighted) * 0.95,
           label = sprintf("r = %.2f, n = %d, p = %.1e\n(x = 0 players in any game)", r_exp, n_exp, p_exp),
           hjust = 0, size = 3.5) +
  labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ",
       title = "Including countries with 0 esports players")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).

## Warning: ggrepel: 122 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_expanded.png")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).

## Warning: ggrepel: 103 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Nonlinearity test

# Test nonlinearity: linear vs restricted cubic spline
fit_linear <- rms::ols(unit_weighted ~ NIQ, data = d_plot)
fit_spline <- rms::ols(unit_weighted ~ rcs(NIQ, 3), data = d_plot)
fit_linear

## Linear Regression Model
## 
## rms::ols(formula = unit_weighted ~ NIQ, data = d_plot)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     141    LR chi2     88.56    R2       0.466    
## sigma0.3922    d.f.            1    R2 adj   0.463    
## d.f.    139    Pr(> chi2) 0.0000    g        0.417    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -0.77832 -0.19062 -0.07712  0.07682  1.71494 
## 
## 
##           Coef    S.E.   t      Pr(>|t|)
## Intercept -3.1990 0.2924 -10.94 <0.0001 
## NIQ        0.0372 0.0034  11.02 <0.0001

fit_spline

## Linear Regression Model
## 
## rms::ols(formula = unit_weighted ~ rcs(NIQ, 3), data = d_plot)
## 
##                 Model Likelihood    Discrimination    
##                       Ratio Test           Indexes    
## Obs     141    LR chi2    110.16    R2       0.542    
## sigma0.3646    d.f.            2    R2 adj   0.536    
## d.f.    138    Pr(> chi2) 0.0000    g        0.416    
## 
## Residuals
## 
##      Min       1Q   Median       3Q      Max 
## -0.96263 -0.11055 -0.02360  0.02648  1.58026 
## 
## 
##           Coef    S.E.   t     Pr(>|t|)
## Intercept -0.6305 0.6021 -1.05 0.2969  
## NIQ        0.0040 0.0076  0.52 0.6047  
## NIQ'       0.0486 0.0102  4.78 <0.0001

lrtest(fit_linear, fit_spline)

## 
## Model 1: unit_weighted ~ NIQ
## Model 2: unit_weighted ~ rcs(NIQ, 3)
## 
## L.R. Chisq       d.f.          P 
##   2.16e+01   1.00e+00   3.35e-06

Which countries win most at esports?

02 April, 2026

Init

Data

Load scraped data

Population data (OWID)

National IQ data

Build country-level dataset

Analysis

Per-capita rates

Factor analysis: raw per-capita rates

Factor analysis: EB shrunk estimates (ebpm)

Factor analysis: old method (log-count residuals)

Game-level correlation matrix

Unit-weighted composites

Hierarchical FA

Compare composites

Country rankings

Comparison with top 1000 global players

Correlations with NIQ

FA method comparison

Disattenuation for NIQ measurement error

NIQ scatter plots

Regressions with controls

Expanded sample: including 0-player countries

Nonlinearity test

Meta