Analysis
Per-capita rates
# Per-capita rates per game (per million)
for (g in game_names) {
d_country[[paste0("rate_players_", g)]] <- d_country[[paste0("players_", g)]] / (d_country$pop / 1e6)
d_country[[paste0("rate_prize_", g)]] <- d_country[[paste0("prize_", g)]] / (d_country$pop / 1e6)
}
d_country$log_pop <- log10(d_country$pop)
Factor analysis: raw per-capita rates
rate_player_cols <- paste0("rate_players_", game_names)
rate_prize_cols <- paste0("rate_prize_", game_names)
# Player count rates
fa_players_raw <- d_country[rate_player_cols] %>%
fa(nfactors = 1, fm = "ml")
fa_players_raw
## Factor Analysis using method = ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## rate_players_aov 0.25 6.2e-02 0.94 1
## rate_players_apex 0.74 5.5e-01 0.45 1
## rate_players_brawl_stars 0.28 7.9e-02 0.92 1
## rate_players_chess 0.12 1.4e-02 0.99 1
## rate_players_clash_royale 0.29 8.5e-02 0.92 1
## rate_players_cod_mobile 0.26 6.6e-02 0.93 1
## rate_players_cod_warzone 0.23 5.5e-02 0.95 1
## rate_players_crossfire 0.17 3.0e-02 0.97 1
## rate_players_cs 0.76 5.8e-01 0.42 1
## rate_players_cs2 0.61 3.7e-01 0.63 1
## rate_players_csgo 0.74 5.5e-01 0.45 1
## rate_players_dota2 0.63 3.9e-01 0.61 1
## rate_players_fortnite 0.50 2.5e-01 0.75 1
## rate_players_freefire 0.01 9.5e-05 1.00 1
## rate_players_hots 0.72 5.2e-01 0.48 1
## rate_players_hs 0.88 7.7e-01 0.23 1
## rate_players_lol 0.54 2.9e-01 0.71 1
## rate_players_lol_wr 0.36 1.3e-01 0.87 1
## rate_players_mlbb 0.20 4.0e-02 0.96 1
## rate_players_mtg_arena 0.58 3.3e-01 0.67 1
## rate_players_ow 0.71 5.1e-01 0.49 1
## rate_players_ow2 0.66 4.4e-01 0.56 1
## rate_players_pubg 0.78 6.1e-01 0.39 1
## rate_players_pubg_mobile 0.05 2.4e-03 1.00 1
## rate_players_r6 0.70 4.9e-01 0.51 1
## rate_players_rocket_league 0.56 3.1e-01 0.69 1
## rate_players_sc2 0.73 5.3e-01 0.47 1
## rate_players_smite 0.54 2.9e-01 0.71 1
## rate_players_tft 0.75 5.7e-01 0.43 1
## rate_players_valorant 0.52 2.7e-01 0.73 1
##
## ML1
## SS loadings 9.19
## Proportion Var 0.31
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 435 with the objective function = 33.7 with Chi Square = 4491
## df of the model are 405 and the objective function was 24
##
## The root mean square of the residuals (RMSR) is 0.17
## The df corrected root mean square of the residuals is 0.17
##
## The harmonic n.obs is 145 with the empirical chi square 3453 with prob < 0
## The total n.obs was 145 with Likelihood Chi Square = 3175 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.262
## RMSEA index = 0.217 and the 90 % confidence intervals are 0.211 0.225
## BIC = 1159
## Fit based upon off diagonal values = 0.77
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.98
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.90
d_country$fa_players_raw <- fa_players_raw$scores[,1]
# Prize money rates
fa_prize_raw <- d_country[rate_prize_cols] %>%
fa(nfactors = 1, fm = "ml")
fa_prize_raw
## Factor Analysis using method = ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## rate_prize_aov 0.13 1.6e-02 0.98 1
## rate_prize_apex 0.66 4.4e-01 0.56 1
## rate_prize_brawl_stars 0.19 3.5e-02 0.96 1
## rate_prize_chess 0.04 1.7e-03 1.00 1
## rate_prize_clash_royale 0.24 5.8e-02 0.94 1
## rate_prize_cod_mobile 0.12 1.5e-02 0.99 1
## rate_prize_cod_warzone -0.03 1.1e-03 1.00 1
## rate_prize_crossfire 0.04 1.5e-03 1.00 1
## rate_prize_cs 0.72 5.1e-01 0.49 1
## rate_prize_cs2 0.41 1.7e-01 0.83 1
## rate_prize_csgo 0.68 4.7e-01 0.53 1
## rate_prize_dota2 0.60 3.6e-01 0.64 1
## rate_prize_fortnite 0.62 3.8e-01 0.62 1
## rate_prize_freefire -0.02 2.7e-04 1.00 1
## rate_prize_hots 0.76 5.8e-01 0.42 1
## rate_prize_hs 0.79 6.2e-01 0.38 1
## rate_prize_lol 0.80 6.4e-01 0.36 1
## rate_prize_lol_wr 0.33 1.1e-01 0.89 1
## rate_prize_mlbb 0.03 6.7e-04 1.00 1
## rate_prize_mtg_arena 0.38 1.5e-01 0.85 1
## rate_prize_ow 0.67 4.4e-01 0.56 1
## rate_prize_ow2 0.69 4.8e-01 0.52 1
## rate_prize_pubg 0.75 5.6e-01 0.44 1
## rate_prize_pubg_mobile 0.00 1.4e-05 1.00 1
## rate_prize_r6 0.68 4.7e-01 0.53 1
## rate_prize_rocket_league 0.45 2.0e-01 0.80 1
## rate_prize_sc2 0.68 4.6e-01 0.54 1
## rate_prize_smite 0.68 4.7e-01 0.53 1
## rate_prize_tft 0.60 3.6e-01 0.64 1
## rate_prize_valorant 0.50 2.5e-01 0.75 1
##
## ML1
## SS loadings 8.26
## Proportion Var 0.28
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 435 with the objective function = 29.6 with Chi Square = 3935
## df of the model are 405 and the objective function was 21.2
##
## The root mean square of the residuals (RMSR) is 0.14
## The df corrected root mean square of the residuals is 0.15
##
## The harmonic n.obs is 145 with the empirical chi square 2598 with prob < 1e-315
## The total n.obs was 145 with Likelihood Chi Square = 2803 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.26
## RMSEA index = 0.202 and the 90 % confidence intervals are 0.196 0.21
## BIC = 788
## Fit based upon off diagonal values = 0.78
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.97
## Multiple R square of scores with factors 0.94
## Minimum correlation of possible factor scores 0.88
d_country$fa_prize_raw <- fa_prize_raw$scores[,1]
# Correlation between player-based and prize-based composite
cor(d_country$fa_players_raw, d_country$fa_prize_raw, use = "complete.obs")
## [1] 0.829
Factor analysis: EB shrunk estimates (ebpm)
# Empirical Bayes via ebpm_gamma (Poisson means with Gamma prior)
for (g in game_names) {
fit <- ebpm_gamma(d_country[[paste0("players_", g)]], d_country$pop)
d_country[[paste0("eb_players_", g)]] <- fit$posterior$mean
}
eb_player_cols <- paste0("eb_players_", game_names)
fa_eb <- d_country[eb_player_cols] %>% fa(nfactors = 1, fm = "ml")
fa_eb
## Factor Analysis using method = ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## eb_players_aov 0.25 6.2e-02 0.94 1
## eb_players_apex 0.79 6.3e-01 0.37 1
## eb_players_brawl_stars 0.30 9.2e-02 0.91 1
## eb_players_chess 0.24 5.8e-02 0.94 1
## eb_players_clash_royale 0.34 1.2e-01 0.88 1
## eb_players_cod_mobile 0.30 8.9e-02 0.91 1
## eb_players_cod_warzone 0.39 1.5e-01 0.85 1
## eb_players_crossfire 0.17 3.0e-02 0.97 1
## eb_players_cs 0.75 5.6e-01 0.44 1
## eb_players_cs2 0.57 3.3e-01 0.67 1
## eb_players_csgo 0.71 5.1e-01 0.49 1
## eb_players_dota2 0.61 3.8e-01 0.62 1
## eb_players_fortnite 0.73 5.3e-01 0.47 1
## eb_players_freefire 0.00 6.2e-07 1.00 1
## eb_players_hots 0.71 5.1e-01 0.49 1
## eb_players_hs 0.86 7.4e-01 0.26 1
## eb_players_lol 0.57 3.2e-01 0.68 1
## eb_players_lol_wr 0.36 1.3e-01 0.87 1
## eb_players_mlbb 0.19 3.7e-02 0.96 1
## eb_players_mtg_arena 0.60 3.6e-01 0.64 1
## eb_players_ow 0.74 5.5e-01 0.45 1
## eb_players_ow2 0.75 5.7e-01 0.43 1
## eb_players_pubg 0.78 6.1e-01 0.39 1
## eb_players_pubg_mobile 0.05 2.3e-03 1.00 1
## eb_players_r6 0.74 5.4e-01 0.46 1
## eb_players_rocket_league 0.55 3.1e-01 0.69 1
## eb_players_sc2 0.78 6.1e-01 0.39 1
## eb_players_smite 0.57 3.3e-01 0.67 1
## eb_players_tft 0.70 4.8e-01 0.52 1
## eb_players_valorant 0.53 2.8e-01 0.72 1
##
## ML1
## SS loadings 9.91
## Proportion Var 0.33
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 435 with the objective function = 35.5 with Chi Square = 4727
## df of the model are 405 and the objective function was 24.7
##
## The root mean square of the residuals (RMSR) is 0.16
## The df corrected root mean square of the residuals is 0.17
##
## The harmonic n.obs is 145 with the empirical chi square 3404 with prob < 0
## The total n.obs was 145 with Likelihood Chi Square = 3272 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.279
## RMSEA index = 0.221 and the 90 % confidence intervals are 0.215 0.229
## BIC = 1257
## Fit based upon off diagonal values = 0.8
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.98
## Multiple R square of scores with factors 0.95
## Minimum correlation of possible factor scores 0.91
d_country$fa_eb <- fa_eb$scores[,1]
Factor analysis: old method (log-count residuals)
# Replication of 2019 approach: log(count+1) ~ log(pop), FA on residuals
for (g in game_names) {
y <- log10(d_country[[paste0("players_", g)]] + 1)
fit <- lm(y ~ d_country$log_pop)
d_country[[paste0("old_resid_", g)]] <- resid(fit)
}
old_resid_cols <- paste0("old_resid_", game_names)
fa_old <- d_country[old_resid_cols] %>% fa(nfactors = 1, fm = "ml")
fa_old
## Factor Analysis using method = ml
## Call: fa(r = ., nfactors = 1, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 h2 u2 com
## old_resid_aov 0.56 0.318 0.68 1
## old_resid_apex 0.91 0.837 0.16 1
## old_resid_brawl_stars 0.68 0.467 0.53 1
## old_resid_chess 0.63 0.395 0.60 1
## old_resid_clash_royale 0.74 0.552 0.45 1
## old_resid_cod_mobile 0.53 0.286 0.71 1
## old_resid_cod_warzone 0.70 0.491 0.51 1
## old_resid_crossfire 0.62 0.389 0.61 1
## old_resid_cs 0.83 0.696 0.30 1
## old_resid_cs2 0.73 0.532 0.47 1
## old_resid_csgo 0.81 0.663 0.34 1
## old_resid_dota2 0.66 0.432 0.57 1
## old_resid_fortnite 0.85 0.715 0.29 1
## old_resid_freefire 0.14 0.021 0.98 1
## old_resid_hots 0.88 0.781 0.22 1
## old_resid_hs 0.93 0.862 0.14 1
## old_resid_lol 0.89 0.789 0.21 1
## old_resid_lol_wr 0.61 0.373 0.63 1
## old_resid_mlbb 0.24 0.056 0.94 1
## old_resid_mtg_arena 0.74 0.554 0.45 1
## old_resid_ow 0.90 0.813 0.19 1
## old_resid_ow2 0.85 0.726 0.27 1
## old_resid_pubg 0.89 0.788 0.21 1
## old_resid_pubg_mobile 0.31 0.097 0.90 1
## old_resid_r6 0.85 0.719 0.28 1
## old_resid_rocket_league 0.75 0.565 0.44 1
## old_resid_sc2 0.91 0.832 0.17 1
## old_resid_smite 0.73 0.527 0.47 1
## old_resid_tft 0.88 0.783 0.22 1
## old_resid_valorant 0.82 0.677 0.32 1
##
## ML1
## SS loadings 16.73
## Proportion Var 0.56
##
## Mean item complexity = 1
## Test of the hypothesis that 1 factor is sufficient.
##
## df null model = 435 with the objective function = 38.2 with Chi Square = 5091
## df of the model are 405 and the objective function was 13.5
##
## The root mean square of the residuals (RMSR) is 0.11
## The df corrected root mean square of the residuals is 0.11
##
## The harmonic n.obs is 145 with the empirical chi square 1552 with prob < 1.2e-133
## The total n.obs was 145 with Likelihood Chi Square = 1794 with prob < 1.3e-173
##
## Tucker Lewis Index of factoring reliability = 0.678
## RMSEA index = 0.154 and the 90 % confidence intervals are 0.147 0.162
## BIC = -221
## Fit based upon off diagonal values = 0.96
## Measures of factor score adequacy
## ML1
## Correlation of (regression) scores with factors 0.99
## Multiple R square of scores with factors 0.98
## Minimum correlation of possible factor scores 0.97
d_country$fa_old_method <- fa_old$scores[,1]
Game-level correlation matrix
# Correlation matrix of raw per-capita rates across games
cor_raw <- cor(d_country[rate_player_cols], use = "complete.obs")
colnames(cor_raw) <- rownames(cor_raw) <- game_names
# EB per-capita correlations
cor_eb <- cor(d_country[eb_player_cols], use = "complete.obs")
colnames(cor_eb) <- rownames(cor_eb) <- game_names
# Combined matrix: upper triangle = raw per-capita, lower triangle = EB per-capita
cor_combined <- cor_raw
cor_combined[lower.tri(cor_combined)] <- cor_eb[lower.tri(cor_eb)]
diag(cor_combined) <- NA
round(cor_combined, 2)
## aov apex brawl_stars chess clash_royale cod_mobile cod_warzone
## aov NA 0.32 0.56 -0.06 0.10 0.65 -0.04
## apex 0.33 NA 0.16 -0.01 0.29 0.24 0.24
## brawl_stars 0.54 0.17 NA -0.03 0.11 0.65 -0.02
## chess -0.08 0.10 0.01 NA -0.05 -0.07 -0.01
## clash_royale 0.09 0.30 0.15 0.05 NA 0.12 0.01
## cod_mobile 0.70 0.29 0.64 -0.04 0.20 NA 0.00
## cod_warzone -0.04 0.32 0.01 0.11 0.13 0.09 NA
## crossfire 0.03 0.12 -0.04 0.03 0.15 0.00 0.03
## cs 0.00 0.64 0.04 0.27 0.17 0.01 0.24
## cs2 -0.08 0.34 -0.03 0.40 0.06 -0.01 0.17
## csgo -0.02 0.42 0.03 0.33 0.08 0.03 0.45
## dota2 0.10 0.47 0.20 0.18 0.15 0.16 0.12
## fortnite -0.05 0.63 0.06 0.18 0.13 0.06 0.65
## freefire 0.55 0.06 0.12 -0.13 -0.05 0.40 -0.10
## hots 0.58 0.65 0.67 0.11 0.27 0.54 0.13
## hs 0.22 0.68 0.28 0.21 0.38 0.26 0.19
## lol 0.07 0.36 0.09 0.17 0.14 0.09 0.82
## lol_wr 0.79 0.31 0.75 -0.07 0.12 0.68 -0.03
## mlbb 0.56 0.06 0.85 -0.07 -0.05 0.63 -0.05
## mtg_arena -0.07 0.56 0.02 0.10 0.37 0.07 0.24
## ow 0.10 0.51 0.15 0.16 0.17 0.12 0.64
## ow2 0.18 0.49 0.26 0.21 0.24 0.23 0.19
## pubg 0.26 0.71 0.11 0.19 0.28 0.18 0.22
## pubg_mobile 0.01 -0.03 0.00 -0.02 -0.01 0.02 -0.02
## r6 0.22 0.70 0.34 0.05 0.30 0.31 0.19
## rocket_league -0.04 0.31 0.06 0.23 0.04 0.05 0.42
## sc2 0.28 0.66 0.32 0.14 0.37 0.27 0.18
## smite -0.06 0.61 0.02 0.16 0.07 0.07 0.27
## tft 0.15 0.53 0.26 0.16 0.49 0.23 0.24
## valorant 0.27 0.29 0.35 0.16 0.14 0.29 0.06
## crossfire cs cs2 csgo dota2 fortnite freefire hots hs
## aov 0.03 0.00 -0.09 -0.03 0.10 -0.06 0.54 0.58 0.22
## apex 0.12 0.63 0.32 0.41 0.46 0.44 0.07 0.61 0.65
## brawl_stars -0.03 0.03 -0.04 0.00 0.18 0.03 0.13 0.64 0.26
## chess -0.01 0.13 0.21 0.18 0.05 0.03 -0.10 0.06 0.09
## clash_royale 0.16 0.17 0.03 0.03 0.16 0.03 -0.01 0.23 0.36
## cod_mobile -0.01 0.01 0.01 0.00 0.18 -0.01 0.30 0.53 0.23
## cod_warzone 0.00 0.12 0.07 0.36 0.08 0.40 -0.09 0.05 0.07
## crossfire NA 0.17 0.08 0.07 0.03 0.03 0.29 0.11 0.21
## cs 0.18 NA 0.71 0.72 0.57 0.43 -0.06 0.46 0.66
## cs2 0.09 0.71 NA 0.79 0.53 0.31 -0.09 0.29 0.50
## csgo 0.08 0.72 0.79 NA 0.55 0.49 -0.09 0.39 0.55
## dota2 0.03 0.58 0.54 0.54 NA 0.23 -0.03 0.39 0.54
## fortnite 0.06 0.62 0.46 0.68 0.35 NA -0.10 0.28 0.35
## freefire 0.27 -0.06 -0.08 -0.07 -0.03 -0.10 NA 0.09 -0.02
## hots 0.11 0.44 0.24 0.34 0.40 0.41 0.08 NA 0.64
## hs 0.22 0.65 0.47 0.51 0.55 0.52 -0.03 0.62 NA
## lol 0.12 0.38 0.37 0.66 0.29 0.72 -0.03 0.28 0.40
## lol_wr -0.01 0.02 0.00 0.06 0.21 0.07 0.38 0.69 0.34
## mlbb -0.04 -0.01 -0.05 0.03 0.27 -0.05 0.20 0.56 0.13
## mtg_arena 0.45 0.48 0.39 0.37 0.42 0.42 -0.03 0.24 0.63
## ow 0.10 0.48 0.42 0.73 0.44 0.71 -0.02 0.41 0.55
## ow2 0.04 0.52 0.44 0.65 0.36 0.55 -0.04 0.61 0.62
## pubg 0.06 0.64 0.43 0.46 0.46 0.54 0.09 0.49 0.74
## pubg_mobile -0.01 -0.03 -0.02 0.24 0.21 -0.01 0.00 -0.02 -0.04
## r6 0.09 0.42 0.27 0.30 0.51 0.45 -0.01 0.60 0.72
## rocket_league 0.06 0.36 0.39 0.68 0.17 0.66 -0.07 0.28 0.40
## sc2 0.07 0.57 0.31 0.41 0.50 0.44 -0.04 0.67 0.71
## smite 0.10 0.54 0.40 0.36 0.29 0.67 -0.03 0.40 0.47
## tft 0.31 0.51 0.33 0.39 0.37 0.44 -0.03 0.61 0.76
## valorant 0.14 0.32 0.40 0.64 0.47 0.30 0.06 0.45 0.41
## lol lol_wr mlbb mtg_arena ow ow2 pubg pubg_mobile r6
## aov 0.05 0.79 0.58 -0.06 0.09 0.10 0.25 -0.01 0.23
## apex 0.38 0.29 0.07 0.53 0.50 0.28 0.70 -0.05 0.69
## brawl_stars 0.06 0.74 0.83 0.00 0.12 0.15 0.10 -0.02 0.34
## chess 0.09 -0.07 -0.06 0.01 0.09 0.18 0.08 0.03 -0.02
## clash_royale 0.08 0.11 -0.03 0.31 0.11 0.10 0.28 -0.05 0.28
## cod_mobile 0.05 0.64 0.66 0.04 0.08 0.12 0.14 0.00 0.29
## cod_warzone 0.76 -0.04 -0.05 0.09 0.61 0.09 0.13 -0.03 0.08
## crossfire 0.10 -0.01 -0.04 0.43 0.08 0.01 0.06 -0.03 0.10
## cs 0.36 0.02 -0.01 0.49 0.47 0.46 0.65 -0.04 0.42
## cs2 0.35 -0.01 -0.05 0.43 0.42 0.48 0.45 -0.03 0.26
## csgo 0.65 0.07 0.03 0.37 0.74 0.70 0.49 0.24 0.28
## dota2 0.28 0.24 0.29 0.43 0.42 0.28 0.46 0.26 0.50
## fortnite 0.53 0.03 -0.05 0.27 0.53 0.34 0.38 -0.02 0.29
## freefire -0.04 0.38 0.21 0.00 -0.03 -0.04 0.09 -0.02 0.01
## hots 0.27 0.66 0.56 0.23 0.41 0.53 0.50 -0.03 0.58
## hs 0.38 0.32 0.13 0.60 0.54 0.57 0.74 -0.06 0.70
## lol NA 0.09 0.02 0.27 0.85 0.35 0.36 -0.04 0.22
## lol_wr 0.11 NA 0.73 0.08 0.13 0.15 0.30 0.19 0.32
## mlbb 0.03 0.72 NA -0.09 0.07 0.10 0.03 0.15 0.23
## mtg_arena 0.29 0.07 -0.09 NA 0.37 0.17 0.45 -0.05 0.60
## ow 0.83 0.16 0.09 0.41 NA 0.66 0.53 -0.03 0.48
## ow2 0.39 0.26 0.18 0.29 0.70 NA 0.46 -0.03 0.32
## pubg 0.36 0.32 0.03 0.44 0.55 0.57 NA -0.04 0.57
## pubg_mobile -0.03 0.10 0.11 -0.02 -0.03 -0.02 -0.03 NA -0.04
## r6 0.24 0.32 0.22 0.60 0.54 0.54 0.57 -0.03 NA
## rocket_league 0.52 0.04 -0.02 0.21 0.66 0.74 0.33 -0.02 0.33
## sc2 0.30 0.36 0.24 0.39 0.50 0.63 0.79 0.23 0.66
## smite 0.25 0.11 -0.06 0.36 0.29 0.38 0.44 -0.02 0.47
## tft 0.33 0.27 0.10 0.55 0.39 0.49 0.47 -0.02 0.55
## valorant 0.22 0.40 0.37 0.26 0.34 0.48 0.31 0.73 0.30
## rocket_league sc2 smite tft valorant
## aov -0.05 0.28 -0.04 0.15 0.22
## apex 0.27 0.62 0.58 0.44 0.22
## brawl_stars 0.06 0.30 0.01 0.21 0.28
## chess 0.30 0.02 0.04 0.12 0.08
## clash_royale -0.02 0.32 0.07 0.34 0.07
## cod_mobile 0.00 0.25 0.05 0.18 0.22
## cod_warzone 0.33 0.08 0.12 0.07 -0.01
## crossfire 0.04 0.07 0.11 0.27 0.11
## cs 0.36 0.55 0.55 0.56 0.31
## cs2 0.41 0.29 0.41 0.47 0.39
## csgo 0.68 0.41 0.35 0.57 0.64
## dota2 0.17 0.52 0.28 0.37 0.49
## fortnite 0.46 0.29 0.45 0.31 0.18
## freefire -0.09 -0.03 0.00 0.00 0.04
## hots 0.33 0.62 0.36 0.63 0.43
## hs 0.44 0.66 0.47 0.79 0.39
## lol 0.51 0.26 0.23 0.33 0.20
## lol_wr 0.02 0.39 0.12 0.25 0.42
## mlbb -0.03 0.26 -0.06 0.11 0.36
## mtg_arena 0.15 0.38 0.37 0.46 0.21
## ow 0.68 0.43 0.25 0.48 0.32
## ow2 0.81 0.36 0.22 0.68 0.48
## pubg 0.35 0.76 0.45 0.49 0.29
## pubg_mobile -0.01 0.33 -0.04 -0.05 0.75
## r6 0.28 0.64 0.46 0.48 0.24
## rocket_league NA 0.18 0.28 0.52 0.39
## sc2 0.22 NA 0.40 0.44 0.52
## smite 0.35 0.41 NA 0.34 0.13
## tft 0.31 0.54 0.37 NA 0.43
## valorant 0.39 0.49 0.17 0.36 NA
# Summary stats
raw_upper <- cor_raw[upper.tri(cor_raw)]
eb_lower <- cor_eb[lower.tri(cor_eb)]
tibble(
method = c("raw_pc (upper)", "EB_pc (lower)"),
mean = c(mean(raw_upper), mean(eb_lower)),
median = c(median(raw_upper), median(eb_lower)),
sd = c(sd(raw_upper), sd(eb_lower)),
min = c(min(raw_upper), min(eb_lower)),
max = c(max(raw_upper), max(eb_lower))
)
# How similar are the two sets of inter-game correlations?
cor(raw_upper, eb_lower)
## [1] 0.0824
# Full game name mapping
game_labels <- c(
aov = "Arena of Valor", apex = "Apex Legends", brawl_stars = "Brawl Stars",
chess = "Chess.com", clash_royale = "Clash Royale", cod_mobile = "CoD Mobile",
cod_warzone = "CoD Warzone", crossfire = "CrossFire",
cs = "Counter-Strike", cs2 = "Counter-Strike 2", csgo = "CS:GO",
dota2 = "Dota 2", fortnite = "Fortnite", freefire = "Free Fire",
hots = "Heroes of Storm", hs = "Hearthstone", lol = "League of Legends",
lol_wr = "LoL Wild Rift", mlbb = "Mobile Legends",
mtg_arena = "MTG Arena", ow = "Overwatch", ow2 = "Overwatch 2",
pubg = "PUBG", pubg_mobile = "PUBG Mobile", r6 = "Rainbow Six",
rocket_league = "Rocket League", sc2 = "StarCraft II", smite = "SMITE",
tft = "Teamfight Tactics", valorant = "VALORANT"
)
# Order games by hierarchical clustering
dist_mat <- as.dist(1 - cor_raw)
hc <- hclust(dist_mat, method = "ward.D2")
game_order <- hc$labels[hc$order]
# Heatmap
cor_combined_long <- cor_combined %>%
as.data.frame() %>%
mutate(game1 = rownames(.)) %>%
pivot_longer(-game1, names_to = "game2", values_to = "r") %>%
mutate(
game1_full = factor(game_labels[game1], levels = game_labels[game_order]),
game2_full = factor(game_labels[game2], levels = rev(game_labels[game_order]))
)
ggplot(cor_combined_long, aes(game1_full, game2_full, fill = r)) +
geom_tile() +
geom_text(aes(label = ifelse(is.na(r), "", sprintf("%.2f", r))), size = 1.8) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90") +
labs(title = "Game correlation matrix (upper = raw pc, lower = EB pc)",
x = NULL, y = NULL, fill = "r") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
axis.text.y = element_text(size = 7))

GG_save("figs/game_cor_matrix.png")
Unit-weighted composites
# Unit-weighted: standardize per-capita rates, average across games
rate_z <- scale(d_country[rate_player_cols])
d_country$unit_weighted <- rowMeans(rate_z, na.rm = TRUE)
prize_z <- scale(d_country[rate_prize_cols])
d_country$unit_weighted_prize <- rowMeans(prize_z, na.rm = TRUE)
Hierarchical FA
# Parallel analysis for number of factors
pa <- fa.parallel(d_country[rate_player_cols], fm = "ml", fa = "fa", plot = FALSE)
## Parallel analysis suggests that the number of factors = 5 and the number of components = NA
pa$nfact
## [1] 5
# Oblimin 1st order
fa6 <- fa(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", rotate = "oblimin")
## Loading required namespace: GPArotation
print(fa6$loadings, cutoff = 0.3, sort = TRUE)
##
## Loadings:
## ML5 ML3 ML2 ML4 ML1
## rate_players_apex 0.802
## rate_players_hs 0.709
## rate_players_mtg_arena 0.628
## rate_players_pubg 0.775
## rate_players_r6 0.787
## rate_players_sc2 0.807 0.326
## rate_players_smite 0.571
## rate_players_aov 0.756
## rate_players_brawl_stars 0.894
## rate_players_cod_mobile 0.749
## rate_players_hots 0.391 0.628
## rate_players_lol_wr 0.823
## rate_players_mlbb 0.889
## rate_players_cs 0.490 0.569
## rate_players_cs2 0.877
## rate_players_csgo 0.760 0.337
## rate_players_ow2 0.687
## rate_players_rocket_league 0.532 0.361
## rate_players_cod_warzone 0.910
## rate_players_lol 0.906
## rate_players_ow 0.723
## rate_players_pubg_mobile 1.015
## rate_players_valorant 0.393 0.740
## rate_players_chess
## rate_players_clash_royale 0.487
## rate_players_crossfire
## rate_players_dota2 0.379
## rate_players_fortnite 0.423
## rate_players_freefire 0.318
## rate_players_tft 0.384 0.460
##
## ML5 ML3 ML2 ML4 ML1
## SS loadings 4.875 4.185 3.282 2.643 1.879
## Proportion Var 0.162 0.139 0.109 0.088 0.063
## Cumulative Var 0.162 0.302 0.411 0.500 0.562
fa6$Phi %>% round(2)
## ML5 ML3 ML2 ML4 ML1
## ML5 1.00 0.29 0.46 0.29 0.08
## ML3 0.29 1.00 0.08 0.04 0.15
## ML2 0.46 0.08 1.00 0.36 0.14
## ML4 0.29 0.04 0.36 1.00 0.04
## ML1 0.08 0.15 0.14 0.04 1.00
# Omega hierarchical (Schmid-Leiman)
om <- omega(d_country[rate_player_cols], nfactors = pa$nfact, fm = "ml", plot = FALSE)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
om$omega_h
## [1] 0.617
om$omega.tot
## [1] 0.954
# Hierarchical g scores
g_loadings <- om$schmid$sl[, 1]
d_country$fa_hierarchical <- as.vector(scale(d_country[rate_player_cols]) %*% g_loadings / sum(g_loadings))
Compare composites
d_country %>%
select(fa_players_raw, fa_eb, unit_weighted, fa_hierarchical, fa_old_method, pop) %>%
cor(use = "complete.obs") %>%
round(3)
## fa_players_raw fa_eb unit_weighted fa_hierarchical
## fa_players_raw 1.000 0.989 0.963 0.995
## fa_eb 0.989 1.000 0.957 0.984
## unit_weighted 0.963 0.957 1.000 0.976
## fa_hierarchical 0.995 0.984 0.976 1.000
## fa_old_method 0.658 0.696 0.679 0.651
## pop -0.092 -0.104 -0.093 -0.096
## fa_old_method pop
## fa_players_raw 0.658 -0.092
## fa_eb 0.696 -0.104
## unit_weighted 0.679 -0.093
## fa_hierarchical 0.651 -0.096
## fa_old_method 1.000 0.073
## pop 0.073 1.000
Country rankings
d_country %>%
select(country, iso3, pop, unit_weighted, unit_weighted_prize, fa_players_raw, fa_old_method) %>%
arrange(desc(unit_weighted)) %>%
head(30)
Comparison with top 1000 global players
# Scrape top 1000 players by earnings
top1000_urls <- c(
"https://www.esportsearnings.com/players",
sprintf("https://www.esportsearnings.com/players/highest-earnings-top-%d", seq(200, 1000, 100))
)
top1000 <- map_dfr(top1000_urls, function(url) {
Sys.sleep(1.5)
page <- rvest::read_html(url)
tbl_node <- page %>% rvest::html_nodes(".detail_list_table") %>% .[[2]]
flags <- tbl_node %>% rvest::html_nodes("img") %>% rvest::html_attr("src") %>%
str_extract("[a-z]{2}(?=\\.png)")
games <- tbl_node %>% rvest::html_nodes("td.detail_list_game a") %>% rvest::html_text()
tbl_data <- tbl_node %>% rvest::html_table()
tibble(
iso2 = flags,
game = games,
earnings = tbl_data[["Total (Overall)"]] %>% str_remove_all("[\\$,]") %>% as.numeric()
)
})
# Apply subnational merges
top1000$iso3 <- countrycode(top1000$iso2, "iso2c", "iso3c", warn = FALSE)
sub_map_iso2 <- c("hk" = "CHN", "mo" = "CHN", "pr" = "USA", "aw" = "NLD",
"fo" = "DNK", "nc" = "FRA", "bm" = "GBR", "gi" = "GBR",
"gl" = "DNK", "pf" = "FRA", "re" = "FRA")
for (code in names(sub_map_iso2)) {
top1000$iso3[top1000$iso2 == code] <- sub_map_iso2[code]
}
top1000_country <- top1000 %>%
count(iso3, name = "top1000_count") %>%
filter(!is.na(iso3))
d_country <- d_country %>%
left_join(top1000_country, by = "iso3") %>%
mutate(top1000_count = replace_na(top1000_count, 0),
top1000_per_million = top1000_count / (pop / 1e6))
# Correlations: top 1000 per capita vs FA composites and IQ
d_country %>%
select(top1000_per_million, fa_players_raw, fa_prize_raw, fa_eb, fa_old_method, NIQ, log_pop) %>%
cor(use = "complete.obs")
## top1000_per_million fa_players_raw fa_prize_raw fa_eb
## top1000_per_million 1.0000 0.648 0.86392 0.670
## fa_players_raw 0.6476 1.000 0.82852 0.990
## fa_prize_raw 0.8639 0.829 1.00000 0.852
## fa_eb 0.6704 0.990 0.85219 1.000
## fa_old_method 0.4219 0.667 0.58363 0.699
## NIQ 0.3895 0.657 0.51808 0.665
## log_pop -0.0754 -0.121 0.00178 -0.153
## fa_old_method NIQ log_pop
## top1000_per_million 0.4219 0.3895 -0.07545
## fa_players_raw 0.6675 0.6574 -0.12055
## fa_prize_raw 0.5836 0.5181 0.00178
## fa_eb 0.6994 0.6652 -0.15276
## fa_old_method 1.0000 0.8082 0.04210
## NIQ 0.8082 1.0000 -0.02818
## log_pop 0.0421 -0.0282 1.00000
# Top countries by top 1000 per capita
d_country %>%
select(country, pop, top1000_count, top1000_per_million, fa_players_raw, fa_prize_raw) %>%
arrange(desc(top1000_per_million)) %>%
head(20)
Correlations with NIQ
# Total players and prize (raw sums, not per capita)
d_country$total_players <- rowSums(d_country[paste0("players_", game_names)])
d_country$total_prize <- rowSums(d_country[paste0("prize_", game_names)])
# Main composites vs NIQ
cor_niq <- d_country %>%
select(
NIQ,
`Players per capita (unit-weighted)` = unit_weighted,
`Prize per capita (unit-weighted)` = unit_weighted_prize,
`Top 1000 pc` = top1000_per_million,
`Total players` = total_players,
`Total prize` = total_prize,
pop
) %>%
cor(use = "pairwise.complete.obs")
round(cor_niq, 3)
## NIQ Players per capita (unit-weighted)
## NIQ 1.000 0.683
## Players per capita (unit-weighted) 0.683 1.000
## Prize per capita (unit-weighted) 0.617 0.844
## Top 1000 pc 0.390 0.574
## Total players 0.411 0.279
## Total prize 0.356 0.213
## pop 0.002 -0.093
## Prize per capita (unit-weighted) Top 1000 pc
## NIQ 0.617 0.390
## Players per capita (unit-weighted) 0.844 0.574
## Prize per capita (unit-weighted) 1.000 0.790
## Top 1000 pc 0.790 1.000
## Total players 0.394 0.213
## Total prize 0.393 0.274
## pop -0.014 -0.054
## Total players Total prize pop
## NIQ 0.411 0.356 0.002
## Players per capita (unit-weighted) 0.279 0.213 -0.093
## Prize per capita (unit-weighted) 0.394 0.393 -0.014
## Top 1000 pc 0.213 0.274 -0.054
## Total players 1.000 0.823 0.413
## Total prize 0.823 1.000 0.601
## pop 0.413 0.601 1.000
# Heatmap
cor_niq %>%
as.data.frame() %>%
mutate(var1 = rownames(.)) %>%
pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
mutate(
var1 = factor(var1, levels = colnames(cor_niq)),
var2 = factor(var2, levels = rev(colnames(cor_niq)))
) %>%
ggplot(aes(var1, var2, fill = r)) +
geom_tile() +
geom_text(aes(label = sprintf("%.2f", r)), size = 3.5) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
limits = c(-0.2, 1)) +
labs(title = "Correlations: NIQ and esports composites", x = NULL, y = NULL, fill = "r") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
axis.text.y = element_text(size = 9))

GG_save("figs/NIQ_esports_cor_matrix.png")
FA method comparison
# Comparison of FA variants for player per-capita rates
cor_fa <- d_country %>%
select(
NIQ,
`Per capita 1-factor FA` = fa_players_raw,
`Per capita EB FA` = fa_eb,
`Per capita unit-weighted` = unit_weighted,
`Per capita hierarchical g` = fa_hierarchical,
`Log-residual FA` = fa_old_method,
pop
) %>%
cor(use = "pairwise.complete.obs")
cor_fa %>%
as.data.frame() %>%
mutate(var1 = rownames(.)) %>%
pivot_longer(-var1, names_to = "var2", values_to = "r") %>%
mutate(
var1 = factor(var1, levels = colnames(cor_fa)),
var2 = factor(var2, levels = rev(colnames(cor_fa)))
) %>%
ggplot(aes(var1, var2, fill = r)) +
geom_tile() +
geom_text(aes(label = sprintf("%.2f", r)), size = 4) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, na.value = "grey90",
limits = c(-0.2, 1)) +
labs(title = "FA method comparison: players per capita", x = NULL, y = NULL, fill = "r") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
axis.text.y = element_text(size = 10))

GG_save("figs/FA_variants_cor_matrix.png")
Disattenuation for NIQ measurement error
# NIQ reliability from SEs
niq_in_sample <- d_country %>% filter(!is.na(NIQ))
var_niq <- var(niq_in_sample$NIQ, na.rm = TRUE)
mean_se2 <- mean(niq_in_sample$SE_NIQ^2, na.rm = TRUE)
reliability_niq <- 1 - mean_se2 / var_niq
cat(sprintf("NIQ reliability: %.3f (var=%.1f, mean SE²=%.2f)\n\n", reliability_niq, var_niq, mean_se2))
## NIQ reliability: 0.928 (var=96.3, mean SE²=6.97)
# Disattenuated correlations (correcting NIQ side only)
composites_list <- c(
"Players per capita (unit-weighted)" = "unit_weighted",
"Prize per capita (unit-weighted)" = "unit_weighted_prize",
"Players per capita 1-factor FA" = "fa_players_raw",
"Players per capita EB FA" = "fa_eb",
"Players log-residual FA" = "fa_old_method",
"Top 1000 pc" = "top1000_per_million"
)
tibble(
composite = names(composites_list),
r_observed = map_dbl(composites_list, ~cor(d_country$NIQ, d_country[[.x]], use = "complete.obs")),
r_disattenuated = r_observed / sqrt(reliability_niq)
)
NIQ scatter plots
d_plot <- d_country %>% filter(!is.na(NIQ))
r_val <- cor(d_plot$NIQ, d_plot$unit_weighted)
n_val <- nrow(d_plot)
p_val <- cor.test(d_plot$NIQ, d_plot$unit_weighted)$p.value
ggplot(d_plot, aes(NIQ, unit_weighted, label = country)) +
geom_point(aes(size = log10(pop)), alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
geom_text_repel(size = 2, max.overlaps = 20) +
scale_size_continuous(guide = "none") +
annotate("text", x = min(d_plot$NIQ) + 1, y = max(d_plot$unit_weighted) * 0.95,
label = sprintf("r = %.2f, n = %d, p = %.1e", r_val, n_val, p_val),
hjust = 0, size = 4) +
labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: ggrepel: 65 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_uw.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Regional means
# Macro-regions based on UN sub-regions
d_country$un_subregion <- countrycode(d_country$iso3, "iso3c", "un.regionsub.name")
## Warning: Some values were not matched unambiguously: TWN, XKX
d_country$un_subregion[d_country$iso3 == "XKX"] <- "Southern Europe"
d_country$macroregion <- dplyr::recode(d_country$un_subregion,
"Northern Europe" = "N+W Europe + offshoots",
"Western Europe" = "N+W Europe + offshoots",
"Northern America" = "N+W Europe + offshoots",
"Australia and New Zealand" = "N+W Europe + offshoots",
"Southern Europe" = "Southern Europe",
"Eastern Europe" = "Eastern Europe",
"Central Asia" = "Central Asia",
"Eastern Asia" = "Eastern Asia",
"South-eastern Asia" = "South-eastern Asia",
"Southern Asia" = "Southern Asia",
"Western Asia" = "MENA",
"Northern Africa" = "MENA",
"Sub-Saharan Africa" = "Sub-Saharan Africa",
"Latin America and the Caribbean" = "Latin America",
"Polynesia" = "Pacific Islands",
"Melanesia" = "Pacific Islands"
)
d_country$macroregion[d_country$iso3 == "TWN"] <- "Eastern Asia"
macro_order <- d_country %>%
dplyr::group_by(macroregion) %>%
dplyr::summarize(med = median(unit_weighted), .groups = "drop") %>%
dplyr::arrange(desc(med)) %>%
dplyr::pull(macroregion)
ggplot(d_country, aes(x = factor(macroregion, levels = macro_order), y = unit_weighted)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
geom_text_repel(aes(label = country), size = 2.2, max.overlaps = 20, seed = 3,
min.segment.length = 0.1, force = 2, max.time = 5) +
labs(x = NULL, y = "Esports composite (unit-weighted per capita)",
title = "Esports performance by macro-region") +
theme(axis.text.x = element_text(angle = 35, hjust = 1, size = 9),
plot.margin = margin(5, 5, 5, 15, "mm"))
## Warning: ggrepel: 62 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/esports_by_macroregion.png", width = 16, height = 8)
## Warning: ggrepel: 36 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Regressions with controls
# Internet access data
inet <- read_csv("/media/emil/8tb_ssd_3/projects/finished/2019/Esports and national IQ/data/internet users 2016.csv", show_col_types = FALSE)
inet$iso3 <- pu_translate(inet$country, messages = FALSE)
inet$internet_pct <- inet$i99h2016 / 100
d_country <- d_country %>%
left_join(inet %>% select(iso3, internet_pct), by = "iso3")
# Steam traffic per capita
steam <- read_csv("data/steam_traffic.csv", show_col_types = FALSE)
names(steam) <- c("iso3", "steam_gb_pc")
steam$iso3 <- ifelse(steam$iso3 %in% names(sub_to_parent), sub_to_parent[steam$iso3], steam$iso3)
steam <- steam %>% filter(!is.na(steam_gb_pc)) %>%
dplyr::group_by(iso3) %>% dplyr::summarize(steam_gb_pc = max(steam_gb_pc), .groups = "drop")
d_country <- d_country %>% left_join(steam, by = "iso3")
d_mod <- d_country %>% filter(!is.na(NIQ))
d_mod$NIQ_z <- scale(d_mod$NIQ)[,1]
d_mod$internet_z <- scale(d_mod$internet_pct)[,1]
d_mod$steam_z <- scale(d_mod$steam_gb_pc)[,1]
d_mod$esport_z <- scale(d_mod$unit_weighted)[,1]
# Partial sqrt(epsilon^2) for effect sizes comparable across predictors
get_pe2 <- function(fit, term) {
a <- as.data.frame(anova(fit))
if (!term %in% rownames(a)) return(NA_real_)
ss_eff <- a[term, "Partial SS"]
df_eff <- a[term, "d.f."]
ms_err <- a["ERROR", "MS"]
ss_err <- a["ERROR", "Partial SS"]
pepsilon2 <- (ss_eff - df_eff * ms_err) / (ss_eff + ss_err)
sqrt(max(pepsilon2, 0))
}
# Linear NIQ models
ml1 <- rms::ols(esport_z ~ NIQ_z, data = d_mod)
ml2 <- rms::ols(esport_z ~ NIQ_z + internet_z, data = d_mod)
ml3 <- rms::ols(esport_z ~ NIQ_z + macroregion, data = d_mod)
ml4 <- rms::ols(esport_z ~ NIQ_z + steam_z, data = d_mod)
ml5 <- rms::ols(esport_z ~ NIQ_z + internet_z + macroregion + steam_z, data = d_mod)
# Spline NIQ models
ms1 <- rms::ols(esport_z ~ rcs(NIQ_z, 3), data = d_mod)
ms2 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z, data = d_mod)
ms3 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + macroregion, data = d_mod)
ms4 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + steam_z, data = d_mod)
ms5 <- rms::ols(esport_z ~ rcs(NIQ_z, 3) + internet_z + macroregion + steam_z, data = d_mod)
# Summary table: sqrt(partial epsilon^2) for each predictor
tibble(
NIQ_form = c(rep("Linear", 5), rep("Spline", 5)),
model = rep(c("NIQ", "+internet", "+region", "+steam", "+all"), 2),
fits = list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5),
NIQ = map_dbl(fits, ~get_pe2(.x, "NIQ_z")),
Internet = map_dbl(fits, ~get_pe2(.x, "internet_z")),
Region = map_dbl(fits, ~get_pe2(.x, "macroregion")),
Steam = map_dbl(fits, ~get_pe2(.x, "steam_z")),
R2_adj = map_dbl(fits, ~summary.lm(.x)$adj.r.squared)
) %>%
select(-fits) %>%
mutate(across(where(is.numeric), ~round(.x, 3)))
# Table as figure with significance stars
get_p <- function(fit, term) {
a <- as.data.frame(anova(fit))
if (!term %in% rownames(a)) return(NA_real_)
a[term, "P"]
}
format_cell <- function(value, p) {
if (is.na(value)) return("")
star <- ifelse(!is.na(p) & p < 0.05, "*", "")
sprintf("%.2f%s", value, star)
}
all_fits <- list(ml1, ml2, ml3, ml4, ml5, ms1, ms2, ms3, ms4, ms5)
fit_names <- c("L1","L2","L3","L4","L5","S1","S2","S3","S4","S5")
predictors <- c("NIQ", "Internet", "Region", "Steam", "R\u00b2 adj")
terms <- c("NIQ_z", "internet_z", "macroregion", "steam_z", NA)
vals <- matrix(NA, 5, 10)
labs <- matrix("", 5, 10)
for (j in 1:10) {
fit <- all_fits[[j]]
for (i in 1:4) {
vals[i,j] <- get_pe2(fit, terms[i])
labs[i,j] <- format_cell(vals[i,j], get_p(fit, terms[i]))
}
vals[5,j] <- summary.lm(fit)$adj.r.squared
labs[5,j] <- sprintf("%.2f", vals[5,j])
}
model_subtitles <- c("NIQ", "+int", "+reg", "+stm", "+all", "NIQ", "+int", "+reg", "+stm", "+all")
expand.grid(
row = factor(predictors, levels = rev(predictors)),
col = factor(fit_names, levels = fit_names)
) %>%
mutate(
i = match(as.character(row), predictors),
j = match(col, fit_names),
value = map2_dbl(i, j, ~vals[.x, .y]),
label = map2_chr(i, j, ~labs[.x, .y]),
is_r2 = as.character(row) == "R\u00b2 adj"
) %>%
ggplot(aes(x = col, y = row)) +
geom_tile(aes(fill = ifelse(is.na(value) | is_r2, NA, value)), color = "grey80") +
geom_text(aes(label = label), size = 4.5) +
scale_fill_gradient(low = "white", high = "salmon", na.value = "grey95", guide = "none",
limits = c(0, 0.8)) +
scale_x_discrete(labels = model_subtitles, position = "top") +
labs(x = NULL, y = NULL) +
theme_minimal() +
theme(axis.text = element_text(size = 10), panel.grid = element_blank(),
plot.title = element_text(hjust = 0.5, size = 13, face = "bold")) +
annotate("text", x = 3, y = 6.2, label = "Linear NIQ", fontface = "bold", size = 4.5) +
annotate("text", x = 8, y = 6.2, label = "Spline NIQ", fontface = "bold", size = 4.5) +
annotate("text", x = 10.3, y = 0.3, label = sprintf("n = %d, * p < .05", nrow(d_mod)),
size = 3.5, hjust = 1, color = "grey40") +
coord_cartesian(ylim = c(0.4, 6.3), clip = "off") +
ggtitle(expression("Regression results: " * sqrt("partial " * epsilon^2)))

GG_save("figs/regression_table.png")
Expanded sample: including 0-player countries
# Countries with NIQ but 0 esports players in any game
niq_all <- read_excel("data/National IQ datasets.xlsx", sheet = "Seb2024") %>%
filter(!ISO %in% c("HKG", "MAC")) %>%
select(iso3 = ISO, NIQ = NIQ)
merged_subs <- c("GRL", "BMU", "PRI", "GIB", "GGY", "JEY", "ASM", "VIR",
"ABW", "FRO", "NCL", "PYF", "ALA", "REU", "HKG", "MAC")
truly_missing <- niq_all %>%
filter(!iso3 %in% d_country$iso3, !iso3 %in% merged_subs) %>%
left_join(pop_owid %>% select(iso3, pop = population), by = "iso3")
# Their unit-weighted score: z-score of 0 rate in each game, then average
rate_means <- colMeans(d_country[rate_player_cols])
rate_sds <- apply(d_country[rate_player_cols], 2, sd)
uw_zero <- mean((0 - rate_means) / rate_sds)
d_expanded <- bind_rows(
d_country %>% select(iso3, country, NIQ, unit_weighted, pop),
truly_missing %>% mutate(
country = countrycode(iso3, "iso3c", "country.name"),
unit_weighted = uw_zero
) %>% select(iso3, country, NIQ, unit_weighted, pop)
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `country = countrycode(iso3, "iso3c", "country.name")`.
## Caused by warning:
## ! Some values were not matched unambiguously: KSV
d_exp_plot <- d_expanded %>% filter(!is.na(NIQ))
d_exp_plot$has_players <- d_exp_plot$iso3 %in% d_country$iso3
r_exp <- cor(d_exp_plot$NIQ, d_exp_plot$unit_weighted)
n_exp <- nrow(d_exp_plot)
p_exp <- cor.test(d_exp_plot$NIQ, d_exp_plot$unit_weighted)$p.value
ggplot(d_exp_plot, aes(NIQ, unit_weighted, label = country)) +
geom_point(aes(size = log10(pop), shape = has_players), alpha = 0.6) +
scale_shape_manual(values = c("TRUE" = 16, "FALSE" = 4), guide = "none") +
geom_smooth(method = "lm", se = TRUE) +
geom_smooth(method = "loess", se = FALSE, linetype = "dashed", color = "red") +
geom_text_repel(size = 2, max.overlaps = 20) +
scale_size_continuous(guide = "none") +
annotate("text", x = min(d_exp_plot$NIQ, na.rm = TRUE) + 1,
y = max(d_exp_plot$unit_weighted) * 0.95,
label = sprintf("r = %.2f, n = %d, p = %.1e\n(x = 0 players in any game)", r_exp, n_exp, p_exp),
hjust = 0, size = 3.5) +
labs(y = "Esports composite (unit-weighted per capita)", x = "NIQ",
title = "Including countries with 0 esports players")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 122 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/NIQ_vs_esports_expanded.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 103 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Nonlinearity test
# Test nonlinearity: linear vs restricted cubic spline
fit_linear <- rms::ols(unit_weighted ~ NIQ, data = d_plot)
fit_spline <- rms::ols(unit_weighted ~ rcs(NIQ, 3), data = d_plot)
fit_linear
## Linear Regression Model
##
## rms::ols(formula = unit_weighted ~ NIQ, data = d_plot)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 141 LR chi2 88.56 R2 0.466
## sigma0.3922 d.f. 1 R2 adj 0.463
## d.f. 139 Pr(> chi2) 0.0000 g 0.417
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.77832 -0.19062 -0.07712 0.07682 1.71494
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -3.1990 0.2924 -10.94 <0.0001
## NIQ 0.0372 0.0034 11.02 <0.0001
fit_spline
## Linear Regression Model
##
## rms::ols(formula = unit_weighted ~ rcs(NIQ, 3), data = d_plot)
##
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 141 LR chi2 110.16 R2 0.542
## sigma0.3646 d.f. 2 R2 adj 0.536
## d.f. 138 Pr(> chi2) 0.0000 g 0.416
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.96263 -0.11055 -0.02360 0.02648 1.58026
##
##
## Coef S.E. t Pr(>|t|)
## Intercept -0.6305 0.6021 -1.05 0.2969
## NIQ 0.0040 0.0076 0.52 0.6047
## NIQ' 0.0486 0.0102 4.78 <0.0001
lrtest(fit_linear, fit_spline)
##
## Model 1: unit_weighted ~ NIQ
## Model 2: unit_weighted ~ rcs(NIQ, 3)
##
## L.R. Chisq d.f. P
## 2.16e+01 1.00e+00 3.35e-06