Init

library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4          ✔ readr     2.1.5     
## ✔ forcats   1.0.1          ✔ stringr   1.6.0     
## ✔ ggplot2   4.0.1.9000     ✔ tibble    3.3.0     
## ✔ lubridate 1.9.4          ✔ tidyr     1.3.1     
## ✔ purrr     1.2.0          
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## Loading required package: robustbase
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +
load_packages(
  MASS,
  ebpm,
  countrycode,
  rnaturalearth,
  sf,
  duckdb
)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Loading required package: DBI
library(flextable)
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:purrr':
## 
##     compose
select = dplyr::select
filter = dplyr::filter

theme_set(theme_bw())
options(digits = 3)

#claude
if (F) {
  library(btw)
  btw::btw_mcp_session()
}

Data

Movie data

#country of production from TMDb API (modal company country, fallback to first origin_country)
tmdb_movies = read_csv("data/tmdb_movies.csv", show_col_types = FALSE)
tmdb_companies = read_csv("data/tmdb_prod_companies.csv", show_col_types = FALSE)

#modal company country per movie
company_mode = tmdb_companies %>%
  filter(company_country != "") %>%
  count(tconst, company_country) %>%
  group_by(tconst) %>%
  slice_max(n, n = 1, with_ties = TRUE) %>%
  ungroup()

#for ties, pick the one matching first origin_country
primary = tmdb_movies %>%
  mutate(primary_country = str_extract(origin_country, "^[^|]+")) %>%
  select(tconst, primary_country)

movie_country = company_mode %>%
  left_join(primary, by = "tconst") %>%
  group_by(tconst) %>%
  mutate(rank = row_number(desc(company_country == primary_country))) %>%
  filter(rank == 1) %>%
  ungroup() %>%
  select(tconst, modal_country = company_country)

movie_country = primary %>%
  left_join(movie_country, by = "tconst") %>%
  mutate(country = coalesce(modal_country, primary_country)) %>%
  select(tconst, country)
#load IMDB ratings and titles
con = dbConnect(duckdb(), read_only = TRUE)
movies = dbGetQuery(con, "
  SELECT r.tconst, r.averageRating, r.numVotes, t.startYear, t.genres
  FROM read_parquet('data/ratings.parquet') r
  JOIN read_parquet('data/titles.parquet') t ON r.tconst = t.tconst
  WHERE t.titleType = 'movie'
    AND r.date = (SELECT MAX(date) FROM read_parquet('data/ratings.parquet'))
    AND t.startYear IS NOT NULL
    AND t.startYear <= 2022
")
titles = dbGetQuery(con, "SELECT tconst, primaryTitle FROM read_parquet('data/titles.parquet')")
dbDisconnect(con, shutdown = TRUE)

#remove documentaries, merge country
movies %<>%
  filter(!grepl("Documentary", genres, fixed = TRUE)) %>%
  left_join(movie_country, by = "tconst") %>%
  filter(!is.na(country))

#merge TMDb ratings
movies = movies %>%
  left_join(tmdb_movies %>% select(tconst, tmdb_vote = tmdb_vote_average), by = "tconst")
#countries with border changes — exclude from periods where borders are unstable
border_unstable = list(
  ussr = c("SU", "RU", "UA", "BY", "GE", "AM", "AZ", "KZ", "UZ", "TM", "KG", "TJ", "LV", "LT", "EE", "MD"),
  yugo = c("YU", "RS", "HR", "BA", "SI", "ME", "MK", "XK"),
  czsk = c("CS", "CZ", "SK"),
  germany = c("DE", "DD", "XG")
)
all_unstable = unlist(border_unstable)

#countries OK for 2000+ (borders stable by then)
ok_from_2000 = c("DE", "DD", "XG", "RU", "UA", "BY", "GE", "AM", "AZ", "KZ",
                  "LV", "LT", "EE", "MD", "CZ", "SK", "SI", "HR", "BA", "MK")
#always exclude (border changes 2006-2008)
always_exclude = c("RS", "ME", "XK")

GDP and population

if (!file.exists("data/gdp_maddison.csv"))
  download.file("https://ourworldindata.org/grapher/gdp-per-capita-maddison.csv",
                "data/gdp_maddison.csv", mode = "wb")
if (!file.exists("data/population.csv"))
  download.file("https://ourworldindata.org/grapher/population.csv",
                "data/population.csv", mode = "wb")

gdp_raw = read_csv("data/gdp_maddison.csv", show_col_types = FALSE) %>%
  select(entity = Entity, code = Code, year = Year, gdppc = `GDP per capita`) %>%
  filter(!is.na(code), !is.na(gdppc))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
pop_raw = read_csv("data/population.csv", show_col_types = FALSE) %>%
  select(entity = Entity, code = Code, year = Year, population = Population) %>%
  filter(!is.na(code), !is.na(population))

periods = list("all" = c(1920, 2022), "1950" = c(1950, 2022),
               "1980" = c(1980, 2022), "2000" = c(2000, 2022))

gdp_avgs = lapply(names(periods), function(p) {
  yr = periods[[p]]
  gdp_raw %>% filter(year >= yr[1], year <= yr[2]) %>%
    group_by(code) %>% summarise(gdppc_mean = mean(gdppc), .groups = "drop") %>%
    mutate(start_year = p)
}) %>% bind_rows()

pop_avgs = lapply(names(periods), function(p) {
  yr = periods[[p]]
  pop_raw %>% filter(year >= yr[1], year <= yr[2]) %>%
    group_by(code) %>% summarise(pop_mean = mean(population), .groups = "drop") %>%
    mutate(start_year = p)
}) %>% bind_rows()

country_vars = inner_join(gdp_avgs, pop_avgs, by = c("code", "start_year"))

Movie production per capita

The cleanest cross-country measure: how many movies (with 1000+ IMDB votes) does each country produce per capita? This measures cultural output/visibility without relying on ratings, which suffer from national voting biases.

#preferred spec: 1000+ votes, 1950+, no docs, border-stable countries
m = movies %>%
  filter(numVotes >= 1000, startYear >= 1950, !country %in% all_unstable)

pop_1950 = pop_avgs %>% filter(start_year == "1950") %>% select(code, pop_mean)
gdp_1950 = gdp_avgs %>% filter(start_year == "1950") %>% select(code, gdppc_mean)

prod = m %>%
  count(country, name = "n_total") %>%
  mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
  filter(!is.na(iso3)) %>%
  left_join(pop_1950, by = c("iso3" = "code")) %>%
  left_join(gdp_1950, by = c("iso3" = "code")) %>%
  filter(!is.na(pop_mean), n_total >= 5)

prod$movies_pc = prod$n_total / prod$pop_mean * 1e6
prod$eb_movies_pc = ebpm_gamma(prod$n_total, prod$pop_mean)$posterior$mean * 1e6
prod$name = countrycode(prod$iso3, "iso3c", "country.name")
prod$log_gdppc = log(prod$gdppc_mean)
#top 20 all-time table
top20_alltime = prod %>%
  arrange(desc(eb_movies_pc)) %>%
  head(20) %>%
  mutate(Rank = row_number(), `Pop. (M)` = round(pop_mean / 1e6, 1)) %>%
  select(Rank, Country = name, ISO = iso3, Movies = n_total,
         `Pop. (M)`, `Raw /M` = movies_pc, `EB /M` = eb_movies_pc) %>%
  mutate(across(c(`Raw /M`, `EB /M`), ~round(., 1)))

ft_alltime = flextable(top20_alltime) %>%
  add_header_lines("Top 20 countries by movie production per capita") %>%
  add_footer_lines("Feature films 1950–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary\nCountry via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1950–2022 · Rates per million\nExcludes border-unstable states (USSR, Yugoslavia, Czechoslovakia, DDR) and Serbia") %>%
  bold(i = 1, part = "header") %>%
  fontsize(i = 1, size = 16, part = "header") %>%
  fontsize(size = 11, part = "body") %>%
  fontsize(size = 9, part = "footer") %>%
  colformat_num(j = "Movies", big.mark = ",", digits = 0) %>%
  align(j = "Rank", align = "center") %>%
  align(j = "ISO", align = "center") %>%
  bg(bg = "white", part = "all") %>%
  autofit() %>%
  theme_vanilla()

save_as_image(ft_alltime, path = "figs/top20_movies_per_capita_alltime.png", zoom = 2)
## [1] "figs/top20_movies_per_capita_alltime.png"
ft_alltime

Top 20 countries by movie production per capita

Rank

Country

ISO

Movies

Pop. (M)

Raw /M

EB /M

1

Iceland

ISL

31

0.2

124.8

112.4

2

Hong Kong SAR China

HKG

479

5.2

92.2

91.7

3

United States

USA

16,986

247.2

68.7

68.7

4

Denmark

DNK

284

5.1

55.4

55.2

5

Sweden

SWE

434

8.5

50.9

50.8

6

France

FRA

2,572

55.4

46.5

46.4

7

United Kingdom

GBR

2,292

57.8

39.6

39.6

8

Ireland

IRL

141

3.6

39.0

38.8

9

Norway

NOR

165

4.2

38.8

38.7

10

Luxembourg

LUX

15

0.4

36.8

35.3

11

Canada

CAN

930

26.4

35.2

35.2

12

Finland

FIN

165

4.9

33.7

33.5

13

Australia

AUS

436

16.4

26.6

26.6

14

Italy

ITA

1,245

55.4

22.5

22.5

15

Belgium

BEL

225

10.0

22.5

22.4

16

Spain

ESP

739

38.3

19.3

19.3

17

Greece

GRC

179

9.7

18.4

18.4

18

Netherlands

NLD

264

14.4

18.3

18.3

19

New Zealand

NZL

62

3.4

18.2

18.2

20

South Korea

KOR

590

39.2

15.0

15.0

Feature films 1950–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary
Country via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1950–2022 · Rates per million
Excludes border-unstable states (USSR, Yugoslavia, Czechoslovakia, DDR) and Serbia

#top 30 post-reunification (1990+, can include Germany, Czech, Russia etc.)
m_post90 = movies %>%
  filter(numVotes >= 1000, startYear >= 1990, !country %in% always_exclude) %>%
  filter(!country %in% c("SU", "YU", "CS", "XC", "XG", "DD"))

pop_1990 = pop_raw %>%
  filter(year >= 1990, year <= 2022) %>%
  group_by(code) %>%
  summarise(pop_mean = mean(population), .groups = "drop")

prod_post90 = m_post90 %>%
  count(country, name = "n_movies") %>%
  mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
  filter(!is.na(iso3)) %>%
  left_join(pop_1990, by = c("iso3" = "code")) %>%
  filter(!is.na(pop_mean), n_movies >= 5) %>%
  mutate(
    name = countrycode(iso3, "iso3c", "country.name"),
    movies_per_m = n_movies / pop_mean * 1e6
  )

fit_post90 = ebpm_gamma(prod_post90$n_movies, prod_post90$pop_mean / 1e6)
prod_post90$eb_per_m = fit_post90$posterior$mean

top30_post90 = prod_post90 %>%
  arrange(desc(eb_per_m)) %>%
  head(30) %>%
  mutate(Rank = row_number(), `Pop. (M)` = round(pop_mean / 1e6, 1)) %>%
  select(Rank, Country = name, ISO = iso3, Movies = n_movies,
         `Pop. (M)`, `Raw /M` = movies_per_m, `EB /M` = eb_per_m) %>%
  mutate(across(c(`Raw /M`, `EB /M`), ~round(., 1)))

ft_post90 = flextable(top30_post90) %>%
  add_header_lines("Top 30 countries by movie production per capita (post-reunification)") %>%
  add_footer_lines("Feature films 1990–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary\nCountry via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1990–2022 · Rates per million\nExcludes Serbia (borders unstable through 2008)") %>%
  bold(i = 1, part = "header") %>%
  fontsize(i = 1, size = 16, part = "header") %>%
  fontsize(size = 11, part = "body") %>%
  fontsize(size = 9, part = "footer") %>%
  colformat_num(j = "Movies", big.mark = ",", digits = 0) %>%
  align(j = "Rank", align = "center") %>%
  align(j = "ISO", align = "center") %>%
  bg(bg = "white", part = "all") %>%
  autofit() %>%
  theme_vanilla()

save_as_image(ft_post90, path = "figs/top30_movies_per_capita_post1990.png", zoom = 2)
## [1] "figs/top30_movies_per_capita_post1990.png"
ft_post90

Top 30 countries by movie production per capita (post-reunification)

Rank

Country

ISO

Movies

Pop. (M)

Raw /M

EB /M

1

Iceland

ISL

31

0.3

101.3

87.4

2

Hong Kong SAR China

HKG

336

6.8

49.2

48.9

3

Denmark

DNK

248

5.5

45.3

44.9

4

United States

USA

12,112

299.3

40.5

40.5

5

Sweden

SWE

341

9.3

36.6

36.5

6

Ireland

IRL

136

4.2

32.1

31.8

7

Norway

NOR

150

4.8

31.4

31.2

8

France

FRA

1,905

61.8

30.8

30.8

9

Luxembourg

LUX

14

0.5

28.4

26.6

10

Finland

FIN

136

5.3

25.7

25.5

11

Canada

CAN

826

33.0

25.0

25.0

12

United Kingdom

GBR

1,544

61.8

25.0

25.0

13

Belgium

BEL

207

10.7

19.3

19.3

14

Australia

AUS

367

21.2

17.3

17.3

15

Estonia

EST

22

1.4

15.9

15.7

16

Netherlands

NLD

244

16.5

14.8

14.8

17

Spain

ESP

631

43.9

14.4

14.4

18

New Zealand

NZL

54

4.2

12.8

12.8

19

South Korea

KOR

588

48.2

12.2

12.2

20

Hungary

HUN

109

10.1

10.8

10.8

21

Greece

GRC

112

10.8

10.4

10.4

22

Italy

ITA

579

58.8

9.9

9.9

23

Germany

DEU

795

81.9

9.7

9.7

24

Croatia

HRV

40

4.3

9.2

9.2

25

Turkey

TUR

589

71.1

8.3

8.3

26

Japan

JPN

1,041

126.7

8.2

8.2

27

Austria

AUT

67

8.3

8.1

8.1

28

Israel

ISR

43

6.9

6.3

6.3

29

Poland

POL

227

38.2

5.9

5.9

30

Switzerland

CHE

45

7.6

5.9

5.9

Feature films 1990–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary
Country via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1990–2022 · Rates per million
Excludes Serbia (borders unstable through 2008)

#world map of movie production per capita
world = ne_countries(scale = "medium", returnclass = "sf")

map_data = world %>%
  filter(iso_a3_eh != "ATA") %>%
  left_join(prod %>% select(iso3, value = eb_movies_pc), by = c("iso_a3_eh" = "iso3"))

ggplot(map_data) +
  geom_sf(aes(fill = value), color = "grey40", linewidth = 0.1) +
  scale_fill_viridis_c(name = "Per million", na.value = "grey90", trans = "pseudo_log") +
  coord_sf(ylim = c(-55, 85), expand = FALSE) +
  labs(title = "Movie production per capita (EB-adjusted)",
       subtitle = "Non-documentary films with 1000+ IMDB votes, 1950+") +
  theme_void() +
  theme(legend.position = "bottom", legend.key.width = unit(2, "cm"))

GG_save("figs/map_production_pc.png")

Decade stability

How stable are country rankings in movie production across decades?

pop_decade = pop_raw %>%
  mutate(decade = floor(year / 10) * 10) %>%
  group_by(code, decade) %>%
  summarise(pop_mean = mean(population), .groups = "drop")

#all country × decade combinations, filling zeros for countries with no movies
all_countries_iso3 = prod$iso3
grid = expand_grid(iso3 = all_countries_iso3, decade = seq(1950, 2010, 10))

counts_decade = m %>%
  mutate(decade = floor(startYear / 10) * 10,
         iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
  filter(!is.na(iso3), decade >= 1950, decade <= 2010) %>%
  count(iso3, decade, name = "n_movies")

counts_decade = grid %>%
  left_join(counts_decade, by = c("iso3", "decade")) %>%
  mutate(n_movies = replace_na(n_movies, 0L)) %>%
  left_join(pop_decade, by = c("iso3" = "code", "decade")) %>%
  filter(!is.na(pop_mean), pop_mean >= 1e6) %>%
  mutate(movies_pc = n_movies / pop_mean * 1e6)
wide_decade = counts_decade %>%
  select(iso3, decade, movies_pc) %>%
  pivot_wider(names_from = decade, values_from = movies_pc, names_prefix = "d")

decades = paste0("d", seq(1950, 2010, 10))
cor_mat = matrix(NA, 7, 7, dimnames = list(seq(1950,2010,10), seq(1950,2010,10)))
for (i in 1:7) for (j in 1:7) {
  x = wide_decade[[decades[i]]]; y = wide_decade[[decades[j]]]
  ok = !is.na(x) & !is.na(y)
  cor_mat[i, j] = cor(x[ok], y[ok], method = "spearman")
}

as.data.frame(as.table(cor_mat)) %>%
  rename(decade1 = Var1, decade2 = Var2, r = Freq) %>%
  ggplot(aes(decade1, decade2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = round(r, 2)), size = 3.5) +
  scale_fill_viridis_c(limits = c(0.5, 1), name = "Spearman r") +
  labs(title = "Stability of movie production per capita across decades",
       subtitle = "Spearman rank correlations, countries with 1M+ population",
       x = NULL, y = NULL) +
  theme_minimal() +
  theme(panel.grid = element_blank())

GG_save("figs/decade_stability.png")

GDP predicts movie production

fit_nb = glm.nb(n_total ~ log_gdppc + offset(log(pop_mean)), data = prod)
summary(fit_nb)
## 
## Call:
## glm.nb(formula = n_total ~ log_gdppc + offset(log(pop_mean)), 
##     data = prod, init.theta = 1.167339443, link = log)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -26.219      1.265   -20.7   <2e-16 ***
## log_gdppc      1.545      0.134    11.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.17) family taken to be 1)
## 
##     Null deviance: 169.645  on 59  degrees of freedom
## Residual deviance:  67.909  on 58  degrees of freedom
## AIC: 730.5
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.167 
##           Std. Err.:  0.196 
## 
##  2 x log-likelihood:  -724.457
prod %>%
  ggplot(aes(gdppc_mean, movies_pc, label = iso3)) +
  geom_point(aes(size = n_total), alpha = 0.5) +
  geom_text(size = 2.5, vjust = -0.8, check_overlap = TRUE) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  scale_x_log10(labels = scales::comma) +
  scale_y_log10() +
  scale_size_continuous(range = c(1, 8), guide = "none") +
  labs(title = "GDP per capita predicts movie production",
       subtitle = "Negative binomial: movies ~ log(GDP/cap) + offset(log(pop))",
       x = "GDP per capita PPP (Maddison, log scale)",
       y = "Movies per million (log scale)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

GG_save("figs/gdp_vs_production.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Rating-based measures and voting bias

Using ratings to identify “top” movies introduces national voting bias: countries with ethnocentric IMDB user bases push domestic films to inflated ratings. We demonstrate this by comparing IMDB and TMDb ratings, and by contrasting rating-based rankings with production-based rankings.

IMDB vs TMDb ratings

m %>%
  filter(!is.na(tmdb_vote), tmdb_vote > 0) %>%
  ggplot(aes(averageRating, tmdb_vote)) +
  geom_point(alpha = 0.05, size = 0.5) +
  geom_abline(linetype = "dashed", color = "red") +
  geom_smooth(method = "lm", color = "blue", se = FALSE) +
  labs(title = "IMDB vs TMDb ratings",
       subtitle = sprintf("r = %.2f, n = %d movies",
         cor(m$averageRating[!is.na(m$tmdb_vote) & m$tmdb_vote > 0],
             m$tmdb_vote[!is.na(m$tmdb_vote) & m$tmdb_vote > 0]),
         sum(!is.na(m$tmdb_vote) & m$tmdb_vote > 0)),
       x = "IMDB rating", y = "TMDb rating")
## `geom_smooth()` using formula = 'y ~ x'

GG_save("figs/imdb_vs_tmdb.png")
## `geom_smooth()` using formula = 'y ~ x'
#mean IMDB-TMDb gap by country (countries with 50+ movies)
country_bias = m %>%
  filter(!is.na(tmdb_vote), tmdb_vote > 0) %>%
  group_by(country) %>%
  filter(n() >= 50) %>%
  summarise(
    n = n(),
    imdb_mean = mean(averageRating),
    tmdb_mean = mean(tmdb_vote),
    bias = mean(averageRating - tmdb_vote),
    .groups = "drop"
  ) %>%
  mutate(name = countrycode(country, "iso2c", "iso3c", warn = FALSE) %>%
           countrycode("iso3c", "country.name")) %>%
  arrange(desc(bias))

country_bias %>%
  ggplot(aes(bias, fct_reorder(name, bias))) +
  geom_col() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(title = "IMDB rating inflation by country",
       subtitle = "Mean (IMDB - TMDb) gap, countries with 50+ movies",
       x = "IMDB - TMDb rating gap", y = NULL)

GG_save("figs/country_rating_bias.png")

Outlier movies: biggest IMDB-TMDb divergence

#biggest IMDB >> TMDb (50k+ votes to filter out bot noise)
m %>%
  filter(!is.na(tmdb_vote), tmdb_vote > 0, numVotes >= 50000) %>%
  left_join(titles, by = "tconst") %>%
  mutate(diff = averageRating - tmdb_vote,
         name = countrycode(country, "iso2c", "iso3c", warn = FALSE) %>%
           countrycode("iso3c", "country.name")) %>%
  arrange(desc(diff)) %>%
  select(primaryTitle, name, startYear, averageRating, tmdb_vote, diff, numVotes) %>%
  head(20)

Rating-based top movies per capita

#top movies (>= 7.5) per country using IMDB and TMDb ratings
top_counts = m %>%
  group_by(country) %>%
  summarise(
    n_total = n(),
    n_top_imdb = sum(averageRating >= 7.5),
    n_top_tmdb = sum(tmdb_vote >= 7.5, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
  filter(!is.na(iso3)) %>%
  left_join(pop_1950, by = c("iso3" = "code")) %>%
  filter(!is.na(pop_mean), n_total >= 5)

top_counts$eb_imdb_pc = ebpm_gamma(top_counts$n_top_imdb, top_counts$pop_mean)$posterior$mean * 1e6
top_counts$eb_tmdb_pc = ebpm_gamma(top_counts$n_top_tmdb, top_counts$pop_mean)$posterior$mean * 1e6
top_counts$eb_avg_pc = (top_counts$eb_imdb_pc + top_counts$eb_tmdb_pc) / 2
top_counts$movies_pc = top_counts$n_total / top_counts$pop_mean * 1e6
top_counts$name = countrycode(top_counts$iso3, "iso3c", "country.name")

Production vs rating rankings: identifying voting bias

Countries that rank much higher on ratings than on production are likely benefiting from ethnocentric voting. Countries that rank lower on ratings than production have less IMDB voting presence.

top_counts %<>% mutate(
  rank_output = rank(-movies_pc),
  rank_rating = rank(-eb_avg_pc),
  rank_diff = rank_rating - rank_output
)

top_counts %>%
  ggplot(aes(rank_output, rank_rating, label = iso3)) +
  geom_point(aes(size = n_total), alpha = 0.5) +
  geom_text(size = 2.5, vjust = -0.8, check_overlap = TRUE) +
  geom_abline(linetype = "dashed", color = "red") +
  labs(title = "Production rank vs rating rank",
       subtitle = "Above diagonal = overperforms on ratings (possible voting bias)\nBelow = underperforms on ratings",
       x = "Production per capita rank (1 = most)", y = "Top-rated per capita rank (1 = most)") +
  scale_size_continuous(range = c(1, 6), guide = "none")

GG_save("figs/production_vs_rating_rank.png")
#overperformers: ratings much better than production rank
top_counts %>%
  arrange(rank_diff) %>%
  select(name, n_total, n_top_imdb, n_top_tmdb, rank_output, rank_rating, rank_diff) %>%
  head(15)
#underperformers: produce many movies but few top-rated
top_counts %>%
  arrange(desc(rank_diff)) %>%
  select(name, n_total, n_top_imdb, n_top_tmdb, rank_output, rank_rating, rank_diff) %>%
  head(15)

Multiverse analysis

#generate counts across all specification combinations
min_votes_list = c(100, 1000, 10000)
start_years_spec = list("all" = 1920, "1950" = 1950, "1980" = 1980, "2000" = 2000)

results = list()
for (mv in min_votes_list) {
  for (sy_name in names(start_years_spec)) {
    sy = start_years_spec[[sy_name]]
    dat = movies %>%
      filter(numVotes >= mv, startYear >= sy, !country %in% always_exclude)

    if (sy_name %in% c("all", "1950", "1980")) {
      dat = dat %>% filter(!country %in% all_unstable)
    } else {
      dat = dat %>% filter(!country %in% c(setdiff(all_unstable, ok_from_2000), "SU", "YU", "CS"))
    }

    if (nrow(dat) == 0) next
    dat = dat %>% mutate(pctile = percent_rank(averageRating) * 100)
    country_totals = dat %>% count(country, name = "n_total")

    criteria = list(
      "abs_7.5" = dat$averageRating >= 7.5,
      "abs_8.0" = dat$averageRating >= 8.0,
      "top_10pct" = dat$pctile >= 90,
      "top_5pct" = dat$pctile >= 95,
      "top_1pct" = dat$pctile >= 99
    )

    for (cr_name in names(criteria)) {
      top = dat %>% filter(criteria[[cr_name]]) %>% count(country, name = "n_top")
      merged = country_totals %>%
        left_join(top, by = "country") %>%
        mutate(n_top = replace_na(n_top, 0L),
               rating_criterion = cr_name, min_votes = mv, start_year = sy_name)
      results[[length(results) + 1]] = merged
    }
  }
}

d_multi = bind_rows(results) %>%
  mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
  filter(!is.na(iso3)) %>%
  left_join(country_vars, by = c("iso3" = "code", "start_year")) %>%
  filter(!is.na(gdppc_mean), !is.na(pop_mean)) %>%
  mutate(log_gdppc = log(gdppc_mean), log_pop = log(pop_mean),
         movies_pc = n_total / pop_mean * 1e6, top_pc = n_top / pop_mean * 1e6)

cat(sprintf("Multiverse: %d rows, %d countries, %d specs\n",
    nrow(d_multi), n_distinct(d_multi$country),
    n_distinct(paste(d_multi$rating_criterion, d_multi$min_votes, d_multi$start_year))))
## Multiverse: 5415 rows, 131 countries, 60 specs

GDP effect across specifications

#negative binomial: n_total ~ log_gdppc + offset(log_pop) across specs
fits_prod = d_multi %>%
  group_by(min_votes, start_year) %>%
  do({
    m = tryCatch(glm.nb(n_total ~ log_gdppc + offset(log_pop), data = .), error = function(e) NULL)
    if (!is.null(m)) broom::tidy(m) %>% filter(term == "log_gdppc") else tibble()
  }) %>% ungroup() %>%
  mutate(outcome = "Total movies")

fits_top = d_multi %>%
  group_by(rating_criterion, min_votes, start_year) %>%
  do({
    m = tryCatch(glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .), error = function(e) NULL)
    if (!is.null(m)) broom::tidy(m) %>% filter(term == "log_gdppc") else tibble()
  }) %>% ungroup() %>%
  mutate(outcome = "Top-rated movies")
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
fits_all = bind_rows(fits_prod, fits_top)

fits_all %>%
  mutate(setting = paste(outcome, rating_criterion, min_votes, start_year, sep = "/")) %>%
  ggplot(aes(estimate, fct_reorder(setting, estimate), color = outcome)) +
  geom_point() +
  geom_errorbarh(aes(xmin = estimate - 1.96 * std.error,
                     xmax = estimate + 1.96 * std.error), height = 0.3) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  labs(title = "Effect of log GDP per capita on movie counts (multiverse)",
       subtitle = "Negative binomial with population offset, 95% CIs",
       x = "Coefficient (log GDP per capita PPP)", y = NULL, color = NULL) +
  theme(legend.position = "top")
## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.

GG_save("figs/gdp_multiverse.png")
## `height` was translated to `width`.

External validation

Oscar nominations vs movie production

oscar_data = readRDS("data/oscar_data.rds")

oscar_merged = prod %>%
  left_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post), by = c("iso3" = "ISO3")) %>%
  filter(!is.na(oscar_eb))

rs_oscar = cor(oscar_merged$eb_movies_pc, oscar_merged$oscar_eb, method = "spearman")

oscar_merged %>%
  ggplot(aes(oscar_eb, eb_movies_pc)) +
  geom_point(size = 2) +
  ggrepel::geom_text_repel(aes(label = iso3), size = 2.5, max.overlaps = 20) +
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
  annotate("text", x = max(oscar_merged$oscar_eb) * 0.7, y = max(oscar_merged$eb_movies_pc) * 0.95,
           label = sprintf("r[s] == %.2f", rs_oscar), parse = TRUE, size = 5) +
  labs(title = "Movie production vs. Oscar nominations per capita",
       subtitle = "Both EB-adjusted rates per million, countries with pop > 1M",
       x = "Oscar nominations per million (EB)", y = "Movies per million (EB)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 29 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/oscar_vs_production.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Top-rated movies vs Oscar nominations

top_oscar = top_counts %>%
  left_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post), by = c("iso3" = "ISO3")) %>%
  filter(!is.na(oscar_eb))

rs_top_oscar = cor(top_oscar$eb_avg_pc, top_oscar$oscar_eb, method = "spearman")

top_oscar %>%
  ggplot(aes(oscar_eb, eb_avg_pc)) +
  geom_point(size = 2) +
  ggrepel::geom_text_repel(aes(label = iso3), size = 2.5, max.overlaps = 20) +
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
  annotate("text", x = max(top_oscar$oscar_eb) * 0.7, y = max(top_oscar$eb_avg_pc) * 0.95,
           label = sprintf("r[s] == %.2f", rs_top_oscar), parse = TRUE, size = 5) +
  labs(title = "Top-rated movies vs. Oscar nominations per million",
       subtitle = "Both EB-adjusted rates, countries with pop > 1M",
       x = "Oscar nominations per million (EB)",
       y = "Top-rated movies (≥7.5) per million (EB)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

GG_save("figs/toprated_vs_oscar_eb.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Correlation matrix of cultural and economic indicators

sci_pubs = read.csv("data/scientific_publications.csv") %>%
  select(code = Code, year = Year,
         sci_pm = Scientific.and.technical.journal.articles.per.million.people) %>%
  filter(!is.na(code), !is.na(sci_pm)) %>%
  group_by(code) %>%
  summarise(sci_pm = mean(sci_pm, na.rm = TRUE), .groups = "drop")

cor_data = prod %>%
  select(iso3, movies_pc = eb_movies_pc) %>%
  full_join(top_counts %>% select(iso3, eb_imdb_pc, eb_tmdb_pc), by = "iso3") %>%
  full_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post) %>%
              mutate(iso3 = ISO3) %>% select(iso3, oscar_eb), by = "iso3") %>%
  full_join(sci_pubs %>% rename(iso3 = code), by = "iso3") %>%
  full_join(gdp_1950 %>% rename(iso3 = code), by = "iso3")

vars = c("Movies/pop (EB)" = "movies_pc", "Top IMDB/pop (EB)" = "eb_imdb_pc",
         "Top TMDb/pop (EB)" = "eb_tmdb_pc", "Oscar noms/pop (EB)" = "oscar_eb",
         "Sci pubs/pop" = "sci_pm", "GDP per capita" = "gdppc_mean")

n_vars = length(vars)
cor_mat2 = matrix(NA, n_vars, n_vars, dimnames = list(names(vars), names(vars)))
for (i in 1:n_vars) for (j in 1:n_vars) {
  x = cor_data[[vars[i]]]; y = cor_data[[vars[j]]]
  ok = !is.na(x) & !is.na(y)
  cor_mat2[i, j] = cor(x[ok], y[ok], method = "spearman")
}

as.data.frame(as.table(cor_mat2)) %>%
  rename(var1 = Var1, var2 = Var2, r = Freq) %>%
  ggplot(aes(var1, var2, fill = r)) +
  geom_tile() +
  geom_text(aes(label = round(r, 2)), size = 3.5) +
  scale_fill_gradient(low = "white", high = "red", limits = c(0, 1), name = "Spearman r") +
  labs(title = "Correlation matrix of country-level cultural and economic indicators",
       subtitle = "Spearman correlations, pairwise complete, full_join",
       x = NULL, y = NULL) +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1))

GG_save("figs/correlation_matrix.png")

Meta

write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] flextable_0.9.10      duckdb_1.4.4          DBI_1.2.3            
##  [4] sf_1.0-21             rnaturalearth_1.1.0   countrycode_1.6.1    
##  [7] ebpm_0.0.1.3          MASS_7.3-65           kirkegaard_2025-11-19
## [10] robustbase_0.99-6     psych_2.5.6           assertthat_0.2.1     
## [13] weights_1.1.2         magrittr_2.0.4        lubridate_1.9.4      
## [16] forcats_1.0.1         stringr_1.6.0         dplyr_1.1.4          
## [19] purrr_1.2.0           readr_2.1.5           tidyr_1.3.1          
## [22] tibble_3.3.0          ggplot2_4.0.1.9000    tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.17.1       jsonlite_2.0.0         
##   [4] shape_1.4.6.1           jomo_2.7-6              farver_2.1.2           
##   [7] nloptr_2.2.1            rmarkdown_2.30          ragg_1.5.0             
##  [10] vctrs_0.6.5             minqa_1.2.8             askpass_1.2.1          
##  [13] base64enc_0.1-3         mixsqp_0.3-54           htmltools_0.5.8.1      
##  [16] broom_1.0.11            Formula_1.2-5           mitml_0.4-5            
##  [19] sass_0.4.10             KernSmooth_2.23-26      bslib_0.9.0            
##  [22] htmlwidgets_1.6.4       cachem_1.1.0            uuid_1.2-1             
##  [25] lifecycle_1.0.4         iterators_1.0.14        pkgconfig_2.0.3        
##  [28] Matrix_1.7-4            R6_2.6.1                fastmap_1.2.0          
##  [31] rbibutils_2.3           digest_0.6.39           colorspace_2.1-2       
##  [34] irlba_2.3.5.1           textshaping_1.0.4       Hmisc_5.2-4            
##  [37] labeling_0.4.3          timechange_0.3.0        gdata_3.0.1            
##  [40] mgcv_1.9-3              compiler_4.5.2          proxy_0.4-27           
##  [43] bit64_4.6.0-1           fontquiver_0.2.1        withr_3.0.2            
##  [46] htmlTable_2.4.3         S7_0.2.1                backports_1.5.0        
##  [49] pan_1.9                 openssl_2.3.4           classInt_0.4-11        
##  [52] gtools_3.9.5            tools_4.5.2             units_1.0-0            
##  [55] foreign_0.8-91          zip_2.3.3               nnet_7.3-20            
##  [58] glue_1.8.0              rnaturalearthdata_1.0.0 nlme_3.1-168           
##  [61] grid_4.5.2              checkmate_2.3.3         cluster_2.1.8.2        
##  [64] generics_0.1.4          gtable_0.3.6            tzdb_0.5.0             
##  [67] class_7.3-23            data.table_1.17.8       hms_1.1.3              
##  [70] xml2_1.4.0              ggrepel_0.9.6           foreach_1.5.2          
##  [73] pillar_1.11.1           vroom_1.6.6             splines_4.5.2          
##  [76] lattice_0.22-7          survival_3.8-6          bit_4.6.0              
##  [79] tidyselect_1.2.1        fontLiberation_0.1.0    knitr_1.50             
##  [82] fontBitstreamVera_0.1.1 reformulas_0.4.1        gridExtra_2.3          
##  [85] xfun_0.53               matrixStats_1.5.0       DEoptimR_1.1-4         
##  [88] stringi_1.8.7           yaml_2.3.10             boot_1.3-32            
##  [91] evaluate_1.0.5          codetools_0.2-20        officer_0.7.0          
##  [94] gdtools_0.4.4           archive_1.1.12          cli_3.6.5              
##  [97] rpart_4.1.24            systemfonts_1.3.1       Rdpack_2.6.4           
## [100] jquerylib_0.1.4         Rcpp_1.1.0              parallel_4.5.2         
## [103] lme4_1.1-37             glmnet_4.1-10           viridisLite_0.4.2      
## [106] scales_1.4.0            e1071_1.7-16            crayon_1.5.3           
## [109] rlang_1.1.6.9000        mnormt_2.1.1            mice_3.18.0