library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1.9000 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
## Loading required package: robustbase
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
MASS,
ebpm,
countrycode,
rnaturalearth,
sf,
duckdb
)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
## Loading required package: DBI
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:purrr':
##
## compose
select = dplyr::select
filter = dplyr::filter
theme_set(theme_bw())
options(digits = 3)
#claude
if (F) {
library(btw)
btw::btw_mcp_session()
}
#country of production from TMDb API (modal company country, fallback to first origin_country)
tmdb_movies = read_csv("data/tmdb_movies.csv", show_col_types = FALSE)
tmdb_companies = read_csv("data/tmdb_prod_companies.csv", show_col_types = FALSE)
#modal company country per movie
company_mode = tmdb_companies %>%
filter(company_country != "") %>%
count(tconst, company_country) %>%
group_by(tconst) %>%
slice_max(n, n = 1, with_ties = TRUE) %>%
ungroup()
#for ties, pick the one matching first origin_country
primary = tmdb_movies %>%
mutate(primary_country = str_extract(origin_country, "^[^|]+")) %>%
select(tconst, primary_country)
movie_country = company_mode %>%
left_join(primary, by = "tconst") %>%
group_by(tconst) %>%
mutate(rank = row_number(desc(company_country == primary_country))) %>%
filter(rank == 1) %>%
ungroup() %>%
select(tconst, modal_country = company_country)
movie_country = primary %>%
left_join(movie_country, by = "tconst") %>%
mutate(country = coalesce(modal_country, primary_country)) %>%
select(tconst, country)
#load IMDB ratings and titles
con = dbConnect(duckdb(), read_only = TRUE)
movies = dbGetQuery(con, "
SELECT r.tconst, r.averageRating, r.numVotes, t.startYear, t.genres
FROM read_parquet('data/ratings.parquet') r
JOIN read_parquet('data/titles.parquet') t ON r.tconst = t.tconst
WHERE t.titleType = 'movie'
AND r.date = (SELECT MAX(date) FROM read_parquet('data/ratings.parquet'))
AND t.startYear IS NOT NULL
AND t.startYear <= 2022
")
titles = dbGetQuery(con, "SELECT tconst, primaryTitle FROM read_parquet('data/titles.parquet')")
dbDisconnect(con, shutdown = TRUE)
#remove documentaries, merge country
movies %<>%
filter(!grepl("Documentary", genres, fixed = TRUE)) %>%
left_join(movie_country, by = "tconst") %>%
filter(!is.na(country))
#merge TMDb ratings
movies = movies %>%
left_join(tmdb_movies %>% select(tconst, tmdb_vote = tmdb_vote_average), by = "tconst")
#countries with border changes — exclude from periods where borders are unstable
border_unstable = list(
ussr = c("SU", "RU", "UA", "BY", "GE", "AM", "AZ", "KZ", "UZ", "TM", "KG", "TJ", "LV", "LT", "EE", "MD"),
yugo = c("YU", "RS", "HR", "BA", "SI", "ME", "MK", "XK"),
czsk = c("CS", "CZ", "SK"),
germany = c("DE", "DD", "XG")
)
all_unstable = unlist(border_unstable)
#countries OK for 2000+ (borders stable by then)
ok_from_2000 = c("DE", "DD", "XG", "RU", "UA", "BY", "GE", "AM", "AZ", "KZ",
"LV", "LT", "EE", "MD", "CZ", "SK", "SI", "HR", "BA", "MK")
#always exclude (border changes 2006-2008)
always_exclude = c("RS", "ME", "XK")
if (!file.exists("data/gdp_maddison.csv"))
download.file("https://ourworldindata.org/grapher/gdp-per-capita-maddison.csv",
"data/gdp_maddison.csv", mode = "wb")
if (!file.exists("data/population.csv"))
download.file("https://ourworldindata.org/grapher/population.csv",
"data/population.csv", mode = "wb")
gdp_raw = read_csv("data/gdp_maddison.csv", show_col_types = FALSE) %>%
select(entity = Entity, code = Code, year = Year, gdppc = `GDP per capita`) %>%
filter(!is.na(code), !is.na(gdppc))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
pop_raw = read_csv("data/population.csv", show_col_types = FALSE) %>%
select(entity = Entity, code = Code, year = Year, population = Population) %>%
filter(!is.na(code), !is.na(population))
periods = list("all" = c(1920, 2022), "1950" = c(1950, 2022),
"1980" = c(1980, 2022), "2000" = c(2000, 2022))
gdp_avgs = lapply(names(periods), function(p) {
yr = periods[[p]]
gdp_raw %>% filter(year >= yr[1], year <= yr[2]) %>%
group_by(code) %>% summarise(gdppc_mean = mean(gdppc), .groups = "drop") %>%
mutate(start_year = p)
}) %>% bind_rows()
pop_avgs = lapply(names(periods), function(p) {
yr = periods[[p]]
pop_raw %>% filter(year >= yr[1], year <= yr[2]) %>%
group_by(code) %>% summarise(pop_mean = mean(population), .groups = "drop") %>%
mutate(start_year = p)
}) %>% bind_rows()
country_vars = inner_join(gdp_avgs, pop_avgs, by = c("code", "start_year"))
The cleanest cross-country measure: how many movies (with 1000+ IMDB votes) does each country produce per capita? This measures cultural output/visibility without relying on ratings, which suffer from national voting biases.
#preferred spec: 1000+ votes, 1950+, no docs, border-stable countries
m = movies %>%
filter(numVotes >= 1000, startYear >= 1950, !country %in% all_unstable)
pop_1950 = pop_avgs %>% filter(start_year == "1950") %>% select(code, pop_mean)
gdp_1950 = gdp_avgs %>% filter(start_year == "1950") %>% select(code, gdppc_mean)
prod = m %>%
count(country, name = "n_total") %>%
mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
filter(!is.na(iso3)) %>%
left_join(pop_1950, by = c("iso3" = "code")) %>%
left_join(gdp_1950, by = c("iso3" = "code")) %>%
filter(!is.na(pop_mean), n_total >= 5)
prod$movies_pc = prod$n_total / prod$pop_mean * 1e6
prod$eb_movies_pc = ebpm_gamma(prod$n_total, prod$pop_mean)$posterior$mean * 1e6
prod$name = countrycode(prod$iso3, "iso3c", "country.name")
prod$log_gdppc = log(prod$gdppc_mean)
#top 20 all-time table
top20_alltime = prod %>%
arrange(desc(eb_movies_pc)) %>%
head(20) %>%
mutate(Rank = row_number(), `Pop. (M)` = round(pop_mean / 1e6, 1)) %>%
select(Rank, Country = name, ISO = iso3, Movies = n_total,
`Pop. (M)`, `Raw /M` = movies_pc, `EB /M` = eb_movies_pc) %>%
mutate(across(c(`Raw /M`, `EB /M`), ~round(., 1)))
ft_alltime = flextable(top20_alltime) %>%
add_header_lines("Top 20 countries by movie production per capita") %>%
add_footer_lines("Feature films 1950–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary\nCountry via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1950–2022 · Rates per million\nExcludes border-unstable states (USSR, Yugoslavia, Czechoslovakia, DDR) and Serbia") %>%
bold(i = 1, part = "header") %>%
fontsize(i = 1, size = 16, part = "header") %>%
fontsize(size = 11, part = "body") %>%
fontsize(size = 9, part = "footer") %>%
colformat_num(j = "Movies", big.mark = ",", digits = 0) %>%
align(j = "Rank", align = "center") %>%
align(j = "ISO", align = "center") %>%
bg(bg = "white", part = "all") %>%
autofit() %>%
theme_vanilla()
save_as_image(ft_alltime, path = "figs/top20_movies_per_capita_alltime.png", zoom = 2)
## [1] "figs/top20_movies_per_capita_alltime.png"
ft_alltime
Top 20 countries by movie production per capita | ||||||
|---|---|---|---|---|---|---|
Rank | Country | ISO | Movies | Pop. (M) | Raw /M | EB /M |
1 | Iceland | ISL | 31 | 0.2 | 124.8 | 112.4 |
2 | Hong Kong SAR China | HKG | 479 | 5.2 | 92.2 | 91.7 |
3 | United States | USA | 16,986 | 247.2 | 68.7 | 68.7 |
4 | Denmark | DNK | 284 | 5.1 | 55.4 | 55.2 |
5 | Sweden | SWE | 434 | 8.5 | 50.9 | 50.8 |
6 | France | FRA | 2,572 | 55.4 | 46.5 | 46.4 |
7 | United Kingdom | GBR | 2,292 | 57.8 | 39.6 | 39.6 |
8 | Ireland | IRL | 141 | 3.6 | 39.0 | 38.8 |
9 | Norway | NOR | 165 | 4.2 | 38.8 | 38.7 |
10 | Luxembourg | LUX | 15 | 0.4 | 36.8 | 35.3 |
11 | Canada | CAN | 930 | 26.4 | 35.2 | 35.2 |
12 | Finland | FIN | 165 | 4.9 | 33.7 | 33.5 |
13 | Australia | AUS | 436 | 16.4 | 26.6 | 26.6 |
14 | Italy | ITA | 1,245 | 55.4 | 22.5 | 22.5 |
15 | Belgium | BEL | 225 | 10.0 | 22.5 | 22.4 |
16 | Spain | ESP | 739 | 38.3 | 19.3 | 19.3 |
17 | Greece | GRC | 179 | 9.7 | 18.4 | 18.4 |
18 | Netherlands | NLD | 264 | 14.4 | 18.3 | 18.3 |
19 | New Zealand | NZL | 62 | 3.4 | 18.2 | 18.2 |
20 | South Korea | KOR | 590 | 39.2 | 15.0 | 15.0 |
Feature films 1950–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary | ||||||
#top 30 post-reunification (1990+, can include Germany, Czech, Russia etc.)
m_post90 = movies %>%
filter(numVotes >= 1000, startYear >= 1990, !country %in% always_exclude) %>%
filter(!country %in% c("SU", "YU", "CS", "XC", "XG", "DD"))
pop_1990 = pop_raw %>%
filter(year >= 1990, year <= 2022) %>%
group_by(code) %>%
summarise(pop_mean = mean(population), .groups = "drop")
prod_post90 = m_post90 %>%
count(country, name = "n_movies") %>%
mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
filter(!is.na(iso3)) %>%
left_join(pop_1990, by = c("iso3" = "code")) %>%
filter(!is.na(pop_mean), n_movies >= 5) %>%
mutate(
name = countrycode(iso3, "iso3c", "country.name"),
movies_per_m = n_movies / pop_mean * 1e6
)
fit_post90 = ebpm_gamma(prod_post90$n_movies, prod_post90$pop_mean / 1e6)
prod_post90$eb_per_m = fit_post90$posterior$mean
top30_post90 = prod_post90 %>%
arrange(desc(eb_per_m)) %>%
head(30) %>%
mutate(Rank = row_number(), `Pop. (M)` = round(pop_mean / 1e6, 1)) %>%
select(Rank, Country = name, ISO = iso3, Movies = n_movies,
`Pop. (M)`, `Raw /M` = movies_per_m, `EB /M` = eb_per_m) %>%
mutate(across(c(`Raw /M`, `EB /M`), ~round(., 1)))
ft_post90 = flextable(top30_post90) %>%
add_header_lines("Top 30 countries by movie production per capita (post-reunification)") %>%
add_footer_lines("Feature films 1990–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary\nCountry via TMDb modal-primary · EB = empirical Bayes · Population = OWID mean 1990–2022 · Rates per million\nExcludes Serbia (borders unstable through 2008)") %>%
bold(i = 1, part = "header") %>%
fontsize(i = 1, size = 16, part = "header") %>%
fontsize(size = 11, part = "body") %>%
fontsize(size = 9, part = "footer") %>%
colformat_num(j = "Movies", big.mark = ",", digits = 0) %>%
align(j = "Rank", align = "center") %>%
align(j = "ISO", align = "center") %>%
bg(bg = "white", part = "all") %>%
autofit() %>%
theme_vanilla()
save_as_image(ft_post90, path = "figs/top30_movies_per_capita_post1990.png", zoom = 2)
## [1] "figs/top30_movies_per_capita_post1990.png"
ft_post90
Top 30 countries by movie production per capita (post-reunification) | ||||||
|---|---|---|---|---|---|---|
Rank | Country | ISO | Movies | Pop. (M) | Raw /M | EB /M |
1 | Iceland | ISL | 31 | 0.3 | 101.3 | 87.4 |
2 | Hong Kong SAR China | HKG | 336 | 6.8 | 49.2 | 48.9 |
3 | Denmark | DNK | 248 | 5.5 | 45.3 | 44.9 |
4 | United States | USA | 12,112 | 299.3 | 40.5 | 40.5 |
5 | Sweden | SWE | 341 | 9.3 | 36.6 | 36.5 |
6 | Ireland | IRL | 136 | 4.2 | 32.1 | 31.8 |
7 | Norway | NOR | 150 | 4.8 | 31.4 | 31.2 |
8 | France | FRA | 1,905 | 61.8 | 30.8 | 30.8 |
9 | Luxembourg | LUX | 14 | 0.5 | 28.4 | 26.6 |
10 | Finland | FIN | 136 | 5.3 | 25.7 | 25.5 |
11 | Canada | CAN | 826 | 33.0 | 25.0 | 25.0 |
12 | United Kingdom | GBR | 1,544 | 61.8 | 25.0 | 25.0 |
13 | Belgium | BEL | 207 | 10.7 | 19.3 | 19.3 |
14 | Australia | AUS | 367 | 21.2 | 17.3 | 17.3 |
15 | Estonia | EST | 22 | 1.4 | 15.9 | 15.7 |
16 | Netherlands | NLD | 244 | 16.5 | 14.8 | 14.8 |
17 | Spain | ESP | 631 | 43.9 | 14.4 | 14.4 |
18 | New Zealand | NZL | 54 | 4.2 | 12.8 | 12.8 |
19 | South Korea | KOR | 588 | 48.2 | 12.2 | 12.2 |
20 | Hungary | HUN | 109 | 10.1 | 10.8 | 10.8 |
21 | Greece | GRC | 112 | 10.8 | 10.4 | 10.4 |
22 | Italy | ITA | 579 | 58.8 | 9.9 | 9.9 |
23 | Germany | DEU | 795 | 81.9 | 9.7 | 9.7 |
24 | Croatia | HRV | 40 | 4.3 | 9.2 | 9.2 |
25 | Turkey | TUR | 589 | 71.1 | 8.3 | 8.3 |
26 | Japan | JPN | 1,041 | 126.7 | 8.2 | 8.2 |
27 | Austria | AUT | 67 | 8.3 | 8.1 | 8.1 |
28 | Israel | ISR | 43 | 6.9 | 6.3 | 6.3 |
29 | Poland | POL | 227 | 38.2 | 5.9 | 5.9 |
30 | Switzerland | CHE | 45 | 7.6 | 5.9 | 5.9 |
Feature films 1990–2022 · ≥1,000 IMDB votes · ≥5 movies · ≥60 min · non-documentary | ||||||
#world map of movie production per capita
world = ne_countries(scale = "medium", returnclass = "sf")
map_data = world %>%
filter(iso_a3_eh != "ATA") %>%
left_join(prod %>% select(iso3, value = eb_movies_pc), by = c("iso_a3_eh" = "iso3"))
ggplot(map_data) +
geom_sf(aes(fill = value), color = "grey40", linewidth = 0.1) +
scale_fill_viridis_c(name = "Per million", na.value = "grey90", trans = "pseudo_log") +
coord_sf(ylim = c(-55, 85), expand = FALSE) +
labs(title = "Movie production per capita (EB-adjusted)",
subtitle = "Non-documentary films with 1000+ IMDB votes, 1950+") +
theme_void() +
theme(legend.position = "bottom", legend.key.width = unit(2, "cm"))
GG_save("figs/map_production_pc.png")
How stable are country rankings in movie production across decades?
pop_decade = pop_raw %>%
mutate(decade = floor(year / 10) * 10) %>%
group_by(code, decade) %>%
summarise(pop_mean = mean(population), .groups = "drop")
#all country × decade combinations, filling zeros for countries with no movies
all_countries_iso3 = prod$iso3
grid = expand_grid(iso3 = all_countries_iso3, decade = seq(1950, 2010, 10))
counts_decade = m %>%
mutate(decade = floor(startYear / 10) * 10,
iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
filter(!is.na(iso3), decade >= 1950, decade <= 2010) %>%
count(iso3, decade, name = "n_movies")
counts_decade = grid %>%
left_join(counts_decade, by = c("iso3", "decade")) %>%
mutate(n_movies = replace_na(n_movies, 0L)) %>%
left_join(pop_decade, by = c("iso3" = "code", "decade")) %>%
filter(!is.na(pop_mean), pop_mean >= 1e6) %>%
mutate(movies_pc = n_movies / pop_mean * 1e6)
wide_decade = counts_decade %>%
select(iso3, decade, movies_pc) %>%
pivot_wider(names_from = decade, values_from = movies_pc, names_prefix = "d")
decades = paste0("d", seq(1950, 2010, 10))
cor_mat = matrix(NA, 7, 7, dimnames = list(seq(1950,2010,10), seq(1950,2010,10)))
for (i in 1:7) for (j in 1:7) {
x = wide_decade[[decades[i]]]; y = wide_decade[[decades[j]]]
ok = !is.na(x) & !is.na(y)
cor_mat[i, j] = cor(x[ok], y[ok], method = "spearman")
}
as.data.frame(as.table(cor_mat)) %>%
rename(decade1 = Var1, decade2 = Var2, r = Freq) %>%
ggplot(aes(decade1, decade2, fill = r)) +
geom_tile() +
geom_text(aes(label = round(r, 2)), size = 3.5) +
scale_fill_viridis_c(limits = c(0.5, 1), name = "Spearman r") +
labs(title = "Stability of movie production per capita across decades",
subtitle = "Spearman rank correlations, countries with 1M+ population",
x = NULL, y = NULL) +
theme_minimal() +
theme(panel.grid = element_blank())
GG_save("figs/decade_stability.png")
fit_nb = glm.nb(n_total ~ log_gdppc + offset(log(pop_mean)), data = prod)
summary(fit_nb)
##
## Call:
## glm.nb(formula = n_total ~ log_gdppc + offset(log(pop_mean)),
## data = prod, init.theta = 1.167339443, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -26.219 1.265 -20.7 <2e-16 ***
## log_gdppc 1.545 0.134 11.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.17) family taken to be 1)
##
## Null deviance: 169.645 on 59 degrees of freedom
## Residual deviance: 67.909 on 58 degrees of freedom
## AIC: 730.5
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.167
## Std. Err.: 0.196
##
## 2 x log-likelihood: -724.457
prod %>%
ggplot(aes(gdppc_mean, movies_pc, label = iso3)) +
geom_point(aes(size = n_total), alpha = 0.5) +
geom_text(size = 2.5, vjust = -0.8, check_overlap = TRUE) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
scale_x_log10(labels = scales::comma) +
scale_y_log10() +
scale_size_continuous(range = c(1, 8), guide = "none") +
labs(title = "GDP per capita predicts movie production",
subtitle = "Negative binomial: movies ~ log(GDP/cap) + offset(log(pop))",
x = "GDP per capita PPP (Maddison, log scale)",
y = "Movies per million (log scale)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
GG_save("figs/gdp_vs_production.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Using ratings to identify “top” movies introduces national voting bias: countries with ethnocentric IMDB user bases push domestic films to inflated ratings. We demonstrate this by comparing IMDB and TMDb ratings, and by contrasting rating-based rankings with production-based rankings.
m %>%
filter(!is.na(tmdb_vote), tmdb_vote > 0) %>%
ggplot(aes(averageRating, tmdb_vote)) +
geom_point(alpha = 0.05, size = 0.5) +
geom_abline(linetype = "dashed", color = "red") +
geom_smooth(method = "lm", color = "blue", se = FALSE) +
labs(title = "IMDB vs TMDb ratings",
subtitle = sprintf("r = %.2f, n = %d movies",
cor(m$averageRating[!is.na(m$tmdb_vote) & m$tmdb_vote > 0],
m$tmdb_vote[!is.na(m$tmdb_vote) & m$tmdb_vote > 0]),
sum(!is.na(m$tmdb_vote) & m$tmdb_vote > 0)),
x = "IMDB rating", y = "TMDb rating")
## `geom_smooth()` using formula = 'y ~ x'
GG_save("figs/imdb_vs_tmdb.png")
## `geom_smooth()` using formula = 'y ~ x'
#mean IMDB-TMDb gap by country (countries with 50+ movies)
country_bias = m %>%
filter(!is.na(tmdb_vote), tmdb_vote > 0) %>%
group_by(country) %>%
filter(n() >= 50) %>%
summarise(
n = n(),
imdb_mean = mean(averageRating),
tmdb_mean = mean(tmdb_vote),
bias = mean(averageRating - tmdb_vote),
.groups = "drop"
) %>%
mutate(name = countrycode(country, "iso2c", "iso3c", warn = FALSE) %>%
countrycode("iso3c", "country.name")) %>%
arrange(desc(bias))
country_bias %>%
ggplot(aes(bias, fct_reorder(name, bias))) +
geom_col() +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(title = "IMDB rating inflation by country",
subtitle = "Mean (IMDB - TMDb) gap, countries with 50+ movies",
x = "IMDB - TMDb rating gap", y = NULL)
GG_save("figs/country_rating_bias.png")
#biggest IMDB >> TMDb (50k+ votes to filter out bot noise)
m %>%
filter(!is.na(tmdb_vote), tmdb_vote > 0, numVotes >= 50000) %>%
left_join(titles, by = "tconst") %>%
mutate(diff = averageRating - tmdb_vote,
name = countrycode(country, "iso2c", "iso3c", warn = FALSE) %>%
countrycode("iso3c", "country.name")) %>%
arrange(desc(diff)) %>%
select(primaryTitle, name, startYear, averageRating, tmdb_vote, diff, numVotes) %>%
head(20)
#top movies (>= 7.5) per country using IMDB and TMDb ratings
top_counts = m %>%
group_by(country) %>%
summarise(
n_total = n(),
n_top_imdb = sum(averageRating >= 7.5),
n_top_tmdb = sum(tmdb_vote >= 7.5, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
filter(!is.na(iso3)) %>%
left_join(pop_1950, by = c("iso3" = "code")) %>%
filter(!is.na(pop_mean), n_total >= 5)
top_counts$eb_imdb_pc = ebpm_gamma(top_counts$n_top_imdb, top_counts$pop_mean)$posterior$mean * 1e6
top_counts$eb_tmdb_pc = ebpm_gamma(top_counts$n_top_tmdb, top_counts$pop_mean)$posterior$mean * 1e6
top_counts$eb_avg_pc = (top_counts$eb_imdb_pc + top_counts$eb_tmdb_pc) / 2
top_counts$movies_pc = top_counts$n_total / top_counts$pop_mean * 1e6
top_counts$name = countrycode(top_counts$iso3, "iso3c", "country.name")
Countries that rank much higher on ratings than on production are likely benefiting from ethnocentric voting. Countries that rank lower on ratings than production have less IMDB voting presence.
top_counts %<>% mutate(
rank_output = rank(-movies_pc),
rank_rating = rank(-eb_avg_pc),
rank_diff = rank_rating - rank_output
)
top_counts %>%
ggplot(aes(rank_output, rank_rating, label = iso3)) +
geom_point(aes(size = n_total), alpha = 0.5) +
geom_text(size = 2.5, vjust = -0.8, check_overlap = TRUE) +
geom_abline(linetype = "dashed", color = "red") +
labs(title = "Production rank vs rating rank",
subtitle = "Above diagonal = overperforms on ratings (possible voting bias)\nBelow = underperforms on ratings",
x = "Production per capita rank (1 = most)", y = "Top-rated per capita rank (1 = most)") +
scale_size_continuous(range = c(1, 6), guide = "none")
GG_save("figs/production_vs_rating_rank.png")
#overperformers: ratings much better than production rank
top_counts %>%
arrange(rank_diff) %>%
select(name, n_total, n_top_imdb, n_top_tmdb, rank_output, rank_rating, rank_diff) %>%
head(15)
#underperformers: produce many movies but few top-rated
top_counts %>%
arrange(desc(rank_diff)) %>%
select(name, n_total, n_top_imdb, n_top_tmdb, rank_output, rank_rating, rank_diff) %>%
head(15)
#generate counts across all specification combinations
min_votes_list = c(100, 1000, 10000)
start_years_spec = list("all" = 1920, "1950" = 1950, "1980" = 1980, "2000" = 2000)
results = list()
for (mv in min_votes_list) {
for (sy_name in names(start_years_spec)) {
sy = start_years_spec[[sy_name]]
dat = movies %>%
filter(numVotes >= mv, startYear >= sy, !country %in% always_exclude)
if (sy_name %in% c("all", "1950", "1980")) {
dat = dat %>% filter(!country %in% all_unstable)
} else {
dat = dat %>% filter(!country %in% c(setdiff(all_unstable, ok_from_2000), "SU", "YU", "CS"))
}
if (nrow(dat) == 0) next
dat = dat %>% mutate(pctile = percent_rank(averageRating) * 100)
country_totals = dat %>% count(country, name = "n_total")
criteria = list(
"abs_7.5" = dat$averageRating >= 7.5,
"abs_8.0" = dat$averageRating >= 8.0,
"top_10pct" = dat$pctile >= 90,
"top_5pct" = dat$pctile >= 95,
"top_1pct" = dat$pctile >= 99
)
for (cr_name in names(criteria)) {
top = dat %>% filter(criteria[[cr_name]]) %>% count(country, name = "n_top")
merged = country_totals %>%
left_join(top, by = "country") %>%
mutate(n_top = replace_na(n_top, 0L),
rating_criterion = cr_name, min_votes = mv, start_year = sy_name)
results[[length(results) + 1]] = merged
}
}
}
d_multi = bind_rows(results) %>%
mutate(iso3 = countrycode(country, "iso2c", "iso3c", warn = FALSE)) %>%
filter(!is.na(iso3)) %>%
left_join(country_vars, by = c("iso3" = "code", "start_year")) %>%
filter(!is.na(gdppc_mean), !is.na(pop_mean)) %>%
mutate(log_gdppc = log(gdppc_mean), log_pop = log(pop_mean),
movies_pc = n_total / pop_mean * 1e6, top_pc = n_top / pop_mean * 1e6)
cat(sprintf("Multiverse: %d rows, %d countries, %d specs\n",
nrow(d_multi), n_distinct(d_multi$country),
n_distinct(paste(d_multi$rating_criterion, d_multi$min_votes, d_multi$start_year))))
## Multiverse: 5415 rows, 131 countries, 60 specs
#negative binomial: n_total ~ log_gdppc + offset(log_pop) across specs
fits_prod = d_multi %>%
group_by(min_votes, start_year) %>%
do({
m = tryCatch(glm.nb(n_total ~ log_gdppc + offset(log_pop), data = .), error = function(e) NULL)
if (!is.null(m)) broom::tidy(m) %>% filter(term == "log_gdppc") else tibble()
}) %>% ungroup() %>%
mutate(outcome = "Total movies")
fits_top = d_multi %>%
group_by(rating_criterion, min_votes, start_year) %>%
do({
m = tryCatch(glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .), error = function(e) NULL)
if (!is.null(m)) broom::tidy(m) %>% filter(term == "log_gdppc") else tibble()
}) %>% ungroup() %>%
mutate(outcome = "Top-rated movies")
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in sqrt(1/i): NaNs produced
## Warning in glm.nb(n_top ~ log_gdppc + offset(log_pop), data = .): alternation
## limit reached
fits_all = bind_rows(fits_prod, fits_top)
fits_all %>%
mutate(setting = paste(outcome, rating_criterion, min_votes, start_year, sep = "/")) %>%
ggplot(aes(estimate, fct_reorder(setting, estimate), color = outcome)) +
geom_point() +
geom_errorbarh(aes(xmin = estimate - 1.96 * std.error,
xmax = estimate + 1.96 * std.error), height = 0.3) +
geom_vline(xintercept = 0, linetype = "dashed") +
labs(title = "Effect of log GDP per capita on movie counts (multiverse)",
subtitle = "Negative binomial with population offset, 95% CIs",
x = "Coefficient (log GDP per capita PPP)", y = NULL, color = NULL) +
theme(legend.position = "top")
## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.
GG_save("figs/gdp_multiverse.png")
## `height` was translated to `width`.
oscar_data = readRDS("data/oscar_data.rds")
oscar_merged = prod %>%
left_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post), by = c("iso3" = "ISO3")) %>%
filter(!is.na(oscar_eb))
rs_oscar = cor(oscar_merged$eb_movies_pc, oscar_merged$oscar_eb, method = "spearman")
oscar_merged %>%
ggplot(aes(oscar_eb, eb_movies_pc)) +
geom_point(size = 2) +
ggrepel::geom_text_repel(aes(label = iso3), size = 2.5, max.overlaps = 20) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
annotate("text", x = max(oscar_merged$oscar_eb) * 0.7, y = max(oscar_merged$eb_movies_pc) * 0.95,
label = sprintf("r[s] == %.2f", rs_oscar), parse = TRUE, size = 5) +
labs(title = "Movie production vs. Oscar nominations per capita",
subtitle = "Both EB-adjusted rates per million, countries with pop > 1M",
x = "Oscar nominations per million (EB)", y = "Movies per million (EB)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 29 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
GG_save("figs/oscar_vs_production.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
top_oscar = top_counts %>%
left_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post), by = c("iso3" = "ISO3")) %>%
filter(!is.na(oscar_eb))
rs_top_oscar = cor(top_oscar$eb_avg_pc, top_oscar$oscar_eb, method = "spearman")
top_oscar %>%
ggplot(aes(oscar_eb, eb_avg_pc)) +
geom_point(size = 2) +
ggrepel::geom_text_repel(aes(label = iso3), size = 2.5, max.overlaps = 20) +
geom_smooth(method = "lm", se = FALSE, linetype = "dashed") +
annotate("text", x = max(top_oscar$oscar_eb) * 0.7, y = max(top_oscar$eb_avg_pc) * 0.95,
label = sprintf("r[s] == %.2f", rs_top_oscar), parse = TRUE, size = 5) +
labs(title = "Top-rated movies vs. Oscar nominations per million",
subtitle = "Both EB-adjusted rates, countries with pop > 1M",
x = "Oscar nominations per million (EB)",
y = "Top-rated movies (≥7.5) per million (EB)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
GG_save("figs/toprated_vs_oscar_eb.png")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
sci_pubs = read.csv("data/scientific_publications.csv") %>%
select(code = Code, year = Year,
sci_pm = Scientific.and.technical.journal.articles.per.million.people) %>%
filter(!is.na(code), !is.na(sci_pm)) %>%
group_by(code) %>%
summarise(sci_pm = mean(sci_pm, na.rm = TRUE), .groups = "drop")
cor_data = prod %>%
select(iso3, movies_pc = eb_movies_pc) %>%
full_join(top_counts %>% select(iso3, eb_imdb_pc, eb_tmdb_pc), by = "iso3") %>%
full_join(oscar_data %>% select(ISO3, oscar_eb = nominations_post) %>%
mutate(iso3 = ISO3) %>% select(iso3, oscar_eb), by = "iso3") %>%
full_join(sci_pubs %>% rename(iso3 = code), by = "iso3") %>%
full_join(gdp_1950 %>% rename(iso3 = code), by = "iso3")
vars = c("Movies/pop (EB)" = "movies_pc", "Top IMDB/pop (EB)" = "eb_imdb_pc",
"Top TMDb/pop (EB)" = "eb_tmdb_pc", "Oscar noms/pop (EB)" = "oscar_eb",
"Sci pubs/pop" = "sci_pm", "GDP per capita" = "gdppc_mean")
n_vars = length(vars)
cor_mat2 = matrix(NA, n_vars, n_vars, dimnames = list(names(vars), names(vars)))
for (i in 1:n_vars) for (j in 1:n_vars) {
x = cor_data[[vars[i]]]; y = cor_data[[vars[j]]]
ok = !is.na(x) & !is.na(y)
cor_mat2[i, j] = cor(x[ok], y[ok], method = "spearman")
}
as.data.frame(as.table(cor_mat2)) %>%
rename(var1 = Var1, var2 = Var2, r = Freq) %>%
ggplot(aes(var1, var2, fill = r)) +
geom_tile() +
geom_text(aes(label = round(r, 2)), size = 3.5) +
scale_fill_gradient(low = "white", high = "red", limits = c(0, 1), name = "Spearman r") +
labs(title = "Correlation matrix of country-level cultural and economic indicators",
subtitle = "Spearman correlations, pairwise complete, full_join",
x = NULL, y = NULL) +
theme_minimal() +
theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))
GG_save("figs/correlation_matrix.png")
write_sessioninfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] flextable_0.9.10 duckdb_1.4.4 DBI_1.2.3
## [4] sf_1.0-21 rnaturalearth_1.1.0 countrycode_1.6.1
## [7] ebpm_0.0.1.3 MASS_7.3-65 kirkegaard_2025-11-19
## [10] robustbase_0.99-6 psych_2.5.6 assertthat_0.2.1
## [13] weights_1.1.2 magrittr_2.0.4 lubridate_1.9.4
## [16] forcats_1.0.1 stringr_1.6.0 dplyr_1.1.4
## [19] purrr_1.2.0 readr_2.1.5 tidyr_1.3.1
## [22] tibble_3.3.0 ggplot2_4.0.1.9000 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0
## [4] shape_1.4.6.1 jomo_2.7-6 farver_2.1.2
## [7] nloptr_2.2.1 rmarkdown_2.30 ragg_1.5.0
## [10] vctrs_0.6.5 minqa_1.2.8 askpass_1.2.1
## [13] base64enc_0.1-3 mixsqp_0.3-54 htmltools_0.5.8.1
## [16] broom_1.0.11 Formula_1.2-5 mitml_0.4-5
## [19] sass_0.4.10 KernSmooth_2.23-26 bslib_0.9.0
## [22] htmlwidgets_1.6.4 cachem_1.1.0 uuid_1.2-1
## [25] lifecycle_1.0.4 iterators_1.0.14 pkgconfig_2.0.3
## [28] Matrix_1.7-4 R6_2.6.1 fastmap_1.2.0
## [31] rbibutils_2.3 digest_0.6.39 colorspace_2.1-2
## [34] irlba_2.3.5.1 textshaping_1.0.4 Hmisc_5.2-4
## [37] labeling_0.4.3 timechange_0.3.0 gdata_3.0.1
## [40] mgcv_1.9-3 compiler_4.5.2 proxy_0.4-27
## [43] bit64_4.6.0-1 fontquiver_0.2.1 withr_3.0.2
## [46] htmlTable_2.4.3 S7_0.2.1 backports_1.5.0
## [49] pan_1.9 openssl_2.3.4 classInt_0.4-11
## [52] gtools_3.9.5 tools_4.5.2 units_1.0-0
## [55] foreign_0.8-91 zip_2.3.3 nnet_7.3-20
## [58] glue_1.8.0 rnaturalearthdata_1.0.0 nlme_3.1-168
## [61] grid_4.5.2 checkmate_2.3.3 cluster_2.1.8.2
## [64] generics_0.1.4 gtable_0.3.6 tzdb_0.5.0
## [67] class_7.3-23 data.table_1.17.8 hms_1.1.3
## [70] xml2_1.4.0 ggrepel_0.9.6 foreach_1.5.2
## [73] pillar_1.11.1 vroom_1.6.6 splines_4.5.2
## [76] lattice_0.22-7 survival_3.8-6 bit_4.6.0
## [79] tidyselect_1.2.1 fontLiberation_0.1.0 knitr_1.50
## [82] fontBitstreamVera_0.1.1 reformulas_0.4.1 gridExtra_2.3
## [85] xfun_0.53 matrixStats_1.5.0 DEoptimR_1.1-4
## [88] stringi_1.8.7 yaml_2.3.10 boot_1.3-32
## [91] evaluate_1.0.5 codetools_0.2-20 officer_0.7.0
## [94] gdtools_0.4.4 archive_1.1.12 cli_3.6.5
## [97] rpart_4.1.24 systemfonts_1.3.1 Rdpack_2.6.4
## [100] jquerylib_0.1.4 Rcpp_1.1.0 parallel_4.5.2
## [103] lme4_1.1-37 glmnet_4.1-10 viridisLite_0.4.2
## [106] scales_1.4.0 e1071_1.7-16 crayon_1.5.3
## [109] rlang_1.1.6.9000 mnormt_2.1.1 mice_3.18.0