Data Preparation

# Load and convert data
fifa <- read_excel("C:/Users/User/Downloads/week11_fifa_audience.xlsx")
## New names:
## • `` -> `...1`
setDT(fifa)  # Convert to data.table

Filtering and Aggregation

# Filter countries with population share > 1%
countries_share_above1 <- fifa[population_share > 1]
# Aggregate by confederation
confederation_stats <- fifa[, .(
  avg_pop = mean(population_share),
  avg_tv = mean(tv_audience_share),
  avg_gdp = mean(gdp_weighted_share)
), by = confederation]

Top 5 Datasets

# Create top 5 datasets
top_pop <- fifa[order(-population_share)][1:5]
top_tv <- fifa[order(-tv_audience_share)][1:5]
top_gdp <- fifa[order(-gdp_weighted_share)][1:5]

Visualizations

Top Population Share

ggplot(top_pop, aes(x = reorder(country, population_share), y = population_share, fill = country)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Top 5 Countries by Population Share", 
       x = "Country", y = "Population Share (%)") +
  scale_fill_brewer(palette = "Set2") +
  coord_flip()

Top TV Audience

ggplot(top_tv, aes(x = reorder(country, tv_audience_share), y = tv_audience_share, fill = confederation)) +
  geom_bar(stat = "identity") +
  theme_classic() +
  labs(title = "Top 5 TV Audiences", x = "Country", y = "TV Audience Share (%)") +
  scale_fill_brewer(palette = "Dark2")

GDP vs Population

ggplot(fifa, aes(x = population_share, y = gdp_weighted_share, color = confederation)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_bw() +
  labs(title = "GDP Weighted vs Population Share",
       x = "Population Share", y = "GDP Weighted Share") +
  scale_color_brewer(palette = "Accent")
## `geom_smooth()` using formula = 'y ~ x'

Confederation TV Audience

ggplot(fifa, aes(x = confederation, y = tv_audience_share, fill = confederation)) +
  geom_boxplot() +
  labs(title = "TV Audience Distribution by Confederation",
       x = "Confederation", y = "TV Audience Share") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_brewer(palette = "Pastel1")

Top GDP Weighted

ggplot(top_gdp, aes(x = reorder(country, gdp_weighted_share), y = gdp_weighted_share)) +
  geom_segment(aes(xend = country, yend = 0), color = "grey50") +
  geom_point(size = 4, color = "#E69F00") +
  coord_flip() +
  labs(title = "Top 5 GDP Weighted Shares", 
       x = "Country", y = "GDP Weighted Share") +
  theme_minimal()

Regional Distribution

# Merge with dummy regions
regions <- data.table(
  confederation = c("UEFA", "CONMEBOL", "CONCACAF", "AFC", "CAF", "OFC"),
  region = c("Europe", "South America", "North America", "Asia", "Africa", "Oceania")
)
merged_data <- merge(fifa, regions, by = "confederation")

ggplot(merged_data, aes(x = population_share, fill = region)) +
  geom_density(alpha = 0.5) +
  labs(title = "Population Share Distribution by Region",
       x = "Population Share", y = "Density") +
  scale_fill_brewer(palette = "Set3")

Country Comparison

# USA/Brazil/Turkey comparison
target_countries <- fifa[country %in% c("United States", "Brazil", "Turkey")]
melted_data <- melt(target_countries, 
                    id.vars = "country",
                    measure.vars = c("population_share", "tv_audience_share", "gdp_weighted_share"),
                    variable.name = "metric")

metric_labels <- c(
  "population_share" = "Population Share",
  "tv_audience_share" = "TV Audience Share",
  "gdp_weighted_share" = "GDP Weighted Share"
)

ggplot(melted_data, aes(x = factor(metric, levels = names(metric_labels)), 
                        y = value, 
                        fill = metric)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = round(value, 1)), vjust = -0.5, size = 3.5) +
  facet_wrap(~country, nrow = 1) +
  scale_fill_brewer(
    palette = "Set1",
    labels = metric_labels,
    guide = guide_legend(title = NULL)
  ) +
  scale_x_discrete(labels = metric_labels) +
  labs(title = "Comparison: USA vs Brazil vs Turkey",
       subtitle = "FIFA Statistics",
       x = "",
       y = "Percentage Share",
       caption = "Source: FIFA") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, margin = margin(b = 20)),
    axis.text.x = element_blank(),
    strip.text = element_text(face = "bold", size = 12),
    panel.spacing = unit(2, "lines"),
    legend.position = "bottom",
    panel.grid.major.x = element_blank()
  ) +
  ylim(0, max(melted_data$value) * 1.1)

Conclusion

So, what did we get out of this?

First off, population size doesn’t always mean a bigger football audience. The biggest countries by population share aren’t necessarily the ones watching the most. TV audience share is way more concentrated in certain countries, meaning a few nations dominate viewership despite not having the largest populations.

GDP-weighted viewership gives another angle—some countries contribute more to FIFA’s commercial value because of their economic weight, not just their number of viewers. UEFA and CONMEBOL stand out in both audience numbers and economic impact, while Africa and Asia have large populations but lower GDP-weighted shares.

Confederations show different audience trends—UEFA dominates, but CONMEBOL has an outsized impact for its size. Meanwhile, Asia and Africa are growing markets with big potential.

Comparing the USA, Brazil, and Turkey(I am Azerbaijani so ofc, I support Turkish national team like ours) was interesting—Brazil dominates TV share, the USA leads in economic influence, and Turkey is a balanced mix of both.

Bottom line? FIFA’s audience isn’t just about numbers—it’s about money, culture, and media reach. Europe and South America still rule, but emerging markets have serious growth potential.