Global GDP Trends and Drivers
Research Questions:
- How has global GDP (aggregate) changed since 1990 to 2024?
- Which countries have had the most or the fastest GDP growth?
- What is the relation between GDP growth and population growth?
Reading and Extract Relevant GDP Dataset
library(jsonlite)
library(tidyverse)
library(dplyr)
library(ggplot2)
Reading
gdp_url <- "https://api.worldbank.org/v2/country/all/indicator/NY.GDP.MKTP.CD?format=json&per_page=20000"
gdp <- fromJSON(gdp_url, flatten = TRUE)
length(gdp) #MetaData is [1] and actual rows is [2]
## [1] 2
Necessary data cleaning for analyzing research questions
- Filtering years for 1990-2024
- Mutate GDP USD to be in Billions USD
- Filter out non-country records [such as High Income, Low
Income]
gdp_df1 <- gdp_df %>%
filter(year >= 1990, year <= 2024) %>%
mutate(gdp_billions_usd = gdp_current_usd / 1e9) %>%
filter(!is.na(country_iso3) & country_iso3 != "")
- Mutate to calculate year-over-year GDP Growth
gdp_df2 <- gdp_df1 %>%
group_by(country_iso3) %>%
arrange(year, .by_group = TRUE) %>%
mutate(gdp_growth_pct = (gdp_billions_usd - lag(gdp_billions_usd)) / lag(gdp_billions_usd) * 100) %>%
ungroup()
Reading and Extract Relevant Population Dataset
#Reading and Extracting Population Data
pop_url <- "https://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL?format=json&per_page=20000"
raw_pop <- fromJSON(pop_url, flatten = TRUE)[[2]]
pop_df <- data.frame(
country_iso3 = raw_pop$countryiso3code,
year = as.integer(raw_pop$date),
population = as.numeric(raw_pop$value),
stringsAsFactors = FALSE
) %>%
filter(year >= 1990, year <= 2024)
# Join Population onto GDP
gdp_pop_df <- gdp_df2 %>%
left_join(pop_df, by = c("country_iso3", "year")) %>%
mutate(gdp_per_capita_usd = gdp_current_usd / population)
Reading and Extract Relevant Countries Dataset
countries_url <- "https://api.worldbank.org/v2/country?format=json&per_page=400"
countries <- fromJSON(countries_url, flatten = TRUE)[[2]]
countries_df <- data.frame(
country_iso3 = countries$id,
region_value = countries$`region.value`,
stringsAsFactors = FALSE
)
keep_iso3 <- c(
"WLD",
countries_df$country_iso3[!is.na(countries_df$country_iso3) &
countries_df$`region_value` != "Aggregates"]
)
# Filtered for countries only (copies of my GDP and Population data used in analysis)
gdp_df2_countries <- gdp_df2 %>%
filter(country_iso3 %in% keep_iso3)
pop_df_countries <- pop_df %>%
filter(country_iso3 %in% keep_iso3)
Question 1: How has global GDP (aggregate) changed since 1990 to
2024?
global_gdp <- gdp_df2_countries %>%
filter(!is.na(gdp_current_usd), year >= 1990, year <= 2024) %>%
group_by(year) %>%
summarize(world_gdp_usd = sum(gdp_current_usd, na.rm = TRUE), .groups = "drop") %>%
mutate(world_gdp_trillions = world_gdp_usd / 1e12)
global_gdp %>%
arrange(year) %>%
head()
## # A tibble: 6 × 3
## year world_gdp_usd world_gdp_trillions
## <int> <dbl> <dbl>
## 1 1990 4.56e13 45.6
## 2 1991 4.76e13 47.6
## 3 1992 5.08e13 50.8
## 4 1993 5.16e13 51.6
## 5 1994 5.56e13 55.6
## 6 1995 6.20e13 62.0
# Line Chart
ggplot(global_gdp, aes(x = year, y = world_gdp_trillions)) +
geom_line(size = 1, color = "red") +
labs(
title = "World GDP (Nominal), 1990–2024",
x = "Year",
y = "USD Trillions"
)

Question 2: Which countries have had the most or the fastest GDP
growth?
country_span <- gdp_df2_countries %>%
filter(year >= 1990, year <= 2024, !is.na(gdp_current_usd), country_iso3 != "WLD") %>% # exclude World here
group_by(country_iso3, country) %>%
arrange(year, .by_group = TRUE) %>%
summarize(
first_year = first(year[!is.na(gdp_current_usd)]),
last_year = last(year[!is.na(gdp_current_usd)]),
gdp_first = first(na.omit(gdp_current_usd)),
gdp_last = last(na.omit(gdp_current_usd)),
n_years = last_year - first_year,
.groups = "drop"
) %>%
filter(!is.na(gdp_first), !is.na(gdp_last), gdp_first > 0, n_years >= 1)
growth_tbl <- country_span %>%
mutate(
abs_change_usd = gdp_last - gdp_first,
abs_change_trn = abs_change_usd / 1e12,
cagr = if_else(n_years >= 5, (gdp_last / gdp_first)^(1 / n_years) - 1, NA_real_)
)
#Most Growth Result
top_grow <- growth_tbl %>%
arrange(desc(abs_change_usd)) %>%
slice_head(n = 15) %>%
transmute(
Country = country,
Span = paste0(first_year, "–", last_year),
`Change (USD Trn)` = round(abs_change_trn, 2)
)
print(top_grow, n = 15)
## # A tibble: 15 × 3
## Country Span `Change (USD Trn)`
## <chr> <chr> <dbl>
## 1 United States 1990–2024 23.2
## 2 China 1990–2024 18.4
## 3 India 1990–2024 3.59
## 4 Germany 1990–2024 2.88
## 5 United Kingdom 1990–2024 2.55
## 6 France 1990–2024 1.9
## 7 Brazil 1990–2024 1.79
## 8 Russian Federation 1990–2024 1.66
## 9 Canada 1990–2024 1.65
## 10 Mexico 1990–2024 1.59
## 11 Australia 1990–2024 1.44
## 12 Korea, Rep. 1990–2023 1.43
## 13 Indonesia 1990–2024 1.29
## 14 Italy 1990–2024 1.19
## 15 Spain 1990–2024 1.19
# Visual Results
ggplot(top_grow, aes(x = reorder(Country, `Change (USD Trn)`), y = `Change (USD Trn)`)) +
geom_col() +
coord_flip() +
labs(title = "Largest Absolute GDP Increase (1990–2024 span)",
x = "", y = "Change (USD Trillions)")

#Fastest Growth Result
top_speed <- growth_tbl %>%
filter(!is.na(cagr)) %>%
arrange(desc(cagr)) %>%
slice_head(n = 15) %>%
transmute(
Country = country,
Span = paste0(first_year, "–", last_year),
`CAGR (%)` = round(100 * cagr, 2)
)
print(top_speed, n = 15)
## # A tibble: 15 × 3
## Country Span `CAGR (%)`
## <chr> <chr> <dbl>
## 1 Equatorial Guinea 1990–2024 14.9
## 2 Viet Nam 1990–2024 13.5
## 3 Guyana 1990–2024 12.9
## 4 China 1990–2024 12.3
## 5 Myanmar 1990–2024 11.0
## 6 Cambodia 1990–2024 10.8
## 7 Maldives 1990–2024 10.8
## 8 Qatar 1990–2024 10.5
## 9 Venezuela, RB 1990–2014 10.0
## 10 Turkmenistan 1990–2024 9.23
## 11 Nicaragua 1990–2024 9.13
## 12 Lao PDR 1990–2024 9.06
## 13 Dominican Republic 1990–2024 8.8
## 14 Montenegro 1997–2024 8.75
## 15 Costa Rica 1990–2024 8.63
# Visual Results
ggplot(top_speed, aes(x = reorder(Country, `CAGR (%)`), y = `CAGR (%)`)) +
geom_col() +
coord_flip() +
labs(title = "Fastest GDP Growth (CAGR, span ≥ 5 years)",
x = "", y = "CAGR (%)")

Answer to Question 3
pop_growth <- pop_df_countries %>%
group_by(country_iso3) %>%
arrange(year, .by_group = TRUE) %>%
mutate(pop_growth_pct = (population - lag(population)) / lag(population) * 100) %>%
ungroup()
growth_join <- gdp_df2 %>%
select(country_iso3, country, year, gdp_growth_pct) %>%
left_join(pop_growth %>% select(country_iso3, year, pop_growth_pct),
by = c("country_iso3","year")) %>%
filter(year >= 1991, year <= 2024,
!is.na(gdp_growth_pct), !is.na(pop_growth_pct))
# Linear Model and Correlation
cor_overall <- cor(growth_join$gdp_growth_pct, growth_join$pop_growth_pct, use = "complete.obs")
fit <- lm(gdp_growth_pct ~ pop_growth_pct, data = growth_join)
s <- summary(fit)
coef_tab <- coef(s)
slope <- coef_tab["pop_growth_pct","Estimate"]
pval <- coef_tab["pop_growth_pct","Pr(>|t|)"]
r2 <- s$r.squared
cat("Observations:", nrow(growth_join), "\n")
## Observations: 7002
cat("Correlation (GDP YoY% vs Pop YoY%):", round(cor_overall, 3), "\n")
## Correlation (GDP YoY% vs Pop YoY%): 0.118
cat("Linear model slope (ΔGDP% per 1pp Pop%):", round(slope, 3),
"| p-value:", signif(pval, 3),
"| R²:", round(r2, 3), "\n")
## Linear model slope (ΔGDP% per 1pp Pop%): 1.094 | p-value: 5.24e-23 | R²: 0.014
# Visual Results
ggplot(growth_join, aes(x = pop_growth_pct, y = gdp_growth_pct)) +
geom_point(alpha = 0.25) +
geom_smooth(method = "lm", se = TRUE) +
labs(
title = "Relationship: GDP Growth vs Population Growth (YoY, 1991–2024)",
x = "Population Growth (%)",
y = "GDP Growth (%)"
)
## `geom_smooth()` using formula = 'y ~ x'
