I used the World Development Indicators (WDI) by the World Bank, a comprehensive global dataset with nearly 1,500 economic, social, and environmental indicators for over 200 countries, covering several decades.
setwd("~/Documents/Rproject")
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
wdi <- read.csv("WDICSV.csv")
# Using group_by, check the indicators and found there are 1,516 in total
nrow(wdi %>%
group_by(Indicator.Code) %>%
summarise(n = n())
)
## [1] 1516
# To examine population growth, extract only the SP.POP.GROW code from the indicators
pop_growth <- subset(wdi, wdi$Indicator.Code=="SP.POP.GROW")
# From the data available since 1990, select the most recent 7 years
pop_growth <- pop_growth[, c("Country.Name", "Country.Code", "Indicator.Name", "Indicator.Code", "X2018","X2019","X2020","X2021","X2022", "X2023", "X2024")]
# Using count, confirm that there are 266 countries in the dataset
nrow(pop_growth %>% count(Country.Code)) #266 countries
## [1] 266
# For convenience in visualization and comparison, extract only the G20 countries
g20_code <- c("ARG","AUS","BRA","CAN","CHN","FRA","DEU","IND","IDN","ITA","JPN","MEX","KOR","RUS","SAU","ZAF","TUR","GBR","USA","EUU")
g20_pop_growth <- pop_growth %>% filter(Country.Code %in% g20_code)
summary(g20_pop_growth)
## Country.Name Country.Code Indicator.Name Indicator.Code
## Length:20 Length:20 Length:20 Length:20
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## X2018 X2019 X2020 X2021
## Min. :-2.5538 Min. :-0.7000 Min. :-0.4871 Min. :-2.4646
## 1st Qu.: 0.2863 1st Qu.: 0.2016 1st Qu.: 0.1238 1st Qu.:-0.1326
## Median : 0.5374 Median : 0.5149 Median : 0.4465 Median : 0.2116
## Mean : 0.5491 Mean : 0.5743 Mean : 0.6939 Mean : 0.1490
## 3rd Qu.: 1.0016 3rd Qu.: 0.9726 3rd Qu.: 0.9711 3rd Qu.: 0.5823
## Max. : 1.6824 Max. : 1.6496 Max. : 4.8331 Max. : 1.5406
## X2022 X2023 X2024
## Min. :-0.4439 Min. :-0.4874 Min. :-0.46708
## 1st Qu.: 0.1548 1st Qu.: 0.1123 1st Qu.: 0.05258
## Median : 0.6483 Median : 0.4012 Median : 0.37565
## Mean : 0.7215 Mean : 0.8560 Mean : 0.79069
## 3rd Qu.: 0.9371 3rd Qu.: 0.9887 3rd Qu.: 0.99881
## Max. : 4.4189 Max. : 4.6382 Max. : 4.63120
# Calculate the average growth rate by year
yrs_mean <- g20_pop_growth %>%
summarise(across(c("X2018","X2019","X2020","X2021","X2022", "X2023", "X2024"), mean, na.rm = TRUE))
print(yrs_mean)
## X2018 X2019 X2020 X2021 X2022 X2023 X2024
## 1 0.5490911 0.5742647 0.6938951 0.1489854 0.7215022 0.856019 0.7906855
# Calculate the 7-year average population growth rate by country
row_mean <- rowMeans(g20_pop_growth[, c("X2018","X2019","X2020","X2021","X2022", "X2023", "X2024")], na.rm = TRUE)
count_mean <- data.frame(
Country.Name = g20_pop_growth$Country.Name,
row_mean = row_mean) %>%
arrange(desc(row_mean))
print(count_mean)
## Country.Name row_mean
## 1 Saudi Arabia 1.8662063
## 2 Canada 1.7434283
## 3 South Africa 1.4980363
## 4 Australia 1.4421237
## 5 India 0.9282281
## 6 Turkiye 0.8972410
## 7 Mexico 0.8386310
## 8 Indonesia 0.8374770
## 9 United Kingdom 0.6891008
## 10 United States 0.5787066
## 11 Brazil 0.5002460
## 12 Argentina 0.4468614
## 13 France 0.3372744
## 14 European Union 0.1551998
## 15 Germany 0.1468317
## 16 China 0.1299641
## 17 Korea, Rep. 0.1078306
## 18 Russian Federation -0.1740469
## 19 Italy -0.2440225
## 20 Japan -0.3411952
# Divide countries into groups above and below the 2023 average, then compared the maximum and minimum population growth rates
grouped <- g20_pop_growth %>%
mutate(pop_mean = mean(X2023, na.rm = TRUE)) %>%
mutate(pop_group = ifelse(X2023 >= pop_mean, "Above Mean", "Below Mean")) %>%
group_by(pop_group) %>%
summarise(
max = max(X2023, na.rm = TRUE),
min = min(X2023, na.rm = TRUE),
n = n()
)
print(grouped)
## # A tibble: 2 × 4
## pop_group max min n
## <chr> <dbl> <dbl> <int>
## 1 Above Mean 4.64 0.872 7
## 2 Below Mean 0.843 -0.487 13
df_long <- g20_pop_growth %>%
select(-Country.Code, -Indicator.Name, -Indicator.Code) %>%
pivot_longer(
cols = c("X2018","X2019","X2020","X2021","X2022", "X2023", "X2024"),
names_to = "year",
values_to = "value"
) %>%
mutate(year = as.numeric(sub("X", "", year)))
ggplot(df_long, aes(x = year, y = value, color = Country.Name)) +
geom_line(size = 1) +
labs(
title = "Population Growth Rate of G20",
x = "year",
y = "value",
color = "countries"
) +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)