Here’s my analysis:

Step 1: Load Packages & Data

library(tidyverse)
library(readr)
df<-read_csv("WDICSV.csv")
country_list<-read_csv("WDICountry.csv")

Step 2: Check the data

head(df)
colnames(df)
head(country_list)
colnames(country_list)

Step 3: Clean the data (excluding non-countries & only evaluate data from 2014-2024)

# filter real countries' code 
valid_countries <- country_list %>%
  filter(!is.na(Region) & Region != "",
         `Income Group` != "Aggregates",
         !grepl("Rep\\.", `Table Name`)) %>%
  pull(`Country Code`)
head(valid_countries)

## [1] "ABW" "AFG" "AGO" "ALB" "AND" "ARE"

length(valid_countries)

## [1] 208

# Keep only real countries & 2014-2024 data in df 
df_filtered <- df %>% filter(`Country Code` %in% valid_countries) %>% select(`Country Name`, `Country Code`, `Indicator Name`, `Indicator Code`, `2014`:`2024`) 
head(df_filtered)

## # A tibble: 6 × 15
##   `Country Name` `Country Code` `Indicator Name`  `Indicator Code` `2014` `2015`
##   <chr>          <chr>          <chr>             <chr>             <dbl>  <dbl>
## 1 Afghanistan    AFG            Access to clean … EG.CFT.ACCS.ZS     26.1   27.6
## 2 Afghanistan    AFG            Access to clean … EG.CFT.ACCS.RU.…   10.2   11.4
## 3 Afghanistan    AFG            Access to clean … EG.CFT.ACCS.UR.…   78     79.5
## 4 Afghanistan    AFG            Access to electr… EG.ELC.ACCS.ZS     89.5   71.5
## 5 Afghanistan    AFG            Access to electr… EG.ELC.ACCS.RU.…   86.5   64.6
## 6 Afghanistan    AFG            Access to electr… EG.ELC.ACCS.UR.…   98.7   92.5
## # ℹ 9 more variables: `2016` <dbl>, `2017` <dbl>, `2018` <dbl>, `2019` <dbl>,
## #   `2020` <dbl>, `2021` <dbl>, `2022` <dbl>, `2023` <dbl>, `2024` <dbl>

dim(df_filtered)

## [1] 315328     15

Step 4: Narrow down the high-potential countries which GDP>100B & Population>10M

# Filter: GDP > 100B USD in 2024
valid_gdp_countries <- df_filtered %>%
  filter(`Indicator Name` == "GDP (current US$)" & `2024` > 100000000000) %>%
  pull(`Country Code`)

# Filter: Population > 10M in 2024
valid_pop_countries <- df_filtered %>%
  filter(`Indicator Name` == "Population, total" & `2024` > 10000000) %>%
  pull(`Country Code`)

# Final valid countries = intersection
valid_countries_final <- intersect(valid_gdp_countries, valid_pop_countries)

# Keep only those countries
df_filtered_final <- df_filtered %>%
  filter(`Country Code` %in% valid_countries_final)

dim(df_filtered_final)

## [1] 72768    15

Step 5: Filter key indicators - GDP, GDP Growth, Population

df_potential <- df_filtered_final %>%
  filter(`Indicator Name` %in% c("GDP (current US$)",
                                 "GDP growth (annual %)",
                                 "Population, total"))
head(df_potential)

## # A tibble: 6 × 15
##   `Country Name` `Country Code` `Indicator Name`      `Indicator Code`    `2014`
##   <chr>          <chr>          <chr>                 <chr>                <dbl>
## 1 Algeria        DZA            GDP (current US$)     NY.GDP.MKTP.CD     2.39e11
## 2 Algeria        DZA            GDP growth (annual %) NY.GDP.MKTP.KD.ZG  4.10e 0
## 3 Algeria        DZA            Population, total     SP.POP.TOTL        3.92e 7
## 4 Argentina      ARG            GDP (current US$)     NY.GDP.MKTP.CD     5.26e11
## 5 Argentina      ARG            GDP growth (annual %) NY.GDP.MKTP.KD.ZG -2.51e 0
## 6 Argentina      ARG            Population, total     SP.POP.TOTL        4.30e 7
## # ℹ 10 more variables: `2015` <dbl>, `2016` <dbl>, `2017` <dbl>, `2018` <dbl>,
## #   `2019` <dbl>, `2020` <dbl>, `2021` <dbl>, `2022` <dbl>, `2023` <dbl>,
## #   `2024` <dbl>

Step 6: Calculate average GDP growth for the past 10 years

df_growth <- df_potential %>%
  filter(`Indicator Name` == "GDP growth (annual %)") %>%
  pivot_longer(cols = `2014`:`2024`, names_to = "Year", values_to = "GDP_Growth") %>%
  group_by(`Country Name`) %>%
  summarize(
    Avg_GDP_Growth = mean(GDP_Growth, na.rm = TRUE)
  ) %>%
  ungroup()
head(df_growth)

## # A tibble: 6 × 2
##   `Country Name` Avg_GDP_Growth
##   <chr>                   <dbl>
## 1 Algeria                 2.25 
## 2 Argentina              -0.107
## 3 Australia               2.36 
## 4 Bangladesh              6.27 
## 5 Belgium                 1.65 
## 6 Brazil                  0.832

Step 7: Check the most high-potential market with 2024 data

df_2024 <- df_potential %>%
  filter(`Indicator Name` %in% c("GDP (current US$)", "Population, total")) %>%
  select(`Country Name`, `Indicator Name`, `2024`) %>%
  pivot_wider(names_from = `Indicator Name`, values_from = `2024`)

Step 8: Combine indicators and filter high-potential markets

df_final <- df_2024 %>%
  left_join(df_growth, by = "Country Name") %>%
  filter(`GDP (current US$)` > 100000000000, 
         `Population, total` > 10000000, 
         Avg_GDP_Growth > 3) %>%
  arrange(desc(Avg_GDP_Growth))
df_final %>% count()

## # A tibble: 1 × 1
##       n
##   <int>
## 1    18

print(df_final)

## # A tibble: 18 × 4
##    `Country Name`       `GDP (current US$)` `Population, total` Avg_GDP_Growth
##    <chr>                              <dbl>               <dbl>          <dbl>
##  1 Bangladesh                       4.50e11           173562364           6.27
##  2 India                            3.91e12          1450935791           6.18
##  3 Viet Nam                         4.76e11           100987686           6.18
##  4 China                            1.87e13          1408975000           5.94
##  5 Uzbekistan                       1.15e11            36361859           5.92
##  6 Dominican Republic               1.24e11            11427557           5.02
##  7 Philippines                      4.62e11           115843670           4.93
##  8 Turkiye                          1.32e12            85518661           4.80
##  9 Kenya                            1.24e11            56432944           4.64
## 10 Indonesia                        1.40e12           283487931           4.29
## 11 Malaysia                         4.22e11            35557673           4.18
## 12 Pakistan                         3.73e11           251269164           3.75
## 13 Poland                           9.15e11            36554707           3.70
## 14 Guatemala                        1.13e11            18406359           3.58
## 15 Romania                          3.83e11            19069340           3.40
## 16 Kazakhstan                       2.88e11            20592571           3.10
## 17 United Arab Emirates             5.37e11            10876981           3.09
## 18 Saudi Arabia                     1.24e12            35300280           3.05

High-Potential Countries includes:

Bangladesh,India,Vietnam,China,Uzbekistan,Dominican Republic,Philippines,Turkiye,Kenya,Indonesia, Malaysia,Pakistan,Poland,Guatemala, Romania,Kazakhstan,United Arab Emirates and Saudi Arabia.

Step 9: Visualize the high-potential countries

ggplot(df_final, aes(x = `GDP (current US$)`/1000000000000, 
                     y = Avg_GDP_Growth, 
                     size = `Population, total`/1000000000,
                     label = `Country Name`)) +
  geom_point(color = "steelblue", alpha = 0.7) +
  geom_text(vjust = -0.5, size = 2.8) +
  scale_x_continuous(labels = scales::number_format(accuracy = 0.1)) +
  scale_size_continuous(labels = scales::number_format(accuracy = 0.1)) +
  labs(title = "Potential Growth Markets (2024)", 
       x = "GDP (Trillion USD)", 
       y = "Average GDP Growth (2014-2024)",
       size = "Population in Billion") +
  theme_minimal()

Identify the High-Protential Countries

Yuting Wu

2025-09-21

Which countries with a substantial economic base have shown the most robust economic growth in the recent past, and therefore represent the most attractive investment opportunities?

Here’s my analysis:

Step 1: Load Packages & Data

Step 2: Check the data

Step 3: Clean the data (excluding non-countries & only evaluate data from 2014-2024)

Step 4: Narrow down the high-potential countries which GDP>100B & Population>10M

Step 5: Filter key indicators - GDP, GDP Growth, Population

Step 6: Calculate average GDP growth for the past 10 years

Step 7: Check the most high-potential market with 2024 data

Step 8: Combine indicators and filter high-potential markets

High-Potential Countries includes:

Step 9: Visualize the high-potential countries

Analysis Conclusions: