Create a CSV file

# Create the dataset

gdp_data <- data.frame(
  Country = c("USA", "China", "India"),
  `2000_Population` = c(282162411, 1262645000, 1053050912),
  `2000_GDP` = c(10285, 1198, 476),
  `2005_Population` = c(295516599, 1307560000, 1139964932),
  `2005_GDP` = c(13094, 2286, 834),
  `2010_Population` = c(309327143, 1340910000, 1224614327),
  `2010_GDP` = c(14964, 6087, 1708)
)

# View the dataset
print(gdp_data)
##   Country X2000_Population X2000_GDP X2005_Population X2005_GDP
## 1     USA        282162411     10285        295516599     13094
## 2   China       1262645000      1198       1307560000      2286
## 3   India       1053050912       476       1139964932       834
##   X2010_Population X2010_GDP
## 1        309327143     14964
## 2       1340910000      6087
## 3       1224614327      1708
# Save it as a CSV file
write.csv(gdp_data, "country_population_gdp.csv", row.names = FALSE)
# Load required packages
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Read the csv file

gdp_data <- read.csv("country_population_gdp.csv", header=TRUE)
gdp_data 
# Reshape to tidy (long) format to better analyze our dataset

Long_GDPdata <- gdp_data %>%
  pivot_longer(
    cols = -Country,
    names_to = c("Year", "Variable"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = Variable,
    values_from = Value
  )

print(Long_GDPdata)
## # A tibble: 9 × 4
##   Country Year  Population   GDP
##   <chr>   <chr>      <int> <int>
## 1 USA     X2000  282162411 10285
## 2 USA     X2005  295516599 13094
## 3 USA     X2010  309327143 14964
## 4 China   X2000 1262645000  1198
## 5 China   X2005 1307560000  2286
## 6 China   X2010 1340910000  6087
## 7 India   X2000 1053050912   476
## 8 India   X2005 1139964932   834
## 9 India   X2010 1224614327  1708

ANALYSIS

# Load required packages

library(ggplot2)
library(readr)
library(stringr)

# Ensure Year is numeric

Long_GDPdata <- Long_GDPdata %>%
  mutate( Year = str_extract(Year, "\\d{4}"),  # Extract exactly 4-digit numbers
    Year = as.numeric(Year)
  )    

# Calculate additional metrics

analysis_data <- Long_GDPdata %>%
  group_by(Country) %>%
  arrange(Year) %>%
  mutate(
    GDP_growth = (GDP - lag(GDP)) / lag(GDP) * 100,             # % GDP growth since previous year
    Population_growth = (Population - lag(Population)) / lag(Population) * 100, #Population growth
    GDP_per_capita = GDP / Population                             # GDP per person
  ) %>%
  ungroup()

print(analysis_data)
## # A tibble: 9 × 7
##   Country  Year Population   GDP GDP_growth Population_growth GDP_per_capita
##   <chr>   <dbl>      <int> <int>      <dbl>             <dbl>          <dbl>
## 1 USA      2000  282162411 10285       NA               NA       0.0000365  
## 2 China    2000 1262645000  1198       NA               NA       0.000000949
## 3 India    2000 1053050912   476       NA               NA       0.000000452
## 4 USA      2005  295516599 13094       27.3              4.73    0.0000443  
## 5 China    2005 1307560000  2286       90.8              3.56    0.00000175 
## 6 India    2005 1139964932   834       75.2              8.25    0.000000732
## 7 USA      2010  309327143 14964       14.3              4.67    0.0000484  
## 8 China    2010 1340910000  6087      166.               2.55    0.00000454 
## 9 India    2010 1224614327  1708      105.               7.43    0.00000139

VISUALIZATION

1. GDP growth over time by country

ggplot(analysis_data, aes(x = Year, y = GDP, color = Country, group = Country)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  labs(title = "GDP Over Time by Country",
       x = "Year", y = "GDP (billions USD)") +
  theme_minimal(base_size = 14)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

2: Population growth over Time by Country

ggplot(analysis_data, aes(x = Year, y = Population, color = Country, group = Country)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  labs(title = "Population Growth Over Time by Country",
       x = "Year", y = "Population") +
  theme_minimal(base_size = 14)

3. GDP per capita over time by country

ggplot(analysis_data, aes(x = Year, y = GDP_per_capita, color = Country, group = Country)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  labs(title = "GDP per Capita Over Time by Country",
       x = "Year", y = "GDP per Capita (USD)") +
  theme_minimal(base_size = 14)

INTERPRETATION

1) China presents the fastest economic growth especially after 2005 with a low population growth that their GDP per capita is rising sharply.

2) USA has a consistent economic growth with a low population growth. AS a result, they have a high GDP per capita and stable economic conditions.

3) India in the other hand possess the highest population growth with a moderate GDP growth.Hence, we observe a GDP per capita increasing but still lower than China and USA.