# Create the dataset
gdp_data <- data.frame(
Country = c("USA", "China", "India"),
`2000_Population` = c(282162411, 1262645000, 1053050912),
`2000_GDP` = c(10285, 1198, 476),
`2005_Population` = c(295516599, 1307560000, 1139964932),
`2005_GDP` = c(13094, 2286, 834),
`2010_Population` = c(309327143, 1340910000, 1224614327),
`2010_GDP` = c(14964, 6087, 1708)
)
# View the dataset
print(gdp_data)
## Country X2000_Population X2000_GDP X2005_Population X2005_GDP
## 1 USA 282162411 10285 295516599 13094
## 2 China 1262645000 1198 1307560000 2286
## 3 India 1053050912 476 1139964932 834
## X2010_Population X2010_GDP
## 1 309327143 14964
## 2 1340910000 6087
## 3 1224614327 1708
# Save it as a CSV file
write.csv(gdp_data, "country_population_gdp.csv", row.names = FALSE)
# Load required packages
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Read the csv file
gdp_data <- read.csv("country_population_gdp.csv", header=TRUE)
gdp_data
# Reshape to tidy (long) format to better analyze our dataset
Long_GDPdata <- gdp_data %>%
pivot_longer(
cols = -Country,
names_to = c("Year", "Variable"),
names_sep = "_",
values_to = "Value"
) %>%
pivot_wider(
names_from = Variable,
values_from = Value
)
print(Long_GDPdata)
## # A tibble: 9 × 4
## Country Year Population GDP
## <chr> <chr> <int> <int>
## 1 USA X2000 282162411 10285
## 2 USA X2005 295516599 13094
## 3 USA X2010 309327143 14964
## 4 China X2000 1262645000 1198
## 5 China X2005 1307560000 2286
## 6 China X2010 1340910000 6087
## 7 India X2000 1053050912 476
## 8 India X2005 1139964932 834
## 9 India X2010 1224614327 1708
# Load required packages
library(ggplot2)
library(readr)
library(stringr)
# Ensure Year is numeric
Long_GDPdata <- Long_GDPdata %>%
mutate( Year = str_extract(Year, "\\d{4}"), # Extract exactly 4-digit numbers
Year = as.numeric(Year)
)
# Calculate additional metrics
analysis_data <- Long_GDPdata %>%
group_by(Country) %>%
arrange(Year) %>%
mutate(
GDP_growth = (GDP - lag(GDP)) / lag(GDP) * 100, # % GDP growth since previous year
Population_growth = (Population - lag(Population)) / lag(Population) * 100, #Population growth
GDP_per_capita = GDP / Population # GDP per person
) %>%
ungroup()
print(analysis_data)
## # A tibble: 9 × 7
## Country Year Population GDP GDP_growth Population_growth GDP_per_capita
## <chr> <dbl> <int> <int> <dbl> <dbl> <dbl>
## 1 USA 2000 282162411 10285 NA NA 0.0000365
## 2 China 2000 1262645000 1198 NA NA 0.000000949
## 3 India 2000 1053050912 476 NA NA 0.000000452
## 4 USA 2005 295516599 13094 27.3 4.73 0.0000443
## 5 China 2005 1307560000 2286 90.8 3.56 0.00000175
## 6 India 2005 1139964932 834 75.2 8.25 0.000000732
## 7 USA 2010 309327143 14964 14.3 4.67 0.0000484
## 8 China 2010 1340910000 6087 166. 2.55 0.00000454
## 9 India 2010 1224614327 1708 105. 7.43 0.00000139
ggplot(analysis_data, aes(x = Year, y = GDP, color = Country, group = Country)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "GDP Over Time by Country",
x = "Year", y = "GDP (billions USD)") +
theme_minimal(base_size = 14)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(analysis_data, aes(x = Year, y = Population, color = Country, group = Country)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "Population Growth Over Time by Country",
x = "Year", y = "Population") +
theme_minimal(base_size = 14)
ggplot(analysis_data, aes(x = Year, y = GDP_per_capita, color = Country, group = Country)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
labs(title = "GDP per Capita Over Time by Country",
x = "Year", y = "GDP per Capita (USD)") +
theme_minimal(base_size = 14)
1) China presents the fastest economic growth especially after 2005 with a low population growth that their GDP per capita is rising sharply.
2) USA has a consistent economic growth with a low population growth. AS a result, they have a high GDP per capita and stable economic conditions.
3) India in the other hand possess the highest population growth with a moderate GDP growth.Hence, we observe a GDP per capita increasing but still lower than China and USA.