The aim of this analysis is to explore global GDP trends and investment patterns across countries from 2000 onwards. The study uses the Penn World Table dataset to analyze GDP per capita, population, and investment shares to derive insights into economic performance and investment behaviors.
The Penn World Table dataset provides economic indicators for various
countries and years. Key variables used are: - rgdpna
: Real
GDP (measured in millions of USD) - pop
: Population (in
millions) - csh_i
: Investment share (as a percentage of
GDP)
library(readxl)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.4.2
# Load the dataset
file_path <- "C:/Users/siddh/Downloads/pwt100 (1) (1).xlsx"
pwt_data <- read_excel(file_path, sheet = "Data")
# Inspect the dataset
glimpse(pwt_data)
## Rows: 12,810
## Columns: 52
## $ countrycode <chr> "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", …
## $ country <chr> "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "A…
## $ currency_unit <chr> "Aruban Guilder", "Aruban Guilder", "Aruban Guilder", "A…
## $ year <dbl> 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 19…
## $ rgdpe <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rgdpo <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pop <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ emp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ avh <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ hc <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ccon <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cda <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cgdpe <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cgdpo <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cn <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ ctfp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cwtfp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rgdpna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rconna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rdana <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rnna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rkna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rtfpna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ rwtfpna <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ labsh <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ irr <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ delta <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ xr <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_con <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_da <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_gdpo <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ i_cig <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ i_xm <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ i_xr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ i_outlier <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ i_irr <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ cor_exp <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ statcap <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_c <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_i <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_g <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_x <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_m <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ csh_r <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_c <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_i <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_g <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_x <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_m <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_n <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
## $ pl_k <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
We select relevant columns and filter data from the year 2000 onwards. Additionally, GDP per capita is calculated.
processed_data <- pwt_data %>%
select(country, year, rgdpna, pop, csh_i) %>% # Select relevant columns
filter(year >= 2000) %>% # Filter data from 2000 onwards
mutate(gdp_per_capita = rgdpna / pop) # Calculate GDP per capita
# Remove rows with missing values
processed_data_clean <- processed_data %>%
filter(!is.na(rgdpna) & !is.na(pop) & !is.na(csh_i))
The following table summarizes the top 10 countries by average GDP per capita from 2000 onwards.
gdp_summary <- processed_data_clean %>%
group_by(country) %>%
summarize(avg_gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE)) %>%
arrange(desc(avg_gdp_per_capita)) %>%
head(10)
gdp_summary
## # A tibble: 10 × 2
## country avg_gdp_per_capita
## <chr> <dbl>
## 1 Qatar 113068.
## 2 Luxembourg 86051.
## 3 China, Macao SAR 77203.
## 4 United Arab Emirates 73968.
## 5 Brunei Darussalam 70953.
## 6 Cayman Islands 70706.
## 7 Switzerland 69477.
## 8 Norway 67253.
## 9 Kuwait 67080.
## 10 Ireland 66875.
We visualize GDP per capita trends for the top 5 countries identified earlier.
top_countries <- gdp_summary$country[1:5]
filtered_data <- processed_data_clean %>% filter(country %in% top_countries)
ggplot(data = filtered_data, aes(x = year, y = gdp_per_capita, color = country)) +
geom_line(size = 1) +
labs(
title = "GDP Per Capita Trends (2000-2020)",
x = "Year",
y = "GDP Per Capita"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
We create an interactive map using the Leaflet library to visualize GDP per capita across countries.
# Create a dataset with geographic coordinates for countries
country_coords <- data.frame(
country = c("Luxembourg", "Singapore", "Ireland", "United States", "Switzerland"),
latitude = c(49.8153, 1.3521, 53.4129, 37.0902, 46.8182),
longitude = c(6.1296, 103.8198, -8.2439, -95.7129, 8.2275)
)
leaflet_data <- processed_data_clean %>%
group_by(country) %>%
summarize(
avg_gdp_per_capita = mean(gdp_per_capita, na.rm = TRUE),
avg_investment_share = mean(csh_i, na.rm = TRUE)
) %>%
inner_join(country_coords, by = "country")
leaflet(leaflet_data) %>%
addTiles() %>%
addCircleMarkers(
lng = ~longitude, lat = ~latitude,
radius = ~sqrt(avg_gdp_per_capita) / 500, # Scale radius by GDP per capita
color = "blue",
popup = ~paste(
"<b>Country:</b>", country, "<br>",
"<b>Avg GDP Per Capita:</b>", round(avg_gdp_per_capita, 2), "<br>",
"<b>Avg Investment Share:</b>", round(avg_investment_share, 2)
)
) %>%
setView(lng = 0, lat = 20, zoom = 2)
This analysis highlights significant disparities in economic performance globally and emphasizes the role of investment and population dynamics in shaping GDP per capita trends.