Load Libraries & Set Working Directory
Load & Preprocess Data
locationWD <- c("/Users/burakberkerbasergun/Desktop/UW")
setwd(locationWD)
econ_data <- read.csv("usl/Global_Economy_Indicators.csv", header = TRUE, sep = ",")
econ_data <- econ_data[complete.cases(econ_data),] # Remove missing values
target_columns <- c("Population", "Per.capita.GNI", "Gross.National.Income.GNI..in.USD",
"Gross.Domestic.Product..GDP.", "Final.consumption.expenditure",
"Gross.fixed.capital.formation..including.Acquisitions.less.disposals.of.valuables.",
"Imports.of.goods.and.services", "Manufacturing..ISIC.D.",
"X.Transport..storage.and.communication..ISIC.I..",
"X.Wholesale..retail.trade..restaurants.and.hotels..ISIC.G.H..", "Total.Value.Added")
econ_2010 <- subset(econ_data, Year == 2010)
econ_2020 <- subset(econ_data, Year == 2020)
econ_2010[target_columns] <- scale(econ_2010[target_columns])
econ_2020[target_columns] <- scale(econ_2020[target_columns])
Filter applied to data for 2010 and 2020 to make a comparison
Principal Component Analysis (PCA)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
pca_2010 <- prcomp(econ_2010[target_columns], center = TRUE, scale. = TRUE)
pca_2020 <- prcomp(econ_2020[target_columns], center = TRUE, scale. = TRUE)
econ_2010_pca <- as.data.frame(pca_2010$x[, 1:3])
econ_2020_pca <- as.data.frame(pca_2020$x[, 1:3])
fig_pca_2010 <- plot_ly(x = econ_2010_pca$PC1,
y = econ_2010_pca$PC2,
z = econ_2010_pca$PC3,
type = "scatter3d",
mode = "markers",
marker = list(color = econ_2010_pca$PC1, colorscale = "Viridis", size = 4))
fig_pca_2010 <- fig_pca_2010 %>% layout(title = "PCA 3D Plot in 2010",
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "PC3")))
fig_pca_2010
fig_pca_2020 <- plot_ly(
x = econ_2020_pca$PC1,
y = econ_2020_pca$PC2,
z = econ_2020_pca$PC3,
type = "scatter3d",
mode = "markers",
marker = list(color = econ_2020_pca$PC1, colorscale = "Plasma", size = 4)
)
fig_pca_2020 <- layout(
fig_pca_2020,
title = "PCA 3D Plot in 2020",
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "PC3")
)
)
fig_pca_2020
-GDP
-Total Value Added
-Gross National Income (GNI)
These variables consistently dominate economic variance over
time.
This suggests that GDP growth and total value-added remain the
strongest indicators of economic performance, both in 2010 and
2020.
It also implies that economies continued to be primarily shaped
by production output and national income.
Multidimensional Scaling (MDS)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
library(ggplot2)
dist_2010 <- dist(econ_2010[target_columns], method = "euclidean")
dist_2020 <- dist(econ_2020[target_columns], method = "euclidean")
mds_2010 <- isoMDS(dist_2010, k = 2)
## initial value 3.525088
## iter 5 value 1.227536
## iter 10 value 1.103700
## iter 15 value 1.064002
## final value 1.055703
## converged
mds_2020 <- isoMDS(dist_2020, k = 2)
## initial value 2.748659
## iter 5 value 1.203335
## iter 10 value 1.027020
## final value 0.929103
## converged
mds_2010_df <- as.data.frame(mds_2010$points)
mds_2020_df <- as.data.frame(mds_2020$points)
colnames(mds_2010_df) <- c("MDS1", "MDS2")
colnames(mds_2020_df) <- c("MDS1", "MDS2")
p1 <- ggplot(mds_2010_df, aes(x = MDS1, y = MDS2)) + geom_point(color = "blue", size = 3) + labs(title = "MDS Plot (2010)")
p2 <- ggplot(mds_2020_df, aes(x = MDS1, y = MDS2)) + geom_point(color = "red", size = 3) + labs(title = "MDS Plot (2020)")
print(p1)

print(p2)

According to the computation of Euclidean distance matrices for MDS,
we can analyse the graphs.
In 2010 MDS Plot as we can see there are some outliers, hese
outliers could represent developing economies, oil-rich nations, or
countries with high economic disparity.
In 2020 some outliers have moved positions, indicating that certain
countries have diverged or changed significantly in economic
structure.
While the core remains dense, the spread of points is slightly
larger, suggesting that economic divergence has increased for some
countries.
COVID-19 pandemic (2020) could beone of the possible causes, which
had uneven economic effects across countries.
library(Rtsne)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
numeric_columns <- setdiff(names(econ_data)[sapply(econ_data, is.numeric)], "Year")
econ_2010 <- subset(econ_data, Year == 2010)
econ_2020 <- subset(econ_data, Year == 2020)
econ_2010_numeric <- econ_2010[numeric_columns]
econ_2020_numeric <- econ_2020[numeric_columns]
econ_2010_clean <- econ_2010_numeric[complete.cases(econ_2010_numeric),]
econ_2020_clean <- econ_2020_numeric[complete.cases(econ_2020_numeric),]
econ_2010_imputed <- econ_2010_numeric
econ_2020_imputed <- econ_2020_numeric
econ_2010_imputed[is.na(econ_2010_imputed)] <- apply(econ_2010_numeric, 2, mean, na.rm = TRUE)
econ_2020_imputed[is.na(econ_2020_imputed)] <- apply(econ_2020_numeric, 2, mean, na.rm = TRUE)
set.seed(123)
tsne_2010 <- Rtsne(econ_2010_clean, dims = 3, perplexity = 30, verbose = TRUE)
## Performing PCA
## Read the 181 x 23 data matrix successfully!
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.01 seconds (sparsity = 0.682580)!
## Learning embedding...
## Iteration 50: error is 45.117982 (50 iterations in 0.02 seconds)
## Iteration 100: error is 43.865666 (50 iterations in 0.02 seconds)
## Iteration 150: error is 43.510006 (50 iterations in 0.02 seconds)
## Iteration 200: error is 43.498213 (50 iterations in 0.02 seconds)
## Iteration 250: error is 43.990855 (50 iterations in 0.01 seconds)
## Iteration 300: error is 0.079517 (50 iterations in 0.01 seconds)
## Iteration 350: error is 0.061739 (50 iterations in 0.01 seconds)
## Iteration 400: error is 0.055353 (50 iterations in 0.01 seconds)
## Iteration 450: error is 0.055338 (50 iterations in 0.01 seconds)
## Iteration 500: error is 0.054602 (50 iterations in 0.01 seconds)
## Iteration 550: error is 0.053406 (50 iterations in 0.01 seconds)
## Iteration 600: error is 0.052431 (50 iterations in 0.01 seconds)
## Iteration 650: error is 0.051796 (50 iterations in 0.01 seconds)
## Iteration 700: error is 0.053856 (50 iterations in 0.01 seconds)
## Iteration 750: error is 0.051165 (50 iterations in 0.01 seconds)
## Iteration 800: error is 0.049688 (50 iterations in 0.01 seconds)
## Iteration 850: error is 0.055139 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.057650 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.058052 (50 iterations in 0.01 seconds)
## Iteration 1000: error is 0.054693 (50 iterations in 0.01 seconds)
## Fitting performed in 0.29 seconds.
tsne_2020 <- Rtsne(econ_2020_clean, dims = 3, perplexity = 30, verbose = TRUE)
## Performing PCA
## Read the 167 x 23 data matrix successfully!
## Using no_dims = 3, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.01 seconds (sparsity = 0.729965)!
## Learning embedding...
## Iteration 50: error is 45.444238 (50 iterations in 0.02 seconds)
## Iteration 100: error is 44.406953 (50 iterations in 0.02 seconds)
## Iteration 150: error is 44.140056 (50 iterations in 0.02 seconds)
## Iteration 200: error is 43.724966 (50 iterations in 0.02 seconds)
## Iteration 250: error is 44.383975 (50 iterations in 0.02 seconds)
## Iteration 300: error is 0.266229 (50 iterations in 0.02 seconds)
## Iteration 350: error is 0.072134 (50 iterations in 0.02 seconds)
## Iteration 400: error is 0.060402 (50 iterations in 0.01 seconds)
## Iteration 450: error is 0.059135 (50 iterations in 0.01 seconds)
## Iteration 500: error is 0.058442 (50 iterations in 0.01 seconds)
## Iteration 550: error is 0.059721 (50 iterations in 0.01 seconds)
## Iteration 600: error is 0.060630 (50 iterations in 0.01 seconds)
## Iteration 650: error is 0.062969 (50 iterations in 0.01 seconds)
## Iteration 700: error is 0.062590 (50 iterations in 0.01 seconds)
## Iteration 750: error is 0.058777 (50 iterations in 0.01 seconds)
## Iteration 800: error is 0.059147 (50 iterations in 0.01 seconds)
## Iteration 850: error is 0.061021 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.060226 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.058559 (50 iterations in 0.01 seconds)
## Iteration 1000: error is 0.056511 (50 iterations in 0.01 seconds)
## Fitting performed in 0.29 seconds.
econ_2010_tsne <- as.data.frame(tsne_2010$Y)
econ_2020_tsne <- as.data.frame(tsne_2020$Y)
colnames(econ_2010_tsne) <- c("Dim1", "Dim2", "Dim3")
colnames(econ_2020_tsne) <- c("Dim1", "Dim2", "Dim3")
fig_tsne_2010 <- plot_ly(x = econ_2010_tsne$Dim1,
y = econ_2010_tsne$Dim2,
z = econ_2010_tsne$Dim3,
type = "scatter3d",
mode = "markers",
marker = list(color = econ_2010_tsne$Dim1, colorscale = "Cividis", size = 4))
fig_tsne_2010 <- fig_tsne_2010 %>% layout(title = "t-SNE 3D Plot (2010)",
scene = list(
xaxis = list(title = "Dim1"),
yaxis = list(title = "Dim2"),
zaxis = list(title = "Dim3")))
fig_tsne_2020 <- plot_ly(x = econ_2020_tsne$Dim1,
y = econ_2020_tsne$Dim2,
z = econ_2020_tsne$Dim3,
type = "scatter3d",
mode = "markers",
marker = list(color = econ_2020_tsne$Dim1, colorscale = "Cividis", size = 4)) %>%
layout(title = "t-SNE 3D Plot (2020)",
scene = list(
xaxis = list(title = "Dim1"),
yaxis = list(title = "Dim2"),
zaxis = list(title = "Dim3")))
fig_tsne_2010
fig_tsne_2020
2010 had a more structured and gradual economic landscape, meaning
economies were more aligned.
2020 shows increased economic variability, with countries forming
more distinct economic paths.
In 2010 There is less variability in clustering, meaning countries
had more structured economic relationships.
In 2020 the range of values is larger (wider spread along all three
dimensions), suggesting greater variation in economic indicators.
Clustering Analysis (Hierarchical & K-Means)
library(ggplot2)
hc_2010 <- hclust(dist(mds_2010_df), method = "ward.D2")
mds_2010_df$HC_Cluster <- cutree(hc_2010, k = 3)
hc_2020 <- hclust(dist(mds_2020_df), method = "ward.D2")
mds_2020_df$HC_Cluster <- cutree(hc_2020, k = 3)
ggplot(mds_2010_df, aes(x = MDS1, y = MDS2, color = as.factor(HC_Cluster))) + geom_point(size = 3) + labs(title = "Hierarchical Clustering in MDS Space (2010)")

ggplot(mds_2020_df, aes(x = MDS1, y = MDS2, color = as.factor(HC_Cluster))) + geom_point(size = 3) + labs(title = "Hierarchical Clustering in MDS Space (2020)")

set.seed(123)
kmeans_2010 <- kmeans(mds_2010_df[, c("MDS1", "MDS2")], centers = 3)
mds_2010_df$KMeans_Cluster <- as.factor(kmeans_2010$cluster)
kmeans_2020 <- kmeans(mds_2020_df[, c("MDS1", "MDS2")], centers = 3)
mds_2020_df$KMeans_Cluster <- as.factor(kmeans_2020$cluster)
The hierarchical clustering results based on MDS data show how
economies are grouped based on their similarity in macroeconomic
indicators.
For Cluster 1, these are likely developed and emerging economies
with relatively stable and comparable growth patterns in 2010.
A small number of economies significantly differ from the main
cluster, which is Cluster 2.
A few economies are positioned far from the center, indicating that
they had highly distinct macroeconomic patterns in 2010.
Unlike 2010, the Green cluster has expanded, showing that some
economies have further diverged from the norm.
The increase in outliers and the spread of Cluster 2 suggests
greater economic differentiation due to: Pandemic Shocks , Policy
Changesor or Technological advancements benefiting some countries more
than others
Economic shifts and clustering changes between 2010 and 2020
indicate increasing economic divergence among some countries, while
others remain stable. PCA, MDS, and clustering highlight these shifts
effectively.