01. Load the CSV file
install.packages("readr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library("readr")
dataframe <- read_csv("Dataframe.csv", col_types = cols(private_comsuption_perCapita = col_double()))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
#>load the dataframe
head(dataframe)
## # A tibble: 6 × 13
## country_list nominal_gdp gdp_per_capita population monthly_earnings
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Brunei NA 37453. 445000 NA
## 2 Cambodia 29598906000 1765. 16592000 141.
## 3 Indonesia 1318710000000 4783. 274859000 192.
## 4 Laos 15362500000 2595 7338000 208
## 5 Malaysia 406439000000 12450. 32652000 771.
## 6 Myanmar 66719587000 1446. 54410000 366.
## # ℹ 8 more variables: unempolyed_rate <dbl>,
## # labour_force_participation_rate <dbl>, labour_force_population <dbl>,
## # private_comsuption <dbl>, private_comsuption_perCapita <dbl>,
## # total_exports <dbl>, total_import <dbl>, fdi <dbl>
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(tidyverse) ##for pipe %>%
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ purrr 1.0.1
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
dataframe = dataframe %>%
rename(private_consumption = private_comsuption,
private_consumption_perCapita = private_comsuption_perCapita,
total_imports = total_import)
02. Check the data frame
dim(dataframe)## number of rows and columns
## [1] 31 13
install.packages("psych")
library(psych)
psych::describe(dataframe) ##present statiscial description of the dataframe
## vars n mean sd median
## country_list* 1 31 1.600000e+01 9.090000e+00 1.600000e+01
## nominal_gdp 2 30 1.111938e+12 3.329484e+12 1.500773e+11
## gdp_per_capita 3 26 1.522152e+04 1.974654e+04 4.635090e+03
## population 4 31 1.345638e+08 3.424951e+08 1.939800e+07
## monthly_earnings 5 25 9.483300e+02 1.150020e+03 3.658100e+02
## unempolyed_rate 6 31 3.000000e-02 2.000000e-02 4.000000e-02
## labour_force_participation_rate 7 25 6.100000e-01 1.000000e-01 6.000000e-01
## labour_force_population 8 31 7.658987e+07 1.926028e+08 1.334582e+07
## private_consumption 9 31 4.979924e+11 1.292139e+12 4.832208e+10
## private_consumption_perCapita 10 26 6.538220e+03 8.651470e+03 2.401640e+03
## total_exports 11 31 2.807285e+11 6.571770e+11 2.537791e+10
## total_imports 12 31 2.674009e+11 5.243813e+11 3.802222e+10
## fdi 13 31 2.107664e+10 4.416265e+10 2.531343e+09
## trimmed mad min
## country_list* 1.600000e+01 1.186000e+01 1.000000e+00
## nominal_gdp 3.235328e+11 2.225046e+11 0.000000e+00
## gdp_per_capita 1.188528e+04 4.901780e+03 1.131180e+03
## population 4.463472e+07 2.875947e+07 0.000000e+00
## monthly_earnings 7.410600e+02 2.671000e+02 7.473000e+01
## unempolyed_rate 3.000000e-02 3.000000e-02 0.000000e+00
## labour_force_participation_rate 6.100000e-01 1.100000e-01 4.100000e-01
## labour_force_population 2.703169e+07 1.978652e+07 0.000000e+00
## private_consumption 1.690310e+11 7.164232e+10 0.000000e+00
## private_consumption_perCapita 5.088480e+03 2.548440e+03 0.000000e+00
## total_exports 1.472592e+11 3.762528e+10 0.000000e+00
## total_imports 1.576327e+11 5.637175e+10 0.000000e+00
## fdi 8.521878e+09 3.752969e+09 -1.720965e+09
## max range skew kurtosis
## country_list* 3.100000e+01 3.000000e+01 0.00 -1.32
## nominal_gdp 1.795440e+13 1.795440e+13 4.33 18.87
## gdp_per_capita 8.279400e+04 8.166282e+04 1.76 2.86
## population 1.411750e+09 1.411750e+09 3.15 8.69
## monthly_earnings 4.773530e+03 4.698800e+03 1.77 2.67
## unempolyed_rate 7.000000e-02 7.000000e-02 -0.06 -1.32
## labour_force_participation_rate 8.700000e-01 4.600000e-01 0.03 0.44
## labour_force_population 9.444608e+08 9.444608e+08 3.45 11.65
## private_consumption 6.791060e+12 6.791060e+12 3.85 15.37
## private_consumption_perCapita 3.277029e+04 3.277029e+04 1.57 1.42
## total_exports 3.590000e+12 3.590000e+12 4.10 17.77
## total_imports 2.720000e+12 2.720000e+12 3.35 12.58
## fdi 1.801670e+11 1.818880e+11 2.48 5.06
## se
## country_list* 1.630000e+00
## nominal_gdp 6.078779e+11
## gdp_per_capita 3.872620e+03
## population 6.151393e+07
## monthly_earnings 2.300000e+02
## unempolyed_rate 0.000000e+00
## labour_force_participation_rate 2.000000e-02
## labour_force_population 3.459248e+07
## private_consumption 2.320750e+11
## private_consumption_perCapita 1.696690e+03
## total_exports 1.180325e+11
## total_imports 9.418166e+10
## fdi 7.931846e+09
03. Cleaning the data, sort out the n/a rows(observations)
## [1] 23
## [1] 0
## vars n mean sd median
## country_list* 1 25 1.300000e+01 7.360000e+00 1.300000e+01
## nominal_gdp 2 25 1.334326e+12 3.617435e+12 3.772860e+11
## gdp_per_capita 3 25 1.433227e+04 1.961520e+04 4.486910e+03
## population 4 25 1.668413e+08 3.755282e+08 3.265200e+07
## monthly_earnings 5 25 9.483300e+02 1.150020e+03 3.658100e+02
## unempolyed_rate 6 25 4.000000e-02 2.000000e-02 4.000000e-02
## labour_force_participation_rate 7 25 6.100000e-01 1.000000e-01 6.000000e-01
## labour_force_population 8 25 9.497143e+07 2.110718e+08 2.033251e+07
## private_consumption 9 25 6.175106e+11 1.417797e+12 2.245890e+11
## private_consumption_perCapita 10 25 6.799750e+03 8.724340e+03 2.545230e+03
## total_exports 11 25 3.481033e+11 7.179285e+11 7.897755e+10
## total_imports 12 25 3.315771e+11 5.670570e+11 8.084198e+10
## fdi 13 25 2.613504e+10 4.796056e+10 6.407894e+09
## trimmed mad min
## country_list* 1.300000e+01 8.900000e+00 1.000000e+00
## nominal_gdp 5.297572e+11 4.604458e+11 1.069489e+10
## gdp_per_capita 1.066777e+04 4.508410e+03 1.131180e+03
## population 6.600719e+07 3.915843e+07 6.730000e+05
## monthly_earnings 7.410600e+02 2.671000e+02 7.473000e+01
## unempolyed_rate 4.000000e-02 2.000000e-02 1.000000e-02
## labour_force_participation_rate 6.100000e-01 1.100000e-01 4.100000e-01
## labour_force_population 4.104590e+07 2.429479e+07 4.610050e+05
## private_consumption 2.990006e+11 2.872654e+11 4.070000e+09
## private_consumption_perCapita 5.300580e+03 2.290340e+03 5.666400e+02
## total_exports 2.076784e+11 1.139165e+11 1.676519e+09
## total_imports 2.221076e+11 1.127862e+11 3.159606e+09
## fdi 1.588938e+10 8.557674e+09 -1.720965e+09
## max range skew kurtosis
## country_list* 2.500000e+01 2.400000e+01 0.00 -1.34
## nominal_gdp 1.795440e+13 1.794371e+13 3.88 14.85
## gdp_per_capita 8.279400e+04 8.166282e+04 1.94 3.53
## population 1.411750e+09 1.411077e+09 2.73 6.05
## monthly_earnings 4.773530e+03 4.698800e+03 1.77 2.67
## unempolyed_rate 7.000000e-02 6.000000e-02 -0.02 -1.30
## labour_force_participation_rate 8.700000e-01 4.600000e-01 0.03 0.44
## labour_force_population 9.444608e+08 9.439997e+08 3.01 8.50
## private_consumption 6.791060e+12 6.786990e+12 3.38 11.53
## private_consumption_perCapita 3.277029e+04 3.220365e+04 1.52 1.25
## total_exports 3.590000e+12 3.588323e+12 3.64 13.66
## total_imports 2.720000e+12 2.716840e+12 2.98 9.70
## fdi 1.801670e+11 1.818880e+11 2.11 3.22
## se
## country_list* 1.470000e+00
## nominal_gdp 7.234870e+11
## gdp_per_capita 3.923040e+03
## population 7.510565e+07
## monthly_earnings 2.300000e+02
## unempolyed_rate 0.000000e+00
## labour_force_participation_rate 2.000000e-02
## labour_force_population 4.221437e+07
## private_consumption 2.835595e+11
## private_consumption_perCapita 1.744870e+03
## total_exports 1.435857e+11
## total_imports 1.134114e+11
## fdi 9.592111e+09
04. Trim the df dataframe, delete the column 6 and
7 & summary the data
## vars n mean sd median
## country_list* 1 25 1.300000e+01 7.360000e+00 1.300000e+01
## nominal_gdp 2 25 1.334326e+12 3.617435e+12 3.772860e+11
## gdp_per_capita 3 25 1.433227e+04 1.961520e+04 4.486910e+03
## population 4 25 1.668413e+08 3.755282e+08 3.265200e+07
## monthly_earnings 5 25 9.483300e+02 1.150020e+03 3.658100e+02
## labour_force_population 6 25 9.497143e+07 2.110718e+08 2.033251e+07
## private_consumption 7 25 6.175106e+11 1.417797e+12 2.245890e+11
## private_consumption_perCapita 8 25 6.799750e+03 8.724340e+03 2.545230e+03
## total_exports 9 25 3.481033e+11 7.179285e+11 7.897755e+10
## total_imports 10 25 3.315771e+11 5.670570e+11 8.084198e+10
## fdi 11 25 2.613504e+10 4.796056e+10 6.407894e+09
## trimmed mad min
## country_list* 1.300000e+01 8.900000e+00 1.000000e+00
## nominal_gdp 5.297572e+11 4.604458e+11 1.069489e+10
## gdp_per_capita 1.066777e+04 4.508410e+03 1.131180e+03
## population 6.600719e+07 3.915843e+07 6.730000e+05
## monthly_earnings 7.410600e+02 2.671000e+02 7.473000e+01
## labour_force_population 4.104590e+07 2.429479e+07 4.610050e+05
## private_consumption 2.990006e+11 2.872654e+11 4.070000e+09
## private_consumption_perCapita 5.300580e+03 2.290340e+03 5.666400e+02
## total_exports 2.076784e+11 1.139165e+11 1.676519e+09
## total_imports 2.221076e+11 1.127862e+11 3.159606e+09
## fdi 1.588938e+10 8.557674e+09 -1.720965e+09
## max range skew kurtosis
## country_list* 2.500000e+01 2.400000e+01 0.00 -1.34
## nominal_gdp 1.795440e+13 1.794371e+13 3.88 14.85
## gdp_per_capita 8.279400e+04 8.166282e+04 1.94 3.53
## population 1.411750e+09 1.411077e+09 2.73 6.05
## monthly_earnings 4.773530e+03 4.698800e+03 1.77 2.67
## labour_force_population 9.444608e+08 9.439997e+08 3.01 8.50
## private_consumption 6.791060e+12 6.786990e+12 3.38 11.53
## private_consumption_perCapita 3.277029e+04 3.220365e+04 1.52 1.25
## total_exports 3.590000e+12 3.588323e+12 3.64 13.66
## total_imports 2.720000e+12 2.716840e+12 2.98 9.70
## fdi 1.801670e+11 1.818880e+11 2.11 3.22
## se
## country_list* 1.470000e+00
## nominal_gdp 7.234870e+11
## gdp_per_capita 3.923040e+03
## population 7.510565e+07
## monthly_earnings 2.300000e+02
## labour_force_population 4.221437e+07
## private_consumption 2.835595e+11
## private_consumption_perCapita 1.744870e+03
## total_exports 1.435857e+11
## total_imports 1.134114e+11
## fdi 9.592111e+09
## Rows: 25
## Columns: 11
## $ country_list <chr> "Cambodia", "Indonesia", "Laos", "Malays…
## $ nominal_gdp <dbl> 2.959891e+10, 1.318710e+12, 1.536250e+10…
## $ gdp_per_capita <dbl> 1765.488, 4783.269, 2595.000, 12449.664,…
## $ population <dbl> 16592000, 274859000, 7338000, 32652000, …
## $ monthly_earnings <dbl> 140.595, 191.774, 208.000, 771.472, 365.…
## $ labour_force_population <dbl> 14501408, 188635732, 4318633, 22791096, …
## $ private_consumption <dbl> 1.770588e+10, 6.996630e+11, 1.252456e+10…
## $ private_consumption_perCapita <dbl> 1067.13, 2545.53, 1706.81, 7170.13, 566.…
## $ total_exports <dbl> 2.537791e+10, 2.919790e+11, 7.892754e+09…
## $ total_imports <dbl> 3.802222e+10, 2.375240e+11, 7.244703e+09…
## $ fdi <dbl> 3590931000, 21652074000, 635822000, 1512…
05. Correlation heatmap
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

06. Factor Analysis

Explanation power of each factor
EV = eigen(cor)$values
EV/length(EV) ## present the explanation powers of each factors, the first factor can explain more than 61.51% variance
## [1] 6.150855e-01 3.110499e-01 3.429919e-02 1.904959e-02 1.428797e-02
## [6] 3.314684e-03 1.572783e-03 1.117533e-03 1.979955e-04 2.479151e-05
Automatically cut the number of factor
psych::scree(cor, pc = 1, factor = 0) ##Automatically cut the number of the factor

Shares for the cumulative variance explained

07. Principle Components Analysis Tables
## Principal Components Analysis
## Call: principal(r = r, nfactors = nfactors, residuals = residuals,
## rotate = rotate, n.obs = n.obs, covar = covar, scores = scores,
## missing = missing, impute = impute, oblique.scores = oblique.scores,
## method = method, use = use, cor = cor, correct = 0.5, weight = NULL)
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PC1 PC2 h2 u2 com
## total_imports 9 0.982 0.966 0.0344 1.00
## total_exports 8 0.968 0.938 0.0618 1.00
## private_consumption 6 0.958 0.965 0.0354 1.10
## nominal_gdp 1 0.956 0.958 0.0423 1.10
## labour_force_population 5 0.893 0.956 0.0443 1.38
## fdi 10 0.840 0.852 0.1481 1.40
## population 3 0.813 0.827 0.1728 1.48
## gdp_per_capita 2 0.953 0.974 0.0263 1.15
## private_consumption_perCapita 7 0.921 0.905 0.0952 1.13
## monthly_earnings 4 0.889 0.922 0.0781 1.32
##
## PC1 PC2
## SS loadings 6.151 3.110
## Proportion Var 0.615 0.311
## Cumulative Var 0.615 0.926
## Proportion Explained 0.664 0.336
## Cumulative Proportion 0.664 1.000
##
## Mean item complexity = 1.2
## Test of the hypothesis that 2 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.033
## with the empirical chi square 2.455 with prob < 1
##
## Fit based upon off diagonal values = 0.997
Plot the Factor Scores
## memb4
## 1 2 3 4
## 17 6 1 1
plot the factor scores plot
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
08. Clustering by similarity analysis and plot the first simple
version

Plot the demdrogram_ upper visual when k = 3
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
## Warning in dist(df_std, method = "euclidean"): NAs introduced by coercion
## [1] 22 23 25 10 11 7 24 19 16 12 14 5 17 3 21 1 15 8 9 4 13 2 20 6 18
## [1] "Cambodia" "Indonesia" "Laos" "Malaysia" "Myanmar"
## [6] "Philippines" "Singapore" "Thailand" "Vietnam" "Japan"
## [11] "South_Korea" "Mongolia" "Kazakhstan" "Kyrgystan" "Tajikistan"
## [16] "Turkmenistan" "Uzbekistan" "Bangladesh" "India" "Pakistan"
## [21] "Sri_Lanka" "China" "Taiwan" "Hongkong" "Macau"
## [1] "China" "Taiwan" "Macau" "Japan" "South_Korea"
## [6] "Singapore" "Hongkong" "India" "Turkmenistan" "Mongolia"
## [11] "Kyrgystan" "Myanmar" "Uzbekistan" "Laos" "Sri_Lanka"
## [16] "Cambodia" "Tajikistan" "Thailand" "Vietnam" "Malaysia"
## [21] "Kazakhstan" "Indonesia" "Pakistan" "Philippines" "Bangladesh"

Dendrogram when k = 4

09. Kmeans Clustering methodology _ 4 clusters
Iterate the centers 02/02
set.seed(1) ##the function set.seed() is used to set the seed for the random number generator
## using kmeans() function to iterate final center
df_means = kmeans(df_std_n1c, centers = cent, iter.max = 10)
df_means
## K-means clustering with 4 clusters of sizes 17, 6, 1, 1
##
## Cluster means:
## nominal_gdp gdp_per_capita population monthly_earnings
## 1 -0.29608788 -0.4908707 -0.2599312 -0.5345763
## 2 -0.02115825 1.5066108 -0.3495839 1.5050332
## 3 0.56600717 -0.6133079 3.2012472 -0.3289736
## 4 4.59443624 -0.0815552 3.3150868 0.3865717
## labour_force_population private_consumption private_consumption_perCapita
## 1 -0.2460802 -0.32426659 -0.5152189
## 2 -0.3450048 0.02303979 1.5988944
## 3 2.2287462 1.01996905 -0.6066202
## 4 4.0246453 4.35432430 -0.2280255
## total_exports total_imports fdi
## 1 -0.3495485 -0.4117207 -0.4193252
## 2 0.2133582 0.3497582 0.5701762
## 3 0.1465490 0.6887400 0.4958338
## 4 4.5156258 4.2119625 3.2116383
##
## Clustering vector:
## [1] 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 1 1 1 3 1 1 4 2 2 2
##
## Within cluster sum of squares by cluster:
## [1] 5.562367 26.526066 0.000000 0.000000
## (between_SS / total_SS = 86.6 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
9.2. Kmeans Clustering and means extraction
9.3 Visualize the means
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)

10.Present the R version
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dendextend_1.17.1 ggrepel_0.9.4 ggcorrplot_0.1.4.1 psych_2.3.9
## [5] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2
## [9] purrr_1.0.1 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.4
## [13] tidyverse_2.0.0 readr_2.1.4
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.3 xfun_0.39 bslib_0.5.0 lattice_0.21-8
## [5] tzdb_0.4.0 vctrs_0.6.3 tools_4.3.1 generics_0.1.3
## [9] parallel_4.3.1 fansi_1.0.4 highr_0.10 pkgconfig_2.0.3
## [13] lifecycle_1.0.3 compiler_4.3.1 farver_2.1.1 textshaping_0.3.6
## [17] munsell_0.5.0 mnormt_2.1.1 htmltools_0.5.5 sass_0.4.6
## [21] yaml_2.3.7 pillar_1.9.0 crayon_1.5.2 jquerylib_0.1.4
## [25] cachem_1.0.8 viridis_0.6.3 nlme_3.1-162 tidyselect_1.2.0
## [29] digest_0.6.32 stringi_1.7.12 reshape2_1.4.4 labeling_0.4.2
## [33] fastmap_1.1.1 grid_4.3.1 colorspace_2.1-0 cli_3.6.1
## [37] magrittr_2.0.3 utf8_1.2.3 withr_2.5.0 scales_1.2.1
## [41] bit64_4.0.5 timechange_0.2.0 rmarkdown_2.23 bit_4.0.5
## [45] gridExtra_2.3 ragg_1.2.5 hms_1.1.3 evaluate_0.21
## [49] knitr_1.43 viridisLite_0.4.2 rlang_1.1.1 Rcpp_1.0.10
## [53] glue_1.6.2 rstudioapi_0.14 vroom_1.6.3 jsonlite_1.8.7
## [57] R6_2.5.1 plyr_1.8.8 systemfonts_1.0.4