title: “WJS International Classifying countries” author: “jan.hovden@uib.no” output: pdf_document: toc: yes html_notebook: default html_document: number_sections: yes theme: journal toc: yes —

load required packages and set working directory

require(foreign)
require(FactoMineR)
require(sjmisc)
require(factoextra)
require(dplyr)
require(ggplot2)
require(ggthemes)
require(psych)
library(rworldmap)
library("NbClust")
library(ape)
library(paran)
#library(ggraph)

Import WJSfinalaggregated

setwd("/Users/janfredrikhovden/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/WJS")
wjs<-read.spss("WJS full agg for final conclusions.sav", to.data.frame=TRUE)
wjs$ID<-as.numeric(rownames(wjs)) # assign row number as ID

#description of some of the methods for cluster validiation: http://www.sthda.com/english/articles/29-cluster-validation-essentials/97-cluster-validation-statistics-must-know-methods/#internal-measures-for-cluster-validation

Cluster, ala Folker

clu<-dplyr::select(wjs, INFL_POL_mean:INFL_ECO_mean,AUTONOMY_mean:ROLE_ACO_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar

#test for gap statistic, silhouette  coefficientcluster.stats(d = NULL, clustering, al.clustering = NULL)
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 klynger

fviz_nbclust(clu, hcut, method = "silhouette") +labs(subtitle = "Gap statistic, hierarchical cluster") # 2 klynger

nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
        max.nc = 10, method = "average")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 10 proposed 2 as the best number of clusters 
## * 11 proposed 3 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

fviz_nbclust(nb)

## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 10 proposed  2 as the best number of clusters
## * 11 proposed  3 as the best number of clusters
## * 3 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  3 .

km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

plot((km.res), cex = 0.7)

#PCA and clustering on PCA factors
paran(clu) # 1 factor

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           4.666855    5.282354      0.615499
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)

fviz_pca_biplot(res.pca, geom = "text",  title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord

res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters

fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))

res.pca.hcpc$desc.var

## $quanti.var
##                    Eta2      P-value
## ROLE_COL_mean 0.8687570 1.262768e-27
## ROLE_INT_mean 0.7605915 1.158133e-19
## INFL_ECO_mean 0.7108045 3.684203e-17
## INFL_ORG_mean 0.6946657 1.930445e-16
## INFL_POL_mean 0.6684038 2.391074e-15
## ROLE_ACO_mean 0.5084688 3.911193e-10
## ROLE_MON_mean 0.2294135 3.532258e-04
## AUTONOMY_mean 0.2092923 7.753306e-04
## INFL_PRO_mean 0.1393100 1.029975e-02
## 
## $quanti
## $quanti$`1`
##                  v.test Mean in category Overall mean sd in category
## AUTONOMY_mean  2.724567         3.988839     3.848377      0.1908396
## ROLE_MON_mean -3.040360         3.316848     3.525736      0.3385180
## ROLE_ACO_mean -4.101474         3.029114     3.336409      0.3244097
## INFL_POL_mean -4.723196         1.783694     2.233791      0.2238205
## ROLE_COL_mean -5.509767         1.378106     2.125116      0.1433477
## INFL_ECO_mean -5.673338         2.290905     2.745668      0.2241332
## INFL_ORG_mean -5.705389         2.879758     3.298806      0.2380333
## ROLE_INT_mean -6.684721         2.643814     3.334939      0.3085856
##               Overall sd      p.value
## AUTONOMY_mean  0.2859607 6.438593e-03
## ROLE_MON_mean  0.3810969 2.362956e-03
## ROLE_ACO_mean  0.4155854 4.105273e-05
## INFL_POL_mean  0.5285857 2.321666e-06
## ROLE_COL_mean  0.7520362 3.593086e-08
## INFL_ECO_mean  0.4446230 1.400418e-08
## INFL_ORG_mean  0.4074038 1.160776e-08
## ROLE_INT_mean  0.5734810 2.313648e-11
## 
## $quanti$`2`
## NULL
## 
## $quanti$`3`
##                  v.test Mean in category Overall mean sd in category
## ROLE_COL_mean  6.999034         3.074040     2.125116      0.3842621
## INFL_POL_mean  6.189744         2.823642     2.233791      0.3611328
## INFL_ECO_mean  5.867501         3.215994     2.745668      0.2480732
## INFL_ORG_mean  5.708375         3.718075     3.298806      0.1895218
## ROLE_ACO_mean  5.406485         3.741478     3.336409      0.2220436
## ROLE_INT_mean  4.833378         3.834656     3.334939      0.2801354
## ROLE_MON_mean  3.476514         3.764591     3.525736      0.3087070
## INFL_PRO_mean  2.916073         3.902630     3.765382      0.2369239
## AUTONOMY_mean -3.425319         3.671789     3.848377      0.2635534
##               Overall sd      p.value
## ROLE_COL_mean  0.7520362 2.577325e-12
## INFL_POL_mean  0.5285857 6.026210e-10
## INFL_ECO_mean  0.4446230 4.424120e-09
## INFL_ORG_mean  0.4074038 1.140597e-08
## ROLE_ACO_mean  0.4155854 6.427344e-08
## ROLE_INT_mean  0.5734810 1.342356e-06
## ROLE_MON_mean  0.3810969 5.079786e-04
## INFL_PRO_mean  0.2610680 3.544676e-03
## AUTONOMY_mean  0.2859607 6.140787e-04
## 
## 
## attr(,"class")
## [1] "catdes" "list "

res.pca.hcpc$desc.ind

## $para
## Cluster: 1
## Czech Republic        Finland        Austria         Norway    Switzerland 
##      0.1502448      0.1685557      0.2414245      0.2649192      0.2655605 
## -------------------------------------------------------- 
## Cluster: 2
##   Moldova Argentina    Brazil   Romania    Turkey 
## 0.1283322 0.1357232 0.2054645 0.2317151 0.2771268 
## -------------------------------------------------------- 
## Cluster: 3
##      Kenya    Ecuador      India   Botswana     Bhutan 
## 0.06385787 0.23854297 0.29167327 0.30830181 0.36219318 
## 
## $dist
## Cluster: 1
##  Iceland   France    Italy   Canada  Belgium 
## 2.281316 1.667664 1.593554 1.508972 1.466847 
## -------------------------------------------------------- 
## Cluster: 2
##   Croatia     Spain   Hungary    Serbia Argentina 
##  1.717003  1.538365  1.412283  1.382766  1.369206 
## -------------------------------------------------------- 
## Cluster: 3
## Thailand      UAE    Sudan   Malawi Ethiopia 
## 2.701182 2.629423 2.279124 2.130951 2.062982

#MDS
library(ggrepel)
d<-dist(clu) 
fit <- isoMDS(d, k=2)

## initial  value 10.985607 
## iter   5 value 8.516952
## final  value 8.391742 
## converged

fit

## $points
##                        [,1]         [,2]
## Argentina       0.004957139  0.227409114
## Bulgaria       -0.184528651  0.404221261
## Croatia        -0.029458965  0.856746246
## Cyprus         -0.367349937  1.032196136
## Estonia        -0.613458614 -0.076580643
## Greece         -0.686132052  0.189119200
## Israel         -0.548766799  0.524019027
## Japan          -0.445665462  0.383306351
## Latvia         -0.891467652  0.454328584
## Portugal       -0.997968089  0.362843559
## Serbia          0.203989245  0.223836609
## South Africa    0.006248435 -0.144506603
## South Korea     0.504865961  0.803229176
## Spain           0.064274438  0.380346730
## Australia      -0.816819880 -0.322669514
## Austria        -1.504614991 -0.491295350
## Belgium        -1.481209417 -0.132557933
## Canada         -1.645610442  0.204056008
## Czech Republic -1.141464901 -0.151329006
## Denmark        -1.463629617  0.482721910
## Finland        -1.154444781 -0.035861969
## France         -1.786897133  0.045396984
## Germany        -1.360432794 -0.712365637
## Iceland        -2.316096151 -0.466065977
## Ireland        -0.996691692 -0.117443975
## Italy          -1.603530764 -0.242818687
## Netherlands    -1.311490163 -0.856557877
## New Zealand    -0.995805123 -0.009707051
## Norway         -1.483769149 -0.200320725
## Sweden         -1.568317450  0.391460639
## Switzerland    -1.458552107 -0.313892466
## UK             -1.023120555 -0.331125750
## USA            -1.257402329  0.639566707
## Bangladesh      0.733969135 -0.086092119
## Bhutan          1.311250275 -0.386595379
## Botswana        1.464279966  0.274255632
## China           1.440242172 -0.921669982
## Colombia        1.139308234  0.393836215
## Ecuador         1.196861354 -0.023335382
## Egypt           1.165888743  0.186238300
## El Salvador     1.335503427  0.234207887
## Ethiopia        1.844805351 -0.421281262
## India           1.138213449 -0.062375882
## Indonesia       1.006713154 -0.418882914
## Kenya           1.385847633 -0.096926494
## Malawi          1.980175584  0.247901583
## Malaysia        1.735543103 -0.517240991
## Mexico          1.123691030  0.435898098
## Oman            1.584042816 -0.550346705
## Philippines     0.785643339  0.333258083
## Sierra Leone    1.685739333  0.180049113
## Sudan           2.196442203  0.578363646
## Thailand        2.603663619 -0.149836927
## UAE             2.203620849 -1.079862881
## Albania        -0.097599018 -0.501745726
## Brazil         -0.245749153 -0.013013883
## Chile           0.318016121  0.061084861
## Hong Kong      -0.537344803 -0.741702883
## Hungary         0.070787216 -0.431831780
## Kosovo          0.282399888 -0.092941596
## Moldova        -0.255540006  0.085713513
## Romania        -0.364124884 -0.008822511
## Russia         -0.084236695 -0.400568701
## Turkey          0.202307006  0.894561987
## 
## $stress
## [1] 8.391742

fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric   MDS",   type="n")
text(x, y, labels = row.names(clu), cex=.7)

space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()

Cluster, ala Thomas

clu<-dplyr::select(wjs, INFL_POL_mean, INFL_ECO_mean, INFL_ORG_mean,AUTONOMY_mean:ROLE_ACO_mean, C13A_mean:C13D_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar

fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 5 clusters

nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
        max.nc = 10, method = "average")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 9 proposed 2 as the best number of clusters 
## * 2 proposed 3 as the best number of clusters 
## * 5 proposed 4 as the best number of clusters 
## * 5 proposed 5 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

fviz_nbclust(nb)

## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 1 proposed  1 as the best number of clusters
## * 9 proposed  2 as the best number of clusters
## * 2 proposed  3 as the best number of clusters
## * 5 proposed  4 as the best number of clusters
## * 5 proposed  5 as the best number of clusters
## * 1 proposed  9 as the best number of clusters
## * 1 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  2 .

km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

plot((km.res), cex = 0.7)

#PCA and clustering on PCA factors
paran(clu) # 2 factors

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 360 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           5.084843    5.870280      0.785436
## 2           1.285554    1.837295      0.551741
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (2 components retained)

res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)

fviz_pca_biplot(res.pca, geom = "text",  title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord

res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters

fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))

res.pca.hcpc$desc.var

## $quanti.var
##                    Eta2      P-value
## ROLE_COL_mean 0.8036543 2.736235e-22
## ROLE_INT_mean 0.7364032 2.181085e-18
## INFL_ECO_mean 0.7209731 1.236526e-17
## INFL_POL_mean 0.7068639 5.566912e-17
## INFL_ORG_mean 0.6986866 1.288429e-16
## ROLE_ACO_mean 0.4626419 5.929874e-09
## C13C_mean     0.3566491 1.437676e-06
## C13D_mean     0.2806846 4.325550e-05
## C13B_mean     0.2685871 7.193813e-05
## AUTONOMY_mean 0.2099072 7.571508e-04
## ROLE_MON_mean 0.2081772 8.093837e-04
## 
## $quanti
## $quanti$`1`
##                  v.test Mean in category Overall mean sd in category
## AUTONOMY_mean  2.486059         3.964008     3.848377      0.2543949
## C13C_mean     -2.351268         2.449425     2.626701      0.3340672
## ROLE_MON_mean -2.981168         3.340947     3.525736      0.3291280
## ROLE_ACO_mean -4.288720         3.046513     3.336409      0.3316884
## INFL_POL_mean -4.975250         1.806046     2.233791      0.2308858
## ROLE_COL_mean -5.792834         1.416544     2.125116      0.1725187
## INFL_ORG_mean -5.857020         2.910695     3.298806      0.2442105
## INFL_ECO_mean -5.947081         2.315587     2.745668      0.2244806
## ROLE_INT_mean -6.690187         2.710901     3.334939      0.3461815
##               Overall sd      p.value
## AUTONOMY_mean  0.2859607 1.291665e-02
## C13C_mean      0.4635468 1.870957e-02
## ROLE_MON_mean  0.3810969 2.871516e-03
## ROLE_ACO_mean  0.4155854 1.797054e-05
## INFL_POL_mean  0.5285857 6.516362e-07
## ROLE_COL_mean  0.7520362 6.920837e-09
## INFL_ORG_mean  0.4074038 4.712468e-09
## INFL_ECO_mean  0.4446230 2.729665e-09
## ROLE_INT_mean  0.5734810 2.228852e-11
## 
## $quanti$`2`
##                  v.test Mean in category Overall mean sd in category
## ROLE_INT_mean  2.691824         3.603665     3.334939      0.2328624
## C13C_mean     -2.090120         2.458043     2.626701      0.2341273
## C13B_mean     -2.885862         2.906586     3.116362      0.3682844
## C13D_mean     -2.977895         2.463160     2.698359      0.3057142
##               Overall sd     p.value
## ROLE_INT_mean  0.5734810 0.007106247
## C13C_mean      0.4635468 0.036606983
## C13B_mean      0.4175787 0.003903435
## C13D_mean      0.4537142 0.002902358
## 
## $quanti$`3`
##                  v.test Mean in category Overall mean sd in category
## ROLE_COL_mean  6.421405         3.097732     2.125116      0.4014658
## INFL_POL_mean  6.301160         2.904614     2.233791      0.3186110
## INFL_ECO_mean  5.655272         3.252096     2.745668      0.2481291
## INFL_ORG_mean  5.564194         3.755368     3.298806      0.1689015
## ROLE_ACO_mean  4.946631         3.750448     3.336409      0.2365875
## C13C_mean      4.739725         3.069207     2.626701      0.5250937
## ROLE_INT_mean  4.360158         3.838549     3.334939      0.2866597
## C13D_mean      3.995538         3.063474     2.698359      0.4164012
## C13B_mean      3.920251         3.446066     3.116362      0.3995027
## ROLE_MON_mean  3.243169         3.774666     3.525736      0.3254019
## AUTONOMY_mean -3.526227         3.645287     3.848377      0.2505697
##               Overall sd      p.value
## ROLE_COL_mean  0.7520362 1.350225e-10
## INFL_POL_mean  0.5285857 2.954264e-10
## INFL_ECO_mean  0.4446230 1.555997e-08
## INFL_ORG_mean  0.4074038 2.633663e-08
## ROLE_ACO_mean  0.4155854 7.550906e-07
## C13C_mean      0.4635468 2.140089e-06
## ROLE_INT_mean  0.5734810 1.299683e-05
## C13D_mean      0.4537142 6.454754e-05
## C13B_mean      0.4175787 8.845672e-05
## ROLE_MON_mean  0.3810969 1.182082e-03
## AUTONOMY_mean  0.2859607 4.215250e-04
## 
## 
## attr(,"class")
## [1] "catdes" "list "

res.pca.hcpc$desc.ind

## $para
## Cluster: 1
##     Denmark Switzerland      Norway New Zealand     Belgium 
##   0.1944659   0.2547730   0.2793222   0.2841225   0.3345835 
## -------------------------------------------------------- 
## Cluster: 2
## South Africa    Argentina       Turkey        Chile       Kosovo 
##    0.1360379    0.1815498    0.2540435    0.3083126    0.3766436 
## -------------------------------------------------------- 
## Cluster: 3
##        Kenya     Ethiopia Sierra Leone        India     Malaysia 
##    0.1967874    0.2237366    0.3084464    0.4141100    0.4600797 
## 
## $dist
## Cluster: 1
##     Iceland      France Netherlands      Sweden       Italy 
##    2.403658    1.963222    1.896259    1.847717    1.781640 
## -------------------------------------------------------- 
## Cluster: 2
## South Korea      Serbia       Chile     Croatia       Spain 
##    1.845821    1.752903    1.660118    1.593968    1.578279 
## -------------------------------------------------------- 
## Cluster: 3
## Thailand    Sudan     Oman      UAE   Bhutan 
## 3.364798 3.122849 2.325242 2.307191 2.229570

library(ggrepel)
d<-dist(clu) 
fit <- isoMDS(d, k=2)

## initial  value 13.241866 
## iter   5 value 9.958288
## iter  10 value 9.829607
## final  value 9.812304 
## converged

fit

## $points
##                       [,1]          [,2]
## Argentina       0.12472990 -0.3915386249
## Bulgaria        0.47334496 -0.8318403147
## Croatia         0.27958451 -0.9996513491
## Cyprus          0.64862464 -0.9426741997
## Estonia         0.68710914 -0.0872645397
## Greece          0.75199453 -0.1861421278
## Israel          0.55316028 -0.0445030705
## Japan           0.27392974  0.4478924330
## Latvia          0.76401027  0.2306769538
## Portugal        1.13911851 -0.4645544283
## Serbia          0.05004004 -0.7077966525
## South Africa    0.14703166 -0.2217132561
## South Korea    -0.11055266 -1.2373414072
## Spain           0.23071769 -0.6825880288
## Australia       0.91997090 -0.0311979431
## Austria         1.64495824 -0.1602622083
## Belgium         1.34760025  0.3652891494
## Canada          1.59255902  0.2862187919
## Czech Republic  0.95423923  0.9072565008
## Denmark         1.32080681  0.7552068969
## Finland         1.31309572 -0.2766871847
## France          1.60480002  0.6343096128
## Germany         1.65945540 -0.2786287378
## Iceland         2.40612486  0.2981076243
## Ireland         0.88315601  0.3688277291
## Italy           1.80708277 -0.0974582059
## Netherlands     0.90442186  1.1434665782
## New Zealand     0.92584292  0.2254214178
## Norway          1.29952321  0.4470905137
## Sweden          1.28340362  1.1627510752
## Switzerland     1.43144307  0.1178526416
## UK              0.84397258  0.5912194584
## USA             1.43671541 -0.6537809376
## Bangladesh     -0.69679604 -0.1664601552
## Bhutan         -1.64977274  0.9258257589
## Botswana       -1.15165887 -0.4904746428
## China          -1.32159920 -0.9862100410
## Colombia       -0.85425555 -0.9523725161
## Ecuador        -1.09546569 -0.2876129722
## Egypt          -1.15119492 -0.2074910597
## El Salvador    -1.19431222 -0.3444668227
## Ethiopia       -2.45462293  0.0070815393
## India          -1.20421002  0.3470058295
## Indonesia      -0.79150977 -0.7513493521
## Kenya          -1.37050828  0.1778387154
## Malawi         -1.75468594 -0.4758181607
## Malaysia       -1.87142996  0.4369730769
## Mexico         -1.14560075 -0.0006162433
## Oman           -1.92908080  0.6888197957
## Philippines    -0.76138129 -0.0186640380
## Sierra Leone   -1.87080665  0.2780333871
## Sudan          -2.75146527  1.5638442568
## Thailand       -3.08897301  0.9434758094
## UAE            -2.45789966 -0.4654321787
## Albania        -0.07096753  0.5641725733
## Brazil          0.40316414 -0.3463075996
## Chile          -0.12339478 -0.3872603634
## Hong Kong       0.30299170  0.7463467423
## Hungary        -0.08881484  0.2731918363
## Kosovo         -0.21571218 -0.0697476267
## Moldova         0.28473239 -0.0057839078
## Romania         0.47084370 -0.1389795599
## Russia         -0.02301994  0.3717687613
## Turkey          0.03539174 -0.9152950027
## 
## $stress
## [1] 9.812304

fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric   MDS",   type="n")
text(x, y, labels = row.names(clu), cex=.7)

space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()

Alternative to Thomas, only C13A (Always follow rules)

clu<-dplyr::select(wjs, INFL_POL_mean, INFL_ECO_mean, INFL_ORG_mean,AUTONOMY_mean:ROLE_ACO_mean, C13A_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar

fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters

nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
        max.nc = 10, method = "average")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 8 proposed 2 as the best number of clusters 
## * 7 proposed 3 as the best number of clusters 
## * 6 proposed 4 as the best number of clusters 
## * 1 proposed 7 as the best number of clusters 
## * 1 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

fviz_nbclust(nb)

## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 1 proposed  1 as the best number of clusters
## * 8 proposed  2 as the best number of clusters
## * 7 proposed  3 as the best number of clusters
## * 6 proposed  4 as the best number of clusters
## * 1 proposed  7 as the best number of clusters
## * 1 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  2 .

km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

plot((km.res), cex = 0.7)

#PCA and clustering on PCA factors
paran(clu) # 1 factor

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           4.463941    5.079440      0.615499
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)

fviz_pca_biplot(res.pca, geom = "text",  title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord

res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters

fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))

res.pca.hcpc$desc.var

## $quanti.var
##                    Eta2      P-value
## ROLE_COL_mean 0.8687570 1.262768e-27
## ROLE_INT_mean 0.7605915 1.158133e-19
## INFL_ECO_mean 0.7108045 3.684203e-17
## INFL_ORG_mean 0.6946657 1.930445e-16
## INFL_POL_mean 0.6684038 2.391074e-15
## ROLE_ACO_mean 0.5084688 3.911193e-10
## ROLE_MON_mean 0.2294135 3.532258e-04
## AUTONOMY_mean 0.2092923 7.753306e-04
## 
## $quanti
## $quanti$`1`
##                  v.test Mean in category Overall mean sd in category
## AUTONOMY_mean  2.724567         3.988839     3.848377      0.1908396
## ROLE_MON_mean -3.040360         3.316848     3.525736      0.3385180
## ROLE_ACO_mean -4.101474         3.029114     3.336409      0.3244097
## INFL_POL_mean -4.723196         1.783694     2.233791      0.2238205
## ROLE_COL_mean -5.509767         1.378106     2.125116      0.1433477
## INFL_ECO_mean -5.673338         2.290905     2.745668      0.2241332
## INFL_ORG_mean -5.705389         2.879758     3.298806      0.2380333
## ROLE_INT_mean -6.684721         2.643814     3.334939      0.3085856
##               Overall sd      p.value
## AUTONOMY_mean  0.2859607 6.438593e-03
## ROLE_MON_mean  0.3810969 2.362956e-03
## ROLE_ACO_mean  0.4155854 4.105273e-05
## INFL_POL_mean  0.5285857 2.321666e-06
## ROLE_COL_mean  0.7520362 3.593086e-08
## INFL_ECO_mean  0.4446230 1.400418e-08
## INFL_ORG_mean  0.4074038 1.160776e-08
## ROLE_INT_mean  0.5734810 2.313648e-11
## 
## $quanti$`2`
## NULL
## 
## $quanti$`3`
##                  v.test Mean in category Overall mean sd in category
## ROLE_COL_mean  6.999034         3.074040     2.125116      0.3842621
## INFL_POL_mean  6.189744         2.823642     2.233791      0.3611328
## INFL_ECO_mean  5.867501         3.215994     2.745668      0.2480732
## INFL_ORG_mean  5.708375         3.718075     3.298806      0.1895218
## ROLE_ACO_mean  5.406485         3.741478     3.336409      0.2220436
## ROLE_INT_mean  4.833378         3.834656     3.334939      0.2801354
## ROLE_MON_mean  3.476514         3.764591     3.525736      0.3087070
## AUTONOMY_mean -3.425319         3.671789     3.848377      0.2635534
##               Overall sd      p.value
## ROLE_COL_mean  0.7520362 2.577325e-12
## INFL_POL_mean  0.5285857 6.026210e-10
## INFL_ECO_mean  0.4446230 4.424120e-09
## INFL_ORG_mean  0.4074038 1.140597e-08
## ROLE_ACO_mean  0.4155854 6.427344e-08
## ROLE_INT_mean  0.5734810 1.342356e-06
## ROLE_MON_mean  0.3810969 5.079786e-04
## AUTONOMY_mean  0.2859607 6.140787e-04
## 
## 
## attr(,"class")
## [1] "catdes" "list "

res.pca.hcpc$desc.ind

## $para
## Cluster: 1
## Czech Republic        Finland        Austria         Norway    Switzerland 
##      0.1424641      0.1626042      0.1857968      0.2078174      0.2134246 
## -------------------------------------------------------- 
## Cluster: 2
##   Moldova Argentina    Brazil   Romania    Turkey 
## 0.1408246 0.1501629 0.2164506 0.2679056 0.3145764 
## -------------------------------------------------------- 
## Cluster: 3
##      Kenya    Ecuador   Botswana     Bhutan      India 
## 0.08203261 0.25453232 0.28846353 0.29090536 0.31420036 
## 
## $dist
## Cluster: 1
##  Iceland   France    Italy   Canada  Belgium 
## 2.311207 1.660255 1.557999 1.528583 1.454146 
## -------------------------------------------------------- 
## Cluster: 2
##   Croatia     Spain   Hungary    Serbia Argentina 
##  1.721791  1.521311  1.413168  1.384273  1.382217 
## -------------------------------------------------------- 
## Cluster: 3
## Thailand      UAE    Sudan Ethiopia   Malawi 
## 2.688992 2.598120 2.248661 2.131303 2.092762

library(ggrepel)
d<-dist(clu) 
fit <- isoMDS(d, k=2)

## initial  value 11.632373 
## iter   5 value 8.705250
## iter  10 value 8.396585
## final  value 8.383972 
## converged

fit

## $points
##                        [,1]        [,2]
## Argentina      -0.009102956  0.21657839
## Bulgaria        0.198865616  0.39381900
## Croatia         0.038947129  0.87518528
## Cyprus          0.379206450  1.03846267
## Estonia         0.612082065 -0.13113019
## Greece          0.686586157  0.17780214
## Israel          0.568646820  0.52573507
## Japan           0.426369298  0.38332480
## Latvia          0.873082975  0.43710744
## Portugal        0.999624989  0.35618385
## Serbia         -0.214527230  0.24753800
## South Africa    0.030640592 -0.11163516
## South Korea    -0.477565345  0.82326718
## Spain          -0.045760112  0.37716115
## Australia       0.828969338 -0.31484675
## Austria         1.471999854 -0.50075437
## Belgium         1.461744569 -0.13965494
## Canada          1.654354194  0.16962438
## Czech Republic  1.150207283 -0.20166114
## Denmark         1.496678459  0.60394690
## Finland         1.163979643 -0.06629030
## France          1.784140778  0.01344487
## Germany         1.341349127 -0.74588449
## Iceland         2.317057915 -0.49114707
## Ireland         0.999806381 -0.13837331
## Italy           1.603262092 -0.22140581
## Netherlands     1.237277462 -0.66243853
## New Zealand     1.006891950 -0.03794178
## Norway          1.472774524 -0.20002503
## Sweden          1.556562530  0.36515438
## Switzerland     1.417695075 -0.28908956
## UK              1.028996495 -0.37053147
## USA             1.277838927  0.49255406
## Bangladesh     -0.756407504 -0.03813752
## Bhutan         -1.359580666 -0.23320521
## Botswana       -1.365447512  0.21789746
## China          -1.475745891 -0.82351382
## Colombia       -1.144874179  0.39057252
## Ecuador        -1.207679413  0.07029216
## Egypt          -1.151153866  0.24838452
## El Salvador    -1.347306687  0.32708842
## Ethiopia       -1.816449300 -1.51364365
## India          -1.111120571 -0.16203970
## Indonesia      -1.032168525 -0.37565282
## Kenya          -1.365376068 -0.03993135
## Malawi         -1.911119286  0.27711452
## Malaysia       -1.749444427 -0.46340790
## Mexico         -1.141615874  0.48652569
## Oman           -1.629606575 -0.50504743
## Philippines    -0.743434643  0.33843312
## Sierra Leone   -1.639488231  0.24368947
## Sudan          -2.143624905  0.64228069
## Thailand       -2.614783695  0.02298138
## UAE            -2.310678811 -0.88114835
## Albania         0.106868274 -0.50542411
## Brazil          0.240436063 -0.01292588
## Chile          -0.316016635  0.07587488
## Hong Kong       0.513609852 -0.58660247
## Hungary        -0.069697310 -0.44840345
## Kosovo         -0.292047282 -0.10545570
## Moldova         0.268020513  0.08650424
## Romania         0.379968053 -0.03754831
## Russia          0.050381090 -0.41302525
## Turkey         -0.203099033  0.84339415
## 
## $stress
## [1] 8.383972

fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric   MDS",   type="n")
text(x, y, labels = row.names(clu), cex=.7)

space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()

Cluster, all roles and influences

clu<-dplyr::select(wjs, INFL_POL_mean:INFL_PER_mean,AUTONOMY_mean:ROLE_ACO_mean)
rownames(clu)<-wjs[,1]
# dropping outlier countries
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
clu<-clu[-53,] # Thailand

clu<-clu[,-6] # dropping autonomy

fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters

nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
        max.nc = 10, method = "average")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 8 proposed 2 as the best number of clusters 
## * 10 proposed 3 as the best number of clusters 
## * 1 proposed 5 as the best number of clusters 
## * 2 proposed 6 as the best number of clusters 
## * 1 proposed 7 as the best number of clusters 
## * 2 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  3 
##  
##  
## *******************************************************************

fviz_nbclust(nb) # 2 or 3 clusters

## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 8 proposed  2 as the best number of clusters
## * 10 proposed  3 as the best number of clusters
## * 1 proposed  5 as the best number of clusters
## * 2 proposed  6 as the best number of clusters
## * 1 proposed  7 as the best number of clusters
## * 2 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  3 .

km.res <- hcut(clu, 2, nstart = 25, hc_method="average")
plot((km.res), cex = 0.7)
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

km.res <- hcut(clu, 5, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

#PCA and clustering on PCA factors
paran(clu) # 1 factor

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           4.708862    5.332078      0.623215
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)

res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)

fviz_pca_biplot(res.pca, geom = "text",  title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord

res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters

fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))

res.pca.hcpc$desc.var

## $quanti.var
##                    Eta2      P-value
## ROLE_COL_mean 0.8698247 2.728071e-27
## ROLE_INT_mean 0.7569803 3.708884e-19
## INFL_ECO_mean 0.7115447 6.343915e-17
## INFL_ORG_mean 0.6874036 7.071621e-16
## INFL_POL_mean 0.6751915 2.232663e-15
## ROLE_ACO_mean 0.4960828 1.176999e-09
## ROLE_MON_mean 0.2179153 6.274996e-04
## INFL_PER_mean 0.2017545 1.159027e-03
## INFL_PRO_mean 0.1323245 1.414880e-02
## 
## $quanti
## $quanti$`1`
##                  v.test Mean in category Overall mean sd in category
## INFL_PER_mean -2.281400         2.494925     2.605831      0.1513998
## ROLE_MON_mean -2.959544         3.316848     3.519004      0.3385180
## ROLE_ACO_mean -4.024930         3.029114     3.327417      0.3244097
## INFL_POL_mean -4.768423         1.783694     2.210085      0.2238205
## ROLE_COL_mean -5.491243         1.378106     2.100955      0.1433477
## INFL_ORG_mean -5.659653         2.879758     3.288270      0.2380333
## INFL_ECO_mean -5.690201         2.290905     2.729564      0.2241332
## ROLE_INT_mean -6.622533         2.643814     3.325769      0.3085856
##               Overall sd      p.value
## INFL_PER_mean  0.2706660 2.252478e-02
## ROLE_MON_mean  0.3803150 3.080946e-03
## ROLE_ACO_mean  0.4126486 5.699229e-05
## INFL_POL_mean  0.4978676 1.856737e-06
## ROLE_COL_mean  0.7329211 3.991142e-08
## INFL_ORG_mean  0.4018802 1.516797e-08
## INFL_ECO_mean  0.4292201 1.268902e-08
## ROLE_INT_mean  0.5733401 3.530947e-11
## 
## $quanti$`2`
## NULL
## 
## $quanti$`3`
##                 v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 6.919672         3.045377     2.100955      0.3711973
## INFL_POL_mean 6.130539         2.778461     2.210085      0.3067097
## INFL_ECO_mean 5.745352         3.188783     2.729564      0.2215127
## INFL_ORG_mean 5.579797         3.705849     3.288270      0.1859451
## ROLE_ACO_mean 5.283404         3.733409     3.327417      0.2245024
## ROLE_INT_mean 4.729819         3.830756     3.325769      0.2864965
## INFL_PER_mean 3.467760         2.780617     2.605831      0.3130260
## ROLE_MON_mean 3.336870         3.755327     3.519004      0.3134690
## INFL_PRO_mean 2.816550         3.899640     3.762255      0.2423878
##               Overall sd      p.value
## ROLE_COL_mean  0.7329211 4.526896e-12
## INFL_POL_mean  0.4978676 8.758201e-10
## INFL_ECO_mean  0.4292201 9.173013e-09
## INFL_ORG_mean  0.4018802 2.407995e-08
## ROLE_ACO_mean  0.4126486 1.268055e-07
## ROLE_INT_mean  0.5733401 2.247203e-06
## INFL_PER_mean  0.2706660 5.248165e-04
## ROLE_MON_mean  0.3803150 8.472758e-04
## INFL_PRO_mean  0.2619394 4.854241e-03
## 
## 
## attr(,"class")
## [1] "catdes" "list "

res.pca.hcpc$desc.ind

## $para
## Cluster: 1
##        Finland         Norway Czech Republic        Belgium    Switzerland 
##      0.1715150      0.2159136      0.2208365      0.2485077      0.2524128 
## -------------------------------------------------------- 
## Cluster: 2
##   Moldova    Brazil Argentina  Bulgaria   Romania 
## 0.1199772 0.1611993 0.1685898 0.2240243 0.2585468 
## -------------------------------------------------------- 
## Cluster: 3
##      Kenya    Ecuador      India   Botswana     Bhutan 
## 0.09194348 0.14919733 0.22755625 0.32096119 0.34235942 
## 
## $dist
## Cluster: 1
##     Iceland      France       Italy      Canada Netherlands 
##    2.278888    1.678364    1.606557    1.520939    1.516498 
## -------------------------------------------------------- 
## Cluster: 2
##   Croatia     Spain Argentina   Hungary    Turkey 
##  1.716906  1.514450  1.397554  1.374816  1.360918 
## -------------------------------------------------------- 
## Cluster: 3
##      UAE    Sudan   Malawi Malaysia Ethiopia 
## 2.643907 2.288352 2.193083 2.042658 1.971425

Cluster, all roles and influences + thrust

clu<-dplyr::select(wjs, COUNTRY, INFL_POL_mean:INFL_PER_mean,AUTONOMY_mean:ROLE_ACO_mean, O4A_mean, O4E_mean)
rownames(clu)<-clu[,1]
# dropping outlier countries
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
clu<-clu[-53,] # Thailand

#dropping countries missing Thrust (O4)
clu<-clu %>% filter(complete.cases(.))
rownames(clu)<-clu[,1]

#dropping variables
clu<-clu[,-7] # dropping autonomy
clu<-clu[,-1] # dropping COUNTRY

#test for clusters
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters

nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
        max.nc = 10, method = "average")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 9 proposed 2 as the best number of clusters 
## * 5 proposed 3 as the best number of clusters 
## * 6 proposed 4 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 2 proposed 10 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

fviz_nbclust(nb) # 2, 3 or 4 clusters

## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 1 proposed  1 as the best number of clusters
## * 9 proposed  2 as the best number of clusters
## * 5 proposed  3 as the best number of clusters
## * 6 proposed  4 as the best number of clusters
## * 1 proposed  9 as the best number of clusters
## * 2 proposed  10 as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  2 .

#PCA and clustering on PCA factors
paran(clu) # 2 factos

## 
## Using eigendecomposition of correlation matrix.
## Computing: 10%  20%  30%  40%  50%  60%  70%  80%  90%  100%
## 
## 
## Results of Horn's Parallel Analysis for component retention
## 330 iterations, using the mean estimate
## 
## -------------------------------------------------- 
## Component   Adjusted    Unadjusted    Estimated 
##             Eigenvalue  Eigenvalue    Bias 
## -------------------------------------------------- 
## 1           4.414199    5.255210      0.841010
## 2           1.338764    1.909134      0.570370
## -------------------------------------------------- 
## 
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (2 components retained)

res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)

fviz_pca_biplot(res.pca, geom = "text",  title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()

fviz_screeplot(res.pca, ncp=10)

fviz_contrib(res.pca, choice = "var", axes = 1)

fviz_contrib(res.pca, choice = "var", axes = 2)

individ <- get_pca_ind(res.pca)
O4ind<-individ$coord

res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters

fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))

res.pca.hcpc$desc.var

## $quanti.var
##                    Eta2      P-value
## ROLE_COL_mean 0.8025593 2.771804e-17
## INFL_ECO_mean 0.7094773 2.425102e-13
## ROLE_INT_mean 0.6755091 3.260323e-12
## INFL_POL_mean 0.6670387 5.973826e-12
## INFL_ORG_mean 0.6589617 1.049236e-11
## O4A_mean      0.5743337 1.919232e-09
## O4E_mean      0.5641107 3.352288e-09
## ROLE_ACO_mean 0.4146953 3.415675e-06
## INFL_PER_mean 0.2429513 1.443478e-03
## ROLE_MON_mean 0.1432996 2.639251e-02
## INFL_PRO_mean 0.1259826 4.223978e-02
## 
## $quanti
## $quanti$`1`
##                  v.test Mean in category Overall mean sd in category
## O4E_mean       3.935359         3.431809     3.001985      0.2511501
## INFL_PER_mean -2.088249         2.507447     2.622006      0.1469778
## ROLE_MON_mean -2.210295         3.337851     3.495063      0.3581715
## ROLE_ACO_mean -3.253518         3.115344     3.365385      0.3699414
## INFL_POL_mean -4.269143         1.797571     2.221161      0.2035667
## INFL_ORG_mean -4.824985         2.928132     3.305765      0.2417741
## ROLE_COL_mean -4.915271         1.409205     2.133319      0.1640174
## INFL_ECO_mean -5.274583         2.307773     2.745249      0.2356943
## ROLE_INT_mean -5.662792         2.701558     3.328452      0.4191497
##               Overall sd      p.value
## O4E_mean       0.5487478 8.307242e-05
## INFL_PER_mean  0.2756193 3.677535e-02
## ROLE_MON_mean  0.3573567 2.708470e-02
## ROLE_ACO_mean  0.3861215 1.139856e-03
## INFL_POL_mean  0.4985065 1.962255e-05
## INFL_ORG_mean  0.3932233 1.400135e-06
## ROLE_COL_mean  0.7401595 8.865965e-07
## INFL_ECO_mean  0.4167074 1.330585e-07
## ROLE_INT_mean  0.5561975 1.489296e-08
## 
## $quanti$`2`
##                  v.test Mean in category Overall mean sd in category
## ROLE_INT_mean  2.495140         3.571265     3.328452      0.2581864
## O4A_mean      -4.894601         2.297231     2.759496      0.2589080
## O4E_mean      -5.131164         2.509337     3.001985      0.3457560
##               Overall sd      p.value
## ROLE_INT_mean  0.5561975 1.259073e-02
## O4A_mean       0.5397923 9.850536e-07
## O4E_mean       0.5487478 2.879559e-07
## 
## $quanti$`3`
##                 v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 5.615436         3.135025     2.133319      0.3707426
## INFL_POL_mean 5.257513         2.852819     2.221161      0.2960142
## INFL_ORG_mean 4.769005         3.757723     3.305765      0.1898614
## INFL_ECO_mean 4.628648         3.210103     2.745249      0.2062529
## ROLE_ACO_mean 4.207777         3.756953     3.365385      0.1873502
## O4A_mean      4.159102         3.300571     2.759496      0.4671153
## INFL_PER_mean 3.374064         2.846133     2.622006      0.3561175
## ROLE_INT_mean 3.328856         3.774678     3.328452      0.2319286
## INFL_PRO_mean 2.389320         3.930626     3.780198      0.2439050
## ROLE_MON_mean 2.263077         3.689973     3.495063      0.2736994
##               Overall sd      p.value
## ROLE_COL_mean  0.7401595 1.960676e-08
## INFL_POL_mean  0.4985065 1.460168e-07
## INFL_ORG_mean  0.3932233 1.851385e-06
## INFL_ECO_mean  0.4167074 3.680601e-06
## ROLE_ACO_mean  0.3861215 2.578946e-05
## O4A_mean       0.5397923 3.195006e-05
## INFL_PER_mean  0.2756193 7.406725e-04
## ROLE_INT_mean  0.5561975 8.720358e-04
## INFL_PRO_mean  0.2612297 1.687961e-02
## ROLE_MON_mean  0.3573567 2.363094e-02
## 
## 
## attr(,"class")
## [1] "catdes" "list "

res.pca.hcpc$desc.ind

## $para
## Cluster: 1
##   Denmark        UK   Ireland   Austria   Belgium 
## 0.1981423 0.2258593 0.2476614 0.2604674 0.2975330 
## -------------------------------------------------------- 
## Cluster: 2
##    Russia    Turkey    Kosovo     Chile   Hungary 
## 0.1849268 0.2155454 0.2293731 0.2304697 0.2867215 
## -------------------------------------------------------- 
## Cluster: 3
##  Botswana      Oman  Ethiopia     Kenya  Malaysia 
## 0.1602248 0.2106291 0.3400777 0.4301602 0.4796833 
## 
## $dist
## Cluster: 1
##     Iceland      Canada      Sweden Switzerland     Germany 
##    2.518254    2.200253    2.185024    2.170086    2.122698 
## -------------------------------------------------------- 
## Cluster: 2
##    Serbia  Bulgaria Argentina    Turkey   Albania 
##  2.128997  2.042885  1.973737  1.952760  1.910355 
## -------------------------------------------------------- 
## Cluster: 3
##      UAE    India Malaysia   Malawi   Bhutan 
## 3.419589 2.441322 2.375087 2.340228 2.205273

km.res <- hcut(clu, 2, nstart = 25, hc_method="average")
plot((km.res), cex = 0.7)

fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()

km.res <- hcut(clu, 4, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()