title: “WJS International Classifying countries” author: “jan.hovden@uib.no” output: pdf_document: toc: yes html_notebook: default html_document: number_sections: yes theme: journal toc: yes —
require(foreign)
require(FactoMineR)
require(sjmisc)
require(factoextra)
require(dplyr)
require(ggplot2)
require(ggthemes)
require(psych)
library(rworldmap)
library("NbClust")
library(ape)
library(paran)
#library(ggraph)
setwd("/Users/janfredrikhovden/Dropbox/DBXPAGAANDEARBEID/Statistikk/Rworkdir/WJS")
wjs<-read.spss("WJS full agg for final conclusions.sav", to.data.frame=TRUE)
wjs$ID<-as.numeric(rownames(wjs)) # assign row number as ID
#description of some of the methods for cluster validiation: http://www.sthda.com/english/articles/29-cluster-validation-essentials/97-cluster-validation-statistics-must-know-methods/#internal-measures-for-cluster-validation
clu<-dplyr::select(wjs, INFL_POL_mean:INFL_ECO_mean,AUTONOMY_mean:ROLE_ACO_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
#test for gap statistic, silhouette coefficientcluster.stats(d = NULL, clustering, al.clustering = NULL)
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 klynger
fviz_nbclust(clu, hcut, method = "silhouette") +labs(subtitle = "Gap statistic, hierarchical cluster") # 2 klynger
nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
max.nc = 10, method = "average")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 10 proposed 2 as the best number of clusters
## * 11 proposed 3 as the best number of clusters
## * 3 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
fviz_nbclust(nb)
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 10 proposed 2 as the best number of clusters
## * 11 proposed 3 as the best number of clusters
## * 3 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 3 .
km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
plot((km.res), cex = 0.7)
#PCA and clustering on PCA factors
paran(clu) # 1 factor
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 4.666855 5.282354 0.615499
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)
res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)
fviz_pca_biplot(res.pca, geom = "text", title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord
res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters
fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))
res.pca.hcpc$desc.var
## $quanti.var
## Eta2 P-value
## ROLE_COL_mean 0.8687570 1.262768e-27
## ROLE_INT_mean 0.7605915 1.158133e-19
## INFL_ECO_mean 0.7108045 3.684203e-17
## INFL_ORG_mean 0.6946657 1.930445e-16
## INFL_POL_mean 0.6684038 2.391074e-15
## ROLE_ACO_mean 0.5084688 3.911193e-10
## ROLE_MON_mean 0.2294135 3.532258e-04
## AUTONOMY_mean 0.2092923 7.753306e-04
## INFL_PRO_mean 0.1393100 1.029975e-02
##
## $quanti
## $quanti$`1`
## v.test Mean in category Overall mean sd in category
## AUTONOMY_mean 2.724567 3.988839 3.848377 0.1908396
## ROLE_MON_mean -3.040360 3.316848 3.525736 0.3385180
## ROLE_ACO_mean -4.101474 3.029114 3.336409 0.3244097
## INFL_POL_mean -4.723196 1.783694 2.233791 0.2238205
## ROLE_COL_mean -5.509767 1.378106 2.125116 0.1433477
## INFL_ECO_mean -5.673338 2.290905 2.745668 0.2241332
## INFL_ORG_mean -5.705389 2.879758 3.298806 0.2380333
## ROLE_INT_mean -6.684721 2.643814 3.334939 0.3085856
## Overall sd p.value
## AUTONOMY_mean 0.2859607 6.438593e-03
## ROLE_MON_mean 0.3810969 2.362956e-03
## ROLE_ACO_mean 0.4155854 4.105273e-05
## INFL_POL_mean 0.5285857 2.321666e-06
## ROLE_COL_mean 0.7520362 3.593086e-08
## INFL_ECO_mean 0.4446230 1.400418e-08
## INFL_ORG_mean 0.4074038 1.160776e-08
## ROLE_INT_mean 0.5734810 2.313648e-11
##
## $quanti$`2`
## NULL
##
## $quanti$`3`
## v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 6.999034 3.074040 2.125116 0.3842621
## INFL_POL_mean 6.189744 2.823642 2.233791 0.3611328
## INFL_ECO_mean 5.867501 3.215994 2.745668 0.2480732
## INFL_ORG_mean 5.708375 3.718075 3.298806 0.1895218
## ROLE_ACO_mean 5.406485 3.741478 3.336409 0.2220436
## ROLE_INT_mean 4.833378 3.834656 3.334939 0.2801354
## ROLE_MON_mean 3.476514 3.764591 3.525736 0.3087070
## INFL_PRO_mean 2.916073 3.902630 3.765382 0.2369239
## AUTONOMY_mean -3.425319 3.671789 3.848377 0.2635534
## Overall sd p.value
## ROLE_COL_mean 0.7520362 2.577325e-12
## INFL_POL_mean 0.5285857 6.026210e-10
## INFL_ECO_mean 0.4446230 4.424120e-09
## INFL_ORG_mean 0.4074038 1.140597e-08
## ROLE_ACO_mean 0.4155854 6.427344e-08
## ROLE_INT_mean 0.5734810 1.342356e-06
## ROLE_MON_mean 0.3810969 5.079786e-04
## INFL_PRO_mean 0.2610680 3.544676e-03
## AUTONOMY_mean 0.2859607 6.140787e-04
##
##
## attr(,"class")
## [1] "catdes" "list "
res.pca.hcpc$desc.ind
## $para
## Cluster: 1
## Czech Republic Finland Austria Norway Switzerland
## 0.1502448 0.1685557 0.2414245 0.2649192 0.2655605
## --------------------------------------------------------
## Cluster: 2
## Moldova Argentina Brazil Romania Turkey
## 0.1283322 0.1357232 0.2054645 0.2317151 0.2771268
## --------------------------------------------------------
## Cluster: 3
## Kenya Ecuador India Botswana Bhutan
## 0.06385787 0.23854297 0.29167327 0.30830181 0.36219318
##
## $dist
## Cluster: 1
## Iceland France Italy Canada Belgium
## 2.281316 1.667664 1.593554 1.508972 1.466847
## --------------------------------------------------------
## Cluster: 2
## Croatia Spain Hungary Serbia Argentina
## 1.717003 1.538365 1.412283 1.382766 1.369206
## --------------------------------------------------------
## Cluster: 3
## Thailand UAE Sudan Malawi Ethiopia
## 2.701182 2.629423 2.279124 2.130951 2.062982
#MDS
library(ggrepel)
d<-dist(clu)
fit <- isoMDS(d, k=2)
## initial value 10.985607
## iter 5 value 8.516952
## final value 8.391742
## converged
fit
## $points
## [,1] [,2]
## Argentina 0.004957139 0.227409114
## Bulgaria -0.184528651 0.404221261
## Croatia -0.029458965 0.856746246
## Cyprus -0.367349937 1.032196136
## Estonia -0.613458614 -0.076580643
## Greece -0.686132052 0.189119200
## Israel -0.548766799 0.524019027
## Japan -0.445665462 0.383306351
## Latvia -0.891467652 0.454328584
## Portugal -0.997968089 0.362843559
## Serbia 0.203989245 0.223836609
## South Africa 0.006248435 -0.144506603
## South Korea 0.504865961 0.803229176
## Spain 0.064274438 0.380346730
## Australia -0.816819880 -0.322669514
## Austria -1.504614991 -0.491295350
## Belgium -1.481209417 -0.132557933
## Canada -1.645610442 0.204056008
## Czech Republic -1.141464901 -0.151329006
## Denmark -1.463629617 0.482721910
## Finland -1.154444781 -0.035861969
## France -1.786897133 0.045396984
## Germany -1.360432794 -0.712365637
## Iceland -2.316096151 -0.466065977
## Ireland -0.996691692 -0.117443975
## Italy -1.603530764 -0.242818687
## Netherlands -1.311490163 -0.856557877
## New Zealand -0.995805123 -0.009707051
## Norway -1.483769149 -0.200320725
## Sweden -1.568317450 0.391460639
## Switzerland -1.458552107 -0.313892466
## UK -1.023120555 -0.331125750
## USA -1.257402329 0.639566707
## Bangladesh 0.733969135 -0.086092119
## Bhutan 1.311250275 -0.386595379
## Botswana 1.464279966 0.274255632
## China 1.440242172 -0.921669982
## Colombia 1.139308234 0.393836215
## Ecuador 1.196861354 -0.023335382
## Egypt 1.165888743 0.186238300
## El Salvador 1.335503427 0.234207887
## Ethiopia 1.844805351 -0.421281262
## India 1.138213449 -0.062375882
## Indonesia 1.006713154 -0.418882914
## Kenya 1.385847633 -0.096926494
## Malawi 1.980175584 0.247901583
## Malaysia 1.735543103 -0.517240991
## Mexico 1.123691030 0.435898098
## Oman 1.584042816 -0.550346705
## Philippines 0.785643339 0.333258083
## Sierra Leone 1.685739333 0.180049113
## Sudan 2.196442203 0.578363646
## Thailand 2.603663619 -0.149836927
## UAE 2.203620849 -1.079862881
## Albania -0.097599018 -0.501745726
## Brazil -0.245749153 -0.013013883
## Chile 0.318016121 0.061084861
## Hong Kong -0.537344803 -0.741702883
## Hungary 0.070787216 -0.431831780
## Kosovo 0.282399888 -0.092941596
## Moldova -0.255540006 0.085713513
## Romania -0.364124884 -0.008822511
## Russia -0.084236695 -0.400568701
## Turkey 0.202307006 0.894561987
##
## $stress
## [1] 8.391742
fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric MDS", type="n")
text(x, y, labels = row.names(clu), cex=.7)
space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()
clu<-dplyr::select(wjs, INFL_POL_mean, INFL_ECO_mean, INFL_ORG_mean,AUTONOMY_mean:ROLE_ACO_mean, C13A_mean:C13D_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 5 clusters
nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
max.nc = 10, method = "average")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 9 proposed 2 as the best number of clusters
## * 2 proposed 3 as the best number of clusters
## * 5 proposed 4 as the best number of clusters
## * 5 proposed 5 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
fviz_nbclust(nb)
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 9 proposed 2 as the best number of clusters
## * 2 proposed 3 as the best number of clusters
## * 5 proposed 4 as the best number of clusters
## * 5 proposed 5 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .
km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
plot((km.res), cex = 0.7)
#PCA and clustering on PCA factors
paran(clu) # 2 factors
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 360 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 5.084843 5.870280 0.785436
## 2 1.285554 1.837295 0.551741
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (2 components retained)
res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)
fviz_pca_biplot(res.pca, geom = "text", title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord
res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters
fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))
res.pca.hcpc$desc.var
## $quanti.var
## Eta2 P-value
## ROLE_COL_mean 0.8036543 2.736235e-22
## ROLE_INT_mean 0.7364032 2.181085e-18
## INFL_ECO_mean 0.7209731 1.236526e-17
## INFL_POL_mean 0.7068639 5.566912e-17
## INFL_ORG_mean 0.6986866 1.288429e-16
## ROLE_ACO_mean 0.4626419 5.929874e-09
## C13C_mean 0.3566491 1.437676e-06
## C13D_mean 0.2806846 4.325550e-05
## C13B_mean 0.2685871 7.193813e-05
## AUTONOMY_mean 0.2099072 7.571508e-04
## ROLE_MON_mean 0.2081772 8.093837e-04
##
## $quanti
## $quanti$`1`
## v.test Mean in category Overall mean sd in category
## AUTONOMY_mean 2.486059 3.964008 3.848377 0.2543949
## C13C_mean -2.351268 2.449425 2.626701 0.3340672
## ROLE_MON_mean -2.981168 3.340947 3.525736 0.3291280
## ROLE_ACO_mean -4.288720 3.046513 3.336409 0.3316884
## INFL_POL_mean -4.975250 1.806046 2.233791 0.2308858
## ROLE_COL_mean -5.792834 1.416544 2.125116 0.1725187
## INFL_ORG_mean -5.857020 2.910695 3.298806 0.2442105
## INFL_ECO_mean -5.947081 2.315587 2.745668 0.2244806
## ROLE_INT_mean -6.690187 2.710901 3.334939 0.3461815
## Overall sd p.value
## AUTONOMY_mean 0.2859607 1.291665e-02
## C13C_mean 0.4635468 1.870957e-02
## ROLE_MON_mean 0.3810969 2.871516e-03
## ROLE_ACO_mean 0.4155854 1.797054e-05
## INFL_POL_mean 0.5285857 6.516362e-07
## ROLE_COL_mean 0.7520362 6.920837e-09
## INFL_ORG_mean 0.4074038 4.712468e-09
## INFL_ECO_mean 0.4446230 2.729665e-09
## ROLE_INT_mean 0.5734810 2.228852e-11
##
## $quanti$`2`
## v.test Mean in category Overall mean sd in category
## ROLE_INT_mean 2.691824 3.603665 3.334939 0.2328624
## C13C_mean -2.090120 2.458043 2.626701 0.2341273
## C13B_mean -2.885862 2.906586 3.116362 0.3682844
## C13D_mean -2.977895 2.463160 2.698359 0.3057142
## Overall sd p.value
## ROLE_INT_mean 0.5734810 0.007106247
## C13C_mean 0.4635468 0.036606983
## C13B_mean 0.4175787 0.003903435
## C13D_mean 0.4537142 0.002902358
##
## $quanti$`3`
## v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 6.421405 3.097732 2.125116 0.4014658
## INFL_POL_mean 6.301160 2.904614 2.233791 0.3186110
## INFL_ECO_mean 5.655272 3.252096 2.745668 0.2481291
## INFL_ORG_mean 5.564194 3.755368 3.298806 0.1689015
## ROLE_ACO_mean 4.946631 3.750448 3.336409 0.2365875
## C13C_mean 4.739725 3.069207 2.626701 0.5250937
## ROLE_INT_mean 4.360158 3.838549 3.334939 0.2866597
## C13D_mean 3.995538 3.063474 2.698359 0.4164012
## C13B_mean 3.920251 3.446066 3.116362 0.3995027
## ROLE_MON_mean 3.243169 3.774666 3.525736 0.3254019
## AUTONOMY_mean -3.526227 3.645287 3.848377 0.2505697
## Overall sd p.value
## ROLE_COL_mean 0.7520362 1.350225e-10
## INFL_POL_mean 0.5285857 2.954264e-10
## INFL_ECO_mean 0.4446230 1.555997e-08
## INFL_ORG_mean 0.4074038 2.633663e-08
## ROLE_ACO_mean 0.4155854 7.550906e-07
## C13C_mean 0.4635468 2.140089e-06
## ROLE_INT_mean 0.5734810 1.299683e-05
## C13D_mean 0.4537142 6.454754e-05
## C13B_mean 0.4175787 8.845672e-05
## ROLE_MON_mean 0.3810969 1.182082e-03
## AUTONOMY_mean 0.2859607 4.215250e-04
##
##
## attr(,"class")
## [1] "catdes" "list "
res.pca.hcpc$desc.ind
## $para
## Cluster: 1
## Denmark Switzerland Norway New Zealand Belgium
## 0.1944659 0.2547730 0.2793222 0.2841225 0.3345835
## --------------------------------------------------------
## Cluster: 2
## South Africa Argentina Turkey Chile Kosovo
## 0.1360379 0.1815498 0.2540435 0.3083126 0.3766436
## --------------------------------------------------------
## Cluster: 3
## Kenya Ethiopia Sierra Leone India Malaysia
## 0.1967874 0.2237366 0.3084464 0.4141100 0.4600797
##
## $dist
## Cluster: 1
## Iceland France Netherlands Sweden Italy
## 2.403658 1.963222 1.896259 1.847717 1.781640
## --------------------------------------------------------
## Cluster: 2
## South Korea Serbia Chile Croatia Spain
## 1.845821 1.752903 1.660118 1.593968 1.578279
## --------------------------------------------------------
## Cluster: 3
## Thailand Sudan Oman UAE Bhutan
## 3.364798 3.122849 2.325242 2.307191 2.229570
library(ggrepel)
d<-dist(clu)
fit <- isoMDS(d, k=2)
## initial value 13.241866
## iter 5 value 9.958288
## iter 10 value 9.829607
## final value 9.812304
## converged
fit
## $points
## [,1] [,2]
## Argentina 0.12472990 -0.3915386249
## Bulgaria 0.47334496 -0.8318403147
## Croatia 0.27958451 -0.9996513491
## Cyprus 0.64862464 -0.9426741997
## Estonia 0.68710914 -0.0872645397
## Greece 0.75199453 -0.1861421278
## Israel 0.55316028 -0.0445030705
## Japan 0.27392974 0.4478924330
## Latvia 0.76401027 0.2306769538
## Portugal 1.13911851 -0.4645544283
## Serbia 0.05004004 -0.7077966525
## South Africa 0.14703166 -0.2217132561
## South Korea -0.11055266 -1.2373414072
## Spain 0.23071769 -0.6825880288
## Australia 0.91997090 -0.0311979431
## Austria 1.64495824 -0.1602622083
## Belgium 1.34760025 0.3652891494
## Canada 1.59255902 0.2862187919
## Czech Republic 0.95423923 0.9072565008
## Denmark 1.32080681 0.7552068969
## Finland 1.31309572 -0.2766871847
## France 1.60480002 0.6343096128
## Germany 1.65945540 -0.2786287378
## Iceland 2.40612486 0.2981076243
## Ireland 0.88315601 0.3688277291
## Italy 1.80708277 -0.0974582059
## Netherlands 0.90442186 1.1434665782
## New Zealand 0.92584292 0.2254214178
## Norway 1.29952321 0.4470905137
## Sweden 1.28340362 1.1627510752
## Switzerland 1.43144307 0.1178526416
## UK 0.84397258 0.5912194584
## USA 1.43671541 -0.6537809376
## Bangladesh -0.69679604 -0.1664601552
## Bhutan -1.64977274 0.9258257589
## Botswana -1.15165887 -0.4904746428
## China -1.32159920 -0.9862100410
## Colombia -0.85425555 -0.9523725161
## Ecuador -1.09546569 -0.2876129722
## Egypt -1.15119492 -0.2074910597
## El Salvador -1.19431222 -0.3444668227
## Ethiopia -2.45462293 0.0070815393
## India -1.20421002 0.3470058295
## Indonesia -0.79150977 -0.7513493521
## Kenya -1.37050828 0.1778387154
## Malawi -1.75468594 -0.4758181607
## Malaysia -1.87142996 0.4369730769
## Mexico -1.14560075 -0.0006162433
## Oman -1.92908080 0.6888197957
## Philippines -0.76138129 -0.0186640380
## Sierra Leone -1.87080665 0.2780333871
## Sudan -2.75146527 1.5638442568
## Thailand -3.08897301 0.9434758094
## UAE -2.45789966 -0.4654321787
## Albania -0.07096753 0.5641725733
## Brazil 0.40316414 -0.3463075996
## Chile -0.12339478 -0.3872603634
## Hong Kong 0.30299170 0.7463467423
## Hungary -0.08881484 0.2731918363
## Kosovo -0.21571218 -0.0697476267
## Moldova 0.28473239 -0.0057839078
## Romania 0.47084370 -0.1389795599
## Russia -0.02301994 0.3717687613
## Turkey 0.03539174 -0.9152950027
##
## $stress
## [1] 9.812304
fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric MDS", type="n")
text(x, y, labels = row.names(clu), cex=.7)
space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()
clu<-dplyr::select(wjs, INFL_POL_mean, INFL_ECO_mean, INFL_ORG_mean,AUTONOMY_mean:ROLE_ACO_mean, C13A_mean)
rownames(clu)<-wjs[,1]
# drop Tanzania, Singapore and Quatar
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters
nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
max.nc = 10, method = "average")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 8 proposed 2 as the best number of clusters
## * 7 proposed 3 as the best number of clusters
## * 6 proposed 4 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
fviz_nbclust(nb)
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 8 proposed 2 as the best number of clusters
## * 7 proposed 3 as the best number of clusters
## * 6 proposed 4 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .
km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
plot((km.res), cex = 0.7)
#PCA and clustering on PCA factors
paran(clu) # 1 factor
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 4.463941 5.079440 0.615499
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)
res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)
fviz_pca_biplot(res.pca, geom = "text", title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord
res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters
fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))
res.pca.hcpc$desc.var
## $quanti.var
## Eta2 P-value
## ROLE_COL_mean 0.8687570 1.262768e-27
## ROLE_INT_mean 0.7605915 1.158133e-19
## INFL_ECO_mean 0.7108045 3.684203e-17
## INFL_ORG_mean 0.6946657 1.930445e-16
## INFL_POL_mean 0.6684038 2.391074e-15
## ROLE_ACO_mean 0.5084688 3.911193e-10
## ROLE_MON_mean 0.2294135 3.532258e-04
## AUTONOMY_mean 0.2092923 7.753306e-04
##
## $quanti
## $quanti$`1`
## v.test Mean in category Overall mean sd in category
## AUTONOMY_mean 2.724567 3.988839 3.848377 0.1908396
## ROLE_MON_mean -3.040360 3.316848 3.525736 0.3385180
## ROLE_ACO_mean -4.101474 3.029114 3.336409 0.3244097
## INFL_POL_mean -4.723196 1.783694 2.233791 0.2238205
## ROLE_COL_mean -5.509767 1.378106 2.125116 0.1433477
## INFL_ECO_mean -5.673338 2.290905 2.745668 0.2241332
## INFL_ORG_mean -5.705389 2.879758 3.298806 0.2380333
## ROLE_INT_mean -6.684721 2.643814 3.334939 0.3085856
## Overall sd p.value
## AUTONOMY_mean 0.2859607 6.438593e-03
## ROLE_MON_mean 0.3810969 2.362956e-03
## ROLE_ACO_mean 0.4155854 4.105273e-05
## INFL_POL_mean 0.5285857 2.321666e-06
## ROLE_COL_mean 0.7520362 3.593086e-08
## INFL_ECO_mean 0.4446230 1.400418e-08
## INFL_ORG_mean 0.4074038 1.160776e-08
## ROLE_INT_mean 0.5734810 2.313648e-11
##
## $quanti$`2`
## NULL
##
## $quanti$`3`
## v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 6.999034 3.074040 2.125116 0.3842621
## INFL_POL_mean 6.189744 2.823642 2.233791 0.3611328
## INFL_ECO_mean 5.867501 3.215994 2.745668 0.2480732
## INFL_ORG_mean 5.708375 3.718075 3.298806 0.1895218
## ROLE_ACO_mean 5.406485 3.741478 3.336409 0.2220436
## ROLE_INT_mean 4.833378 3.834656 3.334939 0.2801354
## ROLE_MON_mean 3.476514 3.764591 3.525736 0.3087070
## AUTONOMY_mean -3.425319 3.671789 3.848377 0.2635534
## Overall sd p.value
## ROLE_COL_mean 0.7520362 2.577325e-12
## INFL_POL_mean 0.5285857 6.026210e-10
## INFL_ECO_mean 0.4446230 4.424120e-09
## INFL_ORG_mean 0.4074038 1.140597e-08
## ROLE_ACO_mean 0.4155854 6.427344e-08
## ROLE_INT_mean 0.5734810 1.342356e-06
## ROLE_MON_mean 0.3810969 5.079786e-04
## AUTONOMY_mean 0.2859607 6.140787e-04
##
##
## attr(,"class")
## [1] "catdes" "list "
res.pca.hcpc$desc.ind
## $para
## Cluster: 1
## Czech Republic Finland Austria Norway Switzerland
## 0.1424641 0.1626042 0.1857968 0.2078174 0.2134246
## --------------------------------------------------------
## Cluster: 2
## Moldova Argentina Brazil Romania Turkey
## 0.1408246 0.1501629 0.2164506 0.2679056 0.3145764
## --------------------------------------------------------
## Cluster: 3
## Kenya Ecuador Botswana Bhutan India
## 0.08203261 0.25453232 0.28846353 0.29090536 0.31420036
##
## $dist
## Cluster: 1
## Iceland France Italy Canada Belgium
## 2.311207 1.660255 1.557999 1.528583 1.454146
## --------------------------------------------------------
## Cluster: 2
## Croatia Spain Hungary Serbia Argentina
## 1.721791 1.521311 1.413168 1.384273 1.382217
## --------------------------------------------------------
## Cluster: 3
## Thailand UAE Sudan Ethiopia Malawi
## 2.688992 2.598120 2.248661 2.131303 2.092762
library(ggrepel)
d<-dist(clu)
fit <- isoMDS(d, k=2)
## initial value 11.632373
## iter 5 value 8.705250
## iter 10 value 8.396585
## final value 8.383972
## converged
fit
## $points
## [,1] [,2]
## Argentina -0.009102956 0.21657839
## Bulgaria 0.198865616 0.39381900
## Croatia 0.038947129 0.87518528
## Cyprus 0.379206450 1.03846267
## Estonia 0.612082065 -0.13113019
## Greece 0.686586157 0.17780214
## Israel 0.568646820 0.52573507
## Japan 0.426369298 0.38332480
## Latvia 0.873082975 0.43710744
## Portugal 0.999624989 0.35618385
## Serbia -0.214527230 0.24753800
## South Africa 0.030640592 -0.11163516
## South Korea -0.477565345 0.82326718
## Spain -0.045760112 0.37716115
## Australia 0.828969338 -0.31484675
## Austria 1.471999854 -0.50075437
## Belgium 1.461744569 -0.13965494
## Canada 1.654354194 0.16962438
## Czech Republic 1.150207283 -0.20166114
## Denmark 1.496678459 0.60394690
## Finland 1.163979643 -0.06629030
## France 1.784140778 0.01344487
## Germany 1.341349127 -0.74588449
## Iceland 2.317057915 -0.49114707
## Ireland 0.999806381 -0.13837331
## Italy 1.603262092 -0.22140581
## Netherlands 1.237277462 -0.66243853
## New Zealand 1.006891950 -0.03794178
## Norway 1.472774524 -0.20002503
## Sweden 1.556562530 0.36515438
## Switzerland 1.417695075 -0.28908956
## UK 1.028996495 -0.37053147
## USA 1.277838927 0.49255406
## Bangladesh -0.756407504 -0.03813752
## Bhutan -1.359580666 -0.23320521
## Botswana -1.365447512 0.21789746
## China -1.475745891 -0.82351382
## Colombia -1.144874179 0.39057252
## Ecuador -1.207679413 0.07029216
## Egypt -1.151153866 0.24838452
## El Salvador -1.347306687 0.32708842
## Ethiopia -1.816449300 -1.51364365
## India -1.111120571 -0.16203970
## Indonesia -1.032168525 -0.37565282
## Kenya -1.365376068 -0.03993135
## Malawi -1.911119286 0.27711452
## Malaysia -1.749444427 -0.46340790
## Mexico -1.141615874 0.48652569
## Oman -1.629606575 -0.50504743
## Philippines -0.743434643 0.33843312
## Sierra Leone -1.639488231 0.24368947
## Sudan -2.143624905 0.64228069
## Thailand -2.614783695 0.02298138
## UAE -2.310678811 -0.88114835
## Albania 0.106868274 -0.50542411
## Brazil 0.240436063 -0.01292588
## Chile -0.316016635 0.07587488
## Hong Kong 0.513609852 -0.58660247
## Hungary -0.069697310 -0.44840345
## Kosovo -0.292047282 -0.10545570
## Moldova 0.268020513 0.08650424
## Romania 0.379968053 -0.03754831
## Russia 0.050381090 -0.41302525
## Turkey -0.203099033 0.84339415
##
## $stress
## [1] 8.383972
fit.sh<-Shepard(d, fit$points)
plot(fit.sh, pch = ".")
lines(fit.sh$x, fit.sh$yf, type = "S")
x <- fit$points[,1]
y <- fit$points[,2]
# plot(x, y, xlab="Coordinate 1", ylab="Coordinate 2",
# main="NonMetric MDS", type="n")
text(x, y, labels = row.names(clu), cex=.7)
space<-as.data.frame(fit$points)
qplot(V1, V2, data=space) + geom_text_repel(aes(label=row.names(fit$points))) + ggtitle("Non-parametric MDS, centered means") + theme_minimal()
clu<-dplyr::select(wjs, INFL_POL_mean:INFL_PER_mean,AUTONOMY_mean:ROLE_ACO_mean)
rownames(clu)<-wjs[,1]
# dropping outlier countries
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
clu<-clu[-53,] # Thailand
clu<-clu[,-6] # dropping autonomy
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters
nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
max.nc = 10, method = "average")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 8 proposed 2 as the best number of clusters
## * 10 proposed 3 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 2 proposed 6 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
fviz_nbclust(nb) # 2 or 3 clusters
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 8 proposed 2 as the best number of clusters
## * 10 proposed 3 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 2 proposed 6 as the best number of clusters
## * 1 proposed 7 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 3 .
km.res <- hcut(clu, 2, nstart = 25, hc_method="average")
plot((km.res), cex = 0.7)
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
km.res <- hcut(clu, 5, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
#PCA and clustering on PCA factors
paran(clu) # 1 factor
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 270 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 4.708862 5.332078 0.623215
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (1 components retained)
res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)
fviz_pca_biplot(res.pca, geom = "text", title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord
res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters
fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))
res.pca.hcpc$desc.var
## $quanti.var
## Eta2 P-value
## ROLE_COL_mean 0.8698247 2.728071e-27
## ROLE_INT_mean 0.7569803 3.708884e-19
## INFL_ECO_mean 0.7115447 6.343915e-17
## INFL_ORG_mean 0.6874036 7.071621e-16
## INFL_POL_mean 0.6751915 2.232663e-15
## ROLE_ACO_mean 0.4960828 1.176999e-09
## ROLE_MON_mean 0.2179153 6.274996e-04
## INFL_PER_mean 0.2017545 1.159027e-03
## INFL_PRO_mean 0.1323245 1.414880e-02
##
## $quanti
## $quanti$`1`
## v.test Mean in category Overall mean sd in category
## INFL_PER_mean -2.281400 2.494925 2.605831 0.1513998
## ROLE_MON_mean -2.959544 3.316848 3.519004 0.3385180
## ROLE_ACO_mean -4.024930 3.029114 3.327417 0.3244097
## INFL_POL_mean -4.768423 1.783694 2.210085 0.2238205
## ROLE_COL_mean -5.491243 1.378106 2.100955 0.1433477
## INFL_ORG_mean -5.659653 2.879758 3.288270 0.2380333
## INFL_ECO_mean -5.690201 2.290905 2.729564 0.2241332
## ROLE_INT_mean -6.622533 2.643814 3.325769 0.3085856
## Overall sd p.value
## INFL_PER_mean 0.2706660 2.252478e-02
## ROLE_MON_mean 0.3803150 3.080946e-03
## ROLE_ACO_mean 0.4126486 5.699229e-05
## INFL_POL_mean 0.4978676 1.856737e-06
## ROLE_COL_mean 0.7329211 3.991142e-08
## INFL_ORG_mean 0.4018802 1.516797e-08
## INFL_ECO_mean 0.4292201 1.268902e-08
## ROLE_INT_mean 0.5733401 3.530947e-11
##
## $quanti$`2`
## NULL
##
## $quanti$`3`
## v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 6.919672 3.045377 2.100955 0.3711973
## INFL_POL_mean 6.130539 2.778461 2.210085 0.3067097
## INFL_ECO_mean 5.745352 3.188783 2.729564 0.2215127
## INFL_ORG_mean 5.579797 3.705849 3.288270 0.1859451
## ROLE_ACO_mean 5.283404 3.733409 3.327417 0.2245024
## ROLE_INT_mean 4.729819 3.830756 3.325769 0.2864965
## INFL_PER_mean 3.467760 2.780617 2.605831 0.3130260
## ROLE_MON_mean 3.336870 3.755327 3.519004 0.3134690
## INFL_PRO_mean 2.816550 3.899640 3.762255 0.2423878
## Overall sd p.value
## ROLE_COL_mean 0.7329211 4.526896e-12
## INFL_POL_mean 0.4978676 8.758201e-10
## INFL_ECO_mean 0.4292201 9.173013e-09
## INFL_ORG_mean 0.4018802 2.407995e-08
## ROLE_ACO_mean 0.4126486 1.268055e-07
## ROLE_INT_mean 0.5733401 2.247203e-06
## INFL_PER_mean 0.2706660 5.248165e-04
## ROLE_MON_mean 0.3803150 8.472758e-04
## INFL_PRO_mean 0.2619394 4.854241e-03
##
##
## attr(,"class")
## [1] "catdes" "list "
res.pca.hcpc$desc.ind
## $para
## Cluster: 1
## Finland Norway Czech Republic Belgium Switzerland
## 0.1715150 0.2159136 0.2208365 0.2485077 0.2524128
## --------------------------------------------------------
## Cluster: 2
## Moldova Brazil Argentina Bulgaria Romania
## 0.1199772 0.1611993 0.1685898 0.2240243 0.2585468
## --------------------------------------------------------
## Cluster: 3
## Kenya Ecuador India Botswana Bhutan
## 0.09194348 0.14919733 0.22755625 0.32096119 0.34235942
##
## $dist
## Cluster: 1
## Iceland France Italy Canada Netherlands
## 2.278888 1.678364 1.606557 1.520939 1.516498
## --------------------------------------------------------
## Cluster: 2
## Croatia Spain Argentina Hungary Turkey
## 1.716906 1.514450 1.397554 1.374816 1.360918
## --------------------------------------------------------
## Cluster: 3
## UAE Sudan Malawi Malaysia Ethiopia
## 2.643907 2.288352 2.193083 2.042658 1.971425
clu<-dplyr::select(wjs, COUNTRY, INFL_POL_mean:INFL_PER_mean,AUTONOMY_mean:ROLE_ACO_mean, O4A_mean, O4E_mean)
rownames(clu)<-clu[,1]
# dropping outlier countries
clu<-clu[-57,] # Tanzania
clu<-clu[-56,] # Singapore
clu<-clu[-55,] # Quatar
clu<-clu[-53,] # Thailand
#dropping countries missing Thrust (O4)
clu<-clu %>% filter(complete.cases(.))
rownames(clu)<-clu[,1]
#dropping variables
clu<-clu[,-7] # dropping autonomy
clu<-clu[,-1] # dropping COUNTRY
#test for clusters
fviz_nbclust(clu, hcut, method = "gap_stat") +labs(subtitle = "Gap statistic, hierarchical cluster") # 3 clusters
nb <- NbClust(clu, distance = "euclidean", min.nc = 2,
max.nc = 10, method = "average")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 9 proposed 2 as the best number of clusters
## * 5 proposed 3 as the best number of clusters
## * 6 proposed 4 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
fviz_nbclust(nb) # 2, 3 or 4 clusters
## Among all indices:
## ===================
## * 2 proposed 0 as the best number of clusters
## * 1 proposed 1 as the best number of clusters
## * 9 proposed 2 as the best number of clusters
## * 5 proposed 3 as the best number of clusters
## * 6 proposed 4 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is 2 .
#PCA and clustering on PCA factors
paran(clu) # 2 factos
##
## Using eigendecomposition of correlation matrix.
## Computing: 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
##
##
## Results of Horn's Parallel Analysis for component retention
## 330 iterations, using the mean estimate
##
## --------------------------------------------------
## Component Adjusted Unadjusted Estimated
## Eigenvalue Eigenvalue Bias
## --------------------------------------------------
## 1 4.414199 5.255210 0.841010
## 2 1.338764 1.909134 0.570370
## --------------------------------------------------
##
## Adjusted eigenvalues > 1 indicate dimensions to retain.
## (2 components retained)
res.pca<-PCA(clu , scale.unit=FALSE, ncp=2, graph = TRUE)
fviz_pca_biplot(res.pca, geom = "text", title="PCA", axes =c(1,2), labelsize=2.5, repel=TRUE) + theme_minimal()
fviz_screeplot(res.pca, ncp=10)
fviz_contrib(res.pca, choice = "var", axes = 1)
fviz_contrib(res.pca, choice = "var", axes = 2)
individ <- get_pca_ind(res.pca)
O4ind<-individ$coord
res.pca.hcpc<-HCPC(res.pca, nb.clust=-1) # 3 clusters
fviz_cluster(res.pca.hcpc, data = x, ellipse.type = "convex", repel=TRUE)+ theme_tufte() + labs(title = "Clusters (after PCA)") + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"))
res.pca.hcpc$desc.var
## $quanti.var
## Eta2 P-value
## ROLE_COL_mean 0.8025593 2.771804e-17
## INFL_ECO_mean 0.7094773 2.425102e-13
## ROLE_INT_mean 0.6755091 3.260323e-12
## INFL_POL_mean 0.6670387 5.973826e-12
## INFL_ORG_mean 0.6589617 1.049236e-11
## O4A_mean 0.5743337 1.919232e-09
## O4E_mean 0.5641107 3.352288e-09
## ROLE_ACO_mean 0.4146953 3.415675e-06
## INFL_PER_mean 0.2429513 1.443478e-03
## ROLE_MON_mean 0.1432996 2.639251e-02
## INFL_PRO_mean 0.1259826 4.223978e-02
##
## $quanti
## $quanti$`1`
## v.test Mean in category Overall mean sd in category
## O4E_mean 3.935359 3.431809 3.001985 0.2511501
## INFL_PER_mean -2.088249 2.507447 2.622006 0.1469778
## ROLE_MON_mean -2.210295 3.337851 3.495063 0.3581715
## ROLE_ACO_mean -3.253518 3.115344 3.365385 0.3699414
## INFL_POL_mean -4.269143 1.797571 2.221161 0.2035667
## INFL_ORG_mean -4.824985 2.928132 3.305765 0.2417741
## ROLE_COL_mean -4.915271 1.409205 2.133319 0.1640174
## INFL_ECO_mean -5.274583 2.307773 2.745249 0.2356943
## ROLE_INT_mean -5.662792 2.701558 3.328452 0.4191497
## Overall sd p.value
## O4E_mean 0.5487478 8.307242e-05
## INFL_PER_mean 0.2756193 3.677535e-02
## ROLE_MON_mean 0.3573567 2.708470e-02
## ROLE_ACO_mean 0.3861215 1.139856e-03
## INFL_POL_mean 0.4985065 1.962255e-05
## INFL_ORG_mean 0.3932233 1.400135e-06
## ROLE_COL_mean 0.7401595 8.865965e-07
## INFL_ECO_mean 0.4167074 1.330585e-07
## ROLE_INT_mean 0.5561975 1.489296e-08
##
## $quanti$`2`
## v.test Mean in category Overall mean sd in category
## ROLE_INT_mean 2.495140 3.571265 3.328452 0.2581864
## O4A_mean -4.894601 2.297231 2.759496 0.2589080
## O4E_mean -5.131164 2.509337 3.001985 0.3457560
## Overall sd p.value
## ROLE_INT_mean 0.5561975 1.259073e-02
## O4A_mean 0.5397923 9.850536e-07
## O4E_mean 0.5487478 2.879559e-07
##
## $quanti$`3`
## v.test Mean in category Overall mean sd in category
## ROLE_COL_mean 5.615436 3.135025 2.133319 0.3707426
## INFL_POL_mean 5.257513 2.852819 2.221161 0.2960142
## INFL_ORG_mean 4.769005 3.757723 3.305765 0.1898614
## INFL_ECO_mean 4.628648 3.210103 2.745249 0.2062529
## ROLE_ACO_mean 4.207777 3.756953 3.365385 0.1873502
## O4A_mean 4.159102 3.300571 2.759496 0.4671153
## INFL_PER_mean 3.374064 2.846133 2.622006 0.3561175
## ROLE_INT_mean 3.328856 3.774678 3.328452 0.2319286
## INFL_PRO_mean 2.389320 3.930626 3.780198 0.2439050
## ROLE_MON_mean 2.263077 3.689973 3.495063 0.2736994
## Overall sd p.value
## ROLE_COL_mean 0.7401595 1.960676e-08
## INFL_POL_mean 0.4985065 1.460168e-07
## INFL_ORG_mean 0.3932233 1.851385e-06
## INFL_ECO_mean 0.4167074 3.680601e-06
## ROLE_ACO_mean 0.3861215 2.578946e-05
## O4A_mean 0.5397923 3.195006e-05
## INFL_PER_mean 0.2756193 7.406725e-04
## ROLE_INT_mean 0.5561975 8.720358e-04
## INFL_PRO_mean 0.2612297 1.687961e-02
## ROLE_MON_mean 0.3573567 2.363094e-02
##
##
## attr(,"class")
## [1] "catdes" "list "
res.pca.hcpc$desc.ind
## $para
## Cluster: 1
## Denmark UK Ireland Austria Belgium
## 0.1981423 0.2258593 0.2476614 0.2604674 0.2975330
## --------------------------------------------------------
## Cluster: 2
## Russia Turkey Kosovo Chile Hungary
## 0.1849268 0.2155454 0.2293731 0.2304697 0.2867215
## --------------------------------------------------------
## Cluster: 3
## Botswana Oman Ethiopia Kenya Malaysia
## 0.1602248 0.2106291 0.3400777 0.4301602 0.4796833
##
## $dist
## Cluster: 1
## Iceland Canada Sweden Switzerland Germany
## 2.518254 2.200253 2.185024 2.170086 2.122698
## --------------------------------------------------------
## Cluster: 2
## Serbia Bulgaria Argentina Turkey Albania
## 2.128997 2.042885 1.973737 1.952760 1.910355
## --------------------------------------------------------
## Cluster: 3
## UAE India Malaysia Malawi Bhutan
## 3.419589 2.441322 2.375087 2.340228 2.205273
km.res <- hcut(clu, 2, nstart = 25, hc_method="average")
plot((km.res), cex = 0.7)
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
km.res <- hcut(clu, 3, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()
km.res <- hcut(clu, 4, nstart = 25, hc_method="average")
fviz_cluster(km.res, data = clu, ellipse.type = "convex", repel=TRUE)+theme_minimal()