#install.packages(ggplot2)
library(ggplot2)
#install.packages("dplyr")
library(dplyr)
#install.packages("Hmisc")
library(Hmisc)
#install.packages("factoextra")
library(factoextra)
#install.packages("cluster")
library(cluster)
#install.packages("magrittr")
library(magrittr)
#install.packages("NbClust")
library(NbClust)
#install.packages("tidyr")
library(tidyr)
data <- read.table("./worlddata2023.csv", header=TRUE, sep=",", dec=".")
head(data)
## Country Population Urban Fertility Expectancy Density Land
## 1 Afghanistan 38.0418 9.7973 4.47 64.5 60 652.230
## 2 Albania 2.8542 1.7476 1.62 78.5 105 28.748
## 3 Algeria 43.0531 31.5101 3.02 76.7 18 2381.741
## 4 Angola 31.8253 21.0610 5.52 60.8 26 1246.700
## 5 Antigua and Barbuda 0.0971 0.0238 1.99 76.9 223 0.443
## 6 Argentina 44.9387 41.3396 2.26 76.5 17 2780.400
## Co2 GDP
## 1 8.672 19101.35
## 2 4.536 15278.08
## 3 150.006 169988.24
## 4 34.693 94635.42
## 5 0.557 1727.76
## 6 201.348 449663.45
This data set includes a list of 186 observations (country) and 8 variables.
data <- data[c(-187:-195),c(-10,-11)]
head(data)
## Country Population Urban Fertility Expectancy Density Land
## 1 Afghanistan 38.0418 9.7973 4.47 64.5 60 652.230
## 2 Albania 2.8542 1.7476 1.62 78.5 105 28.748
## 3 Algeria 43.0531 31.5101 3.02 76.7 18 2381.741
## 4 Angola 31.8253 21.0610 5.52 60.8 26 1246.700
## 5 Antigua and Barbuda 0.0971 0.0238 1.99 76.9 223 0.443
## 6 Argentina 44.9387 41.3396 2.26 76.5 17 2780.400
## Co2 GDP
## 1 8.672 19101.35
## 2 4.536 15278.08
## 3 150.006 169988.24
## 4 34.693 94635.42
## 5 0.557 1727.76
## 6 201.348 449663.45
I removed the last 2 columns and last 9 rows that didn’t have any data in them.
summary(data[,-1])
## Population Urban Fertility Expectancy
## Min. : 0.0182 Min. : 0.0055 Min. :0.980 Min. :52.80
## 1st Qu.: 2.5353 1st Qu.: 1.3422 1st Qu.:1.710 1st Qu.:66.95
## Median : 9.6065 Median : 4.7171 Median :2.255 Median :73.20
## Mean : 41.0583 Mean : 22.7834 Mean :2.714 Mean :72.21
## 3rd Qu.: 30.0650 3rd Qu.: 15.6828 3rd Qu.:3.612 3rd Qu.:77.38
## Max. :1397.7150 Max. :842.9340 Max. :6.910 Max. :84.20
## Density Land Co2 GDP
## Min. : 2.0 Min. : 0.16 Min. : 0.051 Min. : 195
## 1st Qu.: 32.5 1st Qu.: 28.79 1st Qu.: 2.569 1st Qu.: 11099
## Median : 83.5 Median : 145.64 Median : 12.798 Median : 38779
## Mean : 207.4 Mean : 719.05 Mean : 179.708 Mean : 495118
## 3rd Qu.: 204.5 3rd Qu.: 581.39 3rd Qu.: 64.739 3rd Qu.: 244288
## Max. :8358.0 Max. :17098.24 Max. :9893.038 Max. :21427700
data_clu_std <- as.data.frame(scale(data[c(2:9)]))
data$Dissimilarity = sqrt(data_clu_std$Population^2 + data_clu_std$Urban^2 + data_clu_std$Fertility^2 + data_clu_std$Expectancy^2 + data_clu_std$Density^2 + data_clu_std$Land^2 + data_clu_std$Co2^2 + data_clu_std$GDP^2)
head(data[order(-data$Dissimilarity), c("Country", "Dissimilarity")], 15)
## Country Dissimilarity
## 34 China 20.772119
## 178 United States 12.642304
## 148 Singapore 12.519346
## 73 India 11.177093
## 136 Russia 8.736977
## 29 Canada 5.060876
## 22 Brazil 4.797602
## 8 Australia 3.970213
## 118 Niger 3.598798
## 119 Nigeria 3.528500
## 32 Chad 3.459203
## 152 Somalia 3.352503
## 81 Japan 3.332335
## 11 Bahrain 3.259028
## 42 Democratic Republic of the Congo 3.125789
data <- data %>%
filter(!Country %in% c("China", "United States", "Singapore", "India", "Russia"))
data_clu_std <- as.data.frame(scale(data[c(2:9)]))
get_clust_tendency(data_clu_std,
n = nrow(data_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.8836458
##
## $plot
## NULL
rownames(data_clu_std) <- data$Country
#Graphical presentation of dissimilarity matrix
Distance <- get_dist(data_clu_std,
method = "euclidian")
fviz_dist(Distance,
gradient = list(low = "darkred",
mid = "grey95",
high = "white"))
fviz_nbclust(data_clu_std, kmeans, method = "wss") +
labs(subtitle = "Elbow method")
fviz_nbclust(data_clu_std, kmeans, method = "silhouette")+
labs(subtitle = "Silhouette analysis")
NbClust(data_clu_std,
distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans",
index = "all")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 4 proposed 2 as the best number of clusters
## * 7 proposed 3 as the best number of clusters
## * 4 proposed 5 as the best number of clusters
## * 3 proposed 6 as the best number of clusters
## * 3 proposed 9 as the best number of clusters
## * 2 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 0.1950 45.1520 93.0361 -6.4544 272.6518 1.827680e+15 29103.414 1149.9340
## 3 5.8878 80.3765 26.2199 -0.0516 499.6347 1.173427e+15 13422.542 756.6575
## 4 0.5341 69.8229 37.3144 0.0594 692.5140 7.186802e+14 11398.452 659.5099
## 5 1.0127 72.3247 39.0194 3.3987 995.7089 2.103123e+14 6673.872 544.6823
## 6 4.1859 78.0453 14.4459 8.0856 1223.1331 8.620682e+13 5493.469 445.8391
## 7 3.0539 72.3980 8.3925 8.1535 1290.8442 8.071756e+13 4472.419 411.8424
## 8 0.0863 65.8667 39.1282 7.4202 1370.8160 6.777451e+13 4213.914 392.8921
## 9 8.4605 75.1229 8.6605 12.2594 1569.6887 2.858860e+13 2161.951 320.4211
## 10 1.9697 70.6871 6.1967 11.9380 1644.6150 2.333074e+13 1950.969 305.0607
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 8.3691 1.2522 0.1923 1.6678 0.2670 3.3523 -51.9257 -3.6488 0.2415
## 3 27.2125 1.9031 0.1649 1.1993 0.3616 3.3435 -103.0341 -3.6371 0.3767
## 4 32.8233 2.1834 0.1490 1.4431 0.3724 1.3668 -15.0293 -1.3912 0.3496
## 5 39.0995 2.6437 0.1466 1.1783 0.4003 0.6310 12.2795 2.8329 0.3504
## 6 43.7256 3.2299 0.1652 1.1075 0.4086 1.0877 -8.7075 -0.4215 0.3380
## 7 45.0743 3.4965 0.1517 1.1817 0.2854 0.9466 3.6070 0.2939 0.3183
## 8 47.7600 3.6651 0.1455 1.3201 0.2120 0.9223 1.6016 0.4221 0.3004
## 9 58.6324 4.4941 0.1471 1.1694 0.2289 0.7693 14.9935 1.4917 0.2937
## 10 61.0200 4.7204 0.1398 1.1424 0.2329 1.7488 -2.5691 -1.5087 0.2805
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 574.9670 0.1729 -0.6296 0.6514 0.0171 0.0005 2.2619 1.8681 1.2636
## 3 252.2192 0.4929 0.6374 0.5676 0.0253 0.0018 3.2383 1.5353 1.4220
## 4 164.8775 0.4957 -2.9975 0.6765 0.0254 0.0019 3.7064 1.4430 1.5548
## 5 108.9365 0.5677 0.9295 0.5162 0.0262 0.0019 3.2335 1.3833 1.3300
## 6 74.3065 0.5545 2.9070 0.6208 0.0332 0.0021 3.4231 1.2811 1.3573
## 7 58.8346 0.4677 4.3312 0.9902 0.0327 0.0022 3.6204 1.2032 1.2261
## 8 49.1115 0.3866 -0.1036 1.5620 0.0303 0.0023 4.1336 1.1526 1.1429
## 9 35.6023 0.3959 1.5056 1.5104 0.0323 0.0023 3.7972 1.0908 1.0278
## 10 30.5061 0.3610 0.3122 1.8798 0.0240 0.0023 3.6465 1.0498 0.9282
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.7293 27.4635 1.0000
## 3 0.7177 57.8111 1.0000
## 4 0.7121 22.6432 1.0000
## 5 0.4997 21.0215 0.0076
## 6 0.7631 33.5206 1.0000
## 7 0.7464 21.7408 0.9680
## 8 0.5813 13.6876 0.9063
## 9 0.5629 38.8235 0.1664
## 10 0.1620 31.0295 1.0000
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot TrCovW
## Number_clusters 9.0000 3.0000 3.0000 9.0000 5.0000 5.000000e+00 3.00
## Value_Index 8.4605 80.3765 66.8162 12.2594 303.1949 3.842624e+14 15680.87
## TraceW Friedman Rubin Cindex DB Silhouette Duda
## Number_clusters 3.0000 3.0000 9.0000 10.0000 6.0000 6.0000 2.0000
## Value_Index 296.1288 18.8434 -0.6027 0.1398 1.1075 0.4086 3.3523
## PseudoT2 Beale Ratkowsky Ball PtBiserial Frey McClain
## Number_clusters 2.0000 2.0000 3.0000 3.0000 5.0000 1 5.0000
## Value_Index -51.9257 -3.6488 0.3767 322.7478 0.5677 NA 0.5162
## Dunn Hubert SDindex Dindex SDbw
## Number_clusters 6.0000 0 2.0000 0 10.0000
## Value_Index 0.0332 0 2.2619 0 0.9282
##
## $Best.partition
## Afghanistan Albania
## 1 2
## Algeria Angola
## 2 1
## Antigua and Barbuda Argentina
## 2 2
## Armenia Australia
## 2 3
## Austria Azerbaijan
## 2 2
## Bahrain Bangladesh
## 2 3
## Barbados Belarus
## 2 2
## Belgium Belize
## 2 2
## Benin Bhutan
## 1 2
## Bolivia Bosnia and Herzegovina
## 2 2
## Botswana Brazil
## 2 3
## Brunei Bulgaria
## 2 2
## Burkina Faso Burundi
## 1 1
## Cambodia Cameroon
## 2 1
## Canada Cape Verde
## 3 2
## Central African Republic Chad
## 1 1
## Chile Colombia
## 2 2
## Comoros Costa Rica
## 1 2
## Croatia Cuba
## 2 2
## Cyprus Czech Republic
## 2 2
## Democratic Republic of the Congo Denmark
## 1 2
## Djibouti Dominica
## 1 2
## Dominican Republic East Timor
## 2 1
## Ecuador Egypt
## 2 3
## El Salvador Equatorial Guinea
## 2 1
## Eritrea Estonia
## 1 2
## Ethiopia Federated States of Micronesia
## 1 2
## Fiji Finland
## 1 2
## France Gabon
## 3 1
## Georgia Germany
## 2 3
## Ghana Greece
## 1 2
## Grenada Guatemala
## 2 2
## Guinea Guinea-Bissau
## 1 1
## Guyana Haiti
## 2 1
## Honduras Hungary
## 2 2
## Iceland Indonesia
## 2 3
## Iran Iraq
## 3 1
## Israel Italy
## 2 3
## Ivory Coast Jamaica
## 1 2
## Japan Jordan
## 3 2
## Kazakhstan Kenya
## 2 1
## Kiribati Kuwait
## 1 2
## Kyrgyzstan Laos
## 2 2
## Latvia Lebanon
## 2 2
## Lesotho Liberia
## 1 1
## Libya Liechtenstein
## 2 2
## Lithuania Luxembourg
## 2 2
## Madagascar Malawi
## 1 1
## Malaysia Maldives
## 2 2
## Mali Malta
## 1 2
## Marshall Islands Mauritania
## 1 1
## Mauritius Mexico
## 2 3
## Moldova Mongolia
## 2 2
## Montenegro Morocco
## 2 2
## Mozambique Myanmar
## 1 2
## Namibia Nepal
## 1 2
## Netherlands New Zealand
## 2 2
## Nicaragua Niger
## 2 1
## Nigeria North Korea
## 3 2
## Norway Oman
## 2 2
## Pakistan Palau
## 3 2
## Panama Papua New Guinea
## 2 1
## Paraguay Peru
## 2 2
## Philippines Poland
## 2 2
## Portugal Qatar
## 2 2
## Republic of Ireland Republic of the Congo
## 2 1
## Romania Rwanda
## 2 1
## Saint Kitts and Nevis Saint Lucia
## 2 2
## Saint Vincent and the Grenadines Samoa
## 2 1
## Sao Tome and Principe Saudi Arabia
## 1 3
## Senegal Serbia
## 1 2
## Seychelles Sierra Leone
## 2 1
## Slovakia Slovenia
## 2 2
## Solomon Islands Somalia
## 1 1
## South Africa South Korea
## 3 3
## South Sudan Spain
## 1 3
## Sri Lanka Sudan
## 2 1
## Suriname Sweden
## 2 2
## Switzerland Syria
## 2 2
## Tajikistan Tanzania
## 1 1
## Thailand The Bahamas
## 2 2
## The Gambia Togo
## 1 1
## Tonga Trinidad and Tobago
## 1 2
## Tunisia Turkey
## 2 3
## Turkmenistan Uganda
## 2 1
## Ukraine United Arab Emirates
## 2 2
## United Kingdom Uruguay
## 3 2
## Uzbekistan Vanuatu
## 2 1
## Venezuela Vietnam
## 2 2
## Yemen Zambia
## 1 1
## Zimbabwe
## 1
Clustering <- kmeans(data_clu_std,
centers = 3, #Number of groups
nstart = 25) #Number of different positions of initial leaders
Clustering
## K-means clustering with 3 clusters of sizes 101, 61, 19
##
## Cluster means:
## Population Urban Fertility Expectancy Density Land Co2
## 1 -0.2734140 -0.2332168 -0.6266006 0.5607497 0.1126259 -0.21440806 -0.1783443
## 2 -0.1854668 -0.3073531 1.1736972 -1.0811103 -0.2448471 -0.06369196 -0.4329870
## 3 2.0488569 2.2264968 -0.4373090 0.4901055 0.1873923 1.34423284 2.3381569
## GDP
## 1 -0.1641203
## 2 -0.3654631
## 3 2.0457579
##
## Clustering vector:
## Afghanistan Albania
## 2 1
## Algeria Angola
## 1 2
## Antigua and Barbuda Argentina
## 1 1
## Armenia Australia
## 1 3
## Austria Azerbaijan
## 1 1
## Bahrain Bangladesh
## 1 3
## Barbados Belarus
## 1 1
## Belgium Belize
## 1 1
## Benin Bhutan
## 2 1
## Bolivia Bosnia and Herzegovina
## 1 1
## Botswana Brazil
## 2 3
## Brunei Bulgaria
## 1 1
## Burkina Faso Burundi
## 2 2
## Cambodia Cameroon
## 1 2
## Canada Cape Verde
## 3 1
## Central African Republic Chad
## 2 2
## Chile Colombia
## 1 1
## Comoros Costa Rica
## 2 1
## Croatia Cuba
## 1 1
## Cyprus Czech Republic
## 1 1
## Democratic Republic of the Congo Denmark
## 2 1
## Djibouti Dominica
## 2 1
## Dominican Republic East Timor
## 1 2
## Ecuador Egypt
## 1 3
## El Salvador Equatorial Guinea
## 1 2
## Eritrea Estonia
## 2 1
## Ethiopia Federated States of Micronesia
## 2 1
## Fiji Finland
## 2 1
## France Gabon
## 3 2
## Georgia Germany
## 1 3
## Ghana Greece
## 2 1
## Grenada Guatemala
## 1 1
## Guinea Guinea-Bissau
## 2 2
## Guyana Haiti
## 1 2
## Honduras Hungary
## 1 1
## Iceland Indonesia
## 1 3
## Iran Iraq
## 3 2
## Israel Italy
## 1 3
## Ivory Coast Jamaica
## 2 1
## Japan Jordan
## 3 1
## Kazakhstan Kenya
## 1 2
## Kiribati Kuwait
## 2 1
## Kyrgyzstan Laos
## 2 2
## Latvia Lebanon
## 1 1
## Lesotho Liberia
## 2 2
## Libya Liechtenstein
## 1 1
## Lithuania Luxembourg
## 1 1
## Madagascar Malawi
## 2 2
## Malaysia Maldives
## 1 1
## Mali Malta
## 2 1
## Marshall Islands Mauritania
## 2 2
## Mauritius Mexico
## 1 3
## Moldova Mongolia
## 1 2
## Montenegro Morocco
## 1 1
## Mozambique Myanmar
## 2 1
## Namibia Nepal
## 2 1
## Netherlands New Zealand
## 1 1
## Nicaragua Niger
## 1 2
## Nigeria North Korea
## 3 1
## Norway Oman
## 1 1
## Pakistan Palau
## 3 1
## Panama Papua New Guinea
## 1 2
## Paraguay Peru
## 1 1
## Philippines Poland
## 1 1
## Portugal Qatar
## 1 1
## Republic of Ireland Republic of the Congo
## 1 2
## Romania Rwanda
## 1 2
## Saint Kitts and Nevis Saint Lucia
## 1 1
## Saint Vincent and the Grenadines Samoa
## 1 2
## Sao Tome and Principe Saudi Arabia
## 2 3
## Senegal Serbia
## 2 1
## Seychelles Sierra Leone
## 1 2
## Slovakia Slovenia
## 1 1
## Solomon Islands Somalia
## 2 2
## South Africa South Korea
## 3 3
## South Sudan Spain
## 2 1
## Sri Lanka Sudan
## 1 2
## Suriname Sweden
## 1 1
## Switzerland Syria
## 1 1
## Tajikistan Tanzania
## 2 2
## Thailand The Bahamas
## 1 1
## The Gambia Togo
## 2 2
## Tonga Trinidad and Tobago
## 2 1
## Tunisia Turkey
## 1 3
## Turkmenistan Uganda
## 2 2
## Ukraine United Arab Emirates
## 1 1
## United Kingdom Uruguay
## 3 1
## Uzbekistan Vanuatu
## 1 2
## Venezuela Vietnam
## 1 1
## Yemen Zambia
## 2 2
## Zimbabwe
## 2
##
## Within cluster sum of squares by cluster:
## [1] 270.91462 99.21479 386.34519
## (between_SS / total_SS = 47.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
labelsize = 1,
data = data_clu_std)
Averages <- Clustering$centers
Averages #Average values of cluster variables to describe groups
## Population Urban Fertility Expectancy Density Land Co2
## 1 -0.2734140 -0.2332168 -0.6266006 0.5607497 0.1126259 -0.21440806 -0.1783443
## 2 -0.1854668 -0.3073531 1.1736972 -1.0811103 -0.2448471 -0.06369196 -0.4329870
## 3 2.0488569 2.2264968 -0.4373090 0.4901055 0.1873923 1.34423284 2.3381569
## GDP
## 1 -0.1641203
## 2 -0.3654631
## 3 2.0457579
Figure <- as.data.frame(Averages)
Figure$id <- 1:nrow(Figure)
Figure <- pivot_longer(Figure, cols = c("Population", "Urban", "Fertility", "Expectancy", "Density", "Land", "Co2", "GDP"))
Figure$Group <- factor(Figure$id,
levels = c(1, 2, 3),
labels = c("1", "2", "3"))
Figure$ImeF <- factor(Figure$name,
levels = c("Population", "Urban", "Fertility", "Expectancy", "Density", "Land", "Co2", "GDP"),
labels = c("Population", "Urban", "Fertility", "Expectancy", "Density", "Land", "Co2", "GDP"))
library(ggplot2)
ggplot(Figure, aes(x = ImeF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 3) +
geom_line(aes(group = id), linewidth = 1) +
ylab("Averages") +
xlab("Cluster variables") +
scale_color_brewer(palette="Set1") +
ylim(-1.5, 2.4) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 10))
data$Group <- Clustering$cluster #Assigning units to groups
fit <- aov(cbind(Population, Urban, Fertility, Expectancy, Density, Land, Co2, GDP) ~ as.factor(Group),
data = data)
summary(fit)
## Response Population :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 152912 76456 87.835 < 2.2e-16 ***
## Residuals 178 154940 870
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Urban :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 68514 34257 125.87 < 2.2e-16 ***
## Residuals 178 48444 272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Fertility :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 210.165 105.083 215.1 < 2.2e-16 ***
## Residuals 178 86.957 0.489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Expectancy :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 6010.2 3005.11 132.33 < 2.2e-16 ***
## Residuals 178 4042.3 22.71
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Density :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 420575 210288 2.8606 0.05987 .
## Residuals 178 13085123 73512
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Land :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 57577518 28788759 24.797 3.163e-10 ***
## Residuals 178 206655453 1160986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Co2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 3054916 1527458 171.58 < 2.2e-16 ***
## Residuals 178 1584633 8902
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response GDP :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 3.6536e+13 1.8268e+13 89.765 < 2.2e-16 ***
## Residuals 178 3.6225e+13 2.0351e+11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data_clu_std <- data_clu_std[,c(-2,-5)]
head(data_clu_std)
## Population Fertility Expectancy Land Co2
## Afghanistan 0.3328053 1.3446765 -1.0170554 0.1100183 -0.4397806
## Albania -0.5180483 -0.8735882 0.8563315 -0.4045780 -0.4655426
## Algeria 0.4539810 0.2160857 0.6154675 1.5374853 0.4405484
## Angola 0.1824873 2.1619319 -1.5121647 0.6006693 -0.2777032
## Antigua and Barbuda -0.5847164 -0.5856029 0.6422301 -0.4279398 -0.4903266
## Argentina 0.4995758 -0.3754516 0.5887048 1.8665220 0.7603431
## GDP
## Afghanistan -0.3703043
## Albania -0.3763177
## Algeria -0.1329828
## Angola -0.2515010
## Antigua and Barbuda -0.3976302
## Argentina 0.3069024
get_clust_tendency(data_clu_std,
n = nrow(data_clu_std) - 1,
graph = FALSE)
## $hopkins_stat
## [1] 0.8792361
##
## $plot
## NULL
NbClust(data_clu_std,
distance = "euclidean",
min.nc = 2, max.nc = 10,
method = "kmeans",
index = "all")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 4 proposed 2 as the best number of clusters
## * 6 proposed 3 as the best number of clusters
## * 3 proposed 4 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 3 proposed 6 as the best number of clusters
## * 4 proposed 7 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
## $All.index
## KL CH Hartigan CCC Scott Marriot TrCovW TraceW
## 2 0.2422 62.6290 111.5737 -4.6211 272.9122 888260140103 19507.9132 800.0696
## 3 5.5044 106.0246 34.0186 1.3651 526.0447 493574093763 8746.1208 492.8610
## 4 6.0863 94.9946 16.2779 2.1810 746.4646 259627397491 7323.3318 413.7808
## 5 0.0581 81.4050 68.6582 1.4113 838.4084 244095043383 5910.5482 378.9322
## 6 2.3457 103.6684 37.0880 8.6933 1168.5783 56716091046 2597.0969 272.5928
## 7 4.4488 110.2469 14.3215 11.6827 1295.6997 38245705913 1435.4000 224.9243
## 8 13.3642 103.7215 7.4023 11.5682 1383.1176 30818716176 1173.0759 207.8192
## 9 0.0445 95.0125 17.3786 10.6878 1451.0788 26794913190 1116.7071 199.2920
## 10 0.7206 94.3679 -8.6804 11.5191 1519.5870 22656219803 972.9153 181.0037
## Friedman Rubin Cindex DB Silhouette Duda Pseudot2 Beale Ratkowsky
## 2 7.7310 1.3499 0.1967 1.4699 0.3513 2.6632 -43.7156 -2.3645 0.2718
## 3 15.3979 2.1913 0.1382 1.0463 0.4399 1.5778 -55.6611 -1.3832 0.4178
## 4 21.5852 2.6101 0.1161 1.3264 0.4525 1.1200 -7.1780 -0.4045 0.3894
## 5 24.3347 2.8501 0.1051 1.3690 0.3425 1.0856 -1.4197 -0.2856 0.3565
## 6 33.8948 3.9620 0.1490 1.0739 0.3611 1.1089 -3.6346 -0.3644 0.3513
## 7 35.7020 4.8016 0.1389 0.9693 0.3544 0.8288 16.3202 0.7846 0.3362
## 8 38.8781 5.1968 0.1301 1.0689 0.2771 1.0771 -2.2917 -0.2593 0.3176
## 9 43.3198 5.4192 0.1266 1.1222 0.2473 3.6025 -11.5586 -2.6250 0.3009
## 10 45.2434 5.9667 0.1181 1.0901 0.2835 1.7192 -8.3669 -1.4945 0.2885
## Ball Ptbiserial Frey McClain Dunn Hubert SDindex Dindex SDbw
## 2 400.0348 0.2564 -0.6657 0.5792 0.0336 0.0009 2.3833 1.5457 1.4849
## 3 164.2870 0.5676 0.4026 0.4501 0.0220 0.0023 3.6000 1.2540 1.5496
## 4 103.4452 0.5901 2.5029 0.5036 0.0220 0.0025 4.3579 1.1615 1.7996
## 5 75.7864 0.5179 -0.2362 0.7456 0.0236 0.0026 4.2558 1.0788 1.5489
## 6 45.4321 0.5246 1.0436 0.7326 0.0342 0.0026 3.1257 0.9907 1.0153
## 7 32.1320 0.5074 3.6706 0.8165 0.0269 0.0026 2.8580 0.9280 0.8709
## 8 25.9774 0.4162 3.4894 1.3143 0.0225 0.0027 3.3904 0.8656 0.7862
## 9 22.1436 0.3729 -1.1954 1.6890 0.0187 0.0028 3.6347 0.8311 0.6989
## 10 18.1004 0.4181 -6.5105 1.3156 0.0225 0.0028 3.2817 0.8135 0.6586
##
## $All.CriticalValues
## CritValue_Duda CritValue_PseudoT2 Fvalue_Beale
## 2 0.6791 33.0743 1.0000
## 3 0.6643 76.8148 1.0000
## 4 0.6622 34.1756 1.0000
## 5 0.4889 18.8164 1.0000
## 6 0.5748 27.3663 1.0000
## 7 0.7006 33.7630 0.5823
## 8 0.4889 33.4513 1.0000
## 9 0.4997 16.0183 1.0000
## 10 0.4503 24.4188 1.0000
##
## $Best.nc
## KL CH Hartigan CCC Scott Marriot
## Number_clusters 8.0000 7.0000 3.0000 7.0000 6.0000 4
## Value_Index 13.3642 110.2469 77.5551 11.6827 330.1699 218414342163
## TrCovW TraceW Friedman Rubin Cindex DB Silhouette
## Number_clusters 3.00 3.0000 6.00 7.0000 5.0000 7.0000 4.0000
## Value_Index 10761.79 228.1284 9.56 -0.4445 0.1051 0.9693 0.4525
## Duda PseudoT2 Beale Ratkowsky Ball PtBiserial Frey
## Number_clusters 2.0000 2.0000 2.0000 3.0000 3.0000 4.0000 1
## Value_Index 2.6632 -43.7156 -2.3645 0.4178 235.7478 0.5901 NA
## McClain Dunn Hubert SDindex Dindex SDbw
## Number_clusters 3.0000 6.0000 0 2.0000 0 10.0000
## Value_Index 0.4501 0.0342 0 2.3833 0 0.6586
##
## $Best.partition
## Afghanistan Albania
## 1 2
## Algeria Angola
## 2 1
## Antigua and Barbuda Argentina
## 2 2
## Armenia Australia
## 2 3
## Austria Azerbaijan
## 2 2
## Bahrain Bangladesh
## 2 2
## Barbados Belarus
## 2 2
## Belgium Belize
## 2 2
## Benin Bhutan
## 1 2
## Bolivia Bosnia and Herzegovina
## 2 2
## Botswana Brazil
## 2 3
## Brunei Bulgaria
## 2 2
## Burkina Faso Burundi
## 1 1
## Cambodia Cameroon
## 2 1
## Canada Cape Verde
## 3 2
## Central African Republic Chad
## 1 1
## Chile Colombia
## 2 2
## Comoros Costa Rica
## 1 2
## Croatia Cuba
## 2 2
## Cyprus Czech Republic
## 2 2
## Democratic Republic of the Congo Denmark
## 1 2
## Djibouti Dominica
## 2 2
## Dominican Republic East Timor
## 2 1
## Ecuador Egypt
## 2 2
## El Salvador Equatorial Guinea
## 2 1
## Eritrea Estonia
## 1 2
## Ethiopia Federated States of Micronesia
## 1 1
## Fiji Finland
## 2 2
## France Gabon
## 3 1
## Georgia Germany
## 2 3
## Ghana Greece
## 1 2
## Grenada Guatemala
## 2 2
## Guinea Guinea-Bissau
## 1 1
## Guyana Haiti
## 2 1
## Honduras Hungary
## 2 2
## Iceland Indonesia
## 2 3
## Iran Iraq
## 3 1
## Israel Italy
## 2 3
## Ivory Coast Jamaica
## 1 2
## Japan Jordan
## 3 2
## Kazakhstan Kenya
## 2 1
## Kiribati Kuwait
## 1 2
## Kyrgyzstan Laos
## 2 2
## Latvia Lebanon
## 2 2
## Lesotho Liberia
## 1 1
## Libya Liechtenstein
## 2 2
## Lithuania Luxembourg
## 2 2
## Madagascar Malawi
## 1 1
## Malaysia Maldives
## 2 2
## Mali Malta
## 1 2
## Marshall Islands Mauritania
## 1 1
## Mauritius Mexico
## 2 3
## Moldova Mongolia
## 2 2
## Montenegro Morocco
## 2 2
## Mozambique Myanmar
## 1 2
## Namibia Nepal
## 1 2
## Netherlands New Zealand
## 2 2
## Nicaragua Niger
## 2 1
## Nigeria North Korea
## 1 2
## Norway Oman
## 2 2
## Pakistan Palau
## 3 2
## Panama Papua New Guinea
## 2 1
## Paraguay Peru
## 2 2
## Philippines Poland
## 2 2
## Portugal Qatar
## 2 2
## Republic of Ireland Republic of the Congo
## 2 1
## Romania Rwanda
## 2 1
## Saint Kitts and Nevis Saint Lucia
## 2 2
## Saint Vincent and the Grenadines Samoa
## 2 1
## Sao Tome and Principe Saudi Arabia
## 1 3
## Senegal Serbia
## 1 2
## Seychelles Sierra Leone
## 2 1
## Slovakia Slovenia
## 2 2
## Solomon Islands Somalia
## 1 1
## South Africa South Korea
## 3 3
## South Sudan Spain
## 1 2
## Sri Lanka Sudan
## 2 1
## Suriname Sweden
## 2 2
## Switzerland Syria
## 2 2
## Tajikistan Tanzania
## 1 1
## Thailand The Bahamas
## 2 2
## The Gambia Togo
## 1 1
## Tonga Trinidad and Tobago
## 1 2
## Tunisia Turkey
## 2 3
## Turkmenistan Uganda
## 2 1
## Ukraine United Arab Emirates
## 2 2
## United Kingdom Uruguay
## 3 2
## Uzbekistan Vanuatu
## 2 1
## Venezuela Vietnam
## 2 2
## Yemen Zambia
## 1 1
## Zimbabwe
## 1
Clustering <- kmeans(data_clu_std,
centers = 3, #Number of groups
nstart = 25) #Number of different positions of initial leaders
Clustering
## K-means clustering with 3 clusters of sizes 111, 57, 13
##
## Cluster means:
## Population Fertility Expectancy Land Co2 GDP
## 1 -0.20656591 -0.5743135 0.4820158 -0.18710086 -0.1335571 -0.1596184
## 2 0.00843218 1.2955182 -1.1626067 -0.05825548 -0.4080548 -0.3452313
## 3 1.72678320 -0.7765952 0.9819102 1.85298139 2.9295358 2.8766021
##
## Clustering vector:
## Afghanistan Albania
## 2 1
## Algeria Angola
## 1 2
## Antigua and Barbuda Argentina
## 1 1
## Armenia Australia
## 1 3
## Austria Azerbaijan
## 1 1
## Bahrain Bangladesh
## 1 1
## Barbados Belarus
## 1 1
## Belgium Belize
## 1 1
## Benin Bhutan
## 2 1
## Bolivia Bosnia and Herzegovina
## 1 1
## Botswana Brazil
## 1 3
## Brunei Bulgaria
## 1 1
## Burkina Faso Burundi
## 2 2
## Cambodia Cameroon
## 1 2
## Canada Cape Verde
## 3 1
## Central African Republic Chad
## 2 2
## Chile Colombia
## 1 1
## Comoros Costa Rica
## 2 1
## Croatia Cuba
## 1 1
## Cyprus Czech Republic
## 1 1
## Democratic Republic of the Congo Denmark
## 2 1
## Djibouti Dominica
## 1 1
## Dominican Republic East Timor
## 1 2
## Ecuador Egypt
## 1 1
## El Salvador Equatorial Guinea
## 1 2
## Eritrea Estonia
## 2 1
## Ethiopia Federated States of Micronesia
## 2 2
## Fiji Finland
## 1 1
## France Gabon
## 3 2
## Georgia Germany
## 1 3
## Ghana Greece
## 2 1
## Grenada Guatemala
## 1 1
## Guinea Guinea-Bissau
## 2 2
## Guyana Haiti
## 1 2
## Honduras Hungary
## 1 1
## Iceland Indonesia
## 1 3
## Iran Iraq
## 3 2
## Israel Italy
## 1 3
## Ivory Coast Jamaica
## 2 1
## Japan Jordan
## 3 1
## Kazakhstan Kenya
## 1 2
## Kiribati Kuwait
## 2 1
## Kyrgyzstan Laos
## 1 1
## Latvia Lebanon
## 1 1
## Lesotho Liberia
## 2 2
## Libya Liechtenstein
## 1 1
## Lithuania Luxembourg
## 1 1
## Madagascar Malawi
## 2 2
## Malaysia Maldives
## 1 1
## Mali Malta
## 2 1
## Marshall Islands Mauritania
## 2 2
## Mauritius Mexico
## 1 3
## Moldova Mongolia
## 1 1
## Montenegro Morocco
## 1 1
## Mozambique Myanmar
## 2 1
## Namibia Nepal
## 2 1
## Netherlands New Zealand
## 1 1
## Nicaragua Niger
## 1 2
## Nigeria North Korea
## 2 1
## Norway Oman
## 1 1
## Pakistan Palau
## 2 1
## Panama Papua New Guinea
## 1 2
## Paraguay Peru
## 1 1
## Philippines Poland
## 1 1
## Portugal Qatar
## 1 1
## Republic of Ireland Republic of the Congo
## 1 2
## Romania Rwanda
## 1 2
## Saint Kitts and Nevis Saint Lucia
## 1 1
## Saint Vincent and the Grenadines Samoa
## 1 2
## Sao Tome and Principe Saudi Arabia
## 2 3
## Senegal Serbia
## 2 1
## Seychelles Sierra Leone
## 1 2
## Slovakia Slovenia
## 1 1
## Solomon Islands Somalia
## 2 2
## South Africa South Korea
## 1 3
## South Sudan Spain
## 2 1
## Sri Lanka Sudan
## 1 2
## Suriname Sweden
## 1 1
## Switzerland Syria
## 1 1
## Tajikistan Tanzania
## 2 2
## Thailand The Bahamas
## 1 1
## The Gambia Togo
## 2 2
## Tonga Trinidad and Tobago
## 2 1
## Tunisia Turkey
## 1 1
## Turkmenistan Uganda
## 1 2
## Ukraine United Arab Emirates
## 1 1
## United Kingdom Uruguay
## 3 1
## Uzbekistan Vanuatu
## 1 2
## Venezuela Vietnam
## 1 1
## Yemen Zambia
## 2 2
## Zimbabwe
## 2
##
## Within cluster sum of squares by cluster:
## [1] 162.0459 116.0103 214.0044
## (between_SS / total_SS = 54.4 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(Clustering,
palette = "Set1",
repel = TRUE,
ggtheme = theme_bw(),
labelsize = 1,
data = data_clu_std)
Averages <- Clustering$centers
Averages #Average values of cluster variables to describe groups
## Population Fertility Expectancy Land Co2 GDP
## 1 -0.20656591 -0.5743135 0.4820158 -0.18710086 -0.1335571 -0.1596184
## 2 0.00843218 1.2955182 -1.1626067 -0.05825548 -0.4080548 -0.3452313
## 3 1.72678320 -0.7765952 0.9819102 1.85298139 2.9295358 2.8766021
Figure <- as.data.frame(Averages)
Figure$id <- 1:nrow(Figure)
Figure <- pivot_longer(Figure, cols = c("Population", "Fertility", "Expectancy", "Land", "Co2", "GDP"))
Figure$Group <- factor(Figure$id,
levels = c(1, 2, 3),
labels = c("1", "2", "3"))
Figure$ImeF <- factor(Figure$name,
levels = c("Population", "Fertility", "Expectancy", "Land", "Co2", "GDP"),
labels = c("Population", "Fertility", "Expectancy", "Land", "Co2", "GDP"))
library(ggplot2)
ggplot(Figure, aes(x = ImeF, y = value)) +
geom_hline(yintercept = 0) +
theme_bw() +
geom_point(aes(shape = Group, col = Group), size = 3) +
geom_line(aes(group = id), linewidth = 1) +
ylab("Averages") +
xlab("Cluster variables") +
scale_color_brewer(palette="Set1") +
ylim(-1.5, 3) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.50, size = 10))
data$Group <- Clustering$cluster #Assigning units to groups
fit <- aov(cbind(Population, Fertility, Expectancy, Land, Co2, GDP) ~ as.factor(Group),
data = data)
summary(fit)
## Response Population :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 74404 37202 28.366 2.026e-11 ***
## Residuals 178 233448 1312
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Fertility :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 231.292 115.65 312.7 < 2.2e-16 ***
## Residuals 178 65.831 0.37
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Expectancy :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 6443.0 3221.5 158.86 < 2.2e-16 ***
## Residuals 178 3609.5 20.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Land :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 71512024 35756012 33.025 6.336e-13 ***
## Residuals 178 192720946 1082702
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response Co2 :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 3171371 1585686 192.25 < 2.2e-16 ***
## Residuals 178 1468177 8248
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Response GDP :
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 4.7374e+13 2.3687e+13 166.07 < 2.2e-16 ***
## Residuals 178 2.5388e+13 1.4263e+11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(rstatix)
data %>%
group_by(Group) %>%
shapiro_test(Urban)
## # A tibble: 3 × 4
## Group variable statistic p
## <int> <chr> <dbl> <dbl>
## 1 1 Urban 0.689 5.28e-14
## 2 2 Urban 0.522 2.41e-12
## 3 3 Urban 0.861 3.99e- 2
fit <- aov(Urban ~ as.factor(Group),
data = data)
summary(fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(Group) 2 49608 24804 65.55 <2e-16 ***
## Residuals 178 67350 378
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(onewaytests)
welch.test(Urban ~ Group,
data = data)
##
## Welch's Heteroscedastic F Test (alpha = 0.05)
## -------------------------------------------------------------
## data : Urban and Group
##
## statistic : 10.35983
## num df : 2
## denom df : 28.52274
## p.value : 0.0004149019
##
## Result : Difference is statistically significant.
## -------------------------------------------------------------
kruskal.test(Urban ~ Group,
data = data)
##
## Kruskal-Wallis rank sum test
##
## data: Urban by Group
## Kruskal-Wallis chi-squared = 31.077, df = 2, p-value = 1.785e-07
kruskal_effsize(Urban ~ Group,
data = data)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 Urban 181 0.163 eta2[H] large