happy = read.csv("2022.csv")
head(happy)
## RANK Country Happiness.score Whisker.high Whisker.low
## 1 1 Finland 7,821 7,886 7,756
## 2 2 Denmark 7,636 7,710 7,563
## 3 3 Iceland 7,557 7,651 7,464
## 4 4 Switzerland 7,512 7,586 7,437
## 5 5 Netherlands 7,415 7,471 7,359
## 6 6 Luxembourg* 7,404 7,501 7,307
## Dystopia..1.83....residual Explained.by..GDP.per.capita
## 1 2,518 1,892
## 2 2,226 1,953
## 3 2,320 1,936
## 4 2,153 2,026
## 5 2,137 1,945
## 6 2,042 2,209
## Explained.by..Social.support Explained.by..Healthy.life.expectancy
## 1 1,258 0,775
## 2 1,243 0,777
## 3 1,320 0,803
## 4 1,226 0,822
## 5 1,206 0,787
## 6 1,155 0,790
## Explained.by..Freedom.to.make.life.choices Explained.by..Generosity
## 1 0,736 0,109
## 2 0,719 0,188
## 3 0,718 0,270
## 4 0,677 0,147
## 5 0,651 0,271
## 6 0,700 0,120
## Explained.by..Perceptions.of.corruption
## 1 0,534
## 2 0,532
## 3 0,191
## 4 0,461
## 5 0,419
## 6 0,388
The dataset “World Happiness Report” contains information about
happiness scores and various contributing factors for countries around
the world in 2022. World Happiness Report contains a lot of factors
despite the overall happiness score, and that is why MDS could be useful
to find similarities and dissimilarites between countries in terms of
quality of life.
The Happiness Score is explained by the following factors:
GDP per capita
Healthy Life Expectancy
Social
support
Freedom to make life choices
Generosity
Corruption Perception
I also include the region where the country is located to find if there is a similarity of quality of life by region. I exclude rank, happiness score and dystopia (hypothetical anti-happiness benchmark), since it is cumulative variables, and the aim of this analysis is to show how MDS could be useful in terms of defining similarities in Happiness factors of the countries.
happy[,3:12] = apply(happy[,3:12],
2,
function(x) as.numeric(gsub(",", ".", x)))
colnames(happy) = c("rank", "country", "happy_score", "whisker.low", "whisker.high",
"dystopia", "gdp_per_capita", "social_support",
"healthy_life_expectancy", "freedom",
"generosity", "corrupt_perceptions")
happy = na.omit(happy)
happy$region = if_else(happy$country %in%
c("Denmark", "Finland", "Norway", "Sweden"),
"Scandinavia",
if_else(happy$country %in%
c("Austria", "Belgium", "France", "Germany", "Ireland", "Luxembourg*", "Netherlands", "Switzerland",
"United Kingdom","Iceland"),
"Western Europe",
if_else(happy$country %in%
c("Cyprus", "Greece", "Italy", "Malta", "Portugal", "Spain"),
"Southern Europe",
if_else(happy$country %in%
c("Czechia", "Hungary", "Poland", "Slovakia", "Slovenia"),
"Central Europe",
if_else(happy$country %in%
c("Bosnia and Herzegovina", "Bulgaria", "Croatia", "Kosovo", "Montenegro", "North Macedonia",
"Romania", "Serbia", "Albania"),
"Balkans",
if_else(happy$country %in%
c("Belarus*", "Russia", "Ukraine", "Moldova"),
"Eastern Europe",
if_else(happy$country %in%
c("Canada", "United States"),
"North America",
if_else(happy$country %in%
c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Costa Rica", "Dominican Republic",
"Ecuador", "El Salvador", "Guatemala*", "Honduras", "Jamaica", "Mexico", "Nicaragua",
"Panama", "Paraguay", "Peru", "Puerto Rico", "Uruguay", "Venezuela"),
"Latin America",
if_else(happy$country %in%
c("Bahrain", "Iran", "Iraq", "Israel", "Jordan", "Kuwait*", "Lebanon", "Oman",
"Qatar", "Saudi Arabia", "United Arab Emirates", "Yemen*","North Cyprus*",
"Palestinian Territories*", "Afghanistan"),
"Middle East",
if_else(happy$country %in%
c("Bangladesh", "Bhutan", "India", "Maldives", "Nepal", "Pakistan", "Sri Lanka"),
"South Asia",
if_else(happy$country %in%
c("Lithuania", "Estonia", "Latvia"),
"Baltic",
if_else(happy$country %in%
c("Kazakhstan", "Uzbekistan", "Kyrgyzstan", "Turkmenistan*", "Armenia", "Azerbaijan*","Georgia",
"Turkey","Tajikistan"),
"Central Asia",
if_else(happy$country %in%
c("Brunei", "Cambodia", "Indonesia", "Laos", "Malaysia", "Myanmar", "Philippines",
"Singapore", "Thailand", "Timor-Leste", "Vietnam"),
"Southeast Asia",
if_else(happy$country %in%
c("China", "Japan", "North Korea", "South Korea", "Mongolia", "Taiwan", "Taiwan Province of China",
"Hong Kong S.A.R. of China"),
"East Asia",
if_else(happy$country %in%
c("Australia", "Fiji", "Kiribati", "Marshall Islands", "Micronesia", "Nauru",
"New Zealand", "Palau", "Papua New Guinea", "Samoa", "Solomon Islands", "Tonga",
"Tuvalu", "Vanuatu"),
"Oceania",
if_else(happy$country %in%
c("Algeria", "Angola", "Benin", "Botswana*", "Burkina Faso", "Burundi", "Cameroon",
"Cape Verde", "Central African Republic", "Chad*", "Comoros*", "Congo",
"Democratic Republic of the Congo", "Djibouti", "Egypt", "Equatorial Guinea", "Eritrea",
"Eswatini", "Ethiopia", "Gabon", "Gambia*", "Ghana", "Guinea", "Guinea-Bissau", "Ivory Coast",
"Kenya", "Lesotho*", "Liberia*", "Libya*", "Madagascar*",
"Malawi", "Mali", "Mauritania*", "Mauritius",
"Morocco", "Mozambique", "Namibia", "Niger*", "Nigeria", "Rwanda*", "Sao Tome and Principe",
"Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa", "South Sudan", "Sudan",
"Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe", "Eswatini, Kingdom of*"),
"Africa",
NA))))))))))))))))
row.names(happy) = happy$country
regions = happy$region
happy1 = happy[,c(-1,-2,-3,-4,-5,-6,-13)]
dissimilarity_matrix = dist(happy1)
mds_ratio = mds(dissimilarity_matrix, type = "ratio")
mds_ordinal = mds(dissimilarity_matrix, type = "ordinal")
mds_mspline = mds(dissimilarity_matrix, type = "mspline")
plot(mds_ratio, plot.type = "Shepard", main = "Shepard Diagram (Ratio Transformation)")
plot(mds_ordinal, plot.type = "Shepard", main = "Shepard Diagram (Ordinal Transformation)")
plot(mds_mspline, plot.type = "Shepard", main = "Shepard Diagram (mspline Transformation)")
We can see that Ratio Transformation is working good, however ordinal and mspline transformations seem to be better for this task.
mds_ratio
##
## Call:
## mds(delta = dissimilarity_matrix, type = "ratio")
##
## Model: Symmetric SMACOF
## Number of objects: 146
## Stress-1 value: 0.091
## Number of iterations: 30
mds_ordinal
##
## Call:
## mds(delta = dissimilarity_matrix, type = "ordinal")
##
## Model: Symmetric SMACOF
## Number of objects: 146
## Stress-1 value: 0.066
## Number of iterations: 30
mds_mspline
##
## Call:
## mds(delta = dissimilarity_matrix, type = "mspline")
##
## Model: Symmetric SMACOF
## Number of objects: 146
## Stress-1 value: 0.067
## Number of iterations: 29
Indeed, Stress-1 value is lower for ordinal and mspline transformation in comparison with ratio. It seems that ordinal should be chosen, since it has the lowest Stress-1 value = 0.066, which is a good fit. Let’s perform permutation tests to be sure.
perm = permtest(mds_ratio, method.dat = "euclidean", verbose = F)
perm
##
## Call: permtest.smacof(object = mds_ratio, method.dat = "euclidean",
## verbose = F)
##
## SMACOF Permutation Test
## Number of objects: 146
## Number of replications (permutations): 100
##
## Observed stress value: 0.091
## p-value: <0.001
Based on the output of the ECDF permutation test, the observed stress value of the model with ratio transformation is 0.091. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level.
perm_ord = permtest(mds_ordinal, method.dat = "euclidean", verbose = F)
perm_ord
##
## Call: permtest.smacof(object = mds_ordinal, method.dat = "euclidean",
## verbose = F)
##
## SMACOF Permutation Test
## Number of objects: 146
## Number of replications (permutations): 100
##
## Observed stress value: 0.066
## p-value: <0.001
Based on the output of the ECDF permutation test, the observed stress value of the model with ordinal transformation is 0.066. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level. This suggests that the model is a good fit for the data and that the stress value is unlikely to have occurred by chance.
perm_mspline = permtest(mds_mspline, method.dat = "euclidean", verbose = F)
perm_mspline
##
## Call: permtest.smacof(object = mds_mspline, method.dat = "euclidean",
## verbose = F)
##
## SMACOF Permutation Test
## Number of objects: 146
## Number of replications (permutations): 100
##
## Observed stress value: 0.067
## p-value: <0.001
Based on the output of the ECDF permutation test, the observed stress value of the model with mspline transformation is 0.067. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level. This suggests that the model is a good fit for the data and that the stress value is unlikely to have occurred by chance.
So, we can conclude that it is better to use ordinal type of MDS.
summary(mds_ordinal)
##
## Configurations:
## D1 D2
## Finland -0.9892 0.1122
## Denmark -1.0301 0.0418
## Iceland -0.9052 0.1959
## Switzerland -1.0364 -0.0398
## Netherlands -0.9267 0.0041
## Luxembourg* -1.1532 -0.1526
## Sweden -0.9834 0.0279
## Norway -1.0356 0.0162
## Israel -0.6843 0.0707
## New Zealand -0.8886 0.1159
## Austria -0.8193 -0.0245
## Australia -0.8402 0.0473
## Ireland -1.0583 -0.1247
## Germany -0.7768 -0.0997
## Canada -0.8252 0.0234
## United States -0.7867 -0.0349
## United Kingdom -0.7526 -0.0111
## Czechia -0.6597 0.1902
## Belgium -0.6843 -0.1162
## France -0.7499 0.0222
## Bahrain -0.5865 -0.0614
## Slovenia -0.6843 0.1485
## Costa Rica -0.3428 0.0464
## United Arab Emirates -0.7614 -0.2192
## Saudi Arabia -0.6141 -0.0288
## Taiwan Province of China -0.6545 -0.0798
## Singapore -1.1935 -0.2144
## Romania -0.3977 -0.0677
## Spain -0.6493 0.0422
## Uruguay -0.4521 0.1698
## Italy -0.5559 -0.1939
## Kosovo 0.0298 0.1446
## Malta -0.6871 0.0540
## Lithuania -0.5760 0.0798
## Slovakia -0.5353 0.1507
## Estonia -0.7221 0.1218
## Panama -0.4641 0.0219
## Brazil -0.1591 0.0680
## Guatemala* 0.1817 0.0579
## Kazakhstan -0.4647 0.1400
## Cyprus -0.4790 -0.2620
## Latvia -0.5150 0.1112
## Serbia -0.2739 0.0808
## Chile -0.3865 -0.0119
## Nicaragua 0.2124 0.3077
## Mexico -0.1706 -0.0549
## Croatia -0.4837 0.0988
## Poland -0.5435 0.0407
## El Salvador 0.1953 -0.0047
## Kuwait* -0.6232 -0.1577
## Hungary -0.5400 0.1281
## Mauritius -0.3121 0.0961
## Uzbekistan 0.0453 0.4019
## Japan -0.6608 -0.1430
## Honduras 0.3180 0.1606
## Portugal -0.5309 -0.0336
## Argentina -0.3283 0.0550
## Greece -0.3730 -0.3438
## South Korea -0.5314 -0.3177
## Philippines 0.1393 0.1209
## Thailand -0.2837 0.2060
## Moldova -0.0782 0.0763
## Jamaica 0.0020 0.1541
## Kyrgyzstan 0.1954 0.4714
## Belarus* -0.3401 0.2337
## Colombia -0.1149 -0.0124
## Bosnia and Herzegovina -0.1620 0.1258
## Mongolia -0.0940 0.3588
## Dominican Republic -0.2122 0.0287
## Malaysia -0.3394 -0.0725
## Bolivia 0.1620 0.0830
## China -0.2034 0.0050
## Paraguay -0.1592 0.1981
## Peru -0.0207 -0.0350
## Montenegro -0.2508 -0.0101
## Ecuador 0.0172 0.0011
## Vietnam 0.1097 0.1632
## Turkmenistan* -0.3122 0.4680
## North Cyprus* -0.5129 -0.2369
## Russia -0.3675 -0.0279
## Hong Kong S.A.R. of China -0.7866 -0.4066
## Armenia -0.0278 -0.0938
## Tajikistan 0.4130 0.3963
## Nepal 0.5451 0.1244
## Bulgaria -0.3796 0.1113
## Libya* -0.1073 -0.0214
## Indonesia 0.0756 0.3255
## Ivory Coast 0.7342 -0.2279
## North Macedonia -0.0846 -0.0697
## Albania 0.0807 -0.2509
## South Africa -0.0286 0.2691
## Azerbaijan* -0.2086 0.2428
## Gambia* 0.9666 0.1146
## Bangladesh 0.5264 -0.0672
## Laos 0.3630 -0.0649
## Algeria 0.0218 -0.3645
## Liberia* 1.0802 0.1909
## Ukraine -0.0933 0.1435
## Congo 0.8842 -0.2007
## Morocco 0.6331 -0.5548
## Mozambique 1.1880 0.3033
## Cameroon 0.7244 0.0222
## Senegal 0.7747 -0.0789
## Niger* 1.1988 0.1739
## Georgia 0.1066 -0.3341
## Gabon 0.1636 -0.2780
## Iraq 0.3093 -0.1779
## Venezuela 1.6098 0.9931
## Guinea 0.9481 -0.0140
## Iran 0.1216 -0.2478
## Ghana 0.5770 -0.0787
## Turkey -0.2838 -0.4054
## Burkina Faso 0.9817 0.0156
## Cambodia 0.5231 0.1896
## Benin 1.1529 -0.5657
## Comoros* 0.9471 -0.1868
## Uganda 0.7761 0.3116
## Nigeria 0.5824 0.0329
## Kenya 0.6756 -0.0406
## Tunisia 0.2608 -0.3699
## Pakistan 0.7766 -0.2264
## Palestinian Territories* 0.3053 0.1391
## Mali 1.0384 -0.0655
## Namibia 0.2699 -0.0132
## Eswatini, Kingdom of* 0.4422 -0.3325
## Myanmar 0.5014 0.2907
## Sri Lanka -0.0576 0.0192
## Madagascar* 1.1028 0.1236
## Egypt 0.1355 -0.1529
## Chad* 1.2647 0.0089
## Ethiopia 0.7646 0.2409
## Yemen* 0.8022 0.5710
## Mauritania* 0.4393 0.0729
## Jordan 0.1588 -0.1326
## Togo 1.1571 -0.2064
## India 0.6344 -0.3725
## Zambia 0.8077 -0.0173
## Malawi 1.2800 -0.1884
## Tanzania 0.8398 0.0842
## Sierra Leone 1.1996 -0.0529
## Lesotho* 0.9426 0.4802
## Botswana* 0.0665 -0.3008
## Rwanda* 1.2103 -0.7065
## Zimbabwe 0.7788 0.0394
## Lebanon 0.3244 -0.6367
## Afghanistan 1.5507 -0.6220
##
##
## Stress per point (in %):
## Finland Denmark Iceland
## 0.94 0.73 0.45
## Switzerland Netherlands Luxembourg*
## 0.33 0.41 0.30
## Sweden Norway Israel
## 0.90 0.46 0.37
## New Zealand Austria Australia
## 0.98 0.23 0.35
## Ireland Germany Canada
## 0.22 0.36 0.41
## United States United Kingdom Czechia
## 0.60 0.58 0.55
## Belgium France Bahrain
## 0.34 0.33 0.74
## Slovenia Costa Rica United Arab Emirates
## 0.39 0.43 0.80
## Saudi Arabia Taiwan Province of China Singapore
## 0.56 0.24 0.61
## Romania Spain Uruguay
## 0.42 0.49 0.69
## Italy Kosovo Malta
## 0.84 0.53 0.32
## Lithuania Slovakia Estonia
## 0.69 0.84 0.53
## Panama Brazil Guatemala*
## 0.28 0.37 0.56
## Kazakhstan Cyprus Latvia
## 0.23 0.46 0.47
## Serbia Chile Nicaragua
## 0.30 0.41 0.63
## Mexico Croatia Poland
## 0.41 0.60 0.24
## El Salvador Kuwait* Hungary
## 1.07 0.31 0.67
## Mauritius Uzbekistan Japan
## 0.27 0.96 0.48
## Honduras Portugal Argentina
## 0.48 0.72 0.24
## Greece South Korea Philippines
## 1.24 0.32 0.54
## Thailand Moldova Jamaica
## 0.70 0.28 0.40
## Kyrgyzstan Belarus* Colombia
## 0.70 1.98 0.30
## Bosnia and Herzegovina Mongolia Dominican Republic
## 0.65 1.16 0.45
## Malaysia Bolivia China
## 0.65 0.40 0.54
## Paraguay Peru Montenegro
## 0.33 0.38 0.36
## Ecuador Vietnam Turkmenistan*
## 0.36 0.67 0.52
## North Cyprus* Russia Hong Kong S.A.R. of China
## 0.38 0.72 0.48
## Armenia Tajikistan Nepal
## 0.53 0.80 0.47
## Bulgaria Libya* Indonesia
## 0.33 0.47 2.01
## Ivory Coast North Macedonia Albania
## 0.32 0.38 1.04
## South Africa Azerbaijan* Gambia*
## 1.74 1.47 0.40
## Bangladesh Laos Algeria
## 1.22 1.35 3.42
## Liberia* Ukraine Congo
## 0.28 0.47 0.30
## Morocco Mozambique Cameroon
## 0.87 0.52 0.36
## Senegal Niger* Georgia
## 0.38 0.58 0.88
## Gabon Iraq Venezuela
## 0.98 0.59 0.77
## Guinea Iran Ghana
## 0.26 1.08 0.39
## Turkey Burkina Faso Cambodia
## 0.97 0.20 1.37
## Benin Comoros* Uganda
## 0.76 0.63 0.41
## Nigeria Kenya Tunisia
## 0.46 0.34 0.76
## Pakistan Palestinian Territories* Mali
## 0.33 1.21 0.25
## Namibia Eswatini, Kingdom of* Myanmar
## 1.23 3.09 0.96
## Sri Lanka Madagascar* Egypt
## 0.34 0.71 0.52
## Chad* Ethiopia Yemen*
## 0.60 0.36 0.92
## Mauritania* Jordan Togo
## 1.10 0.55 0.26
## India Zambia Malawi
## 1.53 0.32 0.51
## Tanzania Sierra Leone Lesotho*
## 1.04 0.29 1.67
## Botswana* Rwanda* Zimbabwe
## 2.57 2.58 0.63
## Lebanon Afghanistan
## 1.21 1.00
The maximum stress per point is 3.42, and it corresponds to Algeria. The minimum stress per point is 0.20, and it corresponds to Burkina Faso. Overall, we cannot see any points that are represented poorly and should be cut from the dataset.
jackmds(mds_ordinal)
##
## Call: jackmds.smacofB(object = mds_ordinal)
##
## SMACOF Jackknife
## Number of objects: 146
## Value loss function: 0.4276
## Number of iterations: 3
##
## Stability measure: 1
## Cross validity: 1
## Dispersion: 0
The “Stability measure” of 0.9761 indicates that the MDS solution is quite stable, meaning that if the analysis were to be repeated with a different random sample or subset of the data, the resulting solution would be very similar.
The “Cross validity” measure of 0.9998 indicates that the MDS solution is highly valid, meaning that it accurately reflects the similarities and differences between the original data points.
The “Dispersion” measure of 0.024 represents the average deviation between the original data points and the MDS solution, and suggests that the MDS model fits the data well.
Overall, these results suggest that the MDS solution is a valid and stable representation of the underlying data structure
mds_df = data.frame(mds_ordinal$conf, regions)
coef_vec = biplotmds(mds_ordinal,extvar=happy1[,c(1:6)])
library(ggrepel)
library(ggforce)
p = ggplot(coef_vec, aes(x = coef_vec$model$X[,1], y = coef_vec$model$X[,2],
label = row.names(coef_vec$model$X),color = regions)) +
geom_point() +
geom_text_repel(size = 2,fontface = 'bold', segment.alpha = 0,segment.color = 'grey50',
min.segment.length = 10, aes(color = regions)) +
labs(x="",y="",title="Country Similarity by Happiness Factors") +
# Vectors for creation of Biplot
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,1], yend = coef_vec$coefficients[2,1]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,1]+0.2, y = coef_vec$coefficients[2,1],
label = colnames(coef_vec$coefficients)[1]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,2]/1.6, yend = coef_vec$coefficients[2,2]/1.6),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,2]/1.6, y = coef_vec$coefficients[2,2]/1.6,label = colnames(coef_vec$coefficients)[2]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,3], yend = coef_vec$coefficients[2,3]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,3]+0.1, y = coef_vec$coefficients[2,3]-0.1,
label = colnames(coef_vec$coefficients)[3]),
size=2) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,4], yend = coef_vec$coefficients[2,4]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,4], y = coef_vec$coefficients[2,4],label = colnames(coef_vec$coefficients)[4]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,5], yend = coef_vec$coefficients[2,5]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,5], y = coef_vec$coefficients[2,5],label = colnames(coef_vec$coefficients)[5]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,6], yend = coef_vec$coefficients[2,6]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,6], y = coef_vec$coefficients[2,6]-0.1,
label = colnames(coef_vec$coefficients)[6]),
size=3)
p
IMPORTANT: the effect of social_support factor is much more than other factors and worsened the readability of the graph, so it was decided to shorten the vector length for the sake of the readability of the graph. It is useful to look at the interactive plot below the description to see all regions in details and all true factor effects.
The MDS analysis produced a two-dimensional representation of the dissimilarities among the countries. The resulting plot shows the position of each country in the two-dimensional space, with closer countries representing higher similarities and distant countries representing higher dissimilarities. The graphic is a biplot, which has factors of happiness represented as vectors.
Based on the plot, we can observe that the countries seem to cluster based on their region, but not all of the regions are homogeneous. Countries from Scandinavia, Western Europe, Oceania and North America tend to cluster together to the left side of the plot, which signals that these countries have the highest gdp and healthy life expectancy. They different in terms of perception of corruption, freedom and social support, but the values for these factors are also high. We can see that Scandinavia countries are very similar, these countries have higher social support and freedom values than Singapore or Luxembourg and lower percepted corruption.
Some East Asia countries are in the bottom-left quadrant - South Korea and Hong Kong, they have higher gdp but lower social support and freedom values, Mongolia is happy in terms of social support and freedom, but its gdp creates the huge difference between this country and Hong Kong. All in all, East Asia are quite heterogenous.
Most of the countries that are closer to the center of the graph are from completely different regions, it is difficult to highlight the regions with high homogeneity here, one these region is Latin America. On the interactive plot by double-clicking on Latin America region you can see that most of the countries from this region are somewhere in the middle of the graph with one exception - Venezuella, which seems to be a place of anti-happiness despite its OUTSANDINGLY HUGE GENEROSITY.
Central Europe and Baltic, Balkans countries are also very similar. Central Asia is heterogeneous: for example, Turkey is on the lower side of the plot, while Turkmenistan, Uzbekistan and Tajikistan are on the top part. Also, Turkmenistan seems to have much more freedom than Turkey, which seems REALLY STRANGE (maybe it is a mistake in a dataset, but probably it is because of the freedom is a perception metric).
But what region is clearly visible on the graph - this is Africa. African countries are certainly outsiders of this rating, they differ quite a lot due to the large number of different countries, but in general, it is clearly noticeable that this region is very different from all the others. The difference between Lesotho and Rwanda is huge - but not in terms of gdp or healthy life expectancy (these parameters are not very good for both countries), but mainly in terms of generosity, social support and freedom. South Asia is closer region to Africa and it is quite homogeneous.
The most heterogeneous region is Middle East. Afghanistan, Lebanon, Saudi Arabia and Yemen are totally different. While Afghanistan is an absolute loser in the Happiness race, Lebanon and Iran are closer to the middle of the graph, and citizens of Bahrain and Saudi Arabia seem to be quite happy living in these countries.
Hover the mouse on the point or vector to see what the country/factor is represented by this element.
p1 = ggplot(coef_vec, aes(x = coef_vec$model$X[,1], y = coef_vec$model$X[,2],
label = row.names(coef_vec$model$X),color = regions)) +
geom_point() +
labs(x="",y="",title="Interactive Plot of Country Similarity by Happiness Factors") +
# Vectors for creation of Biplot
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,1], yend = coef_vec$coefficients[2,1]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,1]+0.2, y = coef_vec$coefficients[2,1],
label = colnames(coef_vec$coefficients)[1]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,2], yend = coef_vec$coefficients[2,2]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,2], y = coef_vec$coefficients[2,2],label = colnames(coef_vec$coefficients)[2]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,3], yend = coef_vec$coefficients[2,3]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,3]+0.1, y = coef_vec$coefficients[2,3]-0.1,
label = colnames(coef_vec$coefficients)[3]),
size=2) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,4], yend = coef_vec$coefficients[2,4]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,4], y = coef_vec$coefficients[2,4],label = colnames(coef_vec$coefficients)[4]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,5], yend = coef_vec$coefficients[2,5]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,5], y = coef_vec$coefficients[2,5],label = colnames(coef_vec$coefficients)[5]),
size=3) +
geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,6], yend = coef_vec$coefficients[2,6]),
arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
geom_label(aes(x = coef_vec$coefficients[1,6], y = coef_vec$coefficients[2,6]-0.1,
label = colnames(coef_vec$coefficients)[6]),
size=3)
ggplotly(p1)
Countries tend to cluster based on their region, but not all regions are homogeneous. Scandinavian, Western European, Oceania, and North American countries cluster together to the left side of the plot, while South Asia and African countries are on the right side.
East Asia is heterogeneous, with countries such as South Korea and Hong Kong having higher GDP but lower social support and freedom values.
Central Europe and Baltic, Balkans countries are quite similar and homogeneous, while the Middle East is the most heterogeneous region, with Afghanistan, Lebanon, Saudi Arabia, and Yemen being vastly different.
The interactive plot reveals that most of the Latin American countries are somewhere in the middle of the graph, except for Venezuela, which seems to be a place of no happiness despite its high generosity.
When viewing various news articles that mention the list of countries according to the Happiness index, one may get the impression that these countries are similar, but the plot built using MDS helps to understand in more detail how similar or different these or those countries are in accordance with the measured factors quality of life.