Dataset

happy = read.csv("2022.csv")
head(happy)
##   RANK     Country Happiness.score Whisker.high Whisker.low
## 1    1     Finland           7,821        7,886       7,756
## 2    2     Denmark           7,636        7,710       7,563
## 3    3     Iceland           7,557        7,651       7,464
## 4    4 Switzerland           7,512        7,586       7,437
## 5    5 Netherlands           7,415        7,471       7,359
## 6    6 Luxembourg*           7,404        7,501       7,307
##   Dystopia..1.83....residual Explained.by..GDP.per.capita
## 1                      2,518                        1,892
## 2                      2,226                        1,953
## 3                      2,320                        1,936
## 4                      2,153                        2,026
## 5                      2,137                        1,945
## 6                      2,042                        2,209
##   Explained.by..Social.support Explained.by..Healthy.life.expectancy
## 1                        1,258                                 0,775
## 2                        1,243                                 0,777
## 3                        1,320                                 0,803
## 4                        1,226                                 0,822
## 5                        1,206                                 0,787
## 6                        1,155                                 0,790
##   Explained.by..Freedom.to.make.life.choices Explained.by..Generosity
## 1                                      0,736                    0,109
## 2                                      0,719                    0,188
## 3                                      0,718                    0,270
## 4                                      0,677                    0,147
## 5                                      0,651                    0,271
## 6                                      0,700                    0,120
##   Explained.by..Perceptions.of.corruption
## 1                                   0,534
## 2                                   0,532
## 3                                   0,191
## 4                                   0,461
## 5                                   0,419
## 6                                   0,388

The dataset “World Happiness Report” contains information about happiness scores and various contributing factors for countries around the world in 2022. World Happiness Report contains a lot of factors despite the overall happiness score, and that is why MDS could be useful to find similarities and dissimilarites between countries in terms of quality of life.

The Happiness Score is explained by the following factors:

GDP per capita
Healthy Life Expectancy
Social support
Freedom to make life choices
Generosity
Corruption Perception

I also include the region where the country is located to find if there is a similarity of quality of life by region. I exclude rank, happiness score and dystopia (hypothetical anti-happiness benchmark), since it is cumulative variables, and the aim of this analysis is to show how MDS could be useful in terms of defining similarities in Happiness factors of the countries.

Data Preparation

happy[,3:12] = apply(happy[,3:12],
                       2,
                       function(x) as.numeric(gsub(",", ".", x)))


colnames(happy) = c("rank", "country", "happy_score", "whisker.low", "whisker.high",
                    "dystopia", "gdp_per_capita", "social_support",
                    "healthy_life_expectancy", "freedom",
                    "generosity", "corrupt_perceptions")

happy = na.omit(happy)
happy$region = if_else(happy$country %in% 
                          c("Denmark", "Finland", "Norway", "Sweden"), 
                        "Scandinavia",
                if_else(happy$country %in% 
                          c("Austria", "Belgium", "France", "Germany", "Ireland", "Luxembourg*", "Netherlands", "Switzerland",
                            "United Kingdom","Iceland"),
                        "Western Europe",
                if_else(happy$country %in%
                          c("Cyprus", "Greece", "Italy", "Malta", "Portugal", "Spain"),
                        "Southern Europe",
                if_else(happy$country %in%
                          c("Czechia", "Hungary", "Poland", "Slovakia", "Slovenia"),
                        "Central Europe",
                if_else(happy$country %in%
                          c("Bosnia and Herzegovina", "Bulgaria", "Croatia", "Kosovo", "Montenegro", "North Macedonia",
                            "Romania", "Serbia", "Albania"),
                        "Balkans",
                if_else(happy$country %in%
                          c("Belarus*", "Russia", "Ukraine", "Moldova"),
                        "Eastern Europe",                        
                if_else(happy$country %in%
                          c("Canada", "United States"),
                        "North America",
                if_else(happy$country %in%
                          c("Argentina", "Bolivia", "Brazil", "Chile", "Colombia", "Costa Rica", "Dominican Republic",
                            "Ecuador", "El Salvador", "Guatemala*", "Honduras", "Jamaica", "Mexico", "Nicaragua",
                            "Panama", "Paraguay", "Peru", "Puerto Rico", "Uruguay", "Venezuela"),
                        "Latin America",
                if_else(happy$country %in%
                          c("Bahrain", "Iran", "Iraq", "Israel", "Jordan", "Kuwait*", "Lebanon", "Oman",
                            "Qatar", "Saudi Arabia", "United Arab Emirates", "Yemen*","North Cyprus*",
                            "Palestinian Territories*", "Afghanistan"),
                        "Middle East",
                if_else(happy$country %in%
                          c("Bangladesh", "Bhutan", "India", "Maldives", "Nepal", "Pakistan", "Sri Lanka"),
                        "South Asia",
                if_else(happy$country %in%
                          c("Lithuania", "Estonia", "Latvia"),
                        "Baltic",
                if_else(happy$country %in%
                          c("Kazakhstan", "Uzbekistan", "Kyrgyzstan", "Turkmenistan*", "Armenia", "Azerbaijan*","Georgia",
                            "Turkey","Tajikistan"),
                        "Central Asia",                        
                if_else(happy$country %in%
                          c("Brunei", "Cambodia", "Indonesia", "Laos", "Malaysia", "Myanmar", "Philippines",
                            "Singapore", "Thailand", "Timor-Leste", "Vietnam"),
                        "Southeast Asia",                        
                if_else(happy$country %in%
                          c("China", "Japan", "North Korea", "South Korea", "Mongolia", "Taiwan", "Taiwan Province of China",
                            "Hong Kong S.A.R. of China"),
                        "East Asia",
                if_else(happy$country %in%
                          c("Australia", "Fiji", "Kiribati", "Marshall Islands", "Micronesia", "Nauru",
                            "New Zealand", "Palau", "Papua New Guinea", "Samoa", "Solomon Islands", "Tonga",
                            "Tuvalu", "Vanuatu"),
                        "Oceania", 
                if_else(happy$country %in%
                          c("Algeria", "Angola", "Benin", "Botswana*", "Burkina Faso", "Burundi", "Cameroon",
                            "Cape Verde", "Central African Republic", "Chad*", "Comoros*", "Congo", 
                            "Democratic Republic of the Congo", "Djibouti", "Egypt", "Equatorial Guinea", "Eritrea",
                            "Eswatini", "Ethiopia", "Gabon", "Gambia*", "Ghana", "Guinea", "Guinea-Bissau", "Ivory Coast",
                            "Kenya", "Lesotho*", "Liberia*", "Libya*", "Madagascar*",
                            "Malawi", "Mali", "Mauritania*", "Mauritius",
                            "Morocco", "Mozambique", "Namibia", "Niger*", "Nigeria", "Rwanda*", "Sao Tome and Principe",
                            "Senegal", "Seychelles", "Sierra Leone", "Somalia", "South Africa", "South Sudan", "Sudan",
                            "Tanzania", "Togo", "Tunisia", "Uganda", "Zambia", "Zimbabwe", "Eswatini, Kingdom of*"),
                        "Africa",
                        NA))))))))))))))))
                        
row.names(happy) = happy$country
regions = happy$region

happy1 = happy[,c(-1,-2,-3,-4,-5,-6,-13)]

MDS

Distance Matrix

dissimilarity_matrix = dist(happy1)

Shepard Plots & Stress-1 Values

mds_ratio = mds(dissimilarity_matrix, type = "ratio")
mds_ordinal = mds(dissimilarity_matrix, type = "ordinal")
mds_mspline = mds(dissimilarity_matrix, type = "mspline")

plot(mds_ratio, plot.type = "Shepard", main = "Shepard Diagram (Ratio Transformation)")

plot(mds_ordinal, plot.type = "Shepard", main = "Shepard Diagram (Ordinal Transformation)")

plot(mds_mspline, plot.type = "Shepard", main = "Shepard Diagram (mspline Transformation)")

We can see that Ratio Transformation is working good, however ordinal and mspline transformations seem to be better for this task.

mds_ratio
## 
## Call:
## mds(delta = dissimilarity_matrix, type = "ratio")
## 
## Model: Symmetric SMACOF 
## Number of objects: 146 
## Stress-1 value: 0.091 
## Number of iterations: 30
mds_ordinal
## 
## Call:
## mds(delta = dissimilarity_matrix, type = "ordinal")
## 
## Model: Symmetric SMACOF 
## Number of objects: 146 
## Stress-1 value: 0.066 
## Number of iterations: 30
mds_mspline
## 
## Call:
## mds(delta = dissimilarity_matrix, type = "mspline")
## 
## Model: Symmetric SMACOF 
## Number of objects: 146 
## Stress-1 value: 0.067 
## Number of iterations: 29

Indeed, Stress-1 value is lower for ordinal and mspline transformation in comparison with ratio. It seems that ordinal should be chosen, since it has the lowest Stress-1 value = 0.066, which is a good fit. Let’s perform permutation tests to be sure.

perm = permtest(mds_ratio, method.dat = "euclidean", verbose = F)
perm
## 
## Call: permtest.smacof(object = mds_ratio, method.dat = "euclidean", 
##     verbose = F)
## 
## SMACOF Permutation Test
## Number of objects: 146 
## Number of replications (permutations): 100 
## 
## Observed stress value: 0.091 
## p-value: <0.001

Based on the output of the ECDF permutation test, the observed stress value of the model with ratio transformation is 0.091. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level.

perm_ord = permtest(mds_ordinal, method.dat = "euclidean", verbose = F)
perm_ord
## 
## Call: permtest.smacof(object = mds_ordinal, method.dat = "euclidean", 
##     verbose = F)
## 
## SMACOF Permutation Test
## Number of objects: 146 
## Number of replications (permutations): 100 
## 
## Observed stress value: 0.066 
## p-value: <0.001

Based on the output of the ECDF permutation test, the observed stress value of the model with ordinal transformation is 0.066. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level. This suggests that the model is a good fit for the data and that the stress value is unlikely to have occurred by chance.

perm_mspline = permtest(mds_mspline, method.dat = "euclidean", verbose = F)
perm_mspline
## 
## Call: permtest.smacof(object = mds_mspline, method.dat = "euclidean", 
##     verbose = F)
## 
## SMACOF Permutation Test
## Number of objects: 146 
## Number of replications (permutations): 100 
## 
## Observed stress value: 0.067 
## p-value: <0.001

Based on the output of the ECDF permutation test, the observed stress value of the model with mspline transformation is 0.067. The p-value of the test is less than 0.001, which means that the observed stress value is significant at the 0.001 level. This suggests that the model is a good fit for the data and that the stress value is unlikely to have occurred by chance.

So, we can conclude that it is better to use ordinal type of MDS.

Stress Per Point

summary(mds_ordinal)
## 
## Configurations:
##                                D1      D2
## Finland                   -0.9892  0.1122
## Denmark                   -1.0301  0.0418
## Iceland                   -0.9052  0.1959
## Switzerland               -1.0364 -0.0398
## Netherlands               -0.9267  0.0041
## Luxembourg*               -1.1532 -0.1526
## Sweden                    -0.9834  0.0279
## Norway                    -1.0356  0.0162
## Israel                    -0.6843  0.0707
## New Zealand               -0.8886  0.1159
## Austria                   -0.8193 -0.0245
## Australia                 -0.8402  0.0473
## Ireland                   -1.0583 -0.1247
## Germany                   -0.7768 -0.0997
## Canada                    -0.8252  0.0234
## United States             -0.7867 -0.0349
## United Kingdom            -0.7526 -0.0111
## Czechia                   -0.6597  0.1902
## Belgium                   -0.6843 -0.1162
## France                    -0.7499  0.0222
## Bahrain                   -0.5865 -0.0614
## Slovenia                  -0.6843  0.1485
## Costa Rica                -0.3428  0.0464
## United Arab Emirates      -0.7614 -0.2192
## Saudi Arabia              -0.6141 -0.0288
## Taiwan Province of China  -0.6545 -0.0798
## Singapore                 -1.1935 -0.2144
## Romania                   -0.3977 -0.0677
## Spain                     -0.6493  0.0422
## Uruguay                   -0.4521  0.1698
## Italy                     -0.5559 -0.1939
## Kosovo                     0.0298  0.1446
## Malta                     -0.6871  0.0540
## Lithuania                 -0.5760  0.0798
## Slovakia                  -0.5353  0.1507
## Estonia                   -0.7221  0.1218
## Panama                    -0.4641  0.0219
## Brazil                    -0.1591  0.0680
## Guatemala*                 0.1817  0.0579
## Kazakhstan                -0.4647  0.1400
## Cyprus                    -0.4790 -0.2620
## Latvia                    -0.5150  0.1112
## Serbia                    -0.2739  0.0808
## Chile                     -0.3865 -0.0119
## Nicaragua                  0.2124  0.3077
## Mexico                    -0.1706 -0.0549
## Croatia                   -0.4837  0.0988
## Poland                    -0.5435  0.0407
## El Salvador                0.1953 -0.0047
## Kuwait*                   -0.6232 -0.1577
## Hungary                   -0.5400  0.1281
## Mauritius                 -0.3121  0.0961
## Uzbekistan                 0.0453  0.4019
## Japan                     -0.6608 -0.1430
## Honduras                   0.3180  0.1606
## Portugal                  -0.5309 -0.0336
## Argentina                 -0.3283  0.0550
## Greece                    -0.3730 -0.3438
## South Korea               -0.5314 -0.3177
## Philippines                0.1393  0.1209
## Thailand                  -0.2837  0.2060
## Moldova                   -0.0782  0.0763
## Jamaica                    0.0020  0.1541
## Kyrgyzstan                 0.1954  0.4714
## Belarus*                  -0.3401  0.2337
## Colombia                  -0.1149 -0.0124
## Bosnia and Herzegovina    -0.1620  0.1258
## Mongolia                  -0.0940  0.3588
## Dominican Republic        -0.2122  0.0287
## Malaysia                  -0.3394 -0.0725
## Bolivia                    0.1620  0.0830
## China                     -0.2034  0.0050
## Paraguay                  -0.1592  0.1981
## Peru                      -0.0207 -0.0350
## Montenegro                -0.2508 -0.0101
## Ecuador                    0.0172  0.0011
## Vietnam                    0.1097  0.1632
## Turkmenistan*             -0.3122  0.4680
## North Cyprus*             -0.5129 -0.2369
## Russia                    -0.3675 -0.0279
## Hong Kong S.A.R. of China -0.7866 -0.4066
## Armenia                   -0.0278 -0.0938
## Tajikistan                 0.4130  0.3963
## Nepal                      0.5451  0.1244
## Bulgaria                  -0.3796  0.1113
## Libya*                    -0.1073 -0.0214
## Indonesia                  0.0756  0.3255
## Ivory Coast                0.7342 -0.2279
## North Macedonia           -0.0846 -0.0697
## Albania                    0.0807 -0.2509
## South Africa              -0.0286  0.2691
## Azerbaijan*               -0.2086  0.2428
## Gambia*                    0.9666  0.1146
## Bangladesh                 0.5264 -0.0672
## Laos                       0.3630 -0.0649
## Algeria                    0.0218 -0.3645
## Liberia*                   1.0802  0.1909
## Ukraine                   -0.0933  0.1435
## Congo                      0.8842 -0.2007
## Morocco                    0.6331 -0.5548
## Mozambique                 1.1880  0.3033
## Cameroon                   0.7244  0.0222
## Senegal                    0.7747 -0.0789
## Niger*                     1.1988  0.1739
## Georgia                    0.1066 -0.3341
## Gabon                      0.1636 -0.2780
## Iraq                       0.3093 -0.1779
## Venezuela                  1.6098  0.9931
## Guinea                     0.9481 -0.0140
## Iran                       0.1216 -0.2478
## Ghana                      0.5770 -0.0787
## Turkey                    -0.2838 -0.4054
## Burkina Faso               0.9817  0.0156
## Cambodia                   0.5231  0.1896
## Benin                      1.1529 -0.5657
## Comoros*                   0.9471 -0.1868
## Uganda                     0.7761  0.3116
## Nigeria                    0.5824  0.0329
## Kenya                      0.6756 -0.0406
## Tunisia                    0.2608 -0.3699
## Pakistan                   0.7766 -0.2264
## Palestinian Territories*   0.3053  0.1391
## Mali                       1.0384 -0.0655
## Namibia                    0.2699 -0.0132
## Eswatini, Kingdom of*      0.4422 -0.3325
## Myanmar                    0.5014  0.2907
## Sri Lanka                 -0.0576  0.0192
## Madagascar*                1.1028  0.1236
## Egypt                      0.1355 -0.1529
## Chad*                      1.2647  0.0089
## Ethiopia                   0.7646  0.2409
## Yemen*                     0.8022  0.5710
## Mauritania*                0.4393  0.0729
## Jordan                     0.1588 -0.1326
## Togo                       1.1571 -0.2064
## India                      0.6344 -0.3725
## Zambia                     0.8077 -0.0173
## Malawi                     1.2800 -0.1884
## Tanzania                   0.8398  0.0842
## Sierra Leone               1.1996 -0.0529
## Lesotho*                   0.9426  0.4802
## Botswana*                  0.0665 -0.3008
## Rwanda*                    1.2103 -0.7065
## Zimbabwe                   0.7788  0.0394
## Lebanon                    0.3244 -0.6367
## Afghanistan                1.5507 -0.6220
## 
## 
## Stress per point (in %):
##                   Finland                   Denmark                   Iceland 
##                      0.94                      0.73                      0.45 
##               Switzerland               Netherlands               Luxembourg* 
##                      0.33                      0.41                      0.30 
##                    Sweden                    Norway                    Israel 
##                      0.90                      0.46                      0.37 
##               New Zealand                   Austria                 Australia 
##                      0.98                      0.23                      0.35 
##                   Ireland                   Germany                    Canada 
##                      0.22                      0.36                      0.41 
##             United States            United Kingdom                   Czechia 
##                      0.60                      0.58                      0.55 
##                   Belgium                    France                   Bahrain 
##                      0.34                      0.33                      0.74 
##                  Slovenia                Costa Rica      United Arab Emirates 
##                      0.39                      0.43                      0.80 
##              Saudi Arabia  Taiwan Province of China                 Singapore 
##                      0.56                      0.24                      0.61 
##                   Romania                     Spain                   Uruguay 
##                      0.42                      0.49                      0.69 
##                     Italy                    Kosovo                     Malta 
##                      0.84                      0.53                      0.32 
##                 Lithuania                  Slovakia                   Estonia 
##                      0.69                      0.84                      0.53 
##                    Panama                    Brazil                Guatemala* 
##                      0.28                      0.37                      0.56 
##                Kazakhstan                    Cyprus                    Latvia 
##                      0.23                      0.46                      0.47 
##                    Serbia                     Chile                 Nicaragua 
##                      0.30                      0.41                      0.63 
##                    Mexico                   Croatia                    Poland 
##                      0.41                      0.60                      0.24 
##               El Salvador                   Kuwait*                   Hungary 
##                      1.07                      0.31                      0.67 
##                 Mauritius                Uzbekistan                     Japan 
##                      0.27                      0.96                      0.48 
##                  Honduras                  Portugal                 Argentina 
##                      0.48                      0.72                      0.24 
##                    Greece               South Korea               Philippines 
##                      1.24                      0.32                      0.54 
##                  Thailand                   Moldova                   Jamaica 
##                      0.70                      0.28                      0.40 
##                Kyrgyzstan                  Belarus*                  Colombia 
##                      0.70                      1.98                      0.30 
##    Bosnia and Herzegovina                  Mongolia        Dominican Republic 
##                      0.65                      1.16                      0.45 
##                  Malaysia                   Bolivia                     China 
##                      0.65                      0.40                      0.54 
##                  Paraguay                      Peru                Montenegro 
##                      0.33                      0.38                      0.36 
##                   Ecuador                   Vietnam             Turkmenistan* 
##                      0.36                      0.67                      0.52 
##             North Cyprus*                    Russia Hong Kong S.A.R. of China 
##                      0.38                      0.72                      0.48 
##                   Armenia                Tajikistan                     Nepal 
##                      0.53                      0.80                      0.47 
##                  Bulgaria                    Libya*                 Indonesia 
##                      0.33                      0.47                      2.01 
##               Ivory Coast           North Macedonia                   Albania 
##                      0.32                      0.38                      1.04 
##              South Africa               Azerbaijan*                   Gambia* 
##                      1.74                      1.47                      0.40 
##                Bangladesh                      Laos                   Algeria 
##                      1.22                      1.35                      3.42 
##                  Liberia*                   Ukraine                     Congo 
##                      0.28                      0.47                      0.30 
##                   Morocco                Mozambique                  Cameroon 
##                      0.87                      0.52                      0.36 
##                   Senegal                    Niger*                   Georgia 
##                      0.38                      0.58                      0.88 
##                     Gabon                      Iraq                 Venezuela 
##                      0.98                      0.59                      0.77 
##                    Guinea                      Iran                     Ghana 
##                      0.26                      1.08                      0.39 
##                    Turkey              Burkina Faso                  Cambodia 
##                      0.97                      0.20                      1.37 
##                     Benin                  Comoros*                    Uganda 
##                      0.76                      0.63                      0.41 
##                   Nigeria                     Kenya                   Tunisia 
##                      0.46                      0.34                      0.76 
##                  Pakistan  Palestinian Territories*                      Mali 
##                      0.33                      1.21                      0.25 
##                   Namibia     Eswatini, Kingdom of*                   Myanmar 
##                      1.23                      3.09                      0.96 
##                 Sri Lanka               Madagascar*                     Egypt 
##                      0.34                      0.71                      0.52 
##                     Chad*                  Ethiopia                    Yemen* 
##                      0.60                      0.36                      0.92 
##               Mauritania*                    Jordan                      Togo 
##                      1.10                      0.55                      0.26 
##                     India                    Zambia                    Malawi 
##                      1.53                      0.32                      0.51 
##                  Tanzania              Sierra Leone                  Lesotho* 
##                      1.04                      0.29                      1.67 
##                 Botswana*                   Rwanda*                  Zimbabwe 
##                      2.57                      2.58                      0.63 
##                   Lebanon               Afghanistan 
##                      1.21                      1.00

The maximum stress per point is 3.42, and it corresponds to Algeria. The minimum stress per point is 0.20, and it corresponds to Burkina Faso. Overall, we cannot see any points that are represented poorly and should be cut from the dataset.

Stability of a solution : jackknife

jackmds(mds_ordinal)
## 
## Call: jackmds.smacofB(object = mds_ordinal)
## 
## SMACOF Jackknife
## Number of objects: 146 
## Value loss function: 0.4276 
## Number of iterations: 3 
## 
## Stability measure: 1 
## Cross validity: 1 
## Dispersion: 0

The “Stability measure” of 0.9761 indicates that the MDS solution is quite stable, meaning that if the analysis were to be repeated with a different random sample or subset of the data, the resulting solution would be very similar.

The “Cross validity” measure of 0.9998 indicates that the MDS solution is highly valid, meaning that it accurately reflects the similarities and differences between the original data points.

The “Dispersion” measure of 0.024 represents the average deviation between the original data points and the MDS solution, and suggests that the MDS model fits the data well.

Overall, these results suggest that the MDS solution is a valid and stable representation of the underlying data structure

Visualisation

mds_df = data.frame(mds_ordinal$conf, regions)
coef_vec = biplotmds(mds_ordinal,extvar=happy1[,c(1:6)])

library(ggrepel)
library(ggforce)

p = ggplot(coef_vec, aes(x = coef_vec$model$X[,1], y = coef_vec$model$X[,2], 
                         label = row.names(coef_vec$model$X),color = regions)) +
  geom_point() +
  geom_text_repel(size = 2,fontface = 'bold', segment.alpha = 0,segment.color = 'grey50',
                  min.segment.length = 10, aes(color = regions)) +
  labs(x="",y="",title="Country Similarity by Happiness Factors") +
  
  # Vectors for creation of Biplot
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,1], yend = coef_vec$coefficients[2,1]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,1]+0.2, y = coef_vec$coefficients[2,1],
                 label = colnames(coef_vec$coefficients)[1]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,2]/1.6, yend = coef_vec$coefficients[2,2]/1.6), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,2]/1.6, y = coef_vec$coefficients[2,2]/1.6,label = colnames(coef_vec$coefficients)[2]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,3], yend = coef_vec$coefficients[2,3]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,3]+0.1, y = coef_vec$coefficients[2,3]-0.1,
                 label = colnames(coef_vec$coefficients)[3]),
             size=2) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,4], yend = coef_vec$coefficients[2,4]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,4], y = coef_vec$coefficients[2,4],label = colnames(coef_vec$coefficients)[4]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,5], yend = coef_vec$coefficients[2,5]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,5], y = coef_vec$coefficients[2,5],label = colnames(coef_vec$coefficients)[5]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,6], yend = coef_vec$coefficients[2,6]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,6], y = coef_vec$coefficients[2,6]-0.1,
                 label = colnames(coef_vec$coefficients)[6]),
             size=3)

p

IMPORTANT: the effect of social_support factor is much more than other factors and worsened the readability of the graph, so it was decided to shorten the vector length for the sake of the readability of the graph. It is useful to look at the interactive plot below the description to see all regions in details and all true factor effects.

The MDS analysis produced a two-dimensional representation of the dissimilarities among the countries. The resulting plot shows the position of each country in the two-dimensional space, with closer countries representing higher similarities and distant countries representing higher dissimilarities. The graphic is a biplot, which has factors of happiness represented as vectors.

Based on the plot, we can observe that the countries seem to cluster based on their region, but not all of the regions are homogeneous. Countries from Scandinavia, Western Europe, Oceania and North America tend to cluster together to the left side of the plot, which signals that these countries have the highest gdp and healthy life expectancy. They different in terms of perception of corruption, freedom and social support, but the values for these factors are also high. We can see that Scandinavia countries are very similar, these countries have higher social support and freedom values than Singapore or Luxembourg and lower percepted corruption.

Some East Asia countries are in the bottom-left quadrant - South Korea and Hong Kong, they have higher gdp but lower social support and freedom values, Mongolia is happy in terms of social support and freedom, but its gdp creates the huge difference between this country and Hong Kong. All in all, East Asia are quite heterogenous.

Most of the countries that are closer to the center of the graph are from completely different regions, it is difficult to highlight the regions with high homogeneity here, one these region is Latin America. On the interactive plot by double-clicking on Latin America region you can see that most of the countries from this region are somewhere in the middle of the graph with one exception - Venezuella, which seems to be a place of anti-happiness despite its OUTSANDINGLY HUGE GENEROSITY.

Central Europe and Baltic, Balkans countries are also very similar. Central Asia is heterogeneous: for example, Turkey is on the lower side of the plot, while Turkmenistan, Uzbekistan and Tajikistan are on the top part. Also, Turkmenistan seems to have much more freedom than Turkey, which seems REALLY STRANGE (maybe it is a mistake in a dataset, but probably it is because of the freedom is a perception metric).

But what region is clearly visible on the graph - this is Africa. African countries are certainly outsiders of this rating, they differ quite a lot due to the large number of different countries, but in general, it is clearly noticeable that this region is very different from all the others. The difference between Lesotho and Rwanda is huge - but not in terms of gdp or healthy life expectancy (these parameters are not very good for both countries), but mainly in terms of generosity, social support and freedom. South Asia is closer region to Africa and it is quite homogeneous.

The most heterogeneous region is Middle East. Afghanistan, Lebanon, Saudi Arabia and Yemen are totally different. While Afghanistan is an absolute loser in the Happiness race, Lebanon and Iran are closer to the middle of the graph, and citizens of Bahrain and Saudi Arabia seem to be quite happy living in these countries.

Interactive Plot

Hover the mouse on the point or vector to see what the country/factor is represented by this element.

p1 = ggplot(coef_vec, aes(x = coef_vec$model$X[,1], y = coef_vec$model$X[,2], 
                         label = row.names(coef_vec$model$X),color = regions)) +
  geom_point() +
  labs(x="",y="",title="Interactive Plot of Country Similarity by Happiness Factors") +
  
  # Vectors for creation of Biplot
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,1], yend = coef_vec$coefficients[2,1]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,1]+0.2, y = coef_vec$coefficients[2,1],
                 label = colnames(coef_vec$coefficients)[1]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,2], yend = coef_vec$coefficients[2,2]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,2], y = coef_vec$coefficients[2,2],label = colnames(coef_vec$coefficients)[2]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,3], yend = coef_vec$coefficients[2,3]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,3]+0.1, y = coef_vec$coefficients[2,3]-0.1,
                 label = colnames(coef_vec$coefficients)[3]),
             size=2) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,4], yend = coef_vec$coefficients[2,4]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,4], y = coef_vec$coefficients[2,4],label = colnames(coef_vec$coefficients)[4]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,5], yend = coef_vec$coefficients[2,5]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,5], y = coef_vec$coefficients[2,5],label = colnames(coef_vec$coefficients)[5]),
             size=3) +
  
  geom_segment(aes(x = 0, y = 0, xend = coef_vec$coefficients[1,6], yend = coef_vec$coefficients[2,6]), 
               arrow = arrow(length = unit(0.2, "cm"), type = "closed")) +
  geom_label(aes(x = coef_vec$coefficients[1,6], y = coef_vec$coefficients[2,6]-0.1,
                 label = colnames(coef_vec$coefficients)[6]),
             size=3)

ggplotly(p1)

Results

  • Countries tend to cluster based on their region, but not all regions are homogeneous. Scandinavian, Western European, Oceania, and North American countries cluster together to the left side of the plot, while South Asia and African countries are on the right side.

  • East Asia is heterogeneous, with countries such as South Korea and Hong Kong having higher GDP but lower social support and freedom values.

  • Central Europe and Baltic, Balkans countries are quite similar and homogeneous, while the Middle East is the most heterogeneous region, with Afghanistan, Lebanon, Saudi Arabia, and Yemen being vastly different.

  • The interactive plot reveals that most of the Latin American countries are somewhere in the middle of the graph, except for Venezuela, which seems to be a place of no happiness despite its high generosity.

  • When viewing various news articles that mention the list of countries according to the Happiness index, one may get the impression that these countries are similar, but the plot built using MDS helps to understand in more detail how similar or different these or those countries are in accordance with the measured factors quality of life.