1 Intro
2 Libraries
3 30 Counties
4 Data Sources
5 Imputation of missing values
6 Normalisation
7 Multivariate Analysis
8 Weighting and aggregation
- 8.1 Weighting based on factor analysis and my own preference
9 Results

1 Intro

This is a short code-summary, which shows how I have created The Attractiveness Index of Global Economies in my bachelor thesis. The ONLY difference with the original text is that this summary uses (kind-of) latest data available as of October 2015 (the thesis was submitted in mid-July 2015).

The source code for the thesis can be found at GitHub: https://github.com/dmpe/bachelor

2 Libraries

library(grid)
library(gridExtra)
library(rvest)
library(plyr)
library(dplyr)
library(stringr)
library(xlsx)
library(Quandl)
library(scales)
library(corrplot)
library(ellipse)
library(psych)
library(cluster)
library(ggplot2)
library(clustrd)
library(reshape2)

set.seed(5154)

3 30 Counties

selectedCountries <- list("Korea", "Singapore", "Japan", "Chile", "Czech Republic", "Nigeria", "China", "Germany", "Switzerland",
                          "Mexico", "Jordan", "Brazil", "Russia", "United States", "United Kingdom", "United Arab Emirates",
                          "Australia", "South Africa", "Kenya", "Finland", "Canada", "Israel", "New Zealand", "France", "Hungary",
                          "Thailand", "Indonesia", "Ghana", "Colombia", "Turkey")

4 Data Sources

Based on chapter 2.2, page 15ff.

4.1 Human Development’s Educational Index (UNDP)

# "Extract-HTML" way
hdi <- read_html('http://hdr.undp.org/en/content/education-index') 
hdi <- hdi %>%
  html_node('.table') %>% 
  html_table(header = T)

hdi <- hdi[1:187,c("Country", "2013")]
hdi <- plyr::rename(hdi, c(`2013` = "HDIEducatIndex"))

hdi$Country[hdi$Country == "Korea (Republic of)"] <- "Korea"
hdi$Country[hdi$Country == "Russian Federation"] <- "Russia"
hdi$HDIEducatIndex <- as.numeric(hdi$HDIEducatIndex)
head(hdi)

##         Country HDIEducatIndex
## 1        Norway          0.910
## 2     Australia          0.927
## 3   Switzerland          0.844
## 4   Netherlands          0.894
## 5 United States          0.890
## 6       Germany          0.884

4.2 Pearson’s Learning Curve Index

# Data as of January 2014
learningCurveData <- read.xlsx("1_RawData/DataSources/learningcurve.xlsx", sheetIndex = 1, startRow = 18, endRow = 58)

learningCurveData <- plyr::rename(learningCurveData, c(NA. = "Country", Overall.Index = "LearningCurve_Index"))
# sapply(learningCurveData, class) # factors -> to char

learningCurveData$Country <- str_trim(learningCurveData$Country, side = "both")

learningCurveData$Country[learningCurveData$Country == "South Korea"] <- "Korea"
learningCurveData$Country[learningCurveData$Country == "Hong Kong-China"] <- "China"

#' delete some columns
learningCurveData <- learningCurveData[, !(colnames(learningCurveData) %in% c("Cognitive.Skills", "Educational.Attainment", 
    "Notes", "NA..1", "NA..2"))]

learningCurveData$Ranking_LearningCurve <- seq(1, 40)
head(learningCurveData)

##          Country LearningCurve_Index Ranking_LearningCurve
## 1          Korea           1.2976759                     1
## 2          Japan           1.0303531                     2
## 3      Singapore           0.9911852                     3
## 4          China           0.9578964                     4
## 5        Finland           0.9237509                     5
## 6 United Kingdom           0.6690068                     6

4.3 The Youth unemployment rate

options(width=200)
Country = c("Korea", "Singapore", "Japan", "Chile", "Czech Republic", "Nigeria", "China", "Germany", "Switzerland", "Mexico", 
            "Jordan", "Brazil", "Russia", "United States", "United Kingdom", "United Arab Emirates", "Australia", "South Africa", "Kenya", "Finland", "Canada", "Israel", "New Zealand", "France", "Hungary", "Thailand", "Indonesia", "Ghana", "Colombia", "Turkey")

Quandl.api_key("GgnxpyUBXHsyQxqp67bY")

Korea <- Quandl("WORLDBANK/KOR_SL_UEM_1524_ZS")[1, 2]
# http://api.worldbank.org/countries/CHN/indicators/SL.UEM.1524.ZS?per_page=1000
# China <- Quandl('WORLDBANK/CHN_SL_UEM_1524_ZS')[1,2]
China <- 10.1
Germany <- Quandl("WORLDBANK/DEU_SL_UEM_1524_ZS")[1, 2]

##     Country Unemployment_NonScaled Unemployment Unemployment_ZscoreNEGATIVE
## 1 Australia                   12.2  -0.43459353                  0.43459353
## 2    Brazil                   13.6  -0.29309791                  0.29309791
## 3    Canada                   13.8  -0.27288428                  0.27288428
## 4     Chile                   16.1  -0.04042726                  0.04042726
## 5     China                   10.1  -0.64683685                  0.64683685
## 6  Colombia                   29.0   1.26335322                 -1.26335322

4.4 Index of Economic Freedom (Heritage Found./WSJ)

# Excel Way | http://www.heritage.org/index/download
freedom <- read.xlsx("1_RawData/DataSources/index2015_data.xlsx", sheetIndex = 1, endRow = 187)
freedom <- plyr::rename(freedom, c(Country.Name = "Country", X2015.Score = "Freedom_Index", World.Rank = "RankOverall"), warn_duplicated = F)
freedom$Country <- str_trim(freedom$Country, side = "both")
freedom$Country[freedom$Country == "Korea, South"] <- "Korea"
freedom <- subset(freedom, select = c(Country, Freedom_Index, RankOverall))

# convert from factor to numeric
freedom$Freedom_Index <- suppressWarnings(as.numeric(as.character(freedom$Freedom_Index)))
freedom$RankOverall <- suppressWarnings(as.numeric(as.character(freedom$RankOverall)))

freedom <- subset(freedom, Country %in% selectedCountries, select = c(Country, Freedom_Index, RankOverall))

freedom$Freedom_Index_NonScaled <- freedom$Freedom_Index
freedom$Freedom_Index <- as.numeric(scale(freedom$Freedom_Index_NonScaled))
head(freedom)

##      Country Freedom_Index RankOverall Freedom_Index_NonScaled
## 7  Australia     1.2989162           4                81.38729
## 22    Brazil    -1.2933320         118                56.60728
## 29    Canada     1.0553627           6                79.05909
## 33     Chile     0.9924862           7                78.45804
## 34     China    -1.6977867         139                52.74099
## 35  Colombia     0.2840585          28                71.68599

4.5 WEF’s Global Competiveness Index (2015/2016)

wef <- read.xlsx("1_RawData/DataSources/newRMD/GCR_Rankings_2015-2016.xlsx", sheetName = "GCI 2013-2014")[4:147, 1:3]
wef <- plyr::rename(wef, c("The.Global.Competitiveness.Index.2015.2016.rankings." = "Country", "NA."= "Ranking_WEF", "NA..1" = "WEF_Score"))
wef$Country <- str_trim(wef$Country, side = "both")

# correct names and convert to numeric
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-an-integer-numeric-without-a-loss-of-information
wef$Country[wef$Country == "Taiwan, China"] <- "Taiwan"
wef$Country[wef$Country == "Korea, Rep."] <- "Korea"
wef$Country[wef$Country == "Russian Federation"] <- "Russia"

wef <- subset(wef, Country %in% selectedCountries)

# normalazing on the sample, not population
wef$WEF_Score_NonScaled <- as.numeric(levels(wef$WEF_Score)[wef$WEF_Score])
wef$WEF_Score <- as.numeric(scale(wef$WEF_Score_NonScaled))
head(wef)

##          Country Ranking_WEF WEF_Score WEF_Score_NonScaled
## 4    Switzerland           1  1.541580            5.759234
## 5      Singapore           2  1.411072            5.676669
## 6  United States           3  1.310380            5.612967
## 7        Germany           4  1.176973            5.528569
## 9          Japan           6  1.078358            5.466182
## 11       Finland           8  1.055147            5.451497

4.6 Countries’ H-Index (SCImago)

# http://www.scimagojr.com/countryrank.php?area=0&category=0&region=all&year=all&order=h&min=0&min_type=it as of 30.Sep.2015
hindex <- read.xlsx("1_RawData/DataSources/newRMD/scimagojr.xlsx", sheetIndex = 1)

# sapply(hindex, class) # factors -> to char

hindex$Country <- str_trim(hindex$Country, side = "both")
hindex$Country[hindex$Country == "South Korea"] <- "Korea"
hindex$Country[hindex$Country == "Russian Federation"] <- "Russia"

hindex <- hindex[, !(colnames(hindex) %in% c("Documents", "Citable.documents", "Citations", "Self.Citations", "Citations.per.Document"))]
hindex <- plyr::rename(hindex, c(H.index = "H_Index"))

hindex <- subset(hindex, Country %in% selectedCountries, select = c(Country, Rank, H_Index))

hindex$H_Index_NonScaled <- hindex$H_Index
hindex$H_Index <- as.numeric(scale(hindex$H_Index_NonScaled))
head(hindex)

##          Country Rank  H_Index H_Index_NonScaled
## 1  United States    1 3.528064              1648
## 2 United Kingdom    2 1.672165              1015
## 3        Germany    3 1.296881               887
## 4         France    4 1.074056               811
## 5         Canada    5 1.024214               794
## 6          Japan    6 0.880550               745

4.7 Combine all datasets

# Data here are non scaled, they contain 'the real values'.

df.Original <- dplyr::left_join(unemplo, freedom, by = "Country")

df.Original <- dplyr::left_join(df.Original, wef, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)

df.Original <- dplyr::left_join(df.Original, learningCurveData, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- subset(df.Original, select = c(Country, Unemployment_NonScaled, 
                                              Freedom_Index_NonScaled, WEF_Score_NonScaled, LearningCurve_Index))

df.Original <- dplyr::left_join(df.Original, hindex, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)

df.Original <- dplyr::left_join(df.Original, hdi, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- subset(df.Original, select = c(Country, Unemployment_NonScaled, Freedom_Index_NonScaled, WEF_Score_NonScaled, 
                                              LearningCurve_Index, HDIEducatIndex, H_Index_NonScaled))

5 Imputation of missing values

Based on chapter 2.3, page 20ff.

… I am not going to use any of the abovementioned mechanisms for handling missing data, but will return to a much simpler method. Namely, given my knowledge, I will choose and assign six values for Nigeria, Kenya, Jordan, Ghana, South Africa and the UAE. On the one hand, this is not a scientifically good approach as it brings a tangible source of uncertainty on my results. In the case of large dataset and/or very high rate of missingness it may be even impossible doing so. On the other hand, if data are not available and the reason is not related to other variables in my dataset – as it is the case here – it is very hard to impute them in a preferable (‘desired’) way even with the most advanced statistical models, simply because data do not exist.

As result, I decide to assign z-score of -2.1 to Nigeria, -1.9 to South Africa, -1.5 to Kenya, -1 to Ghana, -0.5 to Jordan, and finally -0.2 to the UAE. To conclude the whole chapter, I would like to point out that the best solution to the problem of missing data is not to have a problem of missing data. However, this is often not possible and therefore in this chapter I showed several available techniques and finally assigned values to those countries considering my best (yet also limited) knowledge of their real situation.

Quoted from my thesis, chapter 2.3, page 22f.

df.Original.Imputed <- df.Original
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Nigeria"] <- -2.1
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "South Africa"] <- -1.9
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Kenya"] <- -1.5
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Ghana"] <- -1.0
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Jordan"] <- -0.5
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "United Arab Emirates"] <- -0.2

df.Original.Imputed <- data.frame(df.Original.Imputed[, -1], row.names = df.Original.Imputed[, 1])

6 Normalisation

Based on chapter 2.4, page 23f.

The last normalisation technique, which I want to mention here (also used in the construction of my index), is called min-max normalisation.

… I am going to use a range between 0 and 100 and as briefly mentioned in the chapter about the youth unemployment rate, it will be required to transform its polarity, i.e. from having the highest number being the worst to having the lowest number being the worst.

Quoted from my thesis, chapter 2.4, page 24.

#' create a new data frame (df.Original.MinMax) based on the old one (df.Original.Imputed). This 
#' makes 1:1 copy of the data frame, yet with the different name
df.Original.MinMax <- df.Original.Imputed

df.Original.MinMax$WEF_Score_NonScaled <- ((100-0)*(df.Original.Imputed$WEF_Score_NonScaled-1)/ (7-1)) + 0

df.Original.MinMax$H_Index_NonScaled <- ((100-0)*(df.Original.Imputed$H_Index_NonScaled-1)/ (1518-1)) + 0

# HDI's Educat. Index is between 0 and 1 -> convert to (by multipling it with) 0-100
df.Original.MinMax$HDIEducatIndex <- df.Original.Imputed$HDIEducatIndex * 100

#' Unemployment_NonScaled goes into opposite direction, worst South Africa must be the worst, not the best (e.i. that would be 
#' the logic without this step). 
df.Original.MinMax$Unemployment_NonScaled = ((100-0)*(df.Original.Imputed$Unemployment_NonScaled-100)/ (0-100)) + 0

#' This normalizes columns of 'LearningCurve_Index' from minValue to maxValue. Beware of the colwise function that will be used on
#' on the whole data frame (from plyr)!
#' An assumption is made that although z-score beginngs from -Inf to Inf, I am going to use only a range between +-3.5
#' 
#' @param x A data frame
#' @param minValue A minimal value of the range of the scale (e.g. 0)
#' @param maxValue A maximal value of the range of the scale (e.g. 100)
rescaleColumns <- function(x, minValue, maxValue) {
  scales::rescale(x, to = c(minValue, maxValue), from = range(-3.5:3.5))
}

df.Original.MinMax$LearningCurve_Index <- plyr::colwise(rescaleColumns)(df.Original.Imputed, 0, 100)[, 4]

7 Multivariate Analysis

Based on chapter 2.5, page 24ff.

7.1 Principal component analysis

Based on chapter 2.5.1, page 25f.

names(df.Original.MinMax) <- c("Unemployment", "Freedom_Index", "WEF_Score", "LearningCurve_Index", "HDIEducat_Index", "H_Index")

corelationMat2 <- cor(df.Original.MinMax)

colorfun2 <- colorRampPalette(c("#ffffcc", "#a1dab4", "#41b6c4", "#2c7fb8", "#253494"))
corrplot(corelationMat2, method = "number", type = "lower", order = "FPC", col = colorfun2(100))

pc2 <- prcomp(df.Original.MinMax, center = TRUE, scale = FALSE)
summary(pc2)

## Importance of components:
##                            PC1     PC2      PC3     PC4     PC5     PC6
## Standard deviation     28.4090 13.6402 10.11817 7.65825 5.29009 4.06070
## Proportion of Variance  0.6733  0.1552  0.08541 0.04893 0.02335 0.01376
## Cumulative Proportion   0.6733  0.8286  0.91397 0.96290 0.98624 1.00000

as.data.frame(round(pc2$rotation, 3))

##                       PC1    PC2    PC3    PC4    PC5    PC6
## Unemployment        0.083 -0.164 -0.879 -0.305 -0.315  0.047
## Freedom_Index       0.200 -0.326  0.227 -0.696  0.183 -0.533
## WEF_Score           0.322 -0.210  0.010 -0.236  0.506  0.735
## LearningCurve_Index 0.415 -0.530 -0.174  0.606  0.241 -0.303
## HDIEducat_Index     0.395 -0.300  0.375 -0.007 -0.743  0.246
## H_Index             0.722  0.672 -0.071 -0.011  0.028 -0.146

scree(df.Original.MinMax, factors = TRUE, pc = TRUE)

7.2 Factor analysis

Based on chapter 2.5.2, page 28f.

options(width=200)
factorAn <- factanal(df.Original.MinMax, rotation = "varimax", factors = 2)
factorAn  # SS is sum of squares

## 
## Call:
## factanal(x = df.Original.MinMax, factors = 2, rotation = "varimax")
## 
## Uniquenesses:
##        Unemployment       Freedom_Index           WEF_Score LearningCurve_Index     HDIEducat_Index             H_Index 
##               0.882               0.464               0.005               0.338               0.005               0.512 
## 
## Loadings:
##                     Factor1 Factor2
## Unemployment                0.337  
## Freedom_Index       0.670   0.294  
## WEF_Score           0.762   0.644  
## LearningCurve_Index 0.759   0.294  
## HDIEducat_Index     0.997          
## H_Index             0.632   0.298  
## 
##                Factor1 Factor2
## SS loadings      3.004   0.790
## Proportion Var   0.501   0.132
## Cumulative Var   0.501   0.632
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 7.01 on 4 degrees of freedom.
## The p-value is 0.135

communality <- round(cbind(1 - factorAn$uniquenesses), 3)
communality

##                      [,1]
## Unemployment        0.118
## Freedom_Index       0.536
## WEF_Score           0.995
## LearningCurve_Index 0.662
## HDIEducat_Index     0.995
## H_Index             0.488

7.3 Cluster analysis (hierarchical clustering)

Based on chapter 2.5.3, page 30ff.

#' Hierarchical Clustering 
euroclust <- hclust(dist(df.Original.MinMax, method = "euclidean"), "ward.D2")
plot(euroclust, hang = -1)
rect.hclust(euroclust, k = 2, border = "red")  # create border for 2 clusters

coef.hclust(euroclust) # agglomerative coef.

## [1] 0.8854894

##   Group.1 Unemployment Freedom_Index WEF_Score LearningCurve_Index HDIEducat_Index  H_Index
## 1       1     79.65455      62.23278  52.57746            29.62493        60.47273 13.24984
## 2       2     85.72632      72.87143  69.13861            56.44253        81.98947 38.50744

## Using vars as id variables

8 Weighting and aggregation

Based on chapter 2.6, page 34ff.

8.1 Weighting based on factor analysis and my own preference

Based on chapter 2.6.1.1, page 36f.

options(width=200)
factor1SquaredLoadings <- factorAn$loadings[, 1]^2
factor2SquaredLoadings <- factorAn$loadings[, 2]^2

Sum_SFL <- sum(factor1SquaredLoadings) + sum(factor2SquaredLoadings) # + sum(factorAn$loadings[, 3]^2)

FactorWeight1 <- sum(factor1SquaredLoadings)/Sum_SFL
FactorWeight2 <- sum(factor2SquaredLoadings)/Sum_SFL

df.weights <- data.frame(Factor1ScaledWeight = factor1SquaredLoadings/sum(factor1SquaredLoadings), 
                         Factor2ScaledWeight = factor2SquaredLoadings/sum(factor2SquaredLoadings))

df.weights$colMax <- apply(df.weights, 1, function(x) max(x[])) # take max values from both columns, yet rowwise!

df.weights$WholeFactorWeight <- c(FactorWeight2, FactorWeight1, FactorWeight2, 
                                  FactorWeight1, FactorWeight1, FactorWeight1)

df.weights$Multipl <- df.weights$colMax * df.weights$WholeFactorWeight
df.weights$UnitScaled <- round(df.weights$Multipl / sum(df.weights$Multipl), 4)
df.weights

##                     Factor1ScaledWeight Factor2ScaledWeight    colMax WholeFactorWeight    Multipl UnitScaled
## Unemployment                0.001300565        0.1440676274 0.1440676          0.208256 0.03000295     0.0386
## Freedom_Index               0.149625644        0.1095478327 0.1496256          0.791744 0.11846520     0.1525
## WEF_Score                   0.193319763        0.5242348379 0.5242348          0.208256 0.10917507     0.1405
## LearningCurve_Index         0.191644811        0.1093704279 0.1916448          0.791744 0.15173362     0.1953
## HDIEducat_Index             0.331094780        0.0004516235 0.3310948          0.791744 0.26214230     0.3375
## H_Index                     0.133014436        0.1123276505 0.1330144          0.791744 0.10531338     0.1356

#' Min-MAX + FA weights
minMaxMultiFA.Weights <- t(t(df.Original.MinMax) * df.weights$UnitScaled)
df.Original.MM.FA <- sort(rowSums(minMaxMultiFA.Weights), decreasing = T)
df.Original.MM.FA <- data.frame(Value = df.Original.MM.FA, RankMM.FA = seq(1:30))

#' Min-MAX + EW
minMaxMultiEqual.Weights <- t(t(df.Original.MinMax) * c(rep(1/6, 6)))
df.Original.MM.EW <- sort(rowSums(minMaxMultiEqual.Weights), decreasing = T)
df.Original.MM.EW <- data.frame(Value = df.Original.MM.EW, RankMM.EW = seq(1:30))

#' Min-MAX + My own choice
minMaxMultiMyChoice.Weights <- t(t(df.Original.MinMax) * c(0.140, 0.170, 0.230, 0.220, 0.130, 0.110))
df.Original.MM.MyChoice <- sort(rowSums(minMaxMultiMyChoice.Weights), decreasing = T)
df.Original.MM.MyChoice <- data.frame(Value = df.Original.MM.MyChoice, RankMM.MC = seq(1:30))

9 Results

9.1 Positions in the ranking

options(width=200)
df.Original.MM.FA$Country <- rownames(df.Original.MM.FA) 
df.Original.MM.EW$Country <- rownames(df.Original.MM.EW) 
df.Original.MM.MyChoice$Country <- rownames(df.Original.MM.MyChoice) 

# all lines are different, doens't have a straight one
df.Original.MM.FAEW <- inner_join(df.Original.MM.FA, df.Original.MM.EW, by= "Country")
df.Original.MM.FAEW.Subset <- subset(df.Original.MM.FAEW, select=c(Country, RankMM.FA, RankMM.EW))

df.Original.MM.FAEWMC <- inner_join(df.Original.MM.FAEW, df.Original.MM.MyChoice, by= "Country")
df.Original.MM.FAEWMC.Subset <- subset(df.Original.MM.FAEWMC, select=c(Country, RankMM.FA, RankMM.EW, RankMM.MC))
df.Original.MM.FAEWMC.Subset

##                 Country RankMM.FA RankMM.EW RankMM.MC
## 1         United States         1         1         1
## 2        United Kingdom         2         3         4
## 3               Germany         3         2         2
## 4             Australia         4         7         8
## 5                Canada         5         6         7
## 6           Switzerland         6         5         5
## 7                 Japan         7         4         3
## 8           New Zealand         8        10        10
## 9                 Korea         9         9         9
## 10            Singapore        10         8         6
## 11              Finland        11        11        11
## 12               Israel        12        12        12
## 13               France        13        13        13
## 14       Czech Republic        14        14        14
## 15              Hungary        15        19        19
## 16               Russia        16        17        17
## 17                Chile        17        18        18
## 18                China        18        15        15
## 19 United Arab Emirates        19        16        16
## 20               Jordan        20        24        22
## 21               Turkey        21        22        21
## 22             Thailand        22        20        20
## 23               Mexico        23        21        23
## 24               Brazil        24        23        25
## 25         South Africa        25        28        28
## 26             Colombia        26        25        24
## 27            Indonesia        27        27        27
## 28                Ghana        28        26        26
## 29                Kenya        29        29        29
## 30              Nigeria        30        30        30

9.2 Bar chart decomposition of the Attractiveness Index (MM.FA)

df.BackToDetails <- as.data.frame(minMaxMultiFA.Weights)
df.BackToDetails$Country <- rownames(df.BackToDetails) 
df.BackToDetails.p1 <- df.BackToDetails[, 1:3]
df.BackToDetails.p1$Country <- rownames(df.BackToDetails.p1) 
df.BackToDetails.p2 <- df.BackToDetails[, 4:7]

# Sum rowwise
df.BackToDetails.p2 <- adply(df.BackToDetails.p2, 1, transform, sumEdu = sum(LearningCurve_Index, HDIEducat_Index, H_Index))
df.BackToDetails.p1 <- adply(df.BackToDetails.p1, 1, transform, sumBuEco = sum(Unemployment, WEF_Score, Freedom_Index))


df.BackToDetails <- data.frame(Education = df.BackToDetails.p2$sumEdu, 
                               BussEcon = df.BackToDetails.p1$sumBuEco, 
                               Country = df.BackToDetails.p1$Country)

df.BackToDetails$Country <- as.character(df.BackToDetails$Country)

df.BackToDetails$Country[df.BackToDetails$Country == "United States"] <- "USA"
df.BackToDetails$Country[df.BackToDetails$Country == "United Arab Emirates"] <- "UAE"
df.BackToDetails$Country[df.BackToDetails$Country == "United Kingdom"] <- "UK"
df.BackToDetails$Country[df.BackToDetails$Country == "Czech Republic"] <- "Czech Rep."
df.BackToDetails$Country[df.BackToDetails$Country == "South Africa"] <- "S. Africa"
df.BackToDetails$Country[df.BackToDetails$Country == "New Zealand"] <- "N. Zealand"
# df.BackToDetails$Country[df.BackToDetails$Country == "Switzerland"] <- "Swizerl."

df.meltedBackToDetails <- melt(df.BackToDetails, id = "Country")  # convert to long format

e9 <- ggplot(data = df.meltedBackToDetails, aes(reorder(Country, value), fill = variable, weight = value)) + geom_bar()
e9 <- e9 + coord_cartesian(ylim = c(0, 100)) + scale_y_continuous(breaks = seq(0, 100, 5))
e9 <- e9 + ggtitle("Bar chart decomposition of the Attractiveness Index (MM.FA)") + scale_fill_discrete(name = "Dimensions")
e9 <- e9 + ylab("Index Value") + xlab("Countries")
e9

# Table
EducatValue <- cbind(df.BackToDetails$Education / (df.BackToDetails$Education + df.BackToDetails$BussEcon))
BusinessValue <- cbind(df.BackToDetails$BussEcon / (df.BackToDetails$Education + df.BackToDetails$BussEcon))

df.BackToDetails.table <- data.frame(cbind(df.BackToDetails$Country), EducatValue, BusinessValue)
head(df.BackToDetails.table)

##   cbind.df.BackToDetails.Country. EducatValue BusinessValue
## 1                       Australia   0.6522627     0.3477373
## 2                          Brazil   0.6148834     0.3851166
## 3                          Canada   0.6495140     0.3504860
## 4                           Chile   0.5960276     0.4039724
## 5                           China   0.6448532     0.3551468
## 6                        Colombia   0.5681968     0.4318032

9.3 Comparison of 3 weighting methods (FA/EW/‘my choice’)

9.4 Comparison of different weights based on Min-Max norm. method

9.5 Comparison of 3 weighting methods (FA/EW/‘my choice’)

9.6 Box Plot of 3 weighting methods

9.7 Relationship between GDP and my Attractiveness Index (MM.FA)

Is there any correlation between two values

cor(gdpTiger$Value, gdpTiger$GDPinDollars)

## [1] 0.7891544

The Attractiveness Index of Global Economies (Oct. 2015)

Dmitrij Petrov (@dmpe)

Oct 02 2015