This is a short code-summary, which shows how I have created The Attractiveness Index of Global Economies in my bachelor thesis. The ONLY difference with the original text is that this summary uses (kind-of) latest data available as of October 2015 (the thesis was submitted in mid-July 2015).
The source code for the thesis can be found at GitHub: https://github.com/dmpe/bachelor
library(grid)
library(gridExtra)
library(rvest)
library(plyr)
library(dplyr)
library(stringr)
library(xlsx)
library(Quandl)
library(scales)
library(corrplot)
library(ellipse)
library(psych)
library(cluster)
library(ggplot2)
library(clustrd)
library(reshape2)
set.seed(5154)
selectedCountries <- list("Korea", "Singapore", "Japan", "Chile", "Czech Republic", "Nigeria", "China", "Germany", "Switzerland",
"Mexico", "Jordan", "Brazil", "Russia", "United States", "United Kingdom", "United Arab Emirates",
"Australia", "South Africa", "Kenya", "Finland", "Canada", "Israel", "New Zealand", "France", "Hungary",
"Thailand", "Indonesia", "Ghana", "Colombia", "Turkey")
Based on chapter 2.2, page 15ff.
# "Extract-HTML" way
hdi <- read_html('http://hdr.undp.org/en/content/education-index')
hdi <- hdi %>%
html_node('.table') %>%
html_table(header = T)
hdi <- hdi[1:187,c("Country", "2013")]
hdi <- plyr::rename(hdi, c(`2013` = "HDIEducatIndex"))
hdi$Country[hdi$Country == "Korea (Republic of)"] <- "Korea"
hdi$Country[hdi$Country == "Russian Federation"] <- "Russia"
hdi$HDIEducatIndex <- as.numeric(hdi$HDIEducatIndex)
head(hdi)
## Country HDIEducatIndex
## 1 Norway 0.910
## 2 Australia 0.927
## 3 Switzerland 0.844
## 4 Netherlands 0.894
## 5 United States 0.890
## 6 Germany 0.884
# Data as of January 2014
learningCurveData <- read.xlsx("1_RawData/DataSources/learningcurve.xlsx", sheetIndex = 1, startRow = 18, endRow = 58)
learningCurveData <- plyr::rename(learningCurveData, c(NA. = "Country", Overall.Index = "LearningCurve_Index"))
# sapply(learningCurveData, class) # factors -> to char
learningCurveData$Country <- str_trim(learningCurveData$Country, side = "both")
learningCurveData$Country[learningCurveData$Country == "South Korea"] <- "Korea"
learningCurveData$Country[learningCurveData$Country == "Hong Kong-China"] <- "China"
#' delete some columns
learningCurveData <- learningCurveData[, !(colnames(learningCurveData) %in% c("Cognitive.Skills", "Educational.Attainment",
"Notes", "NA..1", "NA..2"))]
learningCurveData$Ranking_LearningCurve <- seq(1, 40)
head(learningCurveData)
## Country LearningCurve_Index Ranking_LearningCurve
## 1 Korea 1.2976759 1
## 2 Japan 1.0303531 2
## 3 Singapore 0.9911852 3
## 4 China 0.9578964 4
## 5 Finland 0.9237509 5
## 6 United Kingdom 0.6690068 6
options(width=200)
Country = c("Korea", "Singapore", "Japan", "Chile", "Czech Republic", "Nigeria", "China", "Germany", "Switzerland", "Mexico",
"Jordan", "Brazil", "Russia", "United States", "United Kingdom", "United Arab Emirates", "Australia", "South Africa", "Kenya", "Finland", "Canada", "Israel", "New Zealand", "France", "Hungary", "Thailand", "Indonesia", "Ghana", "Colombia", "Turkey")
Quandl.api_key("GgnxpyUBXHsyQxqp67bY")
Korea <- Quandl("WORLDBANK/KOR_SL_UEM_1524_ZS")[1, 2]
# http://api.worldbank.org/countries/CHN/indicators/SL.UEM.1524.ZS?per_page=1000
# China <- Quandl('WORLDBANK/CHN_SL_UEM_1524_ZS')[1,2]
China <- 10.1
Germany <- Quandl("WORLDBANK/DEU_SL_UEM_1524_ZS")[1, 2]
## Country Unemployment_NonScaled Unemployment Unemployment_ZscoreNEGATIVE
## 1 Australia 12.2 -0.43459353 0.43459353
## 2 Brazil 13.6 -0.29309791 0.29309791
## 3 Canada 13.8 -0.27288428 0.27288428
## 4 Chile 16.1 -0.04042726 0.04042726
## 5 China 10.1 -0.64683685 0.64683685
## 6 Colombia 29.0 1.26335322 -1.26335322
# Excel Way | http://www.heritage.org/index/download
freedom <- read.xlsx("1_RawData/DataSources/index2015_data.xlsx", sheetIndex = 1, endRow = 187)
freedom <- plyr::rename(freedom, c(Country.Name = "Country", X2015.Score = "Freedom_Index", World.Rank = "RankOverall"), warn_duplicated = F)
freedom$Country <- str_trim(freedom$Country, side = "both")
freedom$Country[freedom$Country == "Korea, South"] <- "Korea"
freedom <- subset(freedom, select = c(Country, Freedom_Index, RankOverall))
# convert from factor to numeric
freedom$Freedom_Index <- suppressWarnings(as.numeric(as.character(freedom$Freedom_Index)))
freedom$RankOverall <- suppressWarnings(as.numeric(as.character(freedom$RankOverall)))
freedom <- subset(freedom, Country %in% selectedCountries, select = c(Country, Freedom_Index, RankOverall))
freedom$Freedom_Index_NonScaled <- freedom$Freedom_Index
freedom$Freedom_Index <- as.numeric(scale(freedom$Freedom_Index_NonScaled))
head(freedom)
## Country Freedom_Index RankOverall Freedom_Index_NonScaled
## 7 Australia 1.2989162 4 81.38729
## 22 Brazil -1.2933320 118 56.60728
## 29 Canada 1.0553627 6 79.05909
## 33 Chile 0.9924862 7 78.45804
## 34 China -1.6977867 139 52.74099
## 35 Colombia 0.2840585 28 71.68599
wef <- read.xlsx("1_RawData/DataSources/newRMD/GCR_Rankings_2015-2016.xlsx", sheetName = "GCI 2013-2014")[4:147, 1:3]
wef <- plyr::rename(wef, c("The.Global.Competitiveness.Index.2015.2016.rankings." = "Country", "NA."= "Ranking_WEF", "NA..1" = "WEF_Score"))
wef$Country <- str_trim(wef$Country, side = "both")
# correct names and convert to numeric
# https://stackoverflow.com/questions/3418128/how-to-convert-a-factor-to-an-integer-numeric-without-a-loss-of-information
wef$Country[wef$Country == "Taiwan, China"] <- "Taiwan"
wef$Country[wef$Country == "Korea, Rep."] <- "Korea"
wef$Country[wef$Country == "Russian Federation"] <- "Russia"
wef <- subset(wef, Country %in% selectedCountries)
# normalazing on the sample, not population
wef$WEF_Score_NonScaled <- as.numeric(levels(wef$WEF_Score)[wef$WEF_Score])
wef$WEF_Score <- as.numeric(scale(wef$WEF_Score_NonScaled))
head(wef)
## Country Ranking_WEF WEF_Score WEF_Score_NonScaled
## 4 Switzerland 1 1.541580 5.759234
## 5 Singapore 2 1.411072 5.676669
## 6 United States 3 1.310380 5.612967
## 7 Germany 4 1.176973 5.528569
## 9 Japan 6 1.078358 5.466182
## 11 Finland 8 1.055147 5.451497
# http://www.scimagojr.com/countryrank.php?area=0&category=0®ion=all&year=all&order=h&min=0&min_type=it as of 30.Sep.2015
hindex <- read.xlsx("1_RawData/DataSources/newRMD/scimagojr.xlsx", sheetIndex = 1)
# sapply(hindex, class) # factors -> to char
hindex$Country <- str_trim(hindex$Country, side = "both")
hindex$Country[hindex$Country == "South Korea"] <- "Korea"
hindex$Country[hindex$Country == "Russian Federation"] <- "Russia"
hindex <- hindex[, !(colnames(hindex) %in% c("Documents", "Citable.documents", "Citations", "Self.Citations", "Citations.per.Document"))]
hindex <- plyr::rename(hindex, c(H.index = "H_Index"))
hindex <- subset(hindex, Country %in% selectedCountries, select = c(Country, Rank, H_Index))
hindex$H_Index_NonScaled <- hindex$H_Index
hindex$H_Index <- as.numeric(scale(hindex$H_Index_NonScaled))
head(hindex)
## Country Rank H_Index H_Index_NonScaled
## 1 United States 1 3.528064 1648
## 2 United Kingdom 2 1.672165 1015
## 3 Germany 3 1.296881 887
## 4 France 4 1.074056 811
## 5 Canada 5 1.024214 794
## 6 Japan 6 0.880550 745
# Data here are non scaled, they contain 'the real values'.
df.Original <- dplyr::left_join(unemplo, freedom, by = "Country")
df.Original <- dplyr::left_join(df.Original, wef, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- dplyr::left_join(df.Original, learningCurveData, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- subset(df.Original, select = c(Country, Unemployment_NonScaled,
Freedom_Index_NonScaled, WEF_Score_NonScaled, LearningCurve_Index))
df.Original <- dplyr::left_join(df.Original, hindex, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- dplyr::left_join(df.Original, hdi, by = "Country")
df.Original <- plyr::arrange(df.Original, df.Original$Country)
df.Original <- subset(df.Original, select = c(Country, Unemployment_NonScaled, Freedom_Index_NonScaled, WEF_Score_NonScaled,
LearningCurve_Index, HDIEducatIndex, H_Index_NonScaled))
Based on chapter 2.3, page 20ff.
… I am not going to use any of the abovementioned mechanisms for handling missing data, but will return to a much simpler method. Namely, given my knowledge, I will choose and assign six values for Nigeria, Kenya, Jordan, Ghana, South Africa and the UAE. On the one hand, this is not a scientifically good approach as it brings a tangible source of uncertainty on my results. In the case of large dataset and/or very high rate of missingness it may be even impossible doing so. On the other hand, if data are not available and the reason is not related to other variables in my dataset – as it is the case here – it is very hard to impute them in a preferable (‘desired’) way even with the most advanced statistical models, simply because data do not exist.
As result, I decide to assign z-score of -2.1 to Nigeria, -1.9 to South Africa, -1.5 to Kenya, -1 to Ghana, -0.5 to Jordan, and finally -0.2 to the UAE. To conclude the whole chapter, I would like to point out that the best solution to the problem of missing data is not to have a problem of missing data. However, this is often not possible and therefore in this chapter I showed several available techniques and finally assigned values to those countries considering my best (yet also limited) knowledge of their real situation.
Quoted from my thesis, chapter 2.3, page 22f.
df.Original.Imputed <- df.Original
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Nigeria"] <- -2.1
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "South Africa"] <- -1.9
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Kenya"] <- -1.5
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Ghana"] <- -1.0
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "Jordan"] <- -0.5
df.Original.Imputed$LearningCurve_Index[df.Original.Imputed$Country == "United Arab Emirates"] <- -0.2
df.Original.Imputed <- data.frame(df.Original.Imputed[, -1], row.names = df.Original.Imputed[, 1])
Based on chapter 2.4, page 23f.
The last normalisation technique, which I want to mention here (also used in the construction of my index), is called min-max normalisation.
… I am going to use a range between 0 and 100 and as briefly mentioned in the chapter about the youth unemployment rate, it will be required to transform its polarity, i.e. from having the highest number being the worst to having the lowest number being the worst.
Quoted from my thesis, chapter 2.4, page 24.
#' create a new data frame (df.Original.MinMax) based on the old one (df.Original.Imputed). This
#' makes 1:1 copy of the data frame, yet with the different name
df.Original.MinMax <- df.Original.Imputed
df.Original.MinMax$WEF_Score_NonScaled <- ((100-0)*(df.Original.Imputed$WEF_Score_NonScaled-1)/ (7-1)) + 0
df.Original.MinMax$H_Index_NonScaled <- ((100-0)*(df.Original.Imputed$H_Index_NonScaled-1)/ (1518-1)) + 0
# HDI's Educat. Index is between 0 and 1 -> convert to (by multipling it with) 0-100
df.Original.MinMax$HDIEducatIndex <- df.Original.Imputed$HDIEducatIndex * 100
#' Unemployment_NonScaled goes into opposite direction, worst South Africa must be the worst, not the best (e.i. that would be
#' the logic without this step).
df.Original.MinMax$Unemployment_NonScaled = ((100-0)*(df.Original.Imputed$Unemployment_NonScaled-100)/ (0-100)) + 0
#' This normalizes columns of 'LearningCurve_Index' from minValue to maxValue. Beware of the colwise function that will be used on
#' on the whole data frame (from plyr)!
#' An assumption is made that although z-score beginngs from -Inf to Inf, I am going to use only a range between +-3.5
#'
#' @param x A data frame
#' @param minValue A minimal value of the range of the scale (e.g. 0)
#' @param maxValue A maximal value of the range of the scale (e.g. 100)
rescaleColumns <- function(x, minValue, maxValue) {
scales::rescale(x, to = c(minValue, maxValue), from = range(-3.5:3.5))
}
df.Original.MinMax$LearningCurve_Index <- plyr::colwise(rescaleColumns)(df.Original.Imputed, 0, 100)[, 4]
Based on chapter 2.5, page 24ff.
Based on chapter 2.5.1, page 25f.
names(df.Original.MinMax) <- c("Unemployment", "Freedom_Index", "WEF_Score", "LearningCurve_Index", "HDIEducat_Index", "H_Index")
corelationMat2 <- cor(df.Original.MinMax)
colorfun2 <- colorRampPalette(c("#ffffcc", "#a1dab4", "#41b6c4", "#2c7fb8", "#253494"))
corrplot(corelationMat2, method = "number", type = "lower", order = "FPC", col = colorfun2(100))
pc2 <- prcomp(df.Original.MinMax, center = TRUE, scale = FALSE)
summary(pc2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 28.4090 13.6402 10.11817 7.65825 5.29009 4.06070
## Proportion of Variance 0.6733 0.1552 0.08541 0.04893 0.02335 0.01376
## Cumulative Proportion 0.6733 0.8286 0.91397 0.96290 0.98624 1.00000
as.data.frame(round(pc2$rotation, 3))
## PC1 PC2 PC3 PC4 PC5 PC6
## Unemployment 0.083 -0.164 -0.879 -0.305 -0.315 0.047
## Freedom_Index 0.200 -0.326 0.227 -0.696 0.183 -0.533
## WEF_Score 0.322 -0.210 0.010 -0.236 0.506 0.735
## LearningCurve_Index 0.415 -0.530 -0.174 0.606 0.241 -0.303
## HDIEducat_Index 0.395 -0.300 0.375 -0.007 -0.743 0.246
## H_Index 0.722 0.672 -0.071 -0.011 0.028 -0.146
scree(df.Original.MinMax, factors = TRUE, pc = TRUE)
Based on chapter 2.5.2, page 28f.
options(width=200)
factorAn <- factanal(df.Original.MinMax, rotation = "varimax", factors = 2)
factorAn # SS is sum of squares
##
## Call:
## factanal(x = df.Original.MinMax, factors = 2, rotation = "varimax")
##
## Uniquenesses:
## Unemployment Freedom_Index WEF_Score LearningCurve_Index HDIEducat_Index H_Index
## 0.882 0.464 0.005 0.338 0.005 0.512
##
## Loadings:
## Factor1 Factor2
## Unemployment 0.337
## Freedom_Index 0.670 0.294
## WEF_Score 0.762 0.644
## LearningCurve_Index 0.759 0.294
## HDIEducat_Index 0.997
## H_Index 0.632 0.298
##
## Factor1 Factor2
## SS loadings 3.004 0.790
## Proportion Var 0.501 0.132
## Cumulative Var 0.501 0.632
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 7.01 on 4 degrees of freedom.
## The p-value is 0.135
communality <- round(cbind(1 - factorAn$uniquenesses), 3)
communality
## [,1]
## Unemployment 0.118
## Freedom_Index 0.536
## WEF_Score 0.995
## LearningCurve_Index 0.662
## HDIEducat_Index 0.995
## H_Index 0.488
Based on chapter 2.5.3, page 30ff.
#' Hierarchical Clustering
euroclust <- hclust(dist(df.Original.MinMax, method = "euclidean"), "ward.D2")
plot(euroclust, hang = -1)
rect.hclust(euroclust, k = 2, border = "red") # create border for 2 clusters
coef.hclust(euroclust) # agglomerative coef.
## [1] 0.8854894
## Group.1 Unemployment Freedom_Index WEF_Score LearningCurve_Index HDIEducat_Index H_Index
## 1 1 79.65455 62.23278 52.57746 29.62493 60.47273 13.24984
## 2 2 85.72632 72.87143 69.13861 56.44253 81.98947 38.50744
## Using vars as id variables
Based on chapter 2.6, page 34ff.
Based on chapter 2.6.1.1, page 36f.
options(width=200)
factor1SquaredLoadings <- factorAn$loadings[, 1]^2
factor2SquaredLoadings <- factorAn$loadings[, 2]^2
Sum_SFL <- sum(factor1SquaredLoadings) + sum(factor2SquaredLoadings) # + sum(factorAn$loadings[, 3]^2)
FactorWeight1 <- sum(factor1SquaredLoadings)/Sum_SFL
FactorWeight2 <- sum(factor2SquaredLoadings)/Sum_SFL
df.weights <- data.frame(Factor1ScaledWeight = factor1SquaredLoadings/sum(factor1SquaredLoadings),
Factor2ScaledWeight = factor2SquaredLoadings/sum(factor2SquaredLoadings))
df.weights$colMax <- apply(df.weights, 1, function(x) max(x[])) # take max values from both columns, yet rowwise!
df.weights$WholeFactorWeight <- c(FactorWeight2, FactorWeight1, FactorWeight2,
FactorWeight1, FactorWeight1, FactorWeight1)
df.weights$Multipl <- df.weights$colMax * df.weights$WholeFactorWeight
df.weights$UnitScaled <- round(df.weights$Multipl / sum(df.weights$Multipl), 4)
df.weights
## Factor1ScaledWeight Factor2ScaledWeight colMax WholeFactorWeight Multipl UnitScaled
## Unemployment 0.001300565 0.1440676274 0.1440676 0.208256 0.03000295 0.0386
## Freedom_Index 0.149625644 0.1095478327 0.1496256 0.791744 0.11846520 0.1525
## WEF_Score 0.193319763 0.5242348379 0.5242348 0.208256 0.10917507 0.1405
## LearningCurve_Index 0.191644811 0.1093704279 0.1916448 0.791744 0.15173362 0.1953
## HDIEducat_Index 0.331094780 0.0004516235 0.3310948 0.791744 0.26214230 0.3375
## H_Index 0.133014436 0.1123276505 0.1330144 0.791744 0.10531338 0.1356
#' Min-MAX + FA weights
minMaxMultiFA.Weights <- t(t(df.Original.MinMax) * df.weights$UnitScaled)
df.Original.MM.FA <- sort(rowSums(minMaxMultiFA.Weights), decreasing = T)
df.Original.MM.FA <- data.frame(Value = df.Original.MM.FA, RankMM.FA = seq(1:30))
#' Min-MAX + EW
minMaxMultiEqual.Weights <- t(t(df.Original.MinMax) * c(rep(1/6, 6)))
df.Original.MM.EW <- sort(rowSums(minMaxMultiEqual.Weights), decreasing = T)
df.Original.MM.EW <- data.frame(Value = df.Original.MM.EW, RankMM.EW = seq(1:30))
#' Min-MAX + My own choice
minMaxMultiMyChoice.Weights <- t(t(df.Original.MinMax) * c(0.140, 0.170, 0.230, 0.220, 0.130, 0.110))
df.Original.MM.MyChoice <- sort(rowSums(minMaxMultiMyChoice.Weights), decreasing = T)
df.Original.MM.MyChoice <- data.frame(Value = df.Original.MM.MyChoice, RankMM.MC = seq(1:30))
options(width=200)
df.Original.MM.FA$Country <- rownames(df.Original.MM.FA)
df.Original.MM.EW$Country <- rownames(df.Original.MM.EW)
df.Original.MM.MyChoice$Country <- rownames(df.Original.MM.MyChoice)
# all lines are different, doens't have a straight one
df.Original.MM.FAEW <- inner_join(df.Original.MM.FA, df.Original.MM.EW, by= "Country")
df.Original.MM.FAEW.Subset <- subset(df.Original.MM.FAEW, select=c(Country, RankMM.FA, RankMM.EW))
df.Original.MM.FAEWMC <- inner_join(df.Original.MM.FAEW, df.Original.MM.MyChoice, by= "Country")
df.Original.MM.FAEWMC.Subset <- subset(df.Original.MM.FAEWMC, select=c(Country, RankMM.FA, RankMM.EW, RankMM.MC))
df.Original.MM.FAEWMC.Subset
## Country RankMM.FA RankMM.EW RankMM.MC
## 1 United States 1 1 1
## 2 United Kingdom 2 3 4
## 3 Germany 3 2 2
## 4 Australia 4 7 8
## 5 Canada 5 6 7
## 6 Switzerland 6 5 5
## 7 Japan 7 4 3
## 8 New Zealand 8 10 10
## 9 Korea 9 9 9
## 10 Singapore 10 8 6
## 11 Finland 11 11 11
## 12 Israel 12 12 12
## 13 France 13 13 13
## 14 Czech Republic 14 14 14
## 15 Hungary 15 19 19
## 16 Russia 16 17 17
## 17 Chile 17 18 18
## 18 China 18 15 15
## 19 United Arab Emirates 19 16 16
## 20 Jordan 20 24 22
## 21 Turkey 21 22 21
## 22 Thailand 22 20 20
## 23 Mexico 23 21 23
## 24 Brazil 24 23 25
## 25 South Africa 25 28 28
## 26 Colombia 26 25 24
## 27 Indonesia 27 27 27
## 28 Ghana 28 26 26
## 29 Kenya 29 29 29
## 30 Nigeria 30 30 30
df.BackToDetails <- as.data.frame(minMaxMultiFA.Weights)
df.BackToDetails$Country <- rownames(df.BackToDetails)
df.BackToDetails.p1 <- df.BackToDetails[, 1:3]
df.BackToDetails.p1$Country <- rownames(df.BackToDetails.p1)
df.BackToDetails.p2 <- df.BackToDetails[, 4:7]
# Sum rowwise
df.BackToDetails.p2 <- adply(df.BackToDetails.p2, 1, transform, sumEdu = sum(LearningCurve_Index, HDIEducat_Index, H_Index))
df.BackToDetails.p1 <- adply(df.BackToDetails.p1, 1, transform, sumBuEco = sum(Unemployment, WEF_Score, Freedom_Index))
df.BackToDetails <- data.frame(Education = df.BackToDetails.p2$sumEdu,
BussEcon = df.BackToDetails.p1$sumBuEco,
Country = df.BackToDetails.p1$Country)
df.BackToDetails$Country <- as.character(df.BackToDetails$Country)
df.BackToDetails$Country[df.BackToDetails$Country == "United States"] <- "USA"
df.BackToDetails$Country[df.BackToDetails$Country == "United Arab Emirates"] <- "UAE"
df.BackToDetails$Country[df.BackToDetails$Country == "United Kingdom"] <- "UK"
df.BackToDetails$Country[df.BackToDetails$Country == "Czech Republic"] <- "Czech Rep."
df.BackToDetails$Country[df.BackToDetails$Country == "South Africa"] <- "S. Africa"
df.BackToDetails$Country[df.BackToDetails$Country == "New Zealand"] <- "N. Zealand"
# df.BackToDetails$Country[df.BackToDetails$Country == "Switzerland"] <- "Swizerl."
df.meltedBackToDetails <- melt(df.BackToDetails, id = "Country") # convert to long format
e9 <- ggplot(data = df.meltedBackToDetails, aes(reorder(Country, value), fill = variable, weight = value)) + geom_bar()
e9 <- e9 + coord_cartesian(ylim = c(0, 100)) + scale_y_continuous(breaks = seq(0, 100, 5))
e9 <- e9 + ggtitle("Bar chart decomposition of the Attractiveness Index (MM.FA)") + scale_fill_discrete(name = "Dimensions")
e9 <- e9 + ylab("Index Value") + xlab("Countries")
e9
# Table
EducatValue <- cbind(df.BackToDetails$Education / (df.BackToDetails$Education + df.BackToDetails$BussEcon))
BusinessValue <- cbind(df.BackToDetails$BussEcon / (df.BackToDetails$Education + df.BackToDetails$BussEcon))
df.BackToDetails.table <- data.frame(cbind(df.BackToDetails$Country), EducatValue, BusinessValue)
head(df.BackToDetails.table)
## cbind.df.BackToDetails.Country. EducatValue BusinessValue
## 1 Australia 0.6522627 0.3477373
## 2 Brazil 0.6148834 0.3851166
## 3 Canada 0.6495140 0.3504860
## 4 Chile 0.5960276 0.4039724
## 5 China 0.6448532 0.3551468
## 6 Colombia 0.5681968 0.4318032
Is there any correlation between two values
cor(gdpTiger$Value, gdpTiger$GDPinDollars)
## [1] 0.7891544