Citation:

Timmer, M. P., Dietzenbacher, E., Los, B., Stehrer, R. and de Vries, G. J. (2015), “An Illustrated User Guide to the World Input–Output Database: the Case of Global Automotive Production”, Review of International Economics., 23: 575–605

This data is available for 1995-2011, courtesy of the World Input-Output Database (WOID). The data set used for this analysis was released in 2011, and the countries represented here, including the “rest of the world” (RoW), cover approximately 85% of world gross domestic product (GDP) in 2008 (at current exchange rates).

The analysis below creates an economic index based on Factor Analysis of the location quotients for the world economy, reducing 35 sectors into 5 factor scores. The location quotients are constructed from the value of inputs to each industrial sector for each country. The data set aggregated from the World Input-Output Database contains the industrial inputs, or a partial production function, for each industry. These values are then used to create location quotients to determine the concentration of industries in each country compared to their concentrations globally. The factor analysis is performed on the location quotients to determine which industries vary together, creating a typology of national economies.

One challenge that arises in the analysis of these factors is that the industries are grouped statistically, and the groups themselves are difficult to describe. However, in an effort to make this understandable, we have characterized the factors by the sectors with high positive loadings for each factor. It is important to remember that these are composite variables that combine the location quotients of multiple economic sectors into an index score for each factor.

library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Users/rmt33/Library/R/3.3/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Users/rmt33/Library/R/3.3/library/rgdal/proj
##  Linking to sp version: 1.2-3
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
library(knitr)

sectors <- c("Final","AgriForest","Mining","Food","Texile","Leather","Wood","Paper","RefPet","Chem","RubPlast","Mineral","Metal", "Machine","Electr","TransEq","Manuf","UtilSupply","Constr","VehSalMain","VehWholTr","VehRetTr","Hospitality","LandTrans","WatTrans","AirTrans","OtTrans","TeleCom","FinInter","RealEst","BusRents","PubAdmin","Educa","HealthSocW","ComSocServ","Household")

wio <- read.table("https://datahub.io/dataset/ac9f1861-8dbd-4111-bc0e-9793a397e377/resource/fbf44494-038a-4d5a-bcfb-e136fd4b3a05/download/wiot11rowsep12.txt", 
                  header=TRUE, sep="\t")

wiodf_fin <- wio[,c(1438:1641)] %>% rowSums() %>% aggregate( by=list(iso=wio$ISO), FUN=sum )
wiodf <- wio[,c(3:1437)] %>% aggregate( by=list(iso=wio$ISO), FUN=sum ) %>%
  join(.,wiodf_fin) 
## Joining by: iso
# wiodf <- wiodf[wiodf$iso!="TOT"]
# reshape(wiodf,idvar=colnames(wiodf),direction="wide")
col_levels <- unique(substr(colnames(wiodf)[-1],5,length(colnames(wiodf)[-1])))[1:35]

for (i in 1:length(col_levels)){
  #print(paste("collating ", col_levels[i]))
  name = col_levels[i]
  wiodf <- wiodf %>% mutate(.,tmp_name = rowSums( .[grep(col_levels[i],names(.))] ) )
  colnames(wiodf)[colnames(wiodf) == "tmp_name"] <- col_levels[i]
  }
data <- wiodf[wiodf$iso!="TOT",1437:1472]
rownames(data) <- wiodf$iso[wiodf$iso!="TOT"]

##########################################################
##  Import EPI data
##########################################################
epi <- read.table("https://datahub.io/dataset/95842aa8-ea64-48fd-afae-c2d708ee2620/resource/943b6d95-dc09-4cb4-81a7-31f158fadeac/download/2016epidraftsummary.txt", 
                  header=TRUE, sep="\t")
##########################################################
##  Make the location quotients
##########################################################

# Calculate the share of each sector for each country
share_tech_iso <- data / rowSums (data)
colnames(share_tech_iso)<-sectors


share_tech_total <- colSums (data) / sum (data)

# At the international level, agriculture accounts for 10 % of the economic activities (0.104).
# check the sum is 1
# sum(share_tech_total)
names(share_tech_total)<-sectors
# look at the share of the global economy represented by each sector
sort(round(share_tech_total,4),decreasing=TRUE) 
##       Final  AgriForest      Mining        Food      Constr       Metal 
##      0.3536      0.1467      0.0913      0.0851      0.0334      0.0236 
##    BusRents      Electr     TransEq        Chem    FinInter    PubAdmin 
##      0.0213      0.0204      0.0183      0.0158      0.0152      0.0151 
##   VehWholTr      RefPet     Machine  HealthSocW  UtilSupply  ComSocServ 
##      0.0132      0.0126      0.0115      0.0114      0.0113      0.0098 
## Hospitality   LandTrans     RealEst    VehRetTr      Texile       Paper 
##      0.0092      0.0091      0.0090      0.0084      0.0074      0.0071 
##    RubPlast     TeleCom     Mineral     OtTrans       Educa       Manuf 
##      0.0063      0.0060      0.0050      0.0044      0.0042      0.0033 
##        Wood  VehSalMain    AirTrans    WatTrans     Leather   Household 
##      0.0029      0.0026      0.0021      0.0020      0.0014      0.0000
# Create the location quotients as a ratio of: the proportion of the national economy
# divided by the proportion of the global economy for that sector.
LQ <- t(share_tech_iso)/ share_tech_total

LQ[is.na(LQ)] <- 0 
LQ[which(LQ==Inf)] <- 0 
LQ <- t(LQ)
colnames(LQ)<-sectors

The blue in the heatmap represents basic indusctires for that country. Here, basic industries are defined as those with location quotients over 1.38, or in the top 80%; brown represents non-basic industries are below 0.75, or in the top 56%; and the shades in the middle represent sectors with location quotients at about the same proportion of the national economy as they are of the global economy.

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# following code limits the lowest and highest color to 5%, and 95% of your range, respectively
quantile.range <- quantile(LQ, probs = seq(0, 1, 0.01))
palette.breaks <- seq(quantile.range["35%"], quantile.range["80%"], 0.1)
# use http://colorbrewer2.org/ to find optimal divergent color palette (or set own)
color.palette  <- colorRampPalette(c("#d8b365", "#f5f5f5", "#5ab4ac"))(length(palette.breaks) - 1)


# see quantile definitions above
heatmap.2(
  LQ,
  main       = "Location Quotients",
  dendrogram = "row",
  scale      = "none",
  trace      = "none",
  key        = FALSE,
  labRow     = rownames(LQ),
  labCol     = colnames(LQ),
  col    = color.palette,
  breaks = palette.breaks
)
legend("topleft",      # location of the legend on the heatmap plot
    legend = c(round(quantile.range[["35%"]],2), round(quantile.range[["57%"]],2), round(quantile.range[["80%"]],2)), # category labels
    col = c(color.palette[1],color.palette[3],color.palette[6]),  # color key
    lty= 1,             # line style
    lwd = 10            # line width
)

Now we can preform the factor analysis with both 4 and 5 factors. After looking at the output of both, the 5-factor option was deemed a better fit due to explaining more of the total variance and a more logical grouping of economic sectors with high absolute values in the factor loadings.

##########################################################
##  Perform Factor Analysis using Maximum Likelihood
##  with Varimax Rotation
##########################################################

fact1=factanal(LQ[,-20],factors=4,rotation="varimax")
fact2=factanal(LQ[,-20],factors=5,rotation="varimax")

fact2 # 
## 
## Call:
## factanal(x = LQ[, -20], factors = 5, rotation = "varimax")
## 
## Uniquenesses:
##       Final  AgriForest      Mining        Food      Texile     Leather 
##       0.005       0.005       0.016       0.134       0.152       0.399 
##        Wood       Paper      RefPet        Chem    RubPlast     Mineral 
##       0.469       0.758       0.972       0.435       0.226       0.551 
##       Metal     Machine      Electr     TransEq       Manuf  UtilSupply 
##       0.320       0.401       0.399       0.464       0.573       0.776 
##      Constr   VehWholTr    VehRetTr Hospitality   LandTrans    WatTrans 
##       0.602       0.506       0.614       0.662       0.534       0.811 
##    AirTrans     OtTrans     TeleCom    FinInter     RealEst    BusRents 
##       0.633       0.474       0.633       0.062       0.527       0.497 
##    PubAdmin       Educa  HealthSocW  ComSocServ   Household 
##       0.481       0.657       0.427       0.733       0.756 
## 
## Loadings:
##             Factor1 Factor2 Factor3 Factor4 Factor5
## Final       -0.776  -0.294  -0.174  -0.525         
## AgriForest   0.881  -0.230   0.311  -0.210   0.157 
## Mining      -0.308   0.239           0.901  -0.119 
## Food        -0.415   0.672  -0.414   0.146  -0.221 
## Texile       0.104  -0.143                   0.899 
## Leather      0.187  -0.157          -0.168   0.716 
## Wood                 0.254   0.673                 
## Paper        0.149   0.448                  -0.110 
## RefPet              -0.105                         
## Chem         0.553   0.137  -0.382  -0.129   0.281 
## RubPlast     0.760  -0.120          -0.172   0.383 
## Mineral      0.183  -0.304   0.307           0.476 
## Metal        0.701  -0.356   0.182  -0.101   0.136 
## Machine      0.761                  -0.127         
## Electr       0.742          -0.158  -0.101   0.115 
## TransEq      0.627  -0.235  -0.122  -0.204  -0.176 
## Manuf               -0.122   0.603  -0.122   0.181 
## UtilSupply                   0.465                 
## Constr              -0.136   0.600                 
## VehWholTr   -0.235   0.265   0.358   0.441  -0.213 
## VehRetTr                             0.492  -0.353 
## Hospitality -0.461          -0.286  -0.178   0.110 
## LandTrans   -0.146  -0.257   0.436           0.432 
## WatTrans             0.413                         
## AirTrans    -0.206   0.457           0.330         
## OtTrans     -0.103   0.480   0.507   0.129  -0.101 
## TeleCom              0.211           0.561         
## FinInter                    -0.398   0.882         
## RealEst     -0.203   0.354   0.263   0.247  -0.420 
## BusRents             0.502  -0.278   0.217  -0.349 
## PubAdmin    -0.313   0.163  -0.403   0.421  -0.235 
## Educa                0.547           0.128         
## HealthSocW  -0.109   0.562  -0.466          -0.170 
## ComSocServ           0.425  -0.196   0.110  -0.180 
## Household           -0.377          -0.135   0.275 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings      5.198   3.456   3.438   3.366   2.877
## Proportion Var   0.149   0.099   0.098   0.096   0.082
## Cumulative Var   0.149   0.247   0.345   0.442   0.524
## 
## Test of the hypothesis that 5 factors are sufficient.
## The chi square statistic is 766.36 on 430 degrees of freedom.
## The p-value is 2.84e-21

The output above shows the details of the fit, which is ok, but not that good. This may need to be re-fitted or use another method for consolidating.

#get reproduced correlation matrix
repro2=fact2$loadings%*%t(fact2$loadings)
#round(repro1,2)
#residual correlation matrix
resid2=fact2$cor-repro2
#round(resid1,2)

#get root-mean squared residuals
len=length(resid2[upper.tri(resid2)])
RMSR1=sqrt(sum(resid2[upper.tri(resid2)]^2)/len)
RMSR1
## [1] 0.09012746
#get proportion of residuals greater than 0.05 in absolute value
sum(rep(1,len)[abs(resid2[upper.tri(resid2)])>0.05])/len
## [1] 0.4319328
# Plot the factors against each other.
par(mfrow=c(3,3),mar = c(4,4,1,1))
plot(fact1$loadings, pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings, labels=rownames(fact1$loadings),xpd=TRUE)

#get loading plot for factors 1 through 4
plot(fact2$loadings[,c(1,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,3)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(1,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,4)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(1,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,5)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(2,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,3)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(2,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,4)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(2,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,5)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(3,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(3,4)], labels=rownames(fact2$loadings),xpd=TRUE)

plot(fact2$loadings[,c(3,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(3,5)], labels=rownames(fact2$loadings),xpd=TRUE)

par(mfrow=c(1,2))
(fact1$loadings[1:35,])
##                  Factor1     Factor2      Factor3       Factor4
## Final       -0.788681188 -0.26897114 -0.200564096 -0.5103117583
## AgriForest   0.875513346 -0.27562990  0.310041071 -0.2204883466
## Mining      -0.301898566  0.25866714  0.088263730  0.9100724711
## Food        -0.407719197  0.73213917 -0.389052724  0.1452990750
## Texile       0.223947761 -0.39288283  0.018241300 -0.0687436377
## Leather      0.276066738 -0.35278221 -0.005634329 -0.1678157040
## Wood        -0.092669584  0.25625021  0.726210800 -0.0007514095
## Paper        0.141819905  0.46870376  0.094424064 -0.0938204028
## RefPet      -0.051757164 -0.15129881  0.019040882 -0.0635492116
## Chem         0.598401145  0.04545400 -0.381797176 -0.1251158177
## RubPlast     0.794553078 -0.20752863 -0.056397412 -0.1891089327
## Mineral      0.234017817 -0.44492091  0.266055612 -0.0347107412
## Metal        0.701778439 -0.40070280  0.161887387 -0.1059937827
## Machine      0.753301568 -0.01182171  0.058719703 -0.1478753780
## Electr       0.747424047  0.01065949 -0.151367913 -0.1009976260
## TransEq      0.578719492 -0.13112227 -0.071025158 -0.2376279793
## Manuf       -0.008639942 -0.18001409  0.629353924 -0.1371327129
## UtilSupply   0.086203848 -0.04637243  0.436824570 -0.0153630702
## Constr       0.078731945 -0.19727266  0.568880806 -0.0525532167
## VehWholTr   -0.246608656  0.29074998  0.373661412  0.4390534183
## VehRetTr    -0.133479288  0.03089409 -0.015689275  0.4831075368
## Hospitality -0.441086708 -0.03983103 -0.315783474 -0.1595994942
## LandTrans   -0.092623487 -0.40551799  0.395771176  0.0527375680
## WatTrans    -0.069847537  0.36387043  0.112903878  0.0395347063
## AirTrans    -0.183295035  0.44093279 -0.073855431  0.3391370247
## OtTrans     -0.111506611  0.47024629  0.567058756  0.1183095319
## TeleCom     -0.019453710  0.21790912  0.046494515  0.5681999684
## FinInter    -0.010344016  0.04249293 -0.376072348  0.8662081374
## RealEst     -0.248572049  0.44558467  0.282298793  0.2520735251
## BusRents    -0.092640723  0.60367173 -0.223195365  0.2006728001
## PubAdmin    -0.320639296  0.25851126 -0.381680100  0.4099274610
## Educa       -0.096430022  0.53670685  0.113392447  0.1418789109
## HealthSocW  -0.103258199  0.59687263 -0.435928989  0.0023447400
## ComSocServ  -0.058010519  0.48133650 -0.190385769  0.1079957185
## Household   -0.048518582 -0.46386697 -0.003370274 -0.1227595297
heatmap(fact2$loadings[1:35,])

The factors can be characterized by looking down the columns of each factor to identify sectors with high absolute values and findings themes among those sectors. For the factor analysis shown above, the following sectors are used to characterize the factors.

Factor 1 - Capital Goods and Heavy Industrial Manufacturing

High positive values in: Agriculture, hunting, forestry and fishing; Basic metals and fabricated metal; Machinery, not elsewhere classified; Rubber and plastics; Transport equipment; Electrical and optical equipment; Chemicals and chemical products

High Negative values in: Final use aggregated

Factor 2 - Human Capital Investment

High positive values in: Food, beverages and tobacco; Renting of machinery & equipment and other business activities; Health and social work; Education

High negative values in: Private households with employed persons; Other non-metallic mineral; Inland Transport; Textiles and textile products; Basic metals and fabricated metal; Leather, leather products and footwear

Factor 3 - Building Materials

High positive values in: Wood and products of wood and cork; Manufacturing, not elsewhere classified; recycling; Construction; Machinery, not elsewhere classified; Rubber and plastics; Transport Equpiment; Electricity, gas and water supply; and Chemicals and chemical products

High negative values in: Health and social work; Food, beverages and tobacco; Public administration and defence; compulsory social security; Financial intermediation

Factor 4 - Mining

High positive values in: Mining and quarrying; Financial intermediation

High negative values in: Final use aggregate

Factor 5 - Textiles and Leather

High positive values in: Textiles and textile products; Leather, leather products and footwear

High negative values in: Real estate activities; Retail trade and repair, except of motor vehicles and motorcycles; Renting of machinery & equipment and other business activities

###############################
## Apply factor weights to LQs
###############################
fal <- matrix(fact2$loadings[c(1:35),],nrow=35, ncol=5, dimnames = list(c(sectors[2:36]), c("Capital Goods", "Human Capital Investment", "Building Materials", "Extractive", "Textiles")))
LQ <- as.data.frame(LQ)

factors <- data.frame(row.names = rownames(LQ))
# select only the non-zero sector-factor loadings and multiply them by the LQs for those sectors
for (i in 1:nrow(LQ)) {
  factors$CapGoods[i] <- sum(data.frame(
    mapply(`*`,LQ[ i,c(1:6,8,10:16,20,22,23,25,26,29,31,33) ], 
           fal[c(1:6,8,10:16,20,22,23,25,26,29,31,33),1],SIMPLIFY=FALSE)) )
  factors$HumanCap[i] <- sum(data.frame(
    mapply(`*`,LQ[ i, c(1:13,16,17,19,20,23:27,29:35)], 
           fal[c(1:13,16,17,19,20,23:27,29:35),2],SIMPLIFY=FALSE)))
  factors$BuildingMat[i] <- sum(data.frame(
    mapply(`*`,LQ[ i, c(1,2,4,7,10,12,13,15:20,22,23:27,29:35) ], 
           fal[c(1,2,4,7,10,12,13,15:20,22,23:27,29:35),3],SIMPLIFY=FALSE)))
  factors$Extractive[i] <- sum(data.frame(
    mapply(`*`,LQ[ i, c(1:4,6,10,11,13:17,20:22,25:32,34,35) ], 
           fal[c(1:4,6,10,11,13:17,20:22,25:32,34,35),4],SIMPLIFY=FALSE)))
  factors$Textiles[i] <- sum(data.frame(
    mapply(`*`,LQ[ i, c(2:6,8,10:13,15:17,20:23,26,29:31,33:35) ], 
           fal[c(2:6,8,10:13,15:17,20:23,26,29:31,33:35),5],SIMPLIFY=FALSE)))
}
# transform to matrix for plotting
factors<-as.matrix(factors)

# redefine the palette
quantile.range <- quantile(as.matrix(factors), probs = seq(0, 1, 0.01))
palette.breaks <- seq(quantile.range["5%"], quantile.range["95%"], 0.1)

color.palette  <- colorRampPalette(c("#d8b365", "#f5f5f5", "#5ab4ac"))(length(palette.breaks) - 1)

heatmap.2(
  t(factors),
  main       = "Weighted Factors",
  dendrogram = "column",
  scale      = "none",
  trace      = "none",
  key        = FALSE,
  labRow     = colnames(factors),
  labCol     = rownames(factors),
  col    = color.palette,
  breaks = palette.breaks
)

factors<- as.data.frame(factors)
# Combined data set of factors and epi
# replace RestofWorld for NA country
factors$iso <- as.factor(rownames(factors))
comb <- join(factors,epi,type="left") %>% join(.,wiodf[,c(1,1438:1472)])
## Joining by: iso
## Joining by: iso
comb$country<-factor(comb$country, levels=c(levels(comb$country), "RestofWorld"))
comb$country[is.na(comb$country)] = "RestofWorld"
rownames(comb)<- comb$iso
heatmap(fact2$loadings)

par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$CapGoods))

par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$HumanCap))

par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(glm(comb$EPI.2016 ~ comb$BuildingMat))

par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$Extractive))

par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$Textiles))

#load relevant libraries
library(TeachingDemos)
library(fpc)
library(cluster)

par(mfrow=c(2,1))
# raw location quotients
#get the distance matrix
dist1<-dist(LQ, method="euclidean")

#now do clustering – use WARD’s method
clust1<-hclust(dist1,method="ward.D")

#draw the dendrogram
plot(clust1,labels=, hang=-1, cex=0.7, xlab="Country",ylab="Distance",main="Clustering for unweighted location quotients of countries")
rect.hclust(clust1,k=5)

#get membership vector (which country is in which group)
cuts1=cutree(clust1,k=5)
#cuts1

# weighted location quotients
#get the distance matrix
dist2<-dist(comb[,1:5], method="euclidean")

#now do clustering – use WARD’s method
clust2<-hclust(dist2,method="ward.D")

#draw the dendrogram
plot(clust2,labels=comb[,6], hang=-1, cex=0.7, xlab="Country",ylab="Distance",main="Clustering for weighted location quotients of countries")
rect.hclust(clust2,k=5)

#get membership vector (which country is in which group)
cuts2=cutree(clust2,k=5)
#cuts2

weighted_cuts <- data.frame(cbind(cuts2,iso=names(cuts2)))
comb <- join(comb,weighted_cuts)
## Joining by: iso
for (i in 1:5) {
  print(paste("Cluster ", i, sep=" "))
  print(comb$country[comb$cuts2 == i])
  #print(" ")
}
## [1] "Cluster  1"
##  [1] Australia             Belgium               Cyprus               
##  [4] Denmark               Finland               UnitedKingdom        
##  [7] Greece                Ireland               Lithuania            
## [10] Malta                 Netherlands           Sweden               
## [13] UnitedStatesofAmerica
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster  2"
##  [1] Austria       Canada        CzechRepublic Germany       France       
##  [6] Hungary       Japan         Poland        Russia        Slovakia     
## [11] Slovenia     
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster  3"
##  [1] Bulgaria    Brazil      Spain       Indonesia   India      
##  [6] Italy       Mexico      Portugal    Romania     RestofWorld
## [11] Turkey     
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster  4"
## [1] China      SouthKorea Taiwan    
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster  5"
## [1] Estonia    Luxembourg Latvia    
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
par(mfrow=c(2,3))
for (i in 1:5) {
plot(comb$EPI.2016 ~ comb[,i],
     pch = 16,
     xlab = colnames(comb)[i], ylab = "2016 EPI Score",
     col = comb$cuts,
     xlim=c(-1.5,9),
     ylim = c(0,100))
abline(lm(comb$EPI.2016 ~ comb[,i]))
text(6,10,labels=paste("correlation = ",round(cor(comb$EPI.2016, comb[,i] , use = "pairwise.complete.obs"),2)))
#text(comb$EPI.2016 ~ comb$Textiles, labels=comb$iso)
legend("bottomleft",
       legend=c("Cluster 1", "Cluster 2","Cluster 3","Cluster 4","Cluster 5"),
       pch = 16, border = FALSE,
       col = c("black","red","green","blue","cyan"))
}

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:TeachingDemos':
## 
##     cnvrt.coords, subplot
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
#par(mfrow=c(1,5),mar=c(4,4,3,1))
#for (i in 1:5) {
#plot(comb$EPI.2016 ~ comb[,i],
#     pch = 16,
#     col = comb$cuts, main = "2016 EPI Scores by Factor",
#     xlab = colnames(comb)[i], 
#     #ylab = "2016 EPI Score",
#     ylim = c(0,100)
#     #xlim=c(-1.5,9)
#     )
#abline(lm(comb$EPI.2016 ~ comb[,i]))
#text(6,10,labels=paste("r.squared = ",round(summary(lm(comb$EPI.2016 ~ comb[,i]))$r.squared,2)))
#text(comb$EPI.2016 ~ comb[,i], labels=comb$iso)
#}

par(mfrow=c(1,3),mar=c(4,4,3,1))
for (i in 10:12) {
plot(0,0, ylim = c(0,100),
     xlim=c(0.5,5.5), type="n",
     xlab = "Cluster",
     ylab = "2016 Score",
     main = colnames(comb)[i])
  points(comb[,i] ~ comb$cuts2,
     pch = 16,
     col = comb$cuts
     )
  }

par(mfrow=c(1,3),mar=c(4,4,3,1))
for (i in 13:15) {
plot(0,0, ylim = c(0,100),
     xlim=c(0.5,5.5), type="n",
     xlab = "Cluster",
     ylab = "2016 Score",
     main = colnames(comb)[i])
  points(comb[,i] ~ comb$cuts2,
     pch = 16,
     col = comb$cuts
     )
  }

par(mfrow=c(2,3),mar=c(4,4,3,1))
for (i in 16:20) {
plot(0,0, ylim = c(0,100),
     xlim=c(0.5,5.5), type="n",
     xlab = "Cluster",
     ylab = "2016 Score",
     main = colnames(comb)[i])
  points(comb[,i] ~ comb$cuts2,
     pch = 16,
     col = comb$cuts
     )
  }