Citation:
This data is available for 1995-2011, courtesy of the World Input-Output Database (WOID). The data set used for this analysis was released in 2011, and the countries represented here, including the “rest of the world” (RoW), cover approximately 85% of world gross domestic product (GDP) in 2008 (at current exchange rates).
The analysis below creates an economic index based on Factor Analysis of the location quotients for the world economy, reducing 35 sectors into 5 factor scores. The location quotients are constructed from the value of inputs to each industrial sector for each country. The data set aggregated from the World Input-Output Database contains the industrial inputs, or a partial production function, for each industry. These values are then used to create location quotients to determine the concentration of industries in each country compared to their concentrations globally. The factor analysis is performed on the location quotients to determine which industries vary together, creating a typology of national economies.
One challenge that arises in the analysis of these factors is that the industries are grouped statistically, and the groups themselves are difficult to describe. However, in an effort to make this understandable, we have characterized the factors by the sectors with high positive loadings for each factor. It is important to remember that these are composite variables that combine the location quotients of multiple economic sectors into an index score for each factor.
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
## Path to GDAL shared files: /Users/rmt33/Library/R/3.3/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Users/rmt33/Library/R/3.3/library/rgdal/proj
## Linking to sp version: 1.2-3
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(knitr)
sectors <- c("Final","AgriForest","Mining","Food","Texile","Leather","Wood","Paper","RefPet","Chem","RubPlast","Mineral","Metal", "Machine","Electr","TransEq","Manuf","UtilSupply","Constr","VehSalMain","VehWholTr","VehRetTr","Hospitality","LandTrans","WatTrans","AirTrans","OtTrans","TeleCom","FinInter","RealEst","BusRents","PubAdmin","Educa","HealthSocW","ComSocServ","Household")
wio <- read.table("https://datahub.io/dataset/ac9f1861-8dbd-4111-bc0e-9793a397e377/resource/fbf44494-038a-4d5a-bcfb-e136fd4b3a05/download/wiot11rowsep12.txt",
header=TRUE, sep="\t")
wiodf_fin <- wio[,c(1438:1641)] %>% rowSums() %>% aggregate( by=list(iso=wio$ISO), FUN=sum )
wiodf <- wio[,c(3:1437)] %>% aggregate( by=list(iso=wio$ISO), FUN=sum ) %>%
join(.,wiodf_fin)
## Joining by: iso
# wiodf <- wiodf[wiodf$iso!="TOT"]
# reshape(wiodf,idvar=colnames(wiodf),direction="wide")
col_levels <- unique(substr(colnames(wiodf)[-1],5,length(colnames(wiodf)[-1])))[1:35]
for (i in 1:length(col_levels)){
#print(paste("collating ", col_levels[i]))
name = col_levels[i]
wiodf <- wiodf %>% mutate(.,tmp_name = rowSums( .[grep(col_levels[i],names(.))] ) )
colnames(wiodf)[colnames(wiodf) == "tmp_name"] <- col_levels[i]
}
data <- wiodf[wiodf$iso!="TOT",1437:1472]
rownames(data) <- wiodf$iso[wiodf$iso!="TOT"]
##########################################################
## Import EPI data
##########################################################
epi <- read.table("https://datahub.io/dataset/95842aa8-ea64-48fd-afae-c2d708ee2620/resource/943b6d95-dc09-4cb4-81a7-31f158fadeac/download/2016epidraftsummary.txt",
header=TRUE, sep="\t")
##########################################################
## Make the location quotients
##########################################################
# Calculate the share of each sector for each country
share_tech_iso <- data / rowSums (data)
colnames(share_tech_iso)<-sectors
share_tech_total <- colSums (data) / sum (data)
# At the international level, agriculture accounts for 10 % of the economic activities (0.104).
# check the sum is 1
# sum(share_tech_total)
names(share_tech_total)<-sectors
# look at the share of the global economy represented by each sector
sort(round(share_tech_total,4),decreasing=TRUE)
## Final AgriForest Mining Food Constr Metal
## 0.3536 0.1467 0.0913 0.0851 0.0334 0.0236
## BusRents Electr TransEq Chem FinInter PubAdmin
## 0.0213 0.0204 0.0183 0.0158 0.0152 0.0151
## VehWholTr RefPet Machine HealthSocW UtilSupply ComSocServ
## 0.0132 0.0126 0.0115 0.0114 0.0113 0.0098
## Hospitality LandTrans RealEst VehRetTr Texile Paper
## 0.0092 0.0091 0.0090 0.0084 0.0074 0.0071
## RubPlast TeleCom Mineral OtTrans Educa Manuf
## 0.0063 0.0060 0.0050 0.0044 0.0042 0.0033
## Wood VehSalMain AirTrans WatTrans Leather Household
## 0.0029 0.0026 0.0021 0.0020 0.0014 0.0000
# Create the location quotients as a ratio of: the proportion of the national economy
# divided by the proportion of the global economy for that sector.
LQ <- t(share_tech_iso)/ share_tech_total
LQ[is.na(LQ)] <- 0
LQ[which(LQ==Inf)] <- 0
LQ <- t(LQ)
colnames(LQ)<-sectors
The blue in the heatmap represents basic indusctires for that country. Here, basic industries are defined as those with location quotients over 1.38
, or in the top 80%
; brown represents non-basic industries are below 0.75
, or in the top 56%
; and the shades in the middle represent sectors with location quotients at about the same proportion of the national economy as they are of the global economy.
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
# following code limits the lowest and highest color to 5%, and 95% of your range, respectively
quantile.range <- quantile(LQ, probs = seq(0, 1, 0.01))
palette.breaks <- seq(quantile.range["35%"], quantile.range["80%"], 0.1)
# use http://colorbrewer2.org/ to find optimal divergent color palette (or set own)
color.palette <- colorRampPalette(c("#d8b365", "#f5f5f5", "#5ab4ac"))(length(palette.breaks) - 1)
# see quantile definitions above
heatmap.2(
LQ,
main = "Location Quotients",
dendrogram = "row",
scale = "none",
trace = "none",
key = FALSE,
labRow = rownames(LQ),
labCol = colnames(LQ),
col = color.palette,
breaks = palette.breaks
)
legend("topleft", # location of the legend on the heatmap plot
legend = c(round(quantile.range[["35%"]],2), round(quantile.range[["57%"]],2), round(quantile.range[["80%"]],2)), # category labels
col = c(color.palette[1],color.palette[3],color.palette[6]), # color key
lty= 1, # line style
lwd = 10 # line width
)
Now we can preform the factor analysis with both 4 and 5 factors. After looking at the output of both, the 5-factor option was deemed a better fit due to explaining more of the total variance and a more logical grouping of economic sectors with high absolute values in the factor loadings.
##########################################################
## Perform Factor Analysis using Maximum Likelihood
## with Varimax Rotation
##########################################################
fact1=factanal(LQ[,-20],factors=4,rotation="varimax")
fact2=factanal(LQ[,-20],factors=5,rotation="varimax")
fact2 #
##
## Call:
## factanal(x = LQ[, -20], factors = 5, rotation = "varimax")
##
## Uniquenesses:
## Final AgriForest Mining Food Texile Leather
## 0.005 0.005 0.016 0.134 0.152 0.399
## Wood Paper RefPet Chem RubPlast Mineral
## 0.469 0.758 0.972 0.435 0.226 0.551
## Metal Machine Electr TransEq Manuf UtilSupply
## 0.320 0.401 0.399 0.464 0.573 0.776
## Constr VehWholTr VehRetTr Hospitality LandTrans WatTrans
## 0.602 0.506 0.614 0.662 0.534 0.811
## AirTrans OtTrans TeleCom FinInter RealEst BusRents
## 0.633 0.474 0.633 0.062 0.527 0.497
## PubAdmin Educa HealthSocW ComSocServ Household
## 0.481 0.657 0.427 0.733 0.756
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4 Factor5
## Final -0.776 -0.294 -0.174 -0.525
## AgriForest 0.881 -0.230 0.311 -0.210 0.157
## Mining -0.308 0.239 0.901 -0.119
## Food -0.415 0.672 -0.414 0.146 -0.221
## Texile 0.104 -0.143 0.899
## Leather 0.187 -0.157 -0.168 0.716
## Wood 0.254 0.673
## Paper 0.149 0.448 -0.110
## RefPet -0.105
## Chem 0.553 0.137 -0.382 -0.129 0.281
## RubPlast 0.760 -0.120 -0.172 0.383
## Mineral 0.183 -0.304 0.307 0.476
## Metal 0.701 -0.356 0.182 -0.101 0.136
## Machine 0.761 -0.127
## Electr 0.742 -0.158 -0.101 0.115
## TransEq 0.627 -0.235 -0.122 -0.204 -0.176
## Manuf -0.122 0.603 -0.122 0.181
## UtilSupply 0.465
## Constr -0.136 0.600
## VehWholTr -0.235 0.265 0.358 0.441 -0.213
## VehRetTr 0.492 -0.353
## Hospitality -0.461 -0.286 -0.178 0.110
## LandTrans -0.146 -0.257 0.436 0.432
## WatTrans 0.413
## AirTrans -0.206 0.457 0.330
## OtTrans -0.103 0.480 0.507 0.129 -0.101
## TeleCom 0.211 0.561
## FinInter -0.398 0.882
## RealEst -0.203 0.354 0.263 0.247 -0.420
## BusRents 0.502 -0.278 0.217 -0.349
## PubAdmin -0.313 0.163 -0.403 0.421 -0.235
## Educa 0.547 0.128
## HealthSocW -0.109 0.562 -0.466 -0.170
## ComSocServ 0.425 -0.196 0.110 -0.180
## Household -0.377 -0.135 0.275
##
## Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings 5.198 3.456 3.438 3.366 2.877
## Proportion Var 0.149 0.099 0.098 0.096 0.082
## Cumulative Var 0.149 0.247 0.345 0.442 0.524
##
## Test of the hypothesis that 5 factors are sufficient.
## The chi square statistic is 766.36 on 430 degrees of freedom.
## The p-value is 2.84e-21
The output above shows the details of the fit, which is ok, but not that good. This may need to be re-fitted or use another method for consolidating.
#get reproduced correlation matrix
repro2=fact2$loadings%*%t(fact2$loadings)
#round(repro1,2)
#residual correlation matrix
resid2=fact2$cor-repro2
#round(resid1,2)
#get root-mean squared residuals
len=length(resid2[upper.tri(resid2)])
RMSR1=sqrt(sum(resid2[upper.tri(resid2)]^2)/len)
RMSR1
## [1] 0.09012746
#get proportion of residuals greater than 0.05 in absolute value
sum(rep(1,len)[abs(resid2[upper.tri(resid2)])>0.05])/len
## [1] 0.4319328
# Plot the factors against each other.
par(mfrow=c(3,3),mar = c(4,4,1,1))
plot(fact1$loadings, pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact1$loadings, labels=rownames(fact1$loadings),xpd=TRUE)
#get loading plot for factors 1 through 4
plot(fact2$loadings[,c(1,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,3)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(1,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,4)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(1,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(1,5)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(2,3)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,3)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(2,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,4)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(2,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(2,5)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(3,4)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(3,4)], labels=rownames(fact2$loadings),xpd=TRUE)
plot(fact2$loadings[,c(3,5)], pch=18, col='red')
abline(h=0)
abline(v=0)
text(fact2$loadings[,c(3,5)], labels=rownames(fact2$loadings),xpd=TRUE)
par(mfrow=c(1,2))
(fact1$loadings[1:35,])
## Factor1 Factor2 Factor3 Factor4
## Final -0.788681188 -0.26897114 -0.200564096 -0.5103117583
## AgriForest 0.875513346 -0.27562990 0.310041071 -0.2204883466
## Mining -0.301898566 0.25866714 0.088263730 0.9100724711
## Food -0.407719197 0.73213917 -0.389052724 0.1452990750
## Texile 0.223947761 -0.39288283 0.018241300 -0.0687436377
## Leather 0.276066738 -0.35278221 -0.005634329 -0.1678157040
## Wood -0.092669584 0.25625021 0.726210800 -0.0007514095
## Paper 0.141819905 0.46870376 0.094424064 -0.0938204028
## RefPet -0.051757164 -0.15129881 0.019040882 -0.0635492116
## Chem 0.598401145 0.04545400 -0.381797176 -0.1251158177
## RubPlast 0.794553078 -0.20752863 -0.056397412 -0.1891089327
## Mineral 0.234017817 -0.44492091 0.266055612 -0.0347107412
## Metal 0.701778439 -0.40070280 0.161887387 -0.1059937827
## Machine 0.753301568 -0.01182171 0.058719703 -0.1478753780
## Electr 0.747424047 0.01065949 -0.151367913 -0.1009976260
## TransEq 0.578719492 -0.13112227 -0.071025158 -0.2376279793
## Manuf -0.008639942 -0.18001409 0.629353924 -0.1371327129
## UtilSupply 0.086203848 -0.04637243 0.436824570 -0.0153630702
## Constr 0.078731945 -0.19727266 0.568880806 -0.0525532167
## VehWholTr -0.246608656 0.29074998 0.373661412 0.4390534183
## VehRetTr -0.133479288 0.03089409 -0.015689275 0.4831075368
## Hospitality -0.441086708 -0.03983103 -0.315783474 -0.1595994942
## LandTrans -0.092623487 -0.40551799 0.395771176 0.0527375680
## WatTrans -0.069847537 0.36387043 0.112903878 0.0395347063
## AirTrans -0.183295035 0.44093279 -0.073855431 0.3391370247
## OtTrans -0.111506611 0.47024629 0.567058756 0.1183095319
## TeleCom -0.019453710 0.21790912 0.046494515 0.5681999684
## FinInter -0.010344016 0.04249293 -0.376072348 0.8662081374
## RealEst -0.248572049 0.44558467 0.282298793 0.2520735251
## BusRents -0.092640723 0.60367173 -0.223195365 0.2006728001
## PubAdmin -0.320639296 0.25851126 -0.381680100 0.4099274610
## Educa -0.096430022 0.53670685 0.113392447 0.1418789109
## HealthSocW -0.103258199 0.59687263 -0.435928989 0.0023447400
## ComSocServ -0.058010519 0.48133650 -0.190385769 0.1079957185
## Household -0.048518582 -0.46386697 -0.003370274 -0.1227595297
heatmap(fact2$loadings[1:35,])
The factors can be characterized by looking down the columns of each factor to identify sectors with high absolute values and findings themes among those sectors. For the factor analysis shown above, the following sectors are used to characterize the factors.
High positive values in: Agriculture, hunting, forestry and fishing; Basic metals and fabricated metal; Machinery, not elsewhere classified; Rubber and plastics; Transport equipment; Electrical and optical equipment; Chemicals and chemical products
High Negative values in: Final use aggregated
High positive values in: Food, beverages and tobacco; Renting of machinery & equipment and other business activities; Health and social work; Education
High negative values in: Private households with employed persons; Other non-metallic mineral; Inland Transport; Textiles and textile products; Basic metals and fabricated metal; Leather, leather products and footwear
High positive values in: Wood and products of wood and cork; Manufacturing, not elsewhere classified; recycling; Construction; Machinery, not elsewhere classified; Rubber and plastics; Transport Equpiment; Electricity, gas and water supply; and Chemicals and chemical products
High negative values in: Health and social work; Food, beverages and tobacco; Public administration and defence; compulsory social security; Financial intermediation
High positive values in: Mining and quarrying; Financial intermediation
High negative values in: Final use aggregate
High positive values in: Textiles and textile products; Leather, leather products and footwear
High negative values in: Real estate activities; Retail trade and repair, except of motor vehicles and motorcycles; Renting of machinery & equipment and other business activities
###############################
## Apply factor weights to LQs
###############################
fal <- matrix(fact2$loadings[c(1:35),],nrow=35, ncol=5, dimnames = list(c(sectors[2:36]), c("Capital Goods", "Human Capital Investment", "Building Materials", "Extractive", "Textiles")))
LQ <- as.data.frame(LQ)
factors <- data.frame(row.names = rownames(LQ))
# select only the non-zero sector-factor loadings and multiply them by the LQs for those sectors
for (i in 1:nrow(LQ)) {
factors$CapGoods[i] <- sum(data.frame(
mapply(`*`,LQ[ i,c(1:6,8,10:16,20,22,23,25,26,29,31,33) ],
fal[c(1:6,8,10:16,20,22,23,25,26,29,31,33),1],SIMPLIFY=FALSE)) )
factors$HumanCap[i] <- sum(data.frame(
mapply(`*`,LQ[ i, c(1:13,16,17,19,20,23:27,29:35)],
fal[c(1:13,16,17,19,20,23:27,29:35),2],SIMPLIFY=FALSE)))
factors$BuildingMat[i] <- sum(data.frame(
mapply(`*`,LQ[ i, c(1,2,4,7,10,12,13,15:20,22,23:27,29:35) ],
fal[c(1,2,4,7,10,12,13,15:20,22,23:27,29:35),3],SIMPLIFY=FALSE)))
factors$Extractive[i] <- sum(data.frame(
mapply(`*`,LQ[ i, c(1:4,6,10,11,13:17,20:22,25:32,34,35) ],
fal[c(1:4,6,10,11,13:17,20:22,25:32,34,35),4],SIMPLIFY=FALSE)))
factors$Textiles[i] <- sum(data.frame(
mapply(`*`,LQ[ i, c(2:6,8,10:13,15:17,20:23,26,29:31,33:35) ],
fal[c(2:6,8,10:13,15:17,20:23,26,29:31,33:35),5],SIMPLIFY=FALSE)))
}
# transform to matrix for plotting
factors<-as.matrix(factors)
# redefine the palette
quantile.range <- quantile(as.matrix(factors), probs = seq(0, 1, 0.01))
palette.breaks <- seq(quantile.range["5%"], quantile.range["95%"], 0.1)
color.palette <- colorRampPalette(c("#d8b365", "#f5f5f5", "#5ab4ac"))(length(palette.breaks) - 1)
heatmap.2(
t(factors),
main = "Weighted Factors",
dendrogram = "column",
scale = "none",
trace = "none",
key = FALSE,
labRow = colnames(factors),
labCol = rownames(factors),
col = color.palette,
breaks = palette.breaks
)
factors<- as.data.frame(factors)
# Combined data set of factors and epi
# replace RestofWorld for NA country
factors$iso <- as.factor(rownames(factors))
comb <- join(factors,epi,type="left") %>% join(.,wiodf[,c(1,1438:1472)])
## Joining by: iso
## Joining by: iso
comb$country<-factor(comb$country, levels=c(levels(comb$country), "RestofWorld"))
comb$country[is.na(comb$country)] = "RestofWorld"
rownames(comb)<- comb$iso
heatmap(fact2$loadings)
par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$CapGoods))
par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$HumanCap))
par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(glm(comb$EPI.2016 ~ comb$BuildingMat))
par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$Extractive))
par(mar=c(4,4,3,1), mfrow=c(2,2))
plot(lm(comb$EPI.2016 ~ comb$Textiles))
#load relevant libraries
library(TeachingDemos)
library(fpc)
library(cluster)
par(mfrow=c(2,1))
# raw location quotients
#get the distance matrix
dist1<-dist(LQ, method="euclidean")
#now do clustering – use WARD’s method
clust1<-hclust(dist1,method="ward.D")
#draw the dendrogram
plot(clust1,labels=, hang=-1, cex=0.7, xlab="Country",ylab="Distance",main="Clustering for unweighted location quotients of countries")
rect.hclust(clust1,k=5)
#get membership vector (which country is in which group)
cuts1=cutree(clust1,k=5)
#cuts1
# weighted location quotients
#get the distance matrix
dist2<-dist(comb[,1:5], method="euclidean")
#now do clustering – use WARD’s method
clust2<-hclust(dist2,method="ward.D")
#draw the dendrogram
plot(clust2,labels=comb[,6], hang=-1, cex=0.7, xlab="Country",ylab="Distance",main="Clustering for weighted location quotients of countries")
rect.hclust(clust2,k=5)
#get membership vector (which country is in which group)
cuts2=cutree(clust2,k=5)
#cuts2
weighted_cuts <- data.frame(cbind(cuts2,iso=names(cuts2)))
comb <- join(comb,weighted_cuts)
## Joining by: iso
for (i in 1:5) {
print(paste("Cluster ", i, sep=" "))
print(comb$country[comb$cuts2 == i])
#print(" ")
}
## [1] "Cluster 1"
## [1] Australia Belgium Cyprus
## [4] Denmark Finland UnitedKingdom
## [7] Greece Ireland Lithuania
## [10] Malta Netherlands Sweden
## [13] UnitedStatesofAmerica
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster 2"
## [1] Austria Canada CzechRepublic Germany France
## [6] Hungary Japan Poland Russia Slovakia
## [11] Slovenia
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster 3"
## [1] Bulgaria Brazil Spain Indonesia India
## [6] Italy Mexico Portugal Romania RestofWorld
## [11] Turkey
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster 4"
## [1] China SouthKorea Taiwan
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
## [1] "Cluster 5"
## [1] Estonia Luxembourg Latvia
## 181 Levels: Afghanistan Albania Algeria Angola ... RestofWorld
par(mfrow=c(2,3))
for (i in 1:5) {
plot(comb$EPI.2016 ~ comb[,i],
pch = 16,
xlab = colnames(comb)[i], ylab = "2016 EPI Score",
col = comb$cuts,
xlim=c(-1.5,9),
ylim = c(0,100))
abline(lm(comb$EPI.2016 ~ comb[,i]))
text(6,10,labels=paste("correlation = ",round(cor(comb$EPI.2016, comb[,i] , use = "pairwise.complete.obs"),2)))
#text(comb$EPI.2016 ~ comb$Textiles, labels=comb$iso)
legend("bottomleft",
legend=c("Cluster 1", "Cluster 2","Cluster 3","Cluster 4","Cluster 5"),
pch = 16, border = FALSE,
col = c("black","red","green","blue","cyan"))
}
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:TeachingDemos':
##
## cnvrt.coords, subplot
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
## The following objects are masked from 'package:plyr':
##
## is.discrete, summarize
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
#par(mfrow=c(1,5),mar=c(4,4,3,1))
#for (i in 1:5) {
#plot(comb$EPI.2016 ~ comb[,i],
# pch = 16,
# col = comb$cuts, main = "2016 EPI Scores by Factor",
# xlab = colnames(comb)[i],
# #ylab = "2016 EPI Score",
# ylim = c(0,100)
# #xlim=c(-1.5,9)
# )
#abline(lm(comb$EPI.2016 ~ comb[,i]))
#text(6,10,labels=paste("r.squared = ",round(summary(lm(comb$EPI.2016 ~ comb[,i]))$r.squared,2)))
#text(comb$EPI.2016 ~ comb[,i], labels=comb$iso)
#}
par(mfrow=c(1,3),mar=c(4,4,3,1))
for (i in 10:12) {
plot(0,0, ylim = c(0,100),
xlim=c(0.5,5.5), type="n",
xlab = "Cluster",
ylab = "2016 Score",
main = colnames(comb)[i])
points(comb[,i] ~ comb$cuts2,
pch = 16,
col = comb$cuts
)
}
par(mfrow=c(1,3),mar=c(4,4,3,1))
for (i in 13:15) {
plot(0,0, ylim = c(0,100),
xlim=c(0.5,5.5), type="n",
xlab = "Cluster",
ylab = "2016 Score",
main = colnames(comb)[i])
points(comb[,i] ~ comb$cuts2,
pch = 16,
col = comb$cuts
)
}
par(mfrow=c(2,3),mar=c(4,4,3,1))
for (i in 16:20) {
plot(0,0, ylim = c(0,100),
xlim=c(0.5,5.5), type="n",
xlab = "Cluster",
ylab = "2016 Score",
main = colnames(comb)[i])
points(comb[,i] ~ comb$cuts2,
pch = 16,
col = comb$cuts
)
}