This work is based on Mr Quo calculations, we load the packages we are using:

pkgs=c('ggplot2','xtable')
lapply(pkgs, require, character.only = T)
## Loading required package: ggplot2
## Loading required package: xtable
## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
#

Let us load Mr Quo’s data stored as individual RData files

load(file="all caculate results/mydataEnergy.RData")
load(file="all caculate results/mydataGDP.RData")
load(file="all caculate results/mydataDiff_co2emission.RData")
#
DeltaGdP=t(apply(mydataGDP[,5:2],1,diff))
DeltaE=t(apply(mydataEnergy[,5:2],1,diff))
rownames(DeltaE)=rownames(DeltaGdP)=gsub(" ","",mydataGDP[,1],fixed=TRUE)
DeltaCO2=mydataDiff_co2emission
DCO2_DE  = DeltaCO2 / DeltaE
DE_DGdP  = DeltaE / DeltaGdP
print(xtable(DE_DGdP),type="html")
X2006y X2009y X2012y
Beijing 2.46 1.65 1.06
Tianjin 4.26 4.49 4.34
Hebei 13.22 6.28 5.17
Shanxi 14.07 5.96 7.91
InnerMongolia 14.08 8.60 7.23
Liaoning 5.79 6.98 4.58
Jilin 1.89 5.96 3.74
Heilongjiang 5.87 7.31 4.49
Shanghai 3.79 3.33 1.94
Jiangsu 5.80 3.67 2.62
Zhejiang 3.98 3.23 2.15
Anhui 4.81 4.62 3.44
Fujian 8.47 4.49 3.04
Jiangxi 4.20 4.07 2.68
Shandong 7.26 4.72 4.02
Henan 5.75 4.94 3.85
Hubei 6.74 4.98 4.27
Hunan 11.38 5.12 3.75
Guangdong 4.43 3.63 2.55
Guangxi 6.17 5.59 3.94
Hainan 5.07 5.30 3.79
Chongqing -12.47 6.33 4.61
Sichuan 6.81 6.11 4.38
Guizhou -1.20 8.86 7.87
Yunnan 9.85 6.47 5.80
Shaanxi 6.28 5.59 4.11
Gansu 9.52 6.65 6.74
Qinghai 20.88 10.28 14.48
Ningxia 18.10 8.89 11.89
Xinjiang 9.81 12.00 13.34
print(xtable(DCO2_DE),type="html")
co2emission2006_2003 co2emission2009_2006 co2emission2012_2009
Beijing 2.14 -0.13 -0.51
Tianjin 3.52 1.51 1.41
Hebei 2.68 1.76 2.74
Shanxi 1.87 3.54 2.58
Inner Mongolia 4.57 3.94 5.13
Liaoning 5.72 1.36 2.76
Jilin 30.19 1.08 3.03
Heilongjiang 4.76 1.92 2.71
Shanghai 2.29 -0.46 2.31
Jiangsu 3.40 1.23 3.03
Zhejiang 4.18 2.05 1.98
Anhui 1.84 3.17 2.81
Fujian 2.11 2.14 1.54
Jiangxi -1.66 4.05 2.94
Shandong 4.31 1.96 2.29
Henan 4.54 2.52 1.41
Hubei -2.62 5.64 1.80
Hunan 2.87 0.43 1.65
Guangdong 2.27 0.98 2.07
Guangxi 3.48 1.63 2.34
Hainan 0.49 2.72 1.93
Chongqing 1.43 2.35 1.16
Sichuan 1.52 2.59 0.51
Guizhou -49.52 1.42 1.42
Yunnan 4.04 1.93 1.04
Shaanxi 2.80 3.01 3.45
Gansu 2.17 1.53 2.74
Qinghai 0.87 1.34 0.75
Ningxia 1.55 4.01 4.25
Xinjiang 3.30 2.73 2.33

Now, it’s time to figure out the structure of Energy against GdP

#
ngrp = 5
pca_de_gdp = prcomp(dist(t(DE_DGdP)))
print(xtable(summary(pca_de_gdp)),type="html")
PC1 PC2 PC3
Standard deviation 26.3068 6.4883 0.0000
Proportion of Variance 0.9427 0.0573 0.0000
Cumulative Proportion 0.9427 1.0000 1.0000
print(xtable(pca_de_gdp$rotation),type="html")
PC1 PC2 PC3
X2006y 0.64 -0.06 -0.76
X2009y -0.57 0.63 -0.53
X2012y -0.51 -0.78 -0.37
Dp=as.data.frame(DE_DGdP%*%pca_de_gdp$rotation[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 1
Hebei 1
Shanxi 2
InnerMongolia 1
Liaoning 3
Jilin 3
Heilongjiang 3
Shanghai 1
Jiangsu 1
Zhejiang 1
Anhui 1
Fujian 1
Jiangxi 1
Shandong 1
Henan 1
Hubei 1
Hunan 1
Guangdong 1
Guangxi 1
Hainan 1
Chongqing 4
Sichuan 1
Guizhou 5
Yunnan 1
Shaanxi 1
Gansu 1
Qinghai 2
Ningxia 2
Xinjiang 5
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
  Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
  geom_point(size=3,aes(shape=Group)) +
  geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
  theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DE_DGdP))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 1
Hebei 2
Shanxi 2
InnerMongolia 2
Liaoning 1
Jilin 1
Heilongjiang 1
Shanghai 1
Jiangsu 1
Zhejiang 1
Anhui 1
Fujian 1
Jiangxi 1
Shandong 1
Henan 1
Hubei 1
Hunan 2
Guangdong 1
Guangxi 1
Hainan 1
Chongqing 3
Sichuan 1
Guizhou 4
Yunnan 2
Shaanxi 1
Gansu 2
Qinghai 5
Ningxia 5
Xinjiang 5

Now we can analyze the CO2 evolution against Energy

#
pca_dco2_de = prcomp(dist(t(as.matrix(DCO2_DE))))
print(xtable(summary(pca_dco2_de)),type="html")
PC1 PC2 PC3
Standard deviation 57.1590 5.0919 0.0000
Proportion of Variance 0.9921 0.0079 0.0000
Cumulative Proportion 0.9921 1.0000 1.0000
print(xtable(pca_dco2_de$rotation),type="html")
PC1 PC2 PC3
co2emission2006_2003 0.60 -0.10 0.79
co2emission2009_2006 -0.57 0.64 0.51
co2emission2012_2009 -0.56 -0.76 0.33
Dp=as.data.frame((as.matrix(DCO2_DE)%*%pca_dco2_de$rotation)[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 1
Hebei 1
Shanxi 2
Inner Mongolia 2
Liaoning 1
Jilin 3
Heilongjiang 1
Shanghai 1
Jiangsu 1
Zhejiang 1
Anhui 2
Fujian 1
Jiangxi 4
Shandong 1
Henan 1
Hubei 4
Hunan 1
Guangdong 1
Guangxi 1
Hainan 2
Chongqing 1
Sichuan 1
Guizhou 5
Yunnan 1
Shaanxi 2
Gansu 1
Qinghai 1
Ningxia 2
Xinjiang 1
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
  Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
  geom_point(size=3,aes(shape=Group)) +
  geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
  theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DCO2_DE))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 2
Hebei 1
Shanxi 2
Inner Mongolia 2
Liaoning 2
Jilin 3
Heilongjiang 2
Shanghai 1
Jiangsu 1
Zhejiang 2
Anhui 2
Fujian 1
Jiangxi 4
Shandong 2
Henan 2
Hubei 4
Hunan 1
Guangdong 1
Guangxi 1
Hainan 1
Chongqing 1
Sichuan 1
Guizhou 5
Yunnan 2
Shaanxi 2
Gansu 1
Qinghai 1
Ningxia 2
Xinjiang 2
#

Finally we will consider both factors at once:

DD=cbind(DCO2_DE,DE_DGdP)
pca_tot = prcomp(dist(t(as.matrix(DD))))
print(xtable(summary(pca_tot)),type="html")
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 49.3648 24.7280 13.2842 4.0845 3.2267 0.0000
Proportion of Variance 0.7494 0.1880 0.0543 0.0051 0.0032 0.0000
Cumulative Proportion 0.7494 0.9374 0.9917 0.9968 1.0000 1.0000
print(xtable(pca_tot$rotation),type="html")
PC1 PC2 PC3 PC4 PC5 PC6
co2emission2006_2003 0.53 0.23 -0.22 0.04 -0.13 0.78
co2emission2009_2006 -0.35 0.53 0.29 0.09 0.66 0.27
co2emission2012_2009 -0.34 0.52 0.29 -0.09 -0.72 0.03
X2006y -0.31 -0.63 0.49 -0.02 -0.09 0.52
X2009y -0.44 -0.04 -0.55 -0.68 0.05 0.20
X2012y -0.45 -0.07 -0.49 0.72 -0.12 0.13
Dp=as.data.frame((as.matrix(DD)%*%(pca_tot$rotation))[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 1
Hebei 2
Shanxi 2
Inner Mongolia 2
Liaoning 1
Jilin 3
Heilongjiang 1
Shanghai 1
Jiangsu 1
Zhejiang 1
Anhui 1
Fujian 2
Jiangxi 2
Shandong 1
Henan 1
Hubei 2
Hunan 2
Guangdong 1
Guangxi 1
Hainan 2
Chongqing 3
Sichuan 2
Guizhou 4
Yunnan 2
Shaanxi 2
Gansu 2
Qinghai 5
Ningxia 5
Xinjiang 2
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
  Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
  geom_point(size=3,aes(shape=Group)) +
  geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
  theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DCO2_DE))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
lg
Beijing 1
Tianjin 2
Hebei 1
Shanxi 2
Inner Mongolia 2
Liaoning 2
Jilin 3
Heilongjiang 2
Shanghai 1
Jiangsu 1
Zhejiang 2
Anhui 2
Fujian 1
Jiangxi 4
Shandong 2
Henan 2
Hubei 4
Hunan 1
Guangdong 1
Guangxi 1
Hainan 1
Chongqing 1
Sichuan 1
Guizhou 5
Yunnan 2
Shaanxi 2
Gansu 1
Qinghai 1
Ningxia 2
Xinjiang 2
#

Now it’s time to interpert the figures and to discuss them.