This work is based on Mr Quo calculations, we load the packages we are using:
pkgs=c('ggplot2','xtable')
lapply(pkgs, require, character.only = T)
## Loading required package: ggplot2
## Loading required package: xtable
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
#
Let us load Mr Quo’s data stored as individual RData files
load(file="all caculate results/mydataEnergy.RData")
load(file="all caculate results/mydataGDP.RData")
load(file="all caculate results/mydataDiff_co2emission.RData")
#
DeltaGdP=t(apply(mydataGDP[,5:2],1,diff))
DeltaE=t(apply(mydataEnergy[,5:2],1,diff))
rownames(DeltaE)=rownames(DeltaGdP)=gsub(" ","",mydataGDP[,1],fixed=TRUE)
DeltaCO2=mydataDiff_co2emission
DCO2_DE = DeltaCO2 / DeltaE
DE_DGdP = DeltaE / DeltaGdP
print(xtable(DE_DGdP),type="html")
|
|
X2006y
|
X2009y
|
X2012y
|
|
Beijing
|
2.46
|
1.65
|
1.06
|
|
Tianjin
|
4.26
|
4.49
|
4.34
|
|
Hebei
|
13.22
|
6.28
|
5.17
|
|
Shanxi
|
14.07
|
5.96
|
7.91
|
|
InnerMongolia
|
14.08
|
8.60
|
7.23
|
|
Liaoning
|
5.79
|
6.98
|
4.58
|
|
Jilin
|
1.89
|
5.96
|
3.74
|
|
Heilongjiang
|
5.87
|
7.31
|
4.49
|
|
Shanghai
|
3.79
|
3.33
|
1.94
|
|
Jiangsu
|
5.80
|
3.67
|
2.62
|
|
Zhejiang
|
3.98
|
3.23
|
2.15
|
|
Anhui
|
4.81
|
4.62
|
3.44
|
|
Fujian
|
8.47
|
4.49
|
3.04
|
|
Jiangxi
|
4.20
|
4.07
|
2.68
|
|
Shandong
|
7.26
|
4.72
|
4.02
|
|
Henan
|
5.75
|
4.94
|
3.85
|
|
Hubei
|
6.74
|
4.98
|
4.27
|
|
Hunan
|
11.38
|
5.12
|
3.75
|
|
Guangdong
|
4.43
|
3.63
|
2.55
|
|
Guangxi
|
6.17
|
5.59
|
3.94
|
|
Hainan
|
5.07
|
5.30
|
3.79
|
|
Chongqing
|
-12.47
|
6.33
|
4.61
|
|
Sichuan
|
6.81
|
6.11
|
4.38
|
|
Guizhou
|
-1.20
|
8.86
|
7.87
|
|
Yunnan
|
9.85
|
6.47
|
5.80
|
|
Shaanxi
|
6.28
|
5.59
|
4.11
|
|
Gansu
|
9.52
|
6.65
|
6.74
|
|
Qinghai
|
20.88
|
10.28
|
14.48
|
|
Ningxia
|
18.10
|
8.89
|
11.89
|
|
Xinjiang
|
9.81
|
12.00
|
13.34
|
print(xtable(DCO2_DE),type="html")
|
|
co2emission2006_2003
|
co2emission2009_2006
|
co2emission2012_2009
|
|
Beijing
|
2.14
|
-0.13
|
-0.51
|
|
Tianjin
|
3.52
|
1.51
|
1.41
|
|
Hebei
|
2.68
|
1.76
|
2.74
|
|
Shanxi
|
1.87
|
3.54
|
2.58
|
|
Inner Mongolia
|
4.57
|
3.94
|
5.13
|
|
Liaoning
|
5.72
|
1.36
|
2.76
|
|
Jilin
|
30.19
|
1.08
|
3.03
|
|
Heilongjiang
|
4.76
|
1.92
|
2.71
|
|
Shanghai
|
2.29
|
-0.46
|
2.31
|
|
Jiangsu
|
3.40
|
1.23
|
3.03
|
|
Zhejiang
|
4.18
|
2.05
|
1.98
|
|
Anhui
|
1.84
|
3.17
|
2.81
|
|
Fujian
|
2.11
|
2.14
|
1.54
|
|
Jiangxi
|
-1.66
|
4.05
|
2.94
|
|
Shandong
|
4.31
|
1.96
|
2.29
|
|
Henan
|
4.54
|
2.52
|
1.41
|
|
Hubei
|
-2.62
|
5.64
|
1.80
|
|
Hunan
|
2.87
|
0.43
|
1.65
|
|
Guangdong
|
2.27
|
0.98
|
2.07
|
|
Guangxi
|
3.48
|
1.63
|
2.34
|
|
Hainan
|
0.49
|
2.72
|
1.93
|
|
Chongqing
|
1.43
|
2.35
|
1.16
|
|
Sichuan
|
1.52
|
2.59
|
0.51
|
|
Guizhou
|
-49.52
|
1.42
|
1.42
|
|
Yunnan
|
4.04
|
1.93
|
1.04
|
|
Shaanxi
|
2.80
|
3.01
|
3.45
|
|
Gansu
|
2.17
|
1.53
|
2.74
|
|
Qinghai
|
0.87
|
1.34
|
0.75
|
|
Ningxia
|
1.55
|
4.01
|
4.25
|
|
Xinjiang
|
3.30
|
2.73
|
2.33
|
Now, it’s time to figure out the structure of Energy against GdP
#
ngrp = 5
pca_de_gdp = prcomp(dist(t(DE_DGdP)))
print(xtable(summary(pca_de_gdp)),type="html")
|
|
PC1
|
PC2
|
PC3
|
|
Standard deviation
|
26.3068
|
6.4883
|
0.0000
|
|
Proportion of Variance
|
0.9427
|
0.0573
|
0.0000
|
|
Cumulative Proportion
|
0.9427
|
1.0000
|
1.0000
|
print(xtable(pca_de_gdp$rotation),type="html")
|
|
PC1
|
PC2
|
PC3
|
|
X2006y
|
0.64
|
-0.06
|
-0.76
|
|
X2009y
|
-0.57
|
0.63
|
-0.53
|
|
X2012y
|
-0.51
|
-0.78
|
-0.37
|
Dp=as.data.frame(DE_DGdP%*%pca_de_gdp$rotation[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
1
|
|
Hebei
|
1
|
|
Shanxi
|
2
|
|
InnerMongolia
|
1
|
|
Liaoning
|
3
|
|
Jilin
|
3
|
|
Heilongjiang
|
3
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
1
|
|
Anhui
|
1
|
|
Fujian
|
1
|
|
Jiangxi
|
1
|
|
Shandong
|
1
|
|
Henan
|
1
|
|
Hubei
|
1
|
|
Hunan
|
1
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
1
|
|
Chongqing
|
4
|
|
Sichuan
|
1
|
|
Guizhou
|
5
|
|
Yunnan
|
1
|
|
Shaanxi
|
1
|
|
Gansu
|
1
|
|
Qinghai
|
2
|
|
Ningxia
|
2
|
|
Xinjiang
|
5
|
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
geom_point(size=3,aes(shape=Group)) +
geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DE_DGdP))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
1
|
|
Hebei
|
2
|
|
Shanxi
|
2
|
|
InnerMongolia
|
2
|
|
Liaoning
|
1
|
|
Jilin
|
1
|
|
Heilongjiang
|
1
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
1
|
|
Anhui
|
1
|
|
Fujian
|
1
|
|
Jiangxi
|
1
|
|
Shandong
|
1
|
|
Henan
|
1
|
|
Hubei
|
1
|
|
Hunan
|
2
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
1
|
|
Chongqing
|
3
|
|
Sichuan
|
1
|
|
Guizhou
|
4
|
|
Yunnan
|
2
|
|
Shaanxi
|
1
|
|
Gansu
|
2
|
|
Qinghai
|
5
|
|
Ningxia
|
5
|
|
Xinjiang
|
5
|
Now we can analyze the CO2 evolution against Energy
#
pca_dco2_de = prcomp(dist(t(as.matrix(DCO2_DE))))
print(xtable(summary(pca_dco2_de)),type="html")
|
|
PC1
|
PC2
|
PC3
|
|
Standard deviation
|
57.1590
|
5.0919
|
0.0000
|
|
Proportion of Variance
|
0.9921
|
0.0079
|
0.0000
|
|
Cumulative Proportion
|
0.9921
|
1.0000
|
1.0000
|
print(xtable(pca_dco2_de$rotation),type="html")
|
|
PC1
|
PC2
|
PC3
|
|
co2emission2006_2003
|
0.60
|
-0.10
|
0.79
|
|
co2emission2009_2006
|
-0.57
|
0.64
|
0.51
|
|
co2emission2012_2009
|
-0.56
|
-0.76
|
0.33
|
Dp=as.data.frame((as.matrix(DCO2_DE)%*%pca_dco2_de$rotation)[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
1
|
|
Hebei
|
1
|
|
Shanxi
|
2
|
|
Inner Mongolia
|
2
|
|
Liaoning
|
1
|
|
Jilin
|
3
|
|
Heilongjiang
|
1
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
1
|
|
Anhui
|
2
|
|
Fujian
|
1
|
|
Jiangxi
|
4
|
|
Shandong
|
1
|
|
Henan
|
1
|
|
Hubei
|
4
|
|
Hunan
|
1
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
2
|
|
Chongqing
|
1
|
|
Sichuan
|
1
|
|
Guizhou
|
5
|
|
Yunnan
|
1
|
|
Shaanxi
|
2
|
|
Gansu
|
1
|
|
Qinghai
|
1
|
|
Ningxia
|
2
|
|
Xinjiang
|
1
|
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
geom_point(size=3,aes(shape=Group)) +
geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DCO2_DE))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
2
|
|
Hebei
|
1
|
|
Shanxi
|
2
|
|
Inner Mongolia
|
2
|
|
Liaoning
|
2
|
|
Jilin
|
3
|
|
Heilongjiang
|
2
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
2
|
|
Anhui
|
2
|
|
Fujian
|
1
|
|
Jiangxi
|
4
|
|
Shandong
|
2
|
|
Henan
|
2
|
|
Hubei
|
4
|
|
Hunan
|
1
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
1
|
|
Chongqing
|
1
|
|
Sichuan
|
1
|
|
Guizhou
|
5
|
|
Yunnan
|
2
|
|
Shaanxi
|
2
|
|
Gansu
|
1
|
|
Qinghai
|
1
|
|
Ningxia
|
2
|
|
Xinjiang
|
2
|
#
Finally we will consider both factors at once:
DD=cbind(DCO2_DE,DE_DGdP)
pca_tot = prcomp(dist(t(as.matrix(DD))))
print(xtable(summary(pca_tot)),type="html")
|
|
PC1
|
PC2
|
PC3
|
PC4
|
PC5
|
PC6
|
|
Standard deviation
|
49.3648
|
24.7280
|
13.2842
|
4.0845
|
3.2267
|
0.0000
|
|
Proportion of Variance
|
0.7494
|
0.1880
|
0.0543
|
0.0051
|
0.0032
|
0.0000
|
|
Cumulative Proportion
|
0.7494
|
0.9374
|
0.9917
|
0.9968
|
1.0000
|
1.0000
|
print(xtable(pca_tot$rotation),type="html")
|
|
PC1
|
PC2
|
PC3
|
PC4
|
PC5
|
PC6
|
|
co2emission2006_2003
|
0.53
|
0.23
|
-0.22
|
0.04
|
-0.13
|
0.78
|
|
co2emission2009_2006
|
-0.35
|
0.53
|
0.29
|
0.09
|
0.66
|
0.27
|
|
co2emission2012_2009
|
-0.34
|
0.52
|
0.29
|
-0.09
|
-0.72
|
0.03
|
|
X2006y
|
-0.31
|
-0.63
|
0.49
|
-0.02
|
-0.09
|
0.52
|
|
X2009y
|
-0.44
|
-0.04
|
-0.55
|
-0.68
|
0.05
|
0.20
|
|
X2012y
|
-0.45
|
-0.07
|
-0.49
|
0.72
|
-0.12
|
0.13
|
Dp=as.data.frame((as.matrix(DD)%*%(pca_tot$rotation))[,1:2])
Dp[,"Provinces"]=gsub(" ","",mydataGDP[,1],fixed=TRUE)
h1=hclust(dist(as.matrix(Dp[,1:2])))
lg=cutree(h1,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
1
|
|
Hebei
|
2
|
|
Shanxi
|
2
|
|
Inner Mongolia
|
2
|
|
Liaoning
|
1
|
|
Jilin
|
3
|
|
Heilongjiang
|
1
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
1
|
|
Anhui
|
1
|
|
Fujian
|
2
|
|
Jiangxi
|
2
|
|
Shandong
|
1
|
|
Henan
|
1
|
|
Hubei
|
2
|
|
Hunan
|
2
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
2
|
|
Chongqing
|
3
|
|
Sichuan
|
2
|
|
Guizhou
|
4
|
|
Yunnan
|
2
|
|
Shaanxi
|
2
|
|
Gansu
|
2
|
|
Qinghai
|
5
|
|
Ningxia
|
5
|
|
Xinjiang
|
2
|
Dp[,"GRP"]=0
Dp[,"Group"]=as.factor(as.numeric(lg))
for (i in 1:ngrp) {
Dp[Dp$Provinces==gsub(" ","",(names(lg)[lg==i])[1],fixed=TRUE),"GRP"]=i
}
ggplot(data=Dp,aes(x=PC1, y=PC2)) +
geom_point(size=3,aes(shape=Group)) +
geom_text(aes(label=ifelse(GRP>0,as.character(Provinces),''),hjust=0,hust=0)) +
theme_bw()

plot(h1,main="",xlab="PCA view of Provinces")

h2=hclust(dist(DCO2_DE))
plot(h2,main="",xlab="Provinces")

lg=cutree(h2,ngrp)
print(xtable(as.data.frame(lg)),type="html")
|
|
lg
|
|
Beijing
|
1
|
|
Tianjin
|
2
|
|
Hebei
|
1
|
|
Shanxi
|
2
|
|
Inner Mongolia
|
2
|
|
Liaoning
|
2
|
|
Jilin
|
3
|
|
Heilongjiang
|
2
|
|
Shanghai
|
1
|
|
Jiangsu
|
1
|
|
Zhejiang
|
2
|
|
Anhui
|
2
|
|
Fujian
|
1
|
|
Jiangxi
|
4
|
|
Shandong
|
2
|
|
Henan
|
2
|
|
Hubei
|
4
|
|
Hunan
|
1
|
|
Guangdong
|
1
|
|
Guangxi
|
1
|
|
Hainan
|
1
|
|
Chongqing
|
1
|
|
Sichuan
|
1
|
|
Guizhou
|
5
|
|
Yunnan
|
2
|
|
Shaanxi
|
2
|
|
Gansu
|
1
|
|
Qinghai
|
1
|
|
Ningxia
|
2
|
|
Xinjiang
|
2
|
#
Now it’s time to interpert the figures and to discuss them.