数据存在共线性,将原始变量的线性组合表示为主成分(不相关的综合指标)
目的:
表现:
x1=c(171,175,159,155,152,158,154,164,168,166,159,164)
x2=c(57,64,41,38,35,44,41,51,57,49,47,46)
plot(x1,x2,xlim=c(145,180),ylim=c(25,75))#xlim ylim是作图的横纵坐标范围
lines(c(150,178),c(33,66));text(180,68,"y1")#c()是画线的横纵坐标范围,text()是加文本的坐标点
lines(c(161,168),c(60,38));text(161,63,"y2")
setwd("C:/Users/lenovo/Desktop")
d7.2=read.table("d7.2.txt",header=T)
cor(d7.2)#变量之间相关性较强,适合做主成分分析。变量两两之间相关性都相同,意味着很难在这组变量中提取比较好的主成分,主成分载荷特征不明显,平均分布在各变量上,只是简单的变量线性组合
## X1 X2 X3 X4 X5 X6 X7
## X1 1.0000000 0.2569697 0.7252526 0.3853672 0.8990457 0.8284572 0.7145260
## X2 0.2569697 1.0000000 0.4537807 0.5765121 0.3575064 0.5420120 0.4045314
## X3 0.7252526 0.4537807 1.0000000 0.5831419 0.7823418 0.8924742 0.7744004
## X4 0.3853672 0.5765121 0.5831419 1.0000000 0.4665789 0.6291140 0.6911234
## X5 0.8990457 0.3575064 0.7823418 0.4665789 1.0000000 0.8795439 0.7853531
## X6 0.8284572 0.5420120 0.8924742 0.6291140 0.8795439 1.0000000 0.8133081
## X7 0.7145260 0.4045314 0.7744004 0.6911234 0.7853531 0.8133081 1.0000000
## X8 0.7218909 0.6277509 0.7220538 0.6254195 0.7517683 0.8435436 0.7183218
## X8
## X1 0.7218909
## X2 0.6277509
## X3 0.7220538
## X4 0.6254195
## X5 0.7517683
## X6 0.8435436
## X7 0.7183218
## X8 1.0000000
PCA=princomp(d7.2,cor=T)#主成分分析,cor=T表示用相关系数阵,标准化数据,默认为协差阵
PCA#标准差为特征值开根号后的结果
## Call:
## princomp(x = d7.2, cor = T)
##
## Standard deviations:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## 2.3877119 1.0142326 0.7101294 0.5222697 0.4314432 0.4015967 0.2955459
## Comp.8
## 0.2415456
##
## 8 variables and 31 observations.
summary(PCA)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.387712 1.0142326 0.71012939 0.52226971 0.43144321
## Proportion of Variance 0.712646 0.1285835 0.06303547 0.03409571 0.02326791
## Cumulative Proportion 0.712646 0.8412295 0.90426494 0.93836065 0.96162855
## Comp.6 Comp.7 Comp.8
## Standard deviation 0.40159675 0.29554587 0.241545582
## Proportion of Variance 0.02015999 0.01091842 0.007293034
## Cumulative Proportion 0.98178855 0.99270697 1.000000000
PCA$loadings#主成分载荷为特征向量,根据主成分在变量上的载荷值大小给出各个主成分的解释
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## X1 -0.353 -0.429 0.175 0.299 -0.377 0.651
## X2 -0.249 0.677 0.521 -0.399 -0.129 0.134
## X3 -0.374 -0.789 0.261 0.116 0.372
## X4 -0.302 0.472 -0.628 0.225 0.249 -0.416
## X5 -0.376 -0.324 0.123 0.127 -0.281 -0.267 -0.695 0.298
## X6 -0.404 -0.200 0.132 -0.156 -0.857
## X7 -0.371 -0.442 -0.584 0.535 0.166
## X8 -0.374 0.118 0.282 0.409 0.522 0.546 0.141
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.125 0.125 0.125 0.125 0.125 0.125 0.125 0.125
## Cumulative Var 0.125 0.250 0.375 0.500 0.625 0.750 0.875 1.000
screeplot(PCA,type="lines") #碎石图,确定选取主成分个数,type=line/barplot
PCA$scores#主成分得分
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## 北京 -6.12230233 1.5225207 -0.000698533 -0.16662054 0.80816173
## 天津 -3.01010636 0.5367841 -1.983585145 0.24018727 0.22576954
## 河北 0.88750148 0.6923451 -1.007880240 -0.18330396 -0.07151821
## 山西 1.10374781 0.6013737 -0.372722824 -0.28297507 -0.83304562
## 内蒙古 -0.53334901 1.8477269 0.905239595 0.02154009 -0.11597902
## 辽宁 -0.09437659 0.6551539 -0.732793213 1.08401835 0.02237409
## 吉林 0.32707448 1.4246843 -0.611769172 0.87573634 -0.27557587
## 黑龙江 1.68861172 0.9958838 -0.208529623 0.58625087 0.08138796
## 上海 -7.08467075 -1.0693203 1.298471009 0.47816826 0.37117073
## 江苏 -1.14131407 -0.4536945 -0.091151023 -0.56517173 0.29633699
## 浙江 -3.82110679 0.1721339 0.891744431 0.38212613 -0.74259655
## 安徽 1.12338014 -0.3518062 0.090622754 0.14032668 0.08704853
## 福建 -1.17171809 -1.3776085 -0.003970811 -0.07106973 -0.77253155
## 江西 1.66938246 -0.5484835 0.664091644 -0.74109841 0.32709137
## 山东 -0.48111168 0.8084609 0.101467703 -0.66568938 -0.62659039
## 河南 1.27722788 0.6479033 0.090139577 -0.45362641 0.07947087
## 湖北 1.00945390 -0.1165043 0.326878041 -0.53998626 -0.40855492
## 湖南 0.36506902 0.2007324 -0.052268018 -0.49878819 0.19905353
## 广东 -4.03195586 -2.4804874 -0.803478909 -0.09763795 -0.48858766
## 广西 1.62739176 -1.2305704 -0.497569883 0.05814843 0.51713554
## 海南 1.87311537 -2.3528197 -1.004127081 -0.01636417 0.30061623
## 重庆 -0.39403198 0.4623324 -0.170144725 -0.92478643 -0.38485441
## 四川 1.15376310 -0.5180723 0.606681628 -0.33458975 0.31953171
## 贵州 2.01403098 -0.6594701 0.763877040 -0.32367349 -0.08918268
## 云南 2.42950002 -0.4177647 -0.175906875 0.90340516 -0.35390619
## 西藏 2.72036533 -1.0105476 1.416453156 1.26480009 -0.01040599
## 陕西 0.88797732 0.1169535 -0.266060720 -0.02579594 0.48428371
## 甘肃 1.32452136 0.1444992 0.246080766 -0.08967602 0.48669211
## 青海 1.76845611 0.2089337 0.157699554 -0.01405723 0.82536422
## 宁夏 1.31733038 0.4960324 -0.306788610 0.01482892 -0.20283469
## 新疆 1.31814288 1.0526951 0.729998508 -0.05462593 -0.05532510
## Comp.6 Comp.7 Comp.8
## 北京 -0.617050813 0.031038029 0.197697415
## 天津 0.295483841 0.690316105 0.001207657
## 河北 -0.250727157 -0.230407287 0.330645235
## 山西 0.185798481 -0.339093966 -0.200169538
## 内蒙古 0.585412862 -0.276801720 0.129868512
## 辽宁 0.122807969 0.238720270 0.038824403
## 吉林 0.507907099 -0.064620245 -0.057806192
## 黑龙江 -0.074525640 -0.291185007 -0.254935028
## 上海 0.334511659 -0.057588949 0.046922699
## 江苏 0.054377471 0.222355009 -0.435226138
## 浙江 -0.823356823 -0.227434176 -0.436176003
## 安徽 0.092030244 0.149364621 -0.400390171
## 福建 0.750935068 0.396097979 -0.060199171
## 江西 0.310648480 0.282145710 0.089198015
## 山东 0.046175057 -0.072298312 0.335522483
## 河南 0.083959244 -0.166185432 0.205745446
## 湖北 -0.221515183 0.418047386 -0.154758712
## 湖南 -0.074875744 0.052006779 -0.179976795
## 广东 0.162339932 -0.608652475 0.213374764
## 广西 0.020169324 0.002729061 -0.219526115
## 海南 -0.265839242 -0.377353784 0.297995100
## 重庆 -0.541852454 0.528307436 0.231214825
## 四川 -0.414837010 0.142792274 0.186931825
## 贵州 0.088030805 -0.040579891 -0.284458277
## 云南 -1.204551986 0.075837831 -0.014855661
## 西藏 0.134860660 0.482195042 0.458802220
## 陕西 0.068217592 -0.113188407 -0.361739542
## 甘肃 0.366856033 -0.269127919 -0.087249967
## 青海 -0.053452873 -0.292058241 0.024060914
## 宁夏 0.324427636 -0.107413069 0.146122725
## 新疆 0.007635468 -0.177964650 0.213333070
library(mvstats)
princomp.rank(PCA,m=2,plot=T)#主成分排名与作图,m为选取主成分个数,以各主成分的方差贡献率占两个主成分总方差贡献率的比重作为权重加权汇总
## Comp.1 Comp.2 PC rank
## 北京 -6.12230233 1.5225207 -4.95377713 2
## 天津 -3.01010636 0.5367841 -2.46795766 5
## 河北 0.88750148 0.6923451 0.85767147 16
## 山西 1.10374781 0.6013737 1.02695901 19
## 内蒙古 -0.53334901 1.8477269 -0.16939720 10
## 辽宁 -0.09437659 0.6551539 0.02019052 11
## 吉林 0.32707448 1.4246843 0.49484615 13
## 黑龙江 1.68861172 0.9958838 1.58272699 28
## 上海 -7.08467075 -1.0693203 -6.16521342 1
## 江苏 -1.14131407 -0.4536945 -1.03621016 7
## 浙江 -3.82110679 0.1721339 -3.21073265 4
## 安徽 1.12338014 -0.3518062 0.89789520 17
## 福建 -1.17171809 -1.3776085 -1.20318881 6
## 江西 1.66938246 -0.5484835 1.33037759 26
## 山东 -0.48111168 0.8084609 -0.28399815 8
## 河南 1.27722788 0.6479033 1.18103447 21
## 湖北 1.00945390 -0.1165043 0.83734914 15
## 湖南 0.36506902 0.2007324 0.33994986 12
## 广东 -4.03195586 -2.4804874 -3.79481107 3
## 广西 1.62739176 -1.2305704 1.19054702 22
## 海南 1.87311537 -2.3528197 1.22717346 24
## 重庆 -0.39403198 0.4623324 -0.26313511 9
## 四川 1.15376310 -0.5180723 0.89822000 18
## 贵州 2.01403098 -0.6594701 1.60538145 29
## 云南 2.42950002 -0.4177647 1.99429039 30
## 西藏 2.72036533 -1.0105476 2.15008847 31
## 陕西 0.88797732 0.1169535 0.77012492 14
## 甘肃 1.32452136 0.1444992 1.14415281 20
## 青海 1.76845611 0.2089337 1.53008025 27
## 宁夏 1.31733038 0.4960324 1.19179348 23
## 新疆 1.31814288 1.0526951 1.27756871 25