Detailed data attached at the end
Detailed code see github.com
## principal components analysis
student.pr=princomp(~X1+X2+X3+X4, data=student, cor=TRUE)
## summary
summary(student.pr, loadings=TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 1.8817805 0.55980636 0.28179594 0.25711844
## Proportion of Variance 0.8852745 0.07834579 0.01985224 0.01652747
## Cumulative Proportion 0.8852745 0.96362029 0.98347253 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4
## X1 -0.497 0.543 -0.450 0.506
## X2 -0.515 -0.210 -0.462 -0.691
## X3 -0.481 -0.725 0.175 0.461
## X4 -0.507 0.368 0.744 -0.232
## scores
predict(student.pr)
## Comp.1 Comp.2 Comp.3 Comp.4
## 1 0.06990950 -0.23813701 -0.35509248 -0.266120139
## 2 1.59526340 -0.71847399 0.32813232 -0.118056646
## 3 -2.84793151 0.38956679 -0.09731731 -0.279482487
## 4 0.75996988 0.80604335 -0.04945722 -0.162949298
## 5 -2.73966777 0.01718087 0.36012615 0.358653044
## 6 2.10583168 0.32284393 0.18600422 -0.036456084
## 7 -1.42105591 -0.06053165 0.21093321 -0.044223092
## 8 -0.82583977 -0.78102576 -0.27557798 0.057288572
## 9 -0.93464402 -0.58469242 -0.08814136 0.181037746
## 10 2.36463820 -0.36532199 0.08840476 0.045520127
## 11 2.83741916 0.34875841 0.03310423 -0.031146930
## 12 -2.60851224 0.21278728 -0.33398037 0.210157574
## 13 -2.44253342 -0.16769496 -0.46918095 -0.162987830
## 14 1.86630669 0.05021384 0.37720280 -0.358821916
## 15 2.81347421 -0.31790107 -0.03291329 -0.222035112
## 16 0.06392983 0.20718448 0.04334340 0.703533624
## 17 -1.55561022 -1.70439674 -0.33126406 0.007551879
## 18 1.07392251 -0.06763418 0.02283648 0.048606680
## 19 -2.52174212 0.97274301 0.12164633 -0.390667991
## 20 -2.14072377 0.02217881 0.37410972 0.129548960
## 21 -0.79624422 0.16307887 0.12781270 -0.294140762
## 22 0.28708321 -0.35744666 -0.03962116 0.080991989
## 23 -0.25151075 1.25555188 -0.55617325 0.109068939
## 24 2.05706032 0.78894494 -0.26552109 0.388088643
## 25 -3.08596855 -0.05775318 0.62110421 -0.218939612
## 26 -0.16367555 0.04317932 0.24481850 0.560248997
## 27 1.37265053 0.02220972 -0.23378320 -0.257399715
## 28 2.16097778 0.13733233 0.35589739 0.093123683
## 29 2.40434827 -0.48613137 -0.16154441 -0.007914021
## 30 0.50287468 0.14734317 -0.20590831 -0.122078819
## screepplot
screeplot(student.pr,type="lines")
biplot(student.pr)
For more details of Parallel Analysis, see Factor Retention Decisions in Exploratory Factor Analysis: A Tutorial on Parallel Analysis
lmo.sol=lm(y~., data=fertilization)
summary(lmo.sol)
##
## Call:
## lm(formula = y ~ ., data = fertilization)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8202 -0.6534 -0.0676 0.7305 2.7770
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.158668 1.699788 6.565 3.62e-06 ***
## x1 1.702875 0.361459 4.711 0.000174 ***
## x2 -2.188405 0.527028 -4.152 0.000598 ***
## x3 0.007649 0.001137 6.728 2.63e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.146 on 18 degrees of freedom
## Multiple R-squared: 0.9761, Adjusted R-squared: 0.9721
## F-statistic: 245.2 on 3 and 18 DF, p-value: 8.824e-15
library(car)
vif(lmo.sol)
## x1 x2 x3
## 196.842222 209.058254 9.841936
判断主成分个数的方法:
library(psych)
fa.parallel(fertilization1, fa = "pc", n.iter = 100,
show.legend = FALSE, main = "Scree plot with parallel analysis")
## Parallel analysis suggests that the number of factors = NA and the number of components = 1
按照parallel analysis的结果, 选取一个主成分, 并作出主成分与原变量的关系图
fertilization.pr=principal(fertilization1, nfactors=1, rotate="none")
fertilization.pr
## Principal Components Analysis
## Call: principal(r = fertilization1, nfactors = 1, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## x1 0.99 0.99 0.014 1
## x2 0.99 0.99 0.012 1
## x3 0.98 0.95 0.048 1
##
## PC1
## SS loadings 2.93
## Proportion Var 0.98
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.02
## with the empirical chi square 0.05 with prob < NA
##
## Fit based upon off diagonal values = 1
fa.diagram(fertilization.pr)
建立主成分回归模型并计算原方程系数
lm.sol=lm(y~F1, data=fertilization)
summary(lm.sol)
##
## Call:
## lm(formula = y ~ F1, data = fertilization)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7389 -0.7798 -0.0406 0.8496 4.6182
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.7273 0.3819 62.13 < 2e-16 ***
## F1 -3.7928 0.2232 -16.99 2.37e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.791 on 20 degrees of freedom
## Multiple R-squared: 0.9352, Adjusted R-squared: 0.932
## F-statistic: 288.7 on 1 and 20 DF, p-value: 2.369e-13
## (Intercept) x1 x2 x3
## 5.412508163 0.232064543 0.328696909 0.003207758
X1 | X2 | X3 | X4 |
---|---|---|---|
148 | 41 | 72 | 78 |
139 | 34 | 71 | 76 |
160 | 49 | 77 | 86 |
149 | 36 | 67 | 79 |
159 | 45 | 80 | 86 |
142 | 31 | 66 | 76 |
153 | 43 | 76 | 83 |
150 | 43 | 77 | 79 |
151 | 42 | 77 | 80 |
139 | 31 | 68 | 74 |
140 | 29 | 64 | 74 |
161 | 47 | 78 | 84 |
158 | 49 | 78 | 83 |
140 | 33 | 67 | 77 |
137 | 31 | 66 | 73 |
152 | 35 | 73 | 79 |
149 | 47 | 82 | 79 |
145 | 35 | 70 | 77 |
160 | 47 | 74 | 87 |
156 | 44 | 78 | 85 |
151 | 42 | 73 | 82 |
147 | 38 | 73 | 78 |
157 | 39 | 68 | 80 |
147 | 30 | 65 | 75 |
157 | 48 | 80 | 88 |
151 | 36 | 74 | 80 |
144 | 36 | 68 | 76 |
141 | 30 | 67 | 76 |
139 | 32 | 68 | 73 |
148 | 38 | 70 | 78 |
x1 | x2 | x3 | y |
---|---|---|---|
13.0 | 9.2 | 50 | 13 |
18.7 | 13.2 | 102 | 14 |
21.0 | 14.8 | 150 | 15 |
19.0 | 13.3 | 110 | 16 |
22.8 | 16.0 | 200 | 17 |
26.0 | 18.2 | 330 | 18 |
28.0 | 19.7 | 450 | 19 |
31.4 | 22.5 | 450 | 20 |
30.3 | 21.0 | 550 | 21 |
29.2 | 20.5 | 640 | 22 |
36.2 | 25.2 | 800 | 23 |
37.0 | 26.1 | 1090 | 24 |
37.9 | 27.2 | 1140 | 25 |
41.6 | 30.0 | 1500 | 26 |
38.2 | 27.1 | 1180 | 27 |
39.4 | 27.4 | 1320 | 28 |
39.2 | 27.6 | 1400 | 29 |
42.0 | 29.4 | 1600 | 30 |
43.0 | 30.0 | 1600 | 31 |
41.1 | 27.2 | 1400 | 33 |
43.0 | 31.0 | 2050 | 35 |
49.0 | 34.8 | 2500 | 36 |