PCA and Factor analysis
#########################################
######################################### Principal Component Analysis
######################################### by Violent Crime Rates by US State
apply(USArrests,2,mean)
## Murder Assault UrbanPop Rape
## 7.788 170.760 65.540 21.232
apply(USArrests,2, var)
## Murder Assault UrbanPop Rape
## 18.97047 6945.16571 209.51878 87.72916
pca.out=prcomp(USArrests, cor=TRUE)
summary(pca.out)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 83.7324 14.21240 6.4894 2.48279
## Proportion of Variance 0.9655 0.02782 0.0058 0.00085
## Cumulative Proportion 0.9655 0.99335 0.9991 1.00000
attributes(pca.out)
## $names
## [1] "sdev" "rotation" "center" "scale" "x"
##
## $class
## [1] "prcomp"
pca.out$sdev
## [1] 83.732400 14.212402 6.489426 2.482790
# scree plot
screeplot(pca.out,npcs=5,type="lines",main="scree plot-corrlation")
biplot(pca.out)
#########################################
######################################### Factor Analysis
######################################### by US State Facts and Figures
# no rotation
fact1 <- factanal(state.x77,factors=2,rotation="none")
# varimax is the default
fact2 <- factanal(state.x77,factors=2,scores="regression")
# promax rotation
fact3 <- factanal(state.x77,factors=2,rotation="promax")
fact1;
##
## Call:
## factanal(x = state.x77, factors = 2, rotation = "none")
##
## Uniquenesses:
## Population Income Illiteracy Life Exp Murder HS Grad
## 0.859 0.498 0.353 0.337 0.005 0.146
## Frost Area
## 0.681 0.651
##
## Loadings:
## Factor1 Factor2
## Population 0.339 0.162
## Income -0.244 0.665
## Illiteracy 0.713 -0.372
## Life Exp -0.786 0.213
## Murder 0.997
## HS Grad -0.506 0.773
## Frost -0.543 0.155
## Area 0.218 0.549
##
## Factor1 Factor2
## SS loadings 2.894 1.576
## Proportion Var 0.362 0.197
## Cumulative Var 0.362 0.559
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
fact2;
##
## Call:
## factanal(x = state.x77, factors = 2, scores = "regression")
##
## Uniquenesses:
## Population Income Illiteracy Life Exp Murder HS Grad
## 0.859 0.498 0.353 0.337 0.005 0.146
## Frost Area
## 0.681 0.651
##
## Loadings:
## Factor1 Factor2
## Population 0.341 0.158
## Income -0.236 0.668
## Illiteracy 0.709 -0.380
## Life Exp -0.784 0.221
## Murder 0.997
## HS Grad -0.497 0.779
## Frost -0.541 0.161
## Area 0.225 0.546
##
## Factor1 Factor2
## SS loadings 2.875 1.595
## Proportion Var 0.359 0.199
## Cumulative Var 0.359 0.559
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
fact3;
##
## Call:
## factanal(x = state.x77, factors = 2, rotation = "promax")
##
## Uniquenesses:
## Population Income Illiteracy Life Exp Murder HS Grad
## 0.859 0.498 0.353 0.337 0.005 0.146
## Frost Area
## 0.681 0.651
##
## Loadings:
## Factor1 Factor2
## Population 0.398 0.188
## Income 0.699
## Illiteracy 0.600 -0.365
## Life Exp -0.727 0.192
## Murder 1.019
## HS Grad -0.256 0.803
## Frost -0.500 0.141
## Area 0.404 0.595
##
## Factor1 Factor2
## SS loadings 2.565 1.716
## Proportion Var 0.321 0.214
## Cumulative Var 0.321 0.535
##
## Factor Correlations:
## Factor1 Factor2
## Factor1 1.000 0.351
## Factor2 0.351 1.000
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
# factor2 plot
plot(fact2$loadings[,1],fact2$loadings[,2],pch=19,xlab="factor1",ylab="factor2", main="factor pattern")
#plot of factor2 scores
plot(fact2$scores[,1],fact2$scores[,2],pch=19,xlab="factor1",ylab="factor2",main="factor scores")