PCA and Factor analysis

#########################################
######################################### Principal Component Analysis
#########################################   by Violent Crime Rates by US State
apply(USArrests,2,mean)
##   Murder  Assault UrbanPop     Rape 
##    7.788  170.760   65.540   21.232
apply(USArrests,2, var)
##     Murder    Assault   UrbanPop       Rape 
##   18.97047 6945.16571  209.51878   87.72916
pca.out=prcomp(USArrests, cor=TRUE)
summary(pca.out)
## Importance of components:
##                            PC1      PC2    PC3     PC4
## Standard deviation     83.7324 14.21240 6.4894 2.48279
## Proportion of Variance  0.9655  0.02782 0.0058 0.00085
## Cumulative Proportion   0.9655  0.99335 0.9991 1.00000
attributes(pca.out)
## $names
## [1] "sdev"     "rotation" "center"   "scale"    "x"       
## 
## $class
## [1] "prcomp"
pca.out$sdev
## [1] 83.732400 14.212402  6.489426  2.482790
# scree plot
screeplot(pca.out,npcs=5,type="lines",main="scree plot-corrlation")

biplot(pca.out)

#########################################
######################################### Factor Analysis
#########################################   by US State Facts and Figures
# no rotation
fact1 <- factanal(state.x77,factors=2,rotation="none")
# varimax is the default
fact2 <- factanal(state.x77,factors=2,scores="regression") 
# promax rotation
fact3 <- factanal(state.x77,factors=2,rotation="promax") 
fact1;
## 
## Call:
## factanal(x = state.x77, factors = 2, rotation = "none")
## 
## Uniquenesses:
## Population     Income Illiteracy   Life Exp     Murder    HS Grad 
##      0.859      0.498      0.353      0.337      0.005      0.146 
##      Frost       Area 
##      0.681      0.651 
## 
## Loadings:
##            Factor1 Factor2
## Population  0.339   0.162 
## Income     -0.244   0.665 
## Illiteracy  0.713  -0.372 
## Life Exp   -0.786   0.213 
## Murder      0.997         
## HS Grad    -0.506   0.773 
## Frost      -0.543   0.155 
## Area        0.218   0.549 
## 
##                Factor1 Factor2
## SS loadings      2.894   1.576
## Proportion Var   0.362   0.197
## Cumulative Var   0.362   0.559
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
fact2;
## 
## Call:
## factanal(x = state.x77, factors = 2, scores = "regression")
## 
## Uniquenesses:
## Population     Income Illiteracy   Life Exp     Murder    HS Grad 
##      0.859      0.498      0.353      0.337      0.005      0.146 
##      Frost       Area 
##      0.681      0.651 
## 
## Loadings:
##            Factor1 Factor2
## Population  0.341   0.158 
## Income     -0.236   0.668 
## Illiteracy  0.709  -0.380 
## Life Exp   -0.784   0.221 
## Murder      0.997         
## HS Grad    -0.497   0.779 
## Frost      -0.541   0.161 
## Area        0.225   0.546 
## 
##                Factor1 Factor2
## SS loadings      2.875   1.595
## Proportion Var   0.359   0.199
## Cumulative Var   0.359   0.559
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
fact3;
## 
## Call:
## factanal(x = state.x77, factors = 2, rotation = "promax")
## 
## Uniquenesses:
## Population     Income Illiteracy   Life Exp     Murder    HS Grad 
##      0.859      0.498      0.353      0.337      0.005      0.146 
##      Frost       Area 
##      0.681      0.651 
## 
## Loadings:
##            Factor1 Factor2
## Population  0.398   0.188 
## Income              0.699 
## Illiteracy  0.600  -0.365 
## Life Exp   -0.727   0.192 
## Murder      1.019         
## HS Grad    -0.256   0.803 
## Frost      -0.500   0.141 
## Area        0.404   0.595 
## 
##                Factor1 Factor2
## SS loadings      2.565   1.716
## Proportion Var   0.321   0.214
## Cumulative Var   0.321   0.535
## 
## Factor Correlations:
##         Factor1 Factor2
## Factor1   1.000   0.351
## Factor2   0.351   1.000
## 
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 43.83 on 13 degrees of freedom.
## The p-value is 3.27e-05
# factor2 plot
plot(fact2$loadings[,1],fact2$loadings[,2],pch=19,xlab="factor1",ylab="factor2", main="factor pattern")

#plot of factor2 scores
plot(fact2$scores[,1],fact2$scores[,2],pch=19,xlab="factor1",ylab="factor2",main="factor scores")