#使用套件
library(stats)
library(RCurl)
## Loading required package: bitops

Lab R Code

The following R commands perform PCA on the following data set (you can download it from the “Data Sets” folder).

The data give crime rates per 100,000 people for the 72 largest US cities in 1994.

The variables are:

    1. Murder
    1. Rape
    1. Robbery
    1. Assault
    1. Burglary
    1. Larceny
    1. Motor Vehicle Thefts
#讀取資料
crime <- read.table("C:/Users/Asus/Desktop/crime.txt",header = T ,sep = )
pairs(crime)

pca.crime <- princomp(scale(crime,center = TRUE,scale = TRUE),cor = TRUE)
summary(pca.crime)
## Importance of components:
##                          Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     1.948024 1.0950164 0.8771129 0.71434047 0.57690506
## Proportion of Variance 0.542114 0.1712944 0.1099039 0.07289747 0.04754564
## Cumulative Proportion  0.542114 0.7134084 0.8233123 0.89620976 0.94375539
##                           Comp.6     Comp.7
## Standard deviation     0.4617666 0.42483396
## Proportion of Variance 0.0304612 0.02578341
## Cumulative Proportion  0.9742166 1.00000000
loadings(pca.crime)
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## Murder   -0.370 -0.339  0.202  0.717  0.277  0.220 -0.262
## Rape     -0.249  0.467  0.783 -0.159         0.267       
## Robbery  -0.426 -0.388               -0.194 -0.147  0.776
## Assault  -0.434        -0.282        -0.767  0.118 -0.358
## Burglary -0.450  0.238                0.242 -0.794 -0.220
## Larceny  -0.276  0.606 -0.492  0.209  0.264  0.303  0.331
## MVT      -0.390 -0.303 -0.134 -0.644  0.399  0.351 -0.204
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857  1.000
#求出beta及lambda特徵值
pcs.crime <- predict(pca.crime)
eigen <- eigen(cor(crime))
#畫圖
plot(eigen$values,type = "h")

plot(pcs.crime[,1:2],type = "n",xlab = '1st PC',ylab = '2nd PC')
text(pcs.crime[,1:2],row.names(crime))

biplot(pca.crime,scale = 1)

#一個計算p_value的function
sign.pc<-function(x,R=1000,m=length(x), cor=T,...){
  
  # run PCA
  pc.out<-princomp(x,cor=cor,...)
  
  # the proportion of variance of each PC
  pve=(pc.out$sdev^2/m)[1:m]
  
  # a matrix with R rows and m columns that contains
  # the proportion of variance explained by each pc
  # for each randomization replicate.
  pve.perm<-matrix(NA,ncol=m,nrow=R)
  for(i in 1:R){
    
    # permutation each column
    x.perm<-apply(x,2,sample)
    
    # run PCA
    pc.perm.out<-princomp(x.perm,cor=cor,...)
    
    # the proportion of variance of each PC.perm
    pve.perm[i,]=(pc.perm.out$sdev^2/m)[1:m]
  }
  # calcalute the p-values
  pval<-apply(t(pve.perm)>pve,1,sum)/R
  return(list(pve=pve,pval=pval))
}

#計算comp.
sign.pc(crime,cor=T)
## $pve
##     Comp.1     Comp.2     Comp.3     Comp.4     Comp.5     Comp.6 
## 0.54211402 0.17129441 0.10990385 0.07289747 0.04754564 0.03046120 
##     Comp.7 
## 0.02578341 
## 
## $pval
## [1] 0.000 0.828 1.000 1.000 1.000 1.000 1.000
#設alpha=0.0.5
print(sign.pc(crime,cor=T)$pval<0.05)
## [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE