library(FactoMineR)

data

cs <- cos((0:9)*pi/5)
sn <- sin((0:9)*pi/5)
er <- rnorm(10)/10

datcse <- cbind.data.frame(cs=cs, sn=sn, er=er)

誤差なし

par(mar=c(2.5,2.5,2,0.3), mgp=c(1.5, 0.5, 0), mfrow=c(1,1))
res0 <- PCA(datcse[,1:2], scale.unit=FALSE)

多少の誤差は、標準化しなければわずかな影響

rescsen <- PCA(datcse, scale.unit=FALSE)

標準化すると、恐ろしい結果が

rescse <- PCA(datcse)

誤差がさらにあると、さらにひどい事態に

datcse2 <- cbind.data.frame(cs=cs, sn=sn, er=er, er2 = runif(10)/10)

PCA(datcse2)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 4 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

さらに増えると、尤もらしい最悪な事態に

datcse4 <- cbind.data.frame(cs=cs, sn=sn, er=er, er2 = runif(10)/10, er3=rnorm(10)/10, er4=rpois(10,1)/10)

PCA(datcse4)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

それでも 標準化しなければ大丈夫

PCA(datcse4, scale.unit = FALSE)

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

もう一つの例

sin curve を垂直方向に微妙に 誤差があるものを、PCA 処理を おこなう。 not scaled では、本来の図が 復元される。しかし scaled の 場合は、とんでもない図になる。 つまり、誤差が針小棒大となり 崩れてしまう。
なお、曲線のデータではなく、 201点のデータである。

par(mar=c(2.5,2.5,2,0.3), mgp=c(1.5, 0.5, 0), mfrow=c(1,1))

xpts <- 2*(-100:100)/100
ysin <- sin(xpts * pi)
plot(xpts, ysin, type="l", asp=1, main="original 2-dim data")

ecos <- 0.01*cos(xpts * pi)

plot(xpts, ecos, type="l", asp=1, main="1-3 dim")

dat2 <- cbind.data.frame(xpts, ysin, ecos)

suf <- PCA(dat2, scale.unit=F, graph=FALSE)
plot(suf, choix="var", title="Not Scaled PCA")

plot(suf$ind$coord, type="l", main="Not Scaled PCA")

sut <- PCA(dat2, scale.unit=T, graph=FALSE)
plot(sut, choix="var", title="Scaled PCA")

plot(sut$ind$coord, type="l", main="Scaled PCA")