주성분 분석 PCA(principal component analysis)




파일 불러오기
autoparts <- read.csv("autoparts.csv", header = T)
dim(autoparts)
## [1] 34139    17
autoparts1 <- autoparts[autoparts$prod_no=="90784-76001", -c(1:7)]
dim(autoparts1)
## [1] 21779    10
str(autoparts1)
## 'data.frame':    21779 obs. of  10 variables:
##  $ fix_time         : num  85.5 86.2 86 86.1 86.1 86.3 86.5 86.4 86.3 86 ...
##  $ a_speed          : num  0.611 0.606 0.609 0.61 0.603 0.606 0.606 0.607 0.604 0.608 ...
##  $ b_speed          : num  1.72 1.71 1.72 1.72 1.7 ...
##  $ separation       : num  242 245 243 242 242 ...
##  $ s_separation     : num  658 657 658 657 657 ...
##  $ rate_terms       : int  95 95 95 95 95 95 95 95 95 95 ...
##  $ mpa              : num  78.2 77.9 78 78.2 77.9 77.9 78.2 77.5 77.8 77.5 ...
##  $ load_time        : num  18.1 18.2 18.1 18.1 18.2 18 18.1 18.1 18 18.1 ...
##  $ highpressure_time: int  58 58 82 74 56 78 55 57 50 60 ...
##  $ c_thickness      : num  24.7 22.5 24.1 25.1 24.5 22.9 24.3 23.9 22.2 19 ...
autoparts2 <- autoparts1[autoparts1$c_thickness < 1000, ]  # 이상치 제거)

주성분 분석
# 분석
pca <- prcomp(autoparts2, scale. = T)

해석
#표준편차, 분산(설명력), 누적분산으로 구성 
#pc3까지 볼경우 58.3% 설명력
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.8264 1.3686 1.0130 0.99745 0.96304 0.91186
## Proportion of Variance 0.3336 0.1873 0.1026 0.09949 0.09274 0.08315
## Cumulative Proportion  0.3336 0.5209 0.6235 0.72299 0.81573 0.89888
##                            PC7     PC8    PC9    PC10
## Standard deviation     0.67352 0.59323 0.4516 0.04094
## Proportion of Variance 0.04536 0.03519 0.0204 0.00017
## Cumulative Proportion  0.94424 0.97943 0.9998 1.00000
#전체 주성분 
pca$rotation
##                           PC1         PC2          PC3          PC4
## fix_time          -0.39362099  0.13036027  0.126888895 -0.100020384
## a_speed            0.41358841  0.29608549  0.059663533 -0.061210917
## b_speed            0.04252957  0.06424423 -0.847369462 -0.411908036
## separation        -0.52119553  0.14170606 -0.078462745 -0.020047266
## s_separation       0.51987900 -0.12173862  0.072328697  0.038092975
## rate_terms         0.27399179 -0.07169262 -0.256113740 -0.008819776
## mpa                0.05211043  0.64295838 -0.016881628 -0.067111883
## load_time          0.22697778  0.28105952  0.259686263 -0.140911409
## highpressure_time -0.01538209 -0.10528864  0.342853787 -0.884230890
## c_thickness        0.02714598 -0.58965744  0.007191132 -0.091792503
##                            PC5          PC6         PC7         PC8
## fix_time           0.478463470  0.152253575 -0.26477539  0.65569811
## a_speed           -0.004745658 -0.299380763 -0.39456242  0.29744625
## b_speed            0.186068342 -0.214147431  0.13510418  0.06122006
## separation        -0.020562094  0.067950073 -0.04134701 -0.21923378
## s_separation      -0.003764636 -0.034021957  0.13405616  0.27001919
## rate_terms         0.021168998  0.878549202 -0.23281687 -0.03096987
## mpa               -0.195085710 -0.006433205 -0.45712792 -0.30932639
## load_time          0.729689883  0.058251574  0.29615442 -0.38958070
## highpressure_time -0.273078633  0.105699248  0.05505474  0.01335235
## c_thickness        0.300775600 -0.221586362 -0.61836286 -0.32904508
##                           PC9         PC10
## fix_time          -0.22315088  0.003615434
## a_speed            0.63212487 -0.024176555
## b_speed           -0.06223370  0.002461653
## separation         0.36200906 -0.718228882
## s_separation      -0.37221935 -0.691102517
## rate_terms         0.16324386  0.002900899
## mpa               -0.48545272 -0.026213119
## load_time          0.08271710 -0.005513618
## highpressure_time  0.01737197 -0.003056521
## c_thickness       -0.08771657 -0.072011542
#주성분 2개까지만 생성할 경우
#pc1=-0.3936*fix_time + ...+
#pc2=0.1303*fix_time + ...+
pca$rotation[,1:2] 
##                           PC1         PC2
## fix_time          -0.39362099  0.13036027
## a_speed            0.41358841  0.29608549
## b_speed            0.04252957  0.06424423
## separation        -0.52119553  0.14170606
## s_separation       0.51987900 -0.12173862
## rate_terms         0.27399179 -0.07169262
## mpa                0.05211043  0.64295838
## load_time          0.22697778  0.28105952
## highpressure_time -0.01538209 -0.10528864
## c_thickness        0.02714598 -0.58965744

plot
#X축이 주성분의 개수 y축이 설명력
#주성분 개수가 3개가 넘어가면 추가되는 설명력이 낮으므로 주성분의 개수는 2개 혹은 3개로 함
plot(pca, type="line")

***