autoparts <- read.csv("autoparts.csv", header = T)
dim(autoparts)
## [1] 34139 17
autoparts1 <- autoparts[autoparts$prod_no=="90784-76001", -c(1:7)]
dim(autoparts1)
## [1] 21779 10
str(autoparts1)
## 'data.frame': 21779 obs. of 10 variables:
## $ fix_time : num 85.5 86.2 86 86.1 86.1 86.3 86.5 86.4 86.3 86 ...
## $ a_speed : num 0.611 0.606 0.609 0.61 0.603 0.606 0.606 0.607 0.604 0.608 ...
## $ b_speed : num 1.72 1.71 1.72 1.72 1.7 ...
## $ separation : num 242 245 243 242 242 ...
## $ s_separation : num 658 657 658 657 657 ...
## $ rate_terms : int 95 95 95 95 95 95 95 95 95 95 ...
## $ mpa : num 78.2 77.9 78 78.2 77.9 77.9 78.2 77.5 77.8 77.5 ...
## $ load_time : num 18.1 18.2 18.1 18.1 18.2 18 18.1 18.1 18 18.1 ...
## $ highpressure_time: int 58 58 82 74 56 78 55 57 50 60 ...
## $ c_thickness : num 24.7 22.5 24.1 25.1 24.5 22.9 24.3 23.9 22.2 19 ...
autoparts2 <- autoparts1[autoparts1$c_thickness < 1000, ] # 이상치 제거)
# 분석
pca <- prcomp(autoparts2, scale. = T)
#표준편차, 분산(설명력), 누적분산으로 구성
#pc3까지 볼경우 58.3% 설명력
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 1.8264 1.3686 1.0130 0.99745 0.96304 0.91186
## Proportion of Variance 0.3336 0.1873 0.1026 0.09949 0.09274 0.08315
## Cumulative Proportion 0.3336 0.5209 0.6235 0.72299 0.81573 0.89888
## PC7 PC8 PC9 PC10
## Standard deviation 0.67352 0.59323 0.4516 0.04094
## Proportion of Variance 0.04536 0.03519 0.0204 0.00017
## Cumulative Proportion 0.94424 0.97943 0.9998 1.00000
#전체 주성분
pca$rotation
## PC1 PC2 PC3 PC4
## fix_time -0.39362099 0.13036027 0.126888895 -0.100020384
## a_speed 0.41358841 0.29608549 0.059663533 -0.061210917
## b_speed 0.04252957 0.06424423 -0.847369462 -0.411908036
## separation -0.52119553 0.14170606 -0.078462745 -0.020047266
## s_separation 0.51987900 -0.12173862 0.072328697 0.038092975
## rate_terms 0.27399179 -0.07169262 -0.256113740 -0.008819776
## mpa 0.05211043 0.64295838 -0.016881628 -0.067111883
## load_time 0.22697778 0.28105952 0.259686263 -0.140911409
## highpressure_time -0.01538209 -0.10528864 0.342853787 -0.884230890
## c_thickness 0.02714598 -0.58965744 0.007191132 -0.091792503
## PC5 PC6 PC7 PC8
## fix_time 0.478463470 0.152253575 -0.26477539 0.65569811
## a_speed -0.004745658 -0.299380763 -0.39456242 0.29744625
## b_speed 0.186068342 -0.214147431 0.13510418 0.06122006
## separation -0.020562094 0.067950073 -0.04134701 -0.21923378
## s_separation -0.003764636 -0.034021957 0.13405616 0.27001919
## rate_terms 0.021168998 0.878549202 -0.23281687 -0.03096987
## mpa -0.195085710 -0.006433205 -0.45712792 -0.30932639
## load_time 0.729689883 0.058251574 0.29615442 -0.38958070
## highpressure_time -0.273078633 0.105699248 0.05505474 0.01335235
## c_thickness 0.300775600 -0.221586362 -0.61836286 -0.32904508
## PC9 PC10
## fix_time -0.22315088 0.003615434
## a_speed 0.63212487 -0.024176555
## b_speed -0.06223370 0.002461653
## separation 0.36200906 -0.718228882
## s_separation -0.37221935 -0.691102517
## rate_terms 0.16324386 0.002900899
## mpa -0.48545272 -0.026213119
## load_time 0.08271710 -0.005513618
## highpressure_time 0.01737197 -0.003056521
## c_thickness -0.08771657 -0.072011542
#주성분 2개까지만 생성할 경우
#pc1=-0.3936*fix_time + ...+
#pc2=0.1303*fix_time + ...+
pca$rotation[,1:2]
## PC1 PC2
## fix_time -0.39362099 0.13036027
## a_speed 0.41358841 0.29608549
## b_speed 0.04252957 0.06424423
## separation -0.52119553 0.14170606
## s_separation 0.51987900 -0.12173862
## rate_terms 0.27399179 -0.07169262
## mpa 0.05211043 0.64295838
## load_time 0.22697778 0.28105952
## highpressure_time -0.01538209 -0.10528864
## c_thickness 0.02714598 -0.58965744
#X축이 주성분의 개수 y축이 설명력
#주성분 개수가 3개가 넘어가면 추가되는 설명력이 낮으므로 주성분의 개수는 2개 혹은 3개로 함
plot(pca, type="line")
***