다중상관분석
상관분석은 비교 대상이 여럿인 경우에도 유용함
일반적으로 상관계수가 0.3 이상이 되면 상관관계가 존재하고 0.8이상이면 강한 상관관계가 존재한다고 봄
library(corrplot)
autoparts <- read.csv("autoparts.csv", header = T)
autoparts1 <- autoparts[autoparts$prod_no=="90784-76001", -c(1:7)]
autoparts2 <- autoparts1[autoparts1$c_thickness < 1000, ] # 이상치 제거
str(autoparts2)
## 'data.frame': 21767 obs. of 10 variables:
## $ fix_time : num 85.5 86.2 86 86.1 86.1 86.3 86.5 86.4 86.3 86 ...
## $ a_speed : num 0.611 0.606 0.609 0.61 0.603 0.606 0.606 0.607 0.604 0.608 ...
## $ b_speed : num 1.72 1.71 1.72 1.72 1.7 ...
## $ separation : num 242 245 243 242 242 ...
## $ s_separation : num 658 657 658 657 657 ...
## $ rate_terms : int 95 95 95 95 95 95 95 95 95 95 ...
## $ mpa : num 78.2 77.9 78 78.2 77.9 77.9 78.2 77.5 77.8 77.5 ...
## $ load_time : num 18.1 18.2 18.1 18.1 18.2 18 18.1 18.1 18 18.1 ...
## $ highpressure_time: int 58 58 82 74 56 78 55 57 50 60 ...
## $ c_thickness : num 24.7 22.5 24.1 25.1 24.5 22.9 24.3 23.9 22.2 19 ...
cor(autoparts2[,])
## fix_time a_speed b_speed separation
## fix_time 1.00000000 -0.40965888 -0.053310788 0.6481035050
## a_speed -0.40965888 1.00000000 0.094201813 -0.6297286266
## b_speed -0.05331079 0.09420181 1.000000000 -0.0079536955
## separation 0.64810350 -0.62972863 -0.007953695 1.0000000000
## s_separation -0.64955379 0.61663567 0.004768493 -0.9946106507
## rate_terms -0.27573524 0.16379549 0.086763287 -0.4065543276
## mpa 0.01128835 0.42082145 0.065904868 0.0827314919
## load_time 0.02037007 0.39266555 0.022221837 -0.3181313349
## highpressure_time 0.01500145 -0.03609957 -0.013110064 -0.0007672313
## c_thickness -0.06179651 -0.16476471 0.011748578 -0.1901120525
## s_separation rate_terms mpa load_time
## fix_time -0.649553792 -0.275735237 0.011288347 0.02037007
## a_speed 0.616635670 0.163795487 0.420821453 0.39266555
## b_speed 0.004768493 0.086763287 0.065904868 0.02222184
## separation -0.994610651 -0.406554328 0.082731492 -0.31813133
## s_separation 1.000000000 0.417739099 -0.079472678 0.31399479
## rate_terms 0.417739099 1.000000000 -0.006729618 0.13530350
## mpa -0.079472678 -0.006729618 1.000000000 0.22330961
## load_time 0.313994794 0.135303504 0.223309612 1.00000000
## highpressure_time -0.009468168 -0.015795293 -0.042117049 -0.02557634
## c_thickness 0.121676176 0.012913095 -0.579886953 -0.12169953
## highpressure_time c_thickness
## fix_time 0.0150014512 -0.06179651
## a_speed -0.0360995659 -0.16476471
## b_speed -0.0131100638 0.01174858
## separation -0.0007672313 -0.19011205
## s_separation -0.0094681675 0.12167618
## rate_terms -0.0157952933 0.01291309
## mpa -0.0421170493 -0.57988695
## load_time -0.0255763439 -0.12169953
## highpressure_time 1.0000000000 0.08522342
## c_thickness 0.0852234187 1.00000000
x <- cor(autoparts2[,])
# symnum() 함수는 숫자로 간단하게 표현
# 상관계수 구간에 따른 표기
symnum(x)
## f a b sp s_ r m l h c
## fix_time 1
## a_speed . 1
## b_speed 1
## separation , , 1
## s_separation , , B 1
## rate_terms . . 1
## mpa . 1
## load_time . . . 1
## highpressure_time 1
## c_thickness . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
corrplot(x)
corrplot(x, add = TRUE, type = "lower", method = "number", order = "AOE", diag = FALSE, tl.pos = "n", cl.pos = "n")
