rm(list = ls())
# 주성분 분석
# 1. USArrests 데이터셋
data("USArrests")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(USArrests)
## Rows: 50
## Columns: 4
## $ Murder <dbl> 13.2, 10.0, 8.1, 8.8, 9.0, 7.9, 3.3, 5.9, 15.4, 17.4, 5.3, 2.…
## $ Assault <int> 236, 263, 294, 190, 276, 204, 110, 238, 335, 211, 46, 120, 24…
## $ UrbanPop <int> 58, 48, 80, 50, 91, 78, 77, 72, 80, 60, 83, 54, 83, 65, 57, 6…
## $ Rape <dbl> 21.2, 44.5, 31.0, 19.5, 40.6, 38.7, 11.1, 15.8, 31.9, 25.8, 2…
fit1 <- prcomp(USArrests, scale. = TRUE)
summary(fit1)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.5749 0.9949 0.59713 0.41645
## Proportion of Variance 0.6201 0.2474 0.08914 0.04336
## Cumulative Proportion 0.6201 0.8675 0.95664 1.00000
# 2. mtcars 데이터셋
data("mtcars")
glimpse(mtcars)
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
fit2 <- prcomp(mtcars, scale. = TRUE)
summary(fit2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.5707 1.6280 0.79196 0.51923 0.47271 0.46000 0.3678
## Proportion of Variance 0.6008 0.2409 0.05702 0.02451 0.02031 0.01924 0.0123
## Cumulative Proportion 0.6008 0.8417 0.89873 0.92324 0.94356 0.96279 0.9751
## PC8 PC9 PC10 PC11
## Standard deviation 0.35057 0.2776 0.22811 0.1485
## Proportion of Variance 0.01117 0.0070 0.00473 0.0020
## Cumulative Proportion 0.98626 0.9933 0.99800 1.0000
# 3. iris 데이터셋 (종속변수 제거)
data("iris")
glimpse(mtcars)
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
iris_num <- iris[, 1:4]
fit3 <- prcomp(iris_num, scale. = TRUE)
summary(fit3)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.7084 0.9560 0.38309 0.14393
## Proportion of Variance 0.7296 0.2285 0.03669 0.00518
## Cumulative Proportion 0.7296 0.9581 0.99482 1.00000
# 4. attitude 데이터셋
data("attitude")
fit5 <- prcomp(attitude, scale. = TRUE)
summary(fit5)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.9278 1.0681 0.9204 0.78286 0.56892 0.46747 0.37475
## Proportion of Variance 0.5309 0.1630 0.1210 0.08755 0.04624 0.03122 0.02006
## Cumulative Proportion 0.5309 0.6939 0.8149 0.90248 0.94872 0.97994 1.00000
# 1. 데이터 로드 및 PCA 수행
data("USArrests")
pca_result <- prcomp(USArrests, scale. = TRUE)
# 2. Scree Plot 생성
std_dev <- pca_result$sdev
var_explained <- std_dev^2 / sum(std_dev^2)
# 3. 시각화
plot(var_explained, type = "b",
xlab = "Principal Component",
ylab = "Proportion of Variance Explained",
main = "Scree Plot of USArrests PCA")

# 1. USArrests 데이터셋
data("USArrests")
fit1 <- prcomp(USArrests, scale. = TRUE)
print(round(fit1$rotation, 3)) # 주성분 적재량
## PC1 PC2 PC3 PC4
## Murder -0.536 -0.418 0.341 0.649
## Assault -0.583 -0.188 0.268 -0.743
## UrbanPop -0.278 0.873 0.378 0.134
## Rape -0.543 0.167 -0.818 0.089
data("mtcars")
fit2 <- prcomp(mtcars, scale. = TRUE)
print(round(fit2$rotation, 3))
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## mpg -0.363 0.016 -0.226 -0.023 -0.103 -0.109 0.368 0.754 -0.236 -0.139
## cyl 0.374 0.044 -0.175 -0.003 -0.058 0.169 0.057 0.231 -0.054 0.846
## disp 0.368 -0.049 -0.061 0.257 -0.394 -0.336 0.214 -0.001 -0.198 -0.049
## hp 0.330 0.249 0.140 -0.068 -0.540 0.071 -0.001 0.222 0.576 -0.248
## drat -0.294 0.275 0.161 0.855 -0.077 0.244 0.021 -0.032 0.047 0.101
## wt 0.346 -0.143 0.342 0.246 0.075 -0.465 -0.021 0.009 -0.359 -0.094
## qsec -0.200 -0.463 0.403 0.068 0.165 -0.330 0.050 0.232 0.528 0.271
## vs -0.307 -0.232 0.429 -0.215 -0.600 0.194 -0.266 -0.026 -0.359 0.159
## am -0.235 0.429 -0.206 -0.030 -0.090 -0.571 -0.587 0.060 0.047 0.178
## gear -0.207 0.462 0.290 -0.265 -0.048 -0.244 0.605 -0.336 0.002 0.214
## carb 0.214 0.414 0.529 -0.127 0.361 0.184 -0.175 0.396 -0.171 -0.072
## PC11
## mpg -0.125
## cyl -0.141
## disp 0.661
## hp -0.256
## drat -0.040
## wt -0.567
## qsec 0.181
## vs 0.008
## am 0.030
## gear -0.054
## carb 0.320