# Analisis PCA dan FA untuk Faktor Risiko Penyakit Jantung Koroner
# Import Data
data <- read.csv("heart (2).csv")
data <- data[, -ncol(data)] # Menghapus kolom target karena bukan fitur prediktor
print(head(data))
## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
## 1 63 1 3 145 233 1 0 150 0 2.3 0 0 1
## 2 37 1 2 130 250 0 1 187 0 3.5 0 0 2
## 3 41 0 1 130 204 0 0 172 0 1.4 2 0 2
## 4 56 1 1 120 236 0 1 178 0 0.8 2 0 2
## 5 57 0 0 120 354 0 1 163 1 0.6 2 0 2
## 6 57 1 0 140 192 0 1 148 0 0.4 1 0 1
# Preprocessing
sum(is.na(data)) # Mengecek missing values
## [1] 0
p <- ncol(data)
# Check KMO
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
r <- cor(data)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.67
## MSA for each item =
## age sex cp trestbps chol fbs restecg thalach
## 0.64 0.52 0.69 0.65 0.54 0.53 0.63 0.71
## exang oldpeak slope ca thal
## 0.73 0.69 0.66 0.70 0.70
# Bartlett Test
bartlett.test(data)
##
## Bartlett test of homogeneity of variances
##
## data: data
## Bartlett's K-squared = 17122, df = 12, p-value < 2.2e-16
# --------- Principal Component Analysis -----------
## Manual PCA
scale_data = scale(data)
r = cov(scale_data)
pc <- eigen(r)
pc$values
## [1] 2.7630269 1.5366920 1.2228343 1.1811455 1.0219665 0.9700159 0.8627699
## [8] 0.7759454 0.7189255 0.6215702 0.5301048 0.4231424 0.3718607
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Menghitung proporsi varians dan kumulatif
sumvar <- sum(pc$values)
propvar <- sapply(pc$values, function(x) x/sumvar)*100
cumvar <- data.frame(cbind(pc$values, propvar)) %>% mutate(cum = cumsum(propvar))
colnames(cumvar)[1] <- "value"
rownames(cumvar) <- paste0("PC",c(1:p))
print(cumvar)
## value propvar cum
## PC1 2.7630269 21.254053 21.25405
## PC2 1.5366920 11.820708 33.07476
## PC3 1.2228343 9.406418 42.48118
## PC4 1.1811455 9.085735 51.56691
## PC5 1.0219665 7.861281 59.42819
## PC6 0.9700159 7.461661 66.88985
## PC7 0.8627699 6.636692 73.52655
## PC8 0.7759454 5.968811 79.49536
## PC9 0.7189255 5.530196 85.02555
## PC10 0.6215702 4.781309 89.80686
## PC11 0.5301048 4.077729 93.88459
## PC12 0.4231424 3.254941 97.13953
## PC13 0.3718607 2.860467 100.00000
# Hasil PCA
pc$vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.31420252 -0.40614872 -0.09407661 0.02066180 0.30715312 -0.12829615
## [2,] 0.09083783 0.37779171 0.55484915 0.25530873 -0.05070440 0.05496875
## [3,] -0.27460749 -0.29726609 0.35697431 -0.28790041 -0.16317945 -0.19341117
## [4,] 0.18392019 -0.43818675 0.20384930 -0.02260103 -0.18813809 -0.17945982
## [5,] 0.11737503 -0.36451402 -0.40782498 0.34340982 -0.32006670 -0.10472957
## [6,] 0.07363999 -0.31743328 0.48173624 0.06860532 0.23344184 0.24961364
## [7,] -0.12772792 0.22088181 -0.08919083 -0.26609555 0.39366727 -0.66681339
## [8,] -0.41649811 -0.07787618 0.15825529 0.18412539 -0.32328431 -0.12098445
## [9,] 0.36126745 0.26311790 -0.12635610 0.11505621 -0.03453568 0.23069914
## [10,] 0.41963899 0.05225497 0.11034290 -0.32629597 -0.25057927 -0.17007984
## [11,] -0.37977222 -0.04837415 -0.07381839 0.49484894 0.24682275 -0.06406935
## [12,] 0.27326172 -0.09414721 0.18356934 0.32801632 0.43536515 -0.18210750
## [13,] 0.22202375 0.20072042 0.12501113 0.38919138 -0.33195049 -0.50885654
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] -0.22373018 0.26247720 -0.37900026 0.01672242 0.14054369 0.548235088
## [2,] -0.16250682 0.17599193 -0.19892520 -0.53561904 0.28760018 -0.011016034
## [3,] -0.21538959 -0.04794993 -0.35143235 -0.16435134 -0.59428374 -0.097208286
## [4,] 0.33276335 0.59533383 0.35039179 -0.07152427 0.06413037 -0.258721417
## [5,] 0.04932936 -0.37238051 -0.15397520 -0.49516986 0.10887361 -0.183790481
## [6,] 0.51081821 -0.43286301 -0.17700437 0.15369572 0.14210271 0.024729590
## [7,] 0.39689590 -0.09984080 -0.03830435 -0.26996570 0.09554476 -0.002694377
## [8,] 0.10147263 -0.14346128 0.37204449 -0.03081265 0.03250678 0.608911322
## [9,] 0.44991859 0.11260655 -0.05850023 -0.19873186 -0.61590127 0.239078602
## [10,] -0.11288781 -0.19232320 0.23360295 -0.11138417 0.01524183 0.340832606
## [11,] 0.05503797 0.26180704 -0.02850469 -0.05593369 -0.16879774 0.133565990
## [12,] -0.33760601 -0.25967830 0.48580844 -0.03532497 -0.29673948 -0.146030385
## [13,] 0.05516481 -0.03434879 -0.28420109 0.53083127 -0.04361098 -0.072593188
## [,13]
## [1,] -0.181810835
## [2,] -0.060938085
## [3,] -0.006350611
## [4,] -0.020129600
## [5,] 0.007453109
## [6,] 0.127166917
## [7,] -0.074442253
## [8,] -0.316691808
## [9,] -0.148726741
## [10,] 0.614575447
## [11,] 0.645163760
## [12,] -0.156264034
## [13,] -0.015689708
scores <- as.matrix(scale_data) %*% pc$vectors
head(scores)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.6230800 -2.31743663 2.47058555 -2.6718196 -0.37463354 1.71073661
## [2,] -0.4552349 0.95576989 1.13771273 -2.4228303 -2.27001191 -0.78655946
## [3,] -1.8257846 -0.04281395 -0.45148191 -0.4057437 -0.86636898 0.76628302
## [4,] -1.7131720 0.49451926 0.03058043 0.1119764 0.23520879 -0.50183446
## [5,] -0.3707431 -0.30065881 -2.83637722 0.8077056 -0.01137015 -0.08535335
## [6,] -0.6477958 0.38225001 -0.07666423 -1.4915136 1.13566901 0.70781257
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] -0.12015287 -0.73534031 -0.6378849 -0.9311106 0.5571929 0.4034075
## [2,] -0.08137873 -1.53284832 1.2397734 -1.1637555 0.4913199 0.2318759
## [3,] -0.10467873 0.06341831 1.2910663 1.1451712 -0.3631243 0.1776832
## [4,] -0.14251850 0.17716929 -0.1527123 -0.7054052 0.6991341 1.0718084
## [5,] 1.41325773 -0.60613163 -0.1860074 -0.9059243 -0.4137663 0.8830064
## [6,] 0.10571962 1.13295226 0.6190293 -0.9030050 1.5684664 0.0742204
## [,13]
## [1,] -0.41952696
## [2,] -0.27382062
## [3,] 1.10772088
## [4,] 0.15064597
## [5,] 0.06946601
## [6,] -0.71077548
# Function PCA in R
PCA.mod <- prcomp(scale_data)
summary(PCA.mod)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.6622 1.2396 1.10582 1.08681 1.01092 0.98489 0.92885
## Proportion of Variance 0.2125 0.1182 0.09406 0.09086 0.07861 0.07462 0.06637
## Cumulative Proportion 0.2125 0.3307 0.42481 0.51567 0.59428 0.66890 0.73527
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.88088 0.8479 0.78840 0.72808 0.65049 0.6098
## Proportion of Variance 0.05969 0.0553 0.04781 0.04078 0.03255 0.0286
## Cumulative Proportion 0.79495 0.8503 0.89807 0.93885 0.97140 1.0000
PCA.mod$rotation
## PC1 PC2 PC3 PC4 PC5
## age -0.31420252 -0.40614872 -0.09407661 0.02066180 -0.30715312
## sex -0.09083783 0.37779171 0.55484915 0.25530873 0.05070440
## cp 0.27460749 -0.29726609 0.35697431 -0.28790041 0.16317945
## trestbps -0.18392019 -0.43818675 0.20384930 -0.02260103 0.18813809
## chol -0.11737503 -0.36451402 -0.40782498 0.34340982 0.32006670
## fbs -0.07363999 -0.31743328 0.48173624 0.06860532 -0.23344184
## restecg 0.12772792 0.22088181 -0.08919083 -0.26609555 -0.39366727
## thalach 0.41649811 -0.07787618 0.15825529 0.18412539 0.32328431
## exang -0.36126745 0.26311790 -0.12635610 0.11505621 0.03453568
## oldpeak -0.41963899 0.05225497 0.11034290 -0.32629597 0.25057927
## slope 0.37977222 -0.04837415 -0.07381839 0.49484894 -0.24682275
## ca -0.27326172 -0.09414721 0.18356934 0.32801632 -0.43536515
## thal -0.22202375 0.20072042 0.12501113 0.38919138 0.33195049
## PC6 PC7 PC8 PC9 PC10
## age 0.12829615 0.22373018 0.26247720 0.37900026 0.01672242
## sex -0.05496875 0.16250682 0.17599193 0.19892520 -0.53561904
## cp 0.19341117 0.21538959 -0.04794993 0.35143235 -0.16435134
## trestbps 0.17945982 -0.33276335 0.59533383 -0.35039179 -0.07152427
## chol 0.10472957 -0.04932936 -0.37238051 0.15397520 -0.49516986
## fbs -0.24961364 -0.51081821 -0.43286301 0.17700437 0.15369572
## restecg 0.66681339 -0.39689590 -0.09984080 0.03830435 -0.26996570
## thalach 0.12098445 -0.10147263 -0.14346128 -0.37204449 -0.03081265
## exang -0.23069914 -0.44991859 0.11260655 0.05850023 -0.19873186
## oldpeak 0.17007984 0.11288781 -0.19232320 -0.23360295 -0.11138417
## slope 0.06406935 -0.05503797 0.26180704 0.02850469 -0.05593369
## ca 0.18210750 0.33760601 -0.25967830 -0.48580844 -0.03532497
## thal 0.50885654 -0.05516481 -0.03434879 0.28420109 0.53083127
## PC11 PC12 PC13
## age 0.14054369 -0.548235088 0.181810835
## sex 0.28760018 0.011016034 0.060938085
## cp -0.59428374 0.097208286 0.006350611
## trestbps 0.06413037 0.258721417 0.020129600
## chol 0.10887361 0.183790481 -0.007453109
## fbs 0.14210271 -0.024729590 -0.127166917
## restecg 0.09554476 0.002694377 0.074442253
## thalach 0.03250678 -0.608911322 0.316691808
## exang -0.61590127 -0.239078602 0.148726741
## oldpeak 0.01524183 -0.340832606 -0.614575447
## slope -0.16879774 -0.133565990 -0.645163760
## ca -0.29673948 0.146030385 0.156264034
## thal -0.04361098 0.072593188 0.015689708
head(PCA.mod$x)
## PC1 PC2 PC3 PC4 PC5 PC6
## [1,] -0.6230800 -2.31743663 2.47058555 -2.6718196 0.37463354 -1.71073661
## [2,] 0.4552349 0.95576989 1.13771273 -2.4228303 2.27001191 0.78655946
## [3,] 1.8257846 -0.04281395 -0.45148191 -0.4057437 0.86636898 -0.76628302
## [4,] 1.7131720 0.49451926 0.03058043 0.1119764 -0.23520879 0.50183446
## [5,] 0.3707431 -0.30065881 -2.83637722 0.8077056 0.01137015 0.08535335
## [6,] 0.6477958 0.38225001 -0.07666423 -1.4915136 -1.13566901 -0.70781257
## PC7 PC8 PC9 PC10 PC11 PC12
## [1,] 0.12015287 -0.73534031 0.6378849 -0.9311106 0.5571929 -0.4034075
## [2,] 0.08137873 -1.53284832 -1.2397734 -1.1637555 0.4913199 -0.2318759
## [3,] 0.10467873 0.06341831 -1.2910663 1.1451712 -0.3631243 -0.1776832
## [4,] 0.14251850 0.17716929 0.1527123 -0.7054052 0.6991341 -1.0718084
## [5,] -1.41325773 -0.60613163 0.1860074 -0.9059243 -0.4137663 -0.8830064
## [6,] -0.10571962 1.13295226 -0.6190293 -0.9030050 1.5684664 -0.0742204
## PC13
## [1,] 0.41952696
## [2,] 0.27382062
## [3,] -1.10772088
## [4,] -0.15064597
## [5,] -0.06946601
## [6,] 0.71077548
# PCA dengan FactoMineR
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.4.3
pca_result <- PCA(scale_data, scale.unit = TRUE, graph = FALSE, ncp = p)
print(pca_result$eig)
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 2.7630269 21.254053 21.25405
## comp 2 1.5366920 11.820708 33.07476
## comp 3 1.2228343 9.406418 42.48118
## comp 4 1.1811455 9.085735 51.56691
## comp 5 1.0219665 7.861281 59.42819
## comp 6 0.9700159 7.461661 66.88985
## comp 7 0.8627699 6.636692 73.52655
## comp 8 0.7759454 5.968811 79.49536
## comp 9 0.7189255 5.530196 85.02555
## comp 10 0.6215702 4.781309 89.80686
## comp 11 0.5301048 4.077729 93.88459
## comp 12 0.4231424 3.254941 97.13953
## comp 13 0.3718607 2.860467 100.00000
# Visualisasi PCA
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_eig(pca_result, addlabels = TRUE, ncp = p, barfill = "skyblue", barcolor = "darkblue", linecolor = "red")

fviz_pca_biplot(pca_result, geom.ind = "point", addEllipses = TRUE)

# Circle of Correlation
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE) +
ggtitle("Kontribusi Variabel")
plot(contrib_circle)

# Kontribusi Variabel
contrib_v_PC1 <- fviz_contrib(pca_result, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
contrib_v_PC2 <- fviz_contrib(pca_result, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
contrib_v_PC3 <- fviz_contrib(pca_result, choice = "var", axes = 3, top = 5) + ggtitle("PC3")
plot(contrib_v_PC3)

# --------- Factor Analysis -----------
varcov = cov(scale_data)
pc = eigen(varcov)
pc$values
## [1] 2.7630269 1.5366920 1.2228343 1.1811455 1.0219665 0.9700159 0.8627699
## [8] 0.7759454 0.7189255 0.6215702 0.5301048 0.4231424 0.3718607
pc$vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.31420252 -0.40614872 -0.09407661 0.02066180 0.30715312 -0.12829615
## [2,] 0.09083783 0.37779171 0.55484915 0.25530873 -0.05070440 0.05496875
## [3,] -0.27460749 -0.29726609 0.35697431 -0.28790041 -0.16317945 -0.19341117
## [4,] 0.18392019 -0.43818675 0.20384930 -0.02260103 -0.18813809 -0.17945982
## [5,] 0.11737503 -0.36451402 -0.40782498 0.34340982 -0.32006670 -0.10472957
## [6,] 0.07363999 -0.31743328 0.48173624 0.06860532 0.23344184 0.24961364
## [7,] -0.12772792 0.22088181 -0.08919083 -0.26609555 0.39366727 -0.66681339
## [8,] -0.41649811 -0.07787618 0.15825529 0.18412539 -0.32328431 -0.12098445
## [9,] 0.36126745 0.26311790 -0.12635610 0.11505621 -0.03453568 0.23069914
## [10,] 0.41963899 0.05225497 0.11034290 -0.32629597 -0.25057927 -0.17007984
## [11,] -0.37977222 -0.04837415 -0.07381839 0.49484894 0.24682275 -0.06406935
## [12,] 0.27326172 -0.09414721 0.18356934 0.32801632 0.43536515 -0.18210750
## [13,] 0.22202375 0.20072042 0.12501113 0.38919138 -0.33195049 -0.50885654
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] -0.22373018 0.26247720 -0.37900026 0.01672242 0.14054369 0.548235088
## [2,] -0.16250682 0.17599193 -0.19892520 -0.53561904 0.28760018 -0.011016034
## [3,] -0.21538959 -0.04794993 -0.35143235 -0.16435134 -0.59428374 -0.097208286
## [4,] 0.33276335 0.59533383 0.35039179 -0.07152427 0.06413037 -0.258721417
## [5,] 0.04932936 -0.37238051 -0.15397520 -0.49516986 0.10887361 -0.183790481
## [6,] 0.51081821 -0.43286301 -0.17700437 0.15369572 0.14210271 0.024729590
## [7,] 0.39689590 -0.09984080 -0.03830435 -0.26996570 0.09554476 -0.002694377
## [8,] 0.10147263 -0.14346128 0.37204449 -0.03081265 0.03250678 0.608911322
## [9,] 0.44991859 0.11260655 -0.05850023 -0.19873186 -0.61590127 0.239078602
## [10,] -0.11288781 -0.19232320 0.23360295 -0.11138417 0.01524183 0.340832606
## [11,] 0.05503797 0.26180704 -0.02850469 -0.05593369 -0.16879774 0.133565990
## [12,] -0.33760601 -0.25967830 0.48580844 -0.03532497 -0.29673948 -0.146030385
## [13,] 0.05516481 -0.03434879 -0.28420109 0.53083127 -0.04361098 -0.072593188
## [,13]
## [1,] -0.181810835
## [2,] -0.060938085
## [3,] -0.006350611
## [4,] -0.020129600
## [5,] 0.007453109
## [6,] 0.127166917
## [7,] -0.074442253
## [8,] -0.316691808
## [9,] -0.148726741
## [10,] 0.614575447
## [11,] 0.645163760
## [12,] -0.156264034
## [13,] -0.015689708
sp = sum(pc$values[1:3])
L1 = sqrt(pc$values[1])*pc$vectors[,1]
L2 = sqrt(pc$values[2])*pc$vectors[,2]
L3 = sqrt(pc$values[3])*pc$vectors[,3]
L = cbind(L1, L2, L3)
L
## L1 L2 L3
## [1,] 0.5222786 -0.50347568 -0.10403165
## [2,] 0.1509939 0.46832338 0.61356239
## [3,] -0.4564623 -0.36850110 0.39474876
## [4,] 0.3057187 -0.54319112 0.22542031
## [5,] 0.1951050 -0.45186391 -0.45098036
## [6,] 0.1224070 -0.39350103 0.53271279
## [7,] -0.2123139 0.27381256 -0.09862886
## [8,] -0.6923180 -0.09653795 0.17500161
## [9,] 0.6005116 0.32616984 -0.13972690
## [10,] 0.6975388 0.06477703 0.12201921
## [11,] -0.6312709 -0.05996623 -0.08162973
## [12,] 0.4542253 -0.11670806 0.20299435
## [13,] 0.3690558 0.24881981 0.13823961
# Perform Factor Analysis
fa <- fa(r = scale_data, covar = TRUE, nfactors = 3, rotate = "varimax")
load <- fa$loadings
plot(load[,c(1,3)], type="n") # Set up plot
text(load[,c(1,3)], labels=names(data), cex=.7)

fa.diagram(load)
