# Analisis PCA dan FA untuk Faktor Risiko Penyakit Jantung Koroner

# Import Data
data <- read.csv("heart (2).csv")
data <- data[, -ncol(data)]  # Menghapus kolom target karena bukan fitur prediktor
print(head(data))
##   age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
## 1  63   1  3      145  233   1       0     150     0     2.3     0  0    1
## 2  37   1  2      130  250   0       1     187     0     3.5     0  0    2
## 3  41   0  1      130  204   0       0     172     0     1.4     2  0    2
## 4  56   1  1      120  236   0       1     178     0     0.8     2  0    2
## 5  57   0  0      120  354   0       1     163     1     0.6     2  0    2
## 6  57   1  0      140  192   0       1     148     0     0.4     1  0    1
# Preprocessing
sum(is.na(data))  # Mengecek missing values
## [1] 0
p <- ncol(data)

# Check KMO
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
r <- cor(data)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA =  0.67
## MSA for each item = 
##      age      sex       cp trestbps     chol      fbs  restecg  thalach 
##     0.64     0.52     0.69     0.65     0.54     0.53     0.63     0.71 
##    exang  oldpeak    slope       ca     thal 
##     0.73     0.69     0.66     0.70     0.70
# Bartlett Test
bartlett.test(data)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  data
## Bartlett's K-squared = 17122, df = 12, p-value < 2.2e-16
# --------- Principal Component Analysis -----------

## Manual PCA
scale_data = scale(data)
r = cov(scale_data)
pc <- eigen(r)
pc$values
##  [1] 2.7630269 1.5366920 1.2228343 1.1811455 1.0219665 0.9700159 0.8627699
##  [8] 0.7759454 0.7189255 0.6215702 0.5301048 0.4231424 0.3718607
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Menghitung proporsi varians dan kumulatif
sumvar <- sum(pc$values)
propvar <- sapply(pc$values, function(x) x/sumvar)*100
cumvar <- data.frame(cbind(pc$values, propvar)) %>% mutate(cum = cumsum(propvar))
colnames(cumvar)[1] <- "value"
rownames(cumvar) <- paste0("PC",c(1:p))
print(cumvar)
##          value   propvar       cum
## PC1  2.7630269 21.254053  21.25405
## PC2  1.5366920 11.820708  33.07476
## PC3  1.2228343  9.406418  42.48118
## PC4  1.1811455  9.085735  51.56691
## PC5  1.0219665  7.861281  59.42819
## PC6  0.9700159  7.461661  66.88985
## PC7  0.8627699  6.636692  73.52655
## PC8  0.7759454  5.968811  79.49536
## PC9  0.7189255  5.530196  85.02555
## PC10 0.6215702  4.781309  89.80686
## PC11 0.5301048  4.077729  93.88459
## PC12 0.4231424  3.254941  97.13953
## PC13 0.3718607  2.860467 100.00000
# Hasil PCA
pc$vectors
##              [,1]        [,2]        [,3]        [,4]        [,5]        [,6]
##  [1,]  0.31420252 -0.40614872 -0.09407661  0.02066180  0.30715312 -0.12829615
##  [2,]  0.09083783  0.37779171  0.55484915  0.25530873 -0.05070440  0.05496875
##  [3,] -0.27460749 -0.29726609  0.35697431 -0.28790041 -0.16317945 -0.19341117
##  [4,]  0.18392019 -0.43818675  0.20384930 -0.02260103 -0.18813809 -0.17945982
##  [5,]  0.11737503 -0.36451402 -0.40782498  0.34340982 -0.32006670 -0.10472957
##  [6,]  0.07363999 -0.31743328  0.48173624  0.06860532  0.23344184  0.24961364
##  [7,] -0.12772792  0.22088181 -0.08919083 -0.26609555  0.39366727 -0.66681339
##  [8,] -0.41649811 -0.07787618  0.15825529  0.18412539 -0.32328431 -0.12098445
##  [9,]  0.36126745  0.26311790 -0.12635610  0.11505621 -0.03453568  0.23069914
## [10,]  0.41963899  0.05225497  0.11034290 -0.32629597 -0.25057927 -0.17007984
## [11,] -0.37977222 -0.04837415 -0.07381839  0.49484894  0.24682275 -0.06406935
## [12,]  0.27326172 -0.09414721  0.18356934  0.32801632  0.43536515 -0.18210750
## [13,]  0.22202375  0.20072042  0.12501113  0.38919138 -0.33195049 -0.50885654
##              [,7]        [,8]        [,9]       [,10]       [,11]        [,12]
##  [1,] -0.22373018  0.26247720 -0.37900026  0.01672242  0.14054369  0.548235088
##  [2,] -0.16250682  0.17599193 -0.19892520 -0.53561904  0.28760018 -0.011016034
##  [3,] -0.21538959 -0.04794993 -0.35143235 -0.16435134 -0.59428374 -0.097208286
##  [4,]  0.33276335  0.59533383  0.35039179 -0.07152427  0.06413037 -0.258721417
##  [5,]  0.04932936 -0.37238051 -0.15397520 -0.49516986  0.10887361 -0.183790481
##  [6,]  0.51081821 -0.43286301 -0.17700437  0.15369572  0.14210271  0.024729590
##  [7,]  0.39689590 -0.09984080 -0.03830435 -0.26996570  0.09554476 -0.002694377
##  [8,]  0.10147263 -0.14346128  0.37204449 -0.03081265  0.03250678  0.608911322
##  [9,]  0.44991859  0.11260655 -0.05850023 -0.19873186 -0.61590127  0.239078602
## [10,] -0.11288781 -0.19232320  0.23360295 -0.11138417  0.01524183  0.340832606
## [11,]  0.05503797  0.26180704 -0.02850469 -0.05593369 -0.16879774  0.133565990
## [12,] -0.33760601 -0.25967830  0.48580844 -0.03532497 -0.29673948 -0.146030385
## [13,]  0.05516481 -0.03434879 -0.28420109  0.53083127 -0.04361098 -0.072593188
##              [,13]
##  [1,] -0.181810835
##  [2,] -0.060938085
##  [3,] -0.006350611
##  [4,] -0.020129600
##  [5,]  0.007453109
##  [6,]  0.127166917
##  [7,] -0.074442253
##  [8,] -0.316691808
##  [9,] -0.148726741
## [10,]  0.614575447
## [11,]  0.645163760
## [12,] -0.156264034
## [13,] -0.015689708
scores <- as.matrix(scale_data) %*% pc$vectors
head(scores)
##            [,1]        [,2]        [,3]       [,4]        [,5]        [,6]
## [1,]  0.6230800 -2.31743663  2.47058555 -2.6718196 -0.37463354  1.71073661
## [2,] -0.4552349  0.95576989  1.13771273 -2.4228303 -2.27001191 -0.78655946
## [3,] -1.8257846 -0.04281395 -0.45148191 -0.4057437 -0.86636898  0.76628302
## [4,] -1.7131720  0.49451926  0.03058043  0.1119764  0.23520879 -0.50183446
## [5,] -0.3707431 -0.30065881 -2.83637722  0.8077056 -0.01137015 -0.08535335
## [6,] -0.6477958  0.38225001 -0.07666423 -1.4915136  1.13566901  0.70781257
##             [,7]        [,8]       [,9]      [,10]      [,11]     [,12]
## [1,] -0.12015287 -0.73534031 -0.6378849 -0.9311106  0.5571929 0.4034075
## [2,] -0.08137873 -1.53284832  1.2397734 -1.1637555  0.4913199 0.2318759
## [3,] -0.10467873  0.06341831  1.2910663  1.1451712 -0.3631243 0.1776832
## [4,] -0.14251850  0.17716929 -0.1527123 -0.7054052  0.6991341 1.0718084
## [5,]  1.41325773 -0.60613163 -0.1860074 -0.9059243 -0.4137663 0.8830064
## [6,]  0.10571962  1.13295226  0.6190293 -0.9030050  1.5684664 0.0742204
##            [,13]
## [1,] -0.41952696
## [2,] -0.27382062
## [3,]  1.10772088
## [4,]  0.15064597
## [5,]  0.06946601
## [6,] -0.71077548
# Function PCA in R
PCA.mod <- prcomp(scale_data)
summary(PCA.mod)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.6622 1.2396 1.10582 1.08681 1.01092 0.98489 0.92885
## Proportion of Variance 0.2125 0.1182 0.09406 0.09086 0.07861 0.07462 0.06637
## Cumulative Proportion  0.2125 0.3307 0.42481 0.51567 0.59428 0.66890 0.73527
##                            PC8    PC9    PC10    PC11    PC12   PC13
## Standard deviation     0.88088 0.8479 0.78840 0.72808 0.65049 0.6098
## Proportion of Variance 0.05969 0.0553 0.04781 0.04078 0.03255 0.0286
## Cumulative Proportion  0.79495 0.8503 0.89807 0.93885 0.97140 1.0000
PCA.mod$rotation
##                  PC1         PC2         PC3         PC4         PC5
## age      -0.31420252 -0.40614872 -0.09407661  0.02066180 -0.30715312
## sex      -0.09083783  0.37779171  0.55484915  0.25530873  0.05070440
## cp        0.27460749 -0.29726609  0.35697431 -0.28790041  0.16317945
## trestbps -0.18392019 -0.43818675  0.20384930 -0.02260103  0.18813809
## chol     -0.11737503 -0.36451402 -0.40782498  0.34340982  0.32006670
## fbs      -0.07363999 -0.31743328  0.48173624  0.06860532 -0.23344184
## restecg   0.12772792  0.22088181 -0.08919083 -0.26609555 -0.39366727
## thalach   0.41649811 -0.07787618  0.15825529  0.18412539  0.32328431
## exang    -0.36126745  0.26311790 -0.12635610  0.11505621  0.03453568
## oldpeak  -0.41963899  0.05225497  0.11034290 -0.32629597  0.25057927
## slope     0.37977222 -0.04837415 -0.07381839  0.49484894 -0.24682275
## ca       -0.27326172 -0.09414721  0.18356934  0.32801632 -0.43536515
## thal     -0.22202375  0.20072042  0.12501113  0.38919138  0.33195049
##                  PC6         PC7         PC8         PC9        PC10
## age       0.12829615  0.22373018  0.26247720  0.37900026  0.01672242
## sex      -0.05496875  0.16250682  0.17599193  0.19892520 -0.53561904
## cp        0.19341117  0.21538959 -0.04794993  0.35143235 -0.16435134
## trestbps  0.17945982 -0.33276335  0.59533383 -0.35039179 -0.07152427
## chol      0.10472957 -0.04932936 -0.37238051  0.15397520 -0.49516986
## fbs      -0.24961364 -0.51081821 -0.43286301  0.17700437  0.15369572
## restecg   0.66681339 -0.39689590 -0.09984080  0.03830435 -0.26996570
## thalach   0.12098445 -0.10147263 -0.14346128 -0.37204449 -0.03081265
## exang    -0.23069914 -0.44991859  0.11260655  0.05850023 -0.19873186
## oldpeak   0.17007984  0.11288781 -0.19232320 -0.23360295 -0.11138417
## slope     0.06406935 -0.05503797  0.26180704  0.02850469 -0.05593369
## ca        0.18210750  0.33760601 -0.25967830 -0.48580844 -0.03532497
## thal      0.50885654 -0.05516481 -0.03434879  0.28420109  0.53083127
##                 PC11         PC12         PC13
## age       0.14054369 -0.548235088  0.181810835
## sex       0.28760018  0.011016034  0.060938085
## cp       -0.59428374  0.097208286  0.006350611
## trestbps  0.06413037  0.258721417  0.020129600
## chol      0.10887361  0.183790481 -0.007453109
## fbs       0.14210271 -0.024729590 -0.127166917
## restecg   0.09554476  0.002694377  0.074442253
## thalach   0.03250678 -0.608911322  0.316691808
## exang    -0.61590127 -0.239078602  0.148726741
## oldpeak   0.01524183 -0.340832606 -0.614575447
## slope    -0.16879774 -0.133565990 -0.645163760
## ca       -0.29673948  0.146030385  0.156264034
## thal     -0.04361098  0.072593188  0.015689708
head(PCA.mod$x)
##             PC1         PC2         PC3        PC4         PC5         PC6
## [1,] -0.6230800 -2.31743663  2.47058555 -2.6718196  0.37463354 -1.71073661
## [2,]  0.4552349  0.95576989  1.13771273 -2.4228303  2.27001191  0.78655946
## [3,]  1.8257846 -0.04281395 -0.45148191 -0.4057437  0.86636898 -0.76628302
## [4,]  1.7131720  0.49451926  0.03058043  0.1119764 -0.23520879  0.50183446
## [5,]  0.3707431 -0.30065881 -2.83637722  0.8077056  0.01137015  0.08535335
## [6,]  0.6477958  0.38225001 -0.07666423 -1.4915136 -1.13566901 -0.70781257
##              PC7         PC8        PC9       PC10       PC11       PC12
## [1,]  0.12015287 -0.73534031  0.6378849 -0.9311106  0.5571929 -0.4034075
## [2,]  0.08137873 -1.53284832 -1.2397734 -1.1637555  0.4913199 -0.2318759
## [3,]  0.10467873  0.06341831 -1.2910663  1.1451712 -0.3631243 -0.1776832
## [4,]  0.14251850  0.17716929  0.1527123 -0.7054052  0.6991341 -1.0718084
## [5,] -1.41325773 -0.60613163  0.1860074 -0.9059243 -0.4137663 -0.8830064
## [6,] -0.10571962  1.13295226 -0.6190293 -0.9030050  1.5684664 -0.0742204
##             PC13
## [1,]  0.41952696
## [2,]  0.27382062
## [3,] -1.10772088
## [4,] -0.15064597
## [5,] -0.06946601
## [6,]  0.71077548
# PCA dengan FactoMineR
library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.4.3
pca_result <- PCA(scale_data, scale.unit = TRUE, graph = FALSE, ncp = p)
print(pca_result$eig)
##         eigenvalue percentage of variance cumulative percentage of variance
## comp 1   2.7630269              21.254053                          21.25405
## comp 2   1.5366920              11.820708                          33.07476
## comp 3   1.2228343               9.406418                          42.48118
## comp 4   1.1811455               9.085735                          51.56691
## comp 5   1.0219665               7.861281                          59.42819
## comp 6   0.9700159               7.461661                          66.88985
## comp 7   0.8627699               6.636692                          73.52655
## comp 8   0.7759454               5.968811                          79.49536
## comp 9   0.7189255               5.530196                          85.02555
## comp 10  0.6215702               4.781309                          89.80686
## comp 11  0.5301048               4.077729                          93.88459
## comp 12  0.4231424               3.254941                          97.13953
## comp 13  0.3718607               2.860467                         100.00000
# Visualisasi PCA
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_eig(pca_result, addlabels = TRUE, ncp = p, barfill = "skyblue", barcolor = "darkblue", linecolor = "red")

fviz_pca_biplot(pca_result, geom.ind = "point", addEllipses = TRUE)

# Circle of Correlation
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), repel = TRUE) + 
  ggtitle("Kontribusi Variabel")
plot(contrib_circle)

# Kontribusi Variabel
contrib_v_PC1 <- fviz_contrib(pca_result, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
contrib_v_PC2 <- fviz_contrib(pca_result, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
contrib_v_PC3 <- fviz_contrib(pca_result, choice = "var", axes = 3, top = 5) + ggtitle("PC3")
plot(contrib_v_PC3)

# --------- Factor Analysis -----------
varcov = cov(scale_data)
pc = eigen(varcov)
pc$values
##  [1] 2.7630269 1.5366920 1.2228343 1.1811455 1.0219665 0.9700159 0.8627699
##  [8] 0.7759454 0.7189255 0.6215702 0.5301048 0.4231424 0.3718607
pc$vectors
##              [,1]        [,2]        [,3]        [,4]        [,5]        [,6]
##  [1,]  0.31420252 -0.40614872 -0.09407661  0.02066180  0.30715312 -0.12829615
##  [2,]  0.09083783  0.37779171  0.55484915  0.25530873 -0.05070440  0.05496875
##  [3,] -0.27460749 -0.29726609  0.35697431 -0.28790041 -0.16317945 -0.19341117
##  [4,]  0.18392019 -0.43818675  0.20384930 -0.02260103 -0.18813809 -0.17945982
##  [5,]  0.11737503 -0.36451402 -0.40782498  0.34340982 -0.32006670 -0.10472957
##  [6,]  0.07363999 -0.31743328  0.48173624  0.06860532  0.23344184  0.24961364
##  [7,] -0.12772792  0.22088181 -0.08919083 -0.26609555  0.39366727 -0.66681339
##  [8,] -0.41649811 -0.07787618  0.15825529  0.18412539 -0.32328431 -0.12098445
##  [9,]  0.36126745  0.26311790 -0.12635610  0.11505621 -0.03453568  0.23069914
## [10,]  0.41963899  0.05225497  0.11034290 -0.32629597 -0.25057927 -0.17007984
## [11,] -0.37977222 -0.04837415 -0.07381839  0.49484894  0.24682275 -0.06406935
## [12,]  0.27326172 -0.09414721  0.18356934  0.32801632  0.43536515 -0.18210750
## [13,]  0.22202375  0.20072042  0.12501113  0.38919138 -0.33195049 -0.50885654
##              [,7]        [,8]        [,9]       [,10]       [,11]        [,12]
##  [1,] -0.22373018  0.26247720 -0.37900026  0.01672242  0.14054369  0.548235088
##  [2,] -0.16250682  0.17599193 -0.19892520 -0.53561904  0.28760018 -0.011016034
##  [3,] -0.21538959 -0.04794993 -0.35143235 -0.16435134 -0.59428374 -0.097208286
##  [4,]  0.33276335  0.59533383  0.35039179 -0.07152427  0.06413037 -0.258721417
##  [5,]  0.04932936 -0.37238051 -0.15397520 -0.49516986  0.10887361 -0.183790481
##  [6,]  0.51081821 -0.43286301 -0.17700437  0.15369572  0.14210271  0.024729590
##  [7,]  0.39689590 -0.09984080 -0.03830435 -0.26996570  0.09554476 -0.002694377
##  [8,]  0.10147263 -0.14346128  0.37204449 -0.03081265  0.03250678  0.608911322
##  [9,]  0.44991859  0.11260655 -0.05850023 -0.19873186 -0.61590127  0.239078602
## [10,] -0.11288781 -0.19232320  0.23360295 -0.11138417  0.01524183  0.340832606
## [11,]  0.05503797  0.26180704 -0.02850469 -0.05593369 -0.16879774  0.133565990
## [12,] -0.33760601 -0.25967830  0.48580844 -0.03532497 -0.29673948 -0.146030385
## [13,]  0.05516481 -0.03434879 -0.28420109  0.53083127 -0.04361098 -0.072593188
##              [,13]
##  [1,] -0.181810835
##  [2,] -0.060938085
##  [3,] -0.006350611
##  [4,] -0.020129600
##  [5,]  0.007453109
##  [6,]  0.127166917
##  [7,] -0.074442253
##  [8,] -0.316691808
##  [9,] -0.148726741
## [10,]  0.614575447
## [11,]  0.645163760
## [12,] -0.156264034
## [13,] -0.015689708
sp = sum(pc$values[1:3])

L1 = sqrt(pc$values[1])*pc$vectors[,1]
L2 = sqrt(pc$values[2])*pc$vectors[,2]
L3 = sqrt(pc$values[3])*pc$vectors[,3]

L = cbind(L1, L2, L3)
L
##               L1          L2          L3
##  [1,]  0.5222786 -0.50347568 -0.10403165
##  [2,]  0.1509939  0.46832338  0.61356239
##  [3,] -0.4564623 -0.36850110  0.39474876
##  [4,]  0.3057187 -0.54319112  0.22542031
##  [5,]  0.1951050 -0.45186391 -0.45098036
##  [6,]  0.1224070 -0.39350103  0.53271279
##  [7,] -0.2123139  0.27381256 -0.09862886
##  [8,] -0.6923180 -0.09653795  0.17500161
##  [9,]  0.6005116  0.32616984 -0.13972690
## [10,]  0.6975388  0.06477703  0.12201921
## [11,] -0.6312709 -0.05996623 -0.08162973
## [12,]  0.4542253 -0.11670806  0.20299435
## [13,]  0.3690558  0.24881981  0.13823961
# Perform Factor Analysis
fa <- fa(r = scale_data, covar = TRUE, nfactors = 3, rotate = "varimax") 
load <- fa$loadings

plot(load[,c(1,3)], type="n") # Set up plot
text(load[,c(1,3)], labels=names(data), cex=.7)

fa.diagram(load)