# ------ LOAD LIBRARY ------
library(psych)
library(factoextra)
library(FactoMineR)
library(corrplot)
library(ppcor)
library(ggplot2)
library(dplyr)
library(GPArotation)

data_raw <- read.csv("C:/Users/USER/Documents/PCA/data.csv", 
                     check.names = FALSE)

# Ganti spasi jadi underscore
names(data_raw) <- gsub(" ", "_", names(data_raw))

# Ganti titik dua jadi underscore
names(data_raw) <- gsub(":", "_", names(data_raw))

names(data_raw)
##  [1] "id"                      "diagnosis"              
##  [3] "radius_mean"             "texture_mean"           
##  [5] "perimeter_mean"          "area_mean"              
##  [7] "smoothness_mean"         "compactness_mean"       
##  [9] "concavity_mean"          "concave_points_mean"    
## [11] "symmetry_mean"           "fractal_dimension_mean" 
## [13] "radius_se"               "texture_se"             
## [15] "perimeter_se"            "area_se"                
## [17] "smoothness_se"           "compactness_se"         
## [19] "concavity_se"            "concave_points_se"      
## [21] "symmetry_se"             "fractal_dimension_se"   
## [23] "radius_worst"            "texture_worst"          
## [25] "perimeter_worst"         "area_worst"             
## [27] "smoothness_worst"        "compactness_worst"      
## [29] "concavity_worst"         "concave_points_worst"   
## [31] "symmetry_worst"          "fractal_dimension_worst"
## [33] ""
data_num <- data_raw %>%
  select(
    radius_mean,
    texture_mean,
    perimeter_mean,
    area_mean,
    smoothness_mean,
    compactness_mean,
    concavity_mean,
    concave_points_mean,
    symmetry_mean,
    fractal_dimension_mean,
    texture_se,
    radius_worst,
    perimeter_worst,
    area_worst,
    concavity_worst
  ) %>%
  rename_with(~ paste0("X", seq_along(.)))


cat("Jumlah observasi :", nrow(data_num), "\n")
## Jumlah observasi : 569
cat("Jumlah variabel  :", ncol(data_num), "\n")
## Jumlah variabel  : 15
cat("Jumlah missing value              :", sum(is.na(data_num)), "\n")
## Jumlah missing value              : 0
cat("Jumlah baris yang memiliki missing value :", sum(!complete.cases(data_num)), "\n")
## Jumlah baris yang memiliki missing value : 0
cat("Rasio obs/var                     :", round(nrow(data_num)/ncol(data_num),1), ": 1\n")
## Rasio obs/var                     : 37.9 : 1

Statistika Deskriptif

desc <- describe(data_num)
round(desc[,c("n","mean","sd","min","max","skew","kurtosis")],4)
##       n   mean     sd    min     max skew kurtosis
## X1  569  14.13   3.52   6.98   28.11 0.94     0.81
## X2  569  19.29   4.30   9.71   39.28 0.65     0.73
## X3  569  91.97  24.30  43.79  188.50 0.99     0.94
## X4  569 654.89 351.91 143.50 2501.00 1.64     3.59
## X5  569   0.10   0.01   0.05    0.16 0.45     0.82
## X6  569   0.10   0.05   0.02    0.35 1.18     1.61
## X7  569   0.09   0.08   0.00    0.43 1.39     1.95
## X8  569   0.05   0.04   0.00    0.20 1.17     1.03
## X9  569   0.18   0.03   0.11    0.30 0.72     1.25
## X10 569   0.06   0.01   0.05    0.10 1.30     2.95
## X11 569   1.22   0.55   0.36    4.88 1.64     5.26
## X12 569  16.27   4.83   7.93   36.04 1.10     0.91
## X13 569 107.26  33.60  50.41  251.20 1.12     1.04
## X14 569 880.58 569.36 185.20 4254.00 1.85     4.32
## X15 569   0.27   0.21   0.00    1.25 1.14     1.57

Matriks Korelasi

mat_corr <- cor(data_num)
corrplot(mat_corr, method="color", type="upper", addCoef.col="black")

## Uji Asumsi # Bartlett Test

bartlett <- cortest.bartlett(mat_corr, n=nrow(data_num))
bartlett
## $chisq
## [1] 20184.55
## 
## $p.value
## [1] 0
## 
## $df
## [1] 105

KMO Test (Iterasi 1)

kmo1 <- KMO(data_num)

cat("Overall MSA:",kmo1$MSA,"\n")
## Overall MSA: 0.815302
round(kmo1$MSAi,3)
##    X1    X2    X3    X4    X5    X6    X7    X8    X9   X10   X11   X12   X13 
## 0.759 0.749 0.775 0.848 0.850 0.800 0.821 0.900 0.950 0.760 0.357 0.790 0.864 
##   X14   X15 
## 0.831 0.799

Hapus variabel dengan MSA < 0.5

msa_low <- names(kmo1$MSAi[kmo1$MSAi < 0.5])
msa_low <- intersect(msa_low, colnames(data_num))


if(length(msa_low) > 0 & (ncol(data_num) - length(msa_low) >= 10)){
  cat("Variabel dengan MSA < 0.5:", paste(msa_low, collapse = ", "), "\n")
  data_num <- data_num %>% dplyr::select(-dplyr::all_of(msa_low))
}
## Variabel dengan MSA < 0.5: X11
cat("Variabel tersisa:", paste(colnames(data_num), collapse = ", "), "\n")
## Variabel tersisa: X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X12, X13, X14, X15
mat_corr <- cor(data_num)

round(mat_corr,3)
##         X1     X2     X3     X4     X5    X6    X7    X8    X9    X10    X12
## X1   1.000  0.324  0.998  0.987  0.171 0.506 0.677 0.823 0.148 -0.312  0.970
## X2   0.324  1.000  0.330  0.321 -0.023 0.237 0.302 0.293 0.071 -0.076  0.353
## X3   0.998  0.330  1.000  0.987  0.207 0.557 0.716 0.851 0.183 -0.261  0.969
## X4   0.987  0.321  0.987  1.000  0.177 0.499 0.686 0.823 0.151 -0.283  0.963
## X5   0.171 -0.023  0.207  0.177  1.000 0.659 0.522 0.554 0.558  0.585  0.213
## X6   0.506  0.237  0.557  0.499  0.659 1.000 0.883 0.831 0.603  0.565  0.535
## X7   0.677  0.302  0.716  0.686  0.522 0.883 1.000 0.921 0.501  0.337  0.688
## X8   0.823  0.293  0.851  0.823  0.554 0.831 0.921 1.000 0.462  0.167  0.830
## X9   0.148  0.071  0.183  0.151  0.558 0.603 0.501 0.462 1.000  0.480  0.186
## X10 -0.312 -0.076 -0.261 -0.283  0.585 0.565 0.337 0.167 0.480  1.000 -0.254
## X12  0.970  0.353  0.969  0.963  0.213 0.535 0.688 0.830 0.186 -0.254  1.000
## X13  0.965  0.358  0.970  0.959  0.239 0.590 0.730 0.856 0.219 -0.205  0.994
## X14  0.941  0.344  0.942  0.959  0.207 0.510 0.676 0.810 0.177 -0.232  0.984
## X15  0.527  0.301  0.564  0.513  0.435 0.816 0.884 0.752 0.434  0.346  0.574
##        X13    X14   X15
## X1   0.965  0.941 0.527
## X2   0.358  0.344 0.301
## X3   0.970  0.942 0.564
## X4   0.959  0.959 0.513
## X5   0.239  0.207 0.435
## X6   0.590  0.510 0.816
## X7   0.730  0.676 0.884
## X8   0.856  0.810 0.752
## X9   0.219  0.177 0.434
## X10 -0.205 -0.232 0.346
## X12  0.994  0.984 0.574
## X13  1.000  0.978 0.618
## X14  0.978  1.000 0.543
## X15  0.618  0.543 1.000
KMO(data_num)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data_num)
## Overall MSA =  0.82
## MSA for each item = 
##   X1   X2   X3   X4   X5   X6   X7   X8   X9  X10  X12  X13  X14  X15 
## 0.76 0.94 0.78 0.85 0.85 0.80 0.83 0.90 0.95 0.75 0.79 0.86 0.83 0.82

Bartleet Test Setelah dihapus

mat_corr2 <- cor(data_num)
bartlett2 <- cortest.bartlett(mat_corr2, n = nrow(data_num))
bartlett2
## $chisq
## [1] 19912.83
## 
## $p.value
## [1] 0
## 
## $df
## [1] 91

PCA Manual

# scale data
scale_data = scale(data_num)
r = cov(scale_data)

# eigen value and vector
pc <- eigen(r)
print(pc$values)
##  [1] 8.5499892807 2.9673714517 0.9142792420 0.5595408766 0.4406929617
##  [6] 0.2281037422 0.1469155500 0.0881415752 0.0557679880 0.0283529984
## [11] 0.0148080921 0.0044051216 0.0014657520 0.0001653679
print(pc$vectors)
##               [,1]        [,2]        [,3]        [,4]         [,5]
##  [1,] -0.315466116 -0.20347361 -0.09422095  0.03597973  0.013770549
##  [2,] -0.129870057 -0.07412025  0.92079359  0.25785883  0.246928671
##  [3,] -0.322086768 -0.17315992 -0.08495702  0.02077774  0.008887819
##  [4,] -0.315046896 -0.19791033 -0.09932801  0.04192195  0.037347802
##  [5,] -0.143779727  0.40180780 -0.24480970  0.24931149  0.691158179
##  [6,] -0.262089915  0.33385181  0.05208004 -0.15498215 -0.016968504
##  [7,] -0.301495481  0.20351930  0.07389607 -0.24428180 -0.152378589
##  [8,] -0.326450777  0.10471934 -0.06375709 -0.01452483  0.075063802
##  [9,] -0.133026094  0.37435313 -0.04723882  0.73035926 -0.546430104
## [10,] -0.005110186  0.53476943  0.06093076 -0.20850149  0.142340304
## [11,] -0.320983196 -0.17567020 -0.06080670  0.04805725  0.034614915
## [12,] -0.327014776 -0.14516969 -0.04522340  0.01659119  0.005581697
## [13,] -0.314820490 -0.17561364 -0.06721924  0.05596536  0.060560919
## [14,] -0.258813108  0.22308298  0.19442998 -0.44872091 -0.326768711
##               [,6]        [,7]         [,8]        [,9]        [,10]
##  [1,]  0.044526359 -0.17890734 -0.144142555  0.39973993 -0.148198559
##  [2,] -0.005998576 -0.03296974  0.011467070  0.03086931 -0.008201763
##  [3,]  0.058775265 -0.20571779 -0.167364800  0.35197625 -0.084031240
##  [4,]  0.149983953 -0.13227399  0.176617917  0.37887847  0.255926554
##  [5,] -0.426330077  0.10369109  0.001199847  0.11206594  0.104282414
##  [6,]  0.191967729 -0.20273937 -0.705818998 -0.27231976  0.325688078
##  [7,] -0.105359227 -0.35411185  0.491909611 -0.11800989  0.430756375
##  [8,] -0.032038542 -0.39502835  0.219708425 -0.42712323 -0.620175981
##  [9,] -0.006947973  0.05544667  0.029381541  0.06162596 -0.010544764
## [10,]  0.658998356  0.20919258  0.192379256  0.27892922 -0.218507203
## [11,]  0.066602688  0.32478199 -0.064277691 -0.15817294 -0.178598971
## [12,]  0.073353089  0.27723030 -0.141662601 -0.20157067 -0.097336522
## [13,]  0.186023201  0.45357478  0.258987872 -0.28389310  0.319613582
## [14,] -0.512393785  0.37267320 -0.048063763  0.24677420 -0.164048762
##              [,11]        [,12]        [,13]         [,14]
##  [1,]  0.163129876  0.247341519 -0.225760111  0.6880688354
##  [2,] -0.004906530 -0.000894499 -0.003696191  0.0009917128
##  [3,]  0.140878098  0.052216855 -0.373279831 -0.7013575654
##  [4,] -0.486192102 -0.278699194  0.504137437 -0.0107356741
##  [5,]  0.042921459 -0.016989706 -0.003833141 -0.0006612429
##  [6,] -0.160368126  0.077624214  0.057040952  0.0383261344
##  [7,]  0.447308487  0.024336020  0.018993479  0.0184610480
##  [8,] -0.315446493 -0.008497969 -0.023943086  0.0017612162
##  [9,] -0.002813804 -0.001494078 -0.003934027 -0.0011495375
## [10,]  0.072884605 -0.015248152 -0.019933850 -0.0032041016
## [11,]  0.341637504  0.454344477  0.586983813 -0.1422757530
## [12,]  0.337261718 -0.766472120 -0.066785234  0.1074222222
## [13,] -0.329756301  0.239370124 -0.449097657  0.0290892380
## [14,] -0.213219745  0.001303249 -0.014564694 -0.0094693128
## variance proportion and cumulative variance
sumvar <- sum(pc$values)
propvar <- sapply(pc$values, function(x) x/sumvar)*100
cumvar <- data.frame(cbind(pc$values, propvar)) %>% mutate(cum = cumsum(propvar))
colnames(cumvar)[1] <- "eigen_value"
row.names(cumvar) <- paste0("PC",c(1:ncol(data_num)))
print(cumvar)
##       eigen_value      propvar       cum
## PC1  8.5499892807 61.071352005  61.07135
## PC2  2.9673714517 21.195510369  82.26686
## PC3  0.9142792420  6.530566014  88.79743
## PC4  0.5595408766  3.996720547  92.79415
## PC5  0.4406929617  3.147806869  95.94196
## PC6  0.2281037422  1.629312445  97.57127
## PC7  0.1469155500  1.049396786  98.62067
## PC8  0.0881415752  0.629582680  99.25025
## PC9  0.0557679880  0.398342772  99.64859
## PC10 0.0283529984  0.202521417  99.85111
## PC11 0.0148080921  0.105772086  99.95688
## PC12 0.0044051216  0.031465154  99.98835
## PC13 0.0014657520  0.010469657  99.99882
## PC14 0.0001653679  0.001181199 100.00000
# PCA result
scores <- as.matrix(scale_data) %*% pc$vectors
scores_PC <- scores[,1:3]
head(scores_PC)
##            [,1]       [,2]       [,3]
## [1,] -6.3444662  3.5339079 -2.3050890
## [2,] -3.2326393 -2.8244998 -1.0773714
## [3,] -4.7653393 -0.1013085 -0.3745269
## [4,] -2.3483312  7.7079438  0.4703524
## [5,] -4.0013691 -1.0219769 -1.7227752
## [6,] -0.9373283  3.6574176 -0.7670293

PCA With Funcional Principal

pc <- principal(data_num, nfactors = ncol(data_num), rotate = "none")
pc
## Principal Components Analysis
## Call: principal(r = data_num, nfactors = ncol(data_num), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10  PC11  PC12
## X1  0.92 -0.35 -0.09  0.03  0.01  0.02 -0.07 -0.04  0.09  0.02  0.02  0.02
## X2  0.38 -0.13  0.88  0.19  0.16  0.00 -0.01  0.00  0.01  0.00  0.00  0.00
## X3  0.94 -0.30 -0.08  0.02  0.01  0.03 -0.08 -0.05  0.08  0.01  0.02  0.00
## X4  0.92 -0.34 -0.09  0.03  0.02  0.07 -0.05  0.05  0.09 -0.04 -0.06 -0.02
## X5  0.42  0.69 -0.23  0.19  0.46 -0.20  0.04  0.00  0.03 -0.02  0.01  0.00
## X6  0.77  0.58  0.05 -0.12 -0.01  0.09 -0.08 -0.21 -0.06 -0.05 -0.02  0.01
## X7  0.88  0.35  0.07 -0.18 -0.10 -0.05 -0.14  0.15 -0.03 -0.07  0.05  0.00
## X8  0.95  0.18 -0.06 -0.01  0.05 -0.02 -0.15  0.07 -0.10  0.10 -0.04  0.00
## X9  0.39  0.64 -0.05  0.55 -0.36  0.00  0.02  0.01  0.01  0.00  0.00  0.00
## X10 0.01  0.92  0.06 -0.16  0.09  0.31  0.08  0.06  0.07  0.04  0.01  0.00
## X12 0.94 -0.30 -0.06  0.04  0.02  0.03  0.12 -0.02 -0.04  0.03  0.04  0.03
## X13 0.96 -0.25 -0.04  0.01  0.00  0.04  0.11 -0.04 -0.05  0.02  0.04 -0.05
## X14 0.92 -0.30 -0.06  0.04  0.04  0.09  0.17  0.08 -0.07 -0.05 -0.04  0.02
## X15 0.76  0.38  0.19 -0.34 -0.22 -0.24  0.14 -0.01  0.06  0.03 -0.03  0.00
##      PC13  PC14 h2       u2 com
## X1   0.01  0.01  1  5.6e-16 1.4
## X2   0.00  0.00  1 -1.3e-15 1.6
## X3   0.01 -0.01  1 -2.0e-15 1.3
## X4  -0.02  0.00  1 -1.3e-15 1.4
## X5   0.00  0.00  1  1.2e-15 3.2
## X6   0.00  0.00  1  1.6e-15 2.2
## X7   0.00  0.00  1  6.7e-16 1.6
## X8   0.00  0.00  1 -2.2e-16 1.2
## X9   0.00  0.00  1  1.7e-15 3.3
## X10  0.00  0.00  1  2.2e-15 1.4
## X12 -0.02  0.00  1 -1.3e-15 1.3
## X13  0.00  0.00  1 -1.6e-15 1.2
## X14  0.02  0.00  1 -1.8e-15 1.4
## X15  0.00  0.00  1  1.2e-15 2.7
## 
##                        PC1  PC2  PC3  PC4  PC5  PC6  PC7  PC8  PC9 PC10 PC11
## SS loadings           8.55 2.97 0.91 0.56 0.44 0.23 0.15 0.09 0.06 0.03 0.01
## Proportion Var        0.61 0.21 0.07 0.04 0.03 0.02 0.01 0.01 0.00 0.00 0.00
## Cumulative Var        0.61 0.82 0.89 0.93 0.96 0.98 0.99 0.99 1.00 1.00 1.00
## Proportion Explained  0.61 0.21 0.07 0.04 0.03 0.02 0.01 0.01 0.00 0.00 0.00
## Cumulative Proportion 0.61 0.82 0.89 0.93 0.96 0.98 0.99 0.99 1.00 1.00 1.00
##                       PC12 PC13 PC14
## SS loadings              0    0    0
## Proportion Var           0    0    0
## Cumulative Var           1    1    1
## Proportion Explained     0    0    0
## Cumulative Proportion    1    1    1
## 
## Mean item complexity =  1.8
## Test of the hypothesis that 14 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0  with prob <  NA 
## 
## Fit based upon off diagonal values = 1
# score PC principal
L <- as.matrix(pc$loadings)   # loadings
lambda <- pc$values           # eigenvalues
lambda_k <- lambda[1:ncol(L)]
V <- sweep(L, 2, sqrt(lambda_k), "/") # eigenvector
scores_PC <- scale_data %*% as.matrix(V)
head(scores_PC)
##            PC1        PC2        PC3         PC4         PC5         PC6
## [1,] 6.3444662  3.5339079 -2.3050890 -0.78593696 -1.01431507  0.94087100
## [2,] 3.2326393 -2.8244998 -1.0773714  0.42254505 -0.38491447  0.79957232
## [3,] 4.7653393 -0.1013085 -0.3745269  0.54311565  0.08352401 -0.25123943
## [4,] 2.3483312  7.7079438  0.4703524 -0.07534096  0.48417665  0.87649619
## [5,] 4.0013691 -1.0219769 -1.7227752 -0.50139224 -0.24201394 -0.01604769
## [6,] 0.9373283  3.6574176 -0.7670293 -0.37023216  0.50528183 -0.42078720
##             PC7         PC8         PC9        PC10         PC11         PC12
## [1,]  0.5773509 -0.20630460 -0.96228132 -0.02083614  0.287673064 -0.127806115
## [2,]  0.5227056  0.23446668  0.43518878  0.17026749 -0.051880305  0.089371443
## [3,] -0.4165521  0.16580732 -0.19070070  0.20423501 -0.165958399  0.127661642
## [4,]  0.2682709 -0.21396000  0.08581588  0.18902454  0.082269898  0.089725781
## [5,] -0.5549208  0.27250039  0.26579487 -0.02802940  0.062526785 -0.058228930
## [6,]  0.3316441  0.06233122 -0.06698738  0.18709125 -0.007639768  0.002079186
##             PC13         PC14
## [1,]  0.07624386  0.037253095
## [2,] -0.01088478  0.005274366
## [3,] -0.01156597  0.004703886
## [4,] -0.15184977  0.020564593
## [5,] -0.01507829 -0.023554363
## [6,] -0.01955567 -0.003079050

PCA With Funtion Factormine

library('FactoMineR')
library('factoextra')

pca_result <- PCA(scale_data,
                  scale.unit = TRUE,
                  graph = FALSE,
                  ncp=ncol(data_num))

# summary pca result
pca_result$eig  
##           eigenvalue percentage of variance cumulative percentage of variance
## comp 1  8.5499892807           61.071352005                          61.07135
## comp 2  2.9673714517           21.195510369                          82.26686
## comp 3  0.9142792420            6.530566014                          88.79743
## comp 4  0.5595408766            3.996720547                          92.79415
## comp 5  0.4406929617            3.147806869                          95.94196
## comp 6  0.2281037422            1.629312445                          97.57127
## comp 7  0.1469155500            1.049396786                          98.62067
## comp 8  0.0881415752            0.629582680                          99.25025
## comp 9  0.0557679880            0.398342772                          99.64859
## comp 10 0.0283529984            0.202521417                          99.85111
## comp 11 0.0148080921            0.105772086                          99.95688
## comp 12 0.0044051216            0.031465154                          99.98835
## comp 13 0.0014657520            0.010469657                          99.99882
## comp 14 0.0001653679            0.001181199                         100.00000
pca_result$svd$V 
##              [,1]        [,2]        [,3]        [,4]         [,5]         [,6]
##  [1,] 0.315466116 -0.20347361 -0.09422095  0.03597973  0.013770549  0.044526359
##  [2,] 0.129870057 -0.07412025  0.92079359  0.25785883  0.246928671 -0.005998576
##  [3,] 0.322086768 -0.17315992 -0.08495702  0.02077774  0.008887819  0.058775265
##  [4,] 0.315046896 -0.19791033 -0.09932801  0.04192195  0.037347802  0.149983953
##  [5,] 0.143779727  0.40180780 -0.24480970  0.24931149  0.691158179 -0.426330077
##  [6,] 0.262089915  0.33385181  0.05208004 -0.15498215 -0.016968504  0.191967729
##  [7,] 0.301495481  0.20351930  0.07389607 -0.24428180 -0.152378589 -0.105359227
##  [8,] 0.326450777  0.10471934 -0.06375709 -0.01452483  0.075063802 -0.032038542
##  [9,] 0.133026094  0.37435313 -0.04723882  0.73035926 -0.546430104 -0.006947973
## [10,] 0.005110186  0.53476943  0.06093076 -0.20850149  0.142340304  0.658998356
## [11,] 0.320983196 -0.17567020 -0.06080670  0.04805725  0.034614915  0.066602688
## [12,] 0.327014776 -0.14516969 -0.04522340  0.01659119  0.005581697  0.073353089
## [13,] 0.314820490 -0.17561364 -0.06721924  0.05596536  0.060560919  0.186023201
## [14,] 0.258813108  0.22308298  0.19442998 -0.44872091 -0.326768711 -0.512393785
##              [,7]         [,8]        [,9]        [,10]        [,11]
##  [1,] -0.17890734 -0.144142555  0.39973993  0.148198559  0.163129876
##  [2,] -0.03296974  0.011467070  0.03086931  0.008201763 -0.004906530
##  [3,] -0.20571779 -0.167364800  0.35197625  0.084031240  0.140878098
##  [4,] -0.13227399  0.176617917  0.37887847 -0.255926554 -0.486192102
##  [5,]  0.10369109  0.001199847  0.11206594 -0.104282414  0.042921459
##  [6,] -0.20273937 -0.705818998 -0.27231976 -0.325688078 -0.160368126
##  [7,] -0.35411185  0.491909611 -0.11800989 -0.430756375  0.447308487
##  [8,] -0.39502835  0.219708425 -0.42712323  0.620175981 -0.315446493
##  [9,]  0.05544667  0.029381541  0.06162596  0.010544764 -0.002813804
## [10,]  0.20919258  0.192379256  0.27892922  0.218507203  0.072884605
## [11,]  0.32478199 -0.064277691 -0.15817294  0.178598971  0.341637504
## [12,]  0.27723030 -0.141662601 -0.20157067  0.097336522  0.337261718
## [13,]  0.45357478  0.258987872 -0.28389310 -0.319613582 -0.329756301
## [14,]  0.37267320 -0.048063763  0.24677420  0.164048762 -0.213219745
##              [,12]        [,13]         [,14]
##  [1,]  0.247341519  0.225760111  0.6880688354
##  [2,] -0.000894499  0.003696191  0.0009917128
##  [3,]  0.052216855  0.373279831 -0.7013575654
##  [4,] -0.278699194 -0.504137437 -0.0107356741
##  [5,] -0.016989706  0.003833141 -0.0006612429
##  [6,]  0.077624214 -0.057040952  0.0383261344
##  [7,]  0.024336020 -0.018993479  0.0184610480
##  [8,] -0.008497969  0.023943086  0.0017612162
##  [9,] -0.001494078  0.003934027 -0.0011495375
## [10,] -0.015248152  0.019933850 -0.0032041016
## [11,]  0.454344477 -0.586983813 -0.1422757530
## [12,] -0.766472120  0.066785234  0.1074222222
## [13,]  0.239370124  0.449097657  0.0290892380
## [14,]  0.001303249  0.014564694 -0.0094693128
head(pca_result$ind$coord)
##      Dim.1      Dim.2      Dim.3       Dim.4      Dim.5       Dim.6      Dim.7
## 1 6.350049  3.5370173 -2.3071172 -0.78662850 -1.0152076  0.94169886  0.5778589
## 2 3.235484 -2.8269851 -1.0783194  0.42291685 -0.3852532  0.80027586  0.5231655
## 3 4.769532 -0.1013976 -0.3748564  0.54359353  0.0835975 -0.25146049 -0.4169186
## 4 2.350397  7.7147260  0.4707663 -0.07540725  0.4846027  0.87726742  0.2685070
## 5 4.004890 -1.0228761 -1.7242910 -0.50183341 -0.2422269 -0.01606181 -0.5554091
## 6 0.938153  3.6606357 -0.7677042 -0.37055793  0.5057264 -0.42115745  0.3319359
##         Dim.8       Dim.9      Dim.10      Dim.11       Dim.12      Dim.13
## 1 -0.20648613 -0.96312803 -0.02085448  0.28792619 -0.127918571  0.07631094
## 2  0.23467299  0.43557170  0.17041730 -0.05192595  0.089450081 -0.01089436
## 3  0.16595321 -0.19086849  0.20441471 -0.16610442  0.127773971 -0.01157615
## 4 -0.21414826  0.08589139  0.18919086  0.08234229  0.089804730 -0.15198338
## 5  0.27274016  0.26602874 -0.02805406  0.06258180 -0.058280165 -0.01509155
## 6  0.06238606 -0.06704632  0.18725587 -0.00764649  0.002081015 -0.01957287
##         Dim.14
## 1  0.037285874
## 2  0.005279007
## 3  0.004708025
## 4  0.020582688
## 5 -0.023575088
## 6 -0.003081760
# scree plot
fviz_eig(pca_result,
         addlabels = TRUE,
         ncp = ncol(data_num), 
         barfill = "skyblue",
         barcolor = "darkblue",
         linecolor = "red")

#Biplot
fviz_pca_biplot(pca_result,
                geom.ind = "point",
                #col.ind = data_raw$diagnosis,
                #palette = c("#FC4E07","#E7B800", "#00AFBB"),
                addEllipses = TRUE,
                #legend.title = "Kategori"
)

# correlation circle
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib",
                               gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                               repel = TRUE) +
  ggtitle("Kontribusi Variabel")
plot(contrib_circle)

# variable contribution
contrib_v_PC1 <- fviz_contrib(pca_result, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
plot(contrib_v_PC1)

contrib_v_PC2 <- fviz_contrib(pca_result, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
plot(contrib_v_PC2)

Factor Analysis

# Factor Analysis
varcov = cov(scale_data)
pc = eigen(varcov)

cat("eigen value:")
## eigen value:
pc$values
##  [1] 8.5499892807 2.9673714517 0.9142792420 0.5595408766 0.4406929617
##  [6] 0.2281037422 0.1469155500 0.0881415752 0.0557679880 0.0283529984
## [11] 0.0148080921 0.0044051216 0.0014657520 0.0001653679
cat("eigen vector:")
## eigen vector:
pc$vectors
##               [,1]        [,2]        [,3]        [,4]         [,5]
##  [1,] -0.315466116 -0.20347361 -0.09422095  0.03597973  0.013770549
##  [2,] -0.129870057 -0.07412025  0.92079359  0.25785883  0.246928671
##  [3,] -0.322086768 -0.17315992 -0.08495702  0.02077774  0.008887819
##  [4,] -0.315046896 -0.19791033 -0.09932801  0.04192195  0.037347802
##  [5,] -0.143779727  0.40180780 -0.24480970  0.24931149  0.691158179
##  [6,] -0.262089915  0.33385181  0.05208004 -0.15498215 -0.016968504
##  [7,] -0.301495481  0.20351930  0.07389607 -0.24428180 -0.152378589
##  [8,] -0.326450777  0.10471934 -0.06375709 -0.01452483  0.075063802
##  [9,] -0.133026094  0.37435313 -0.04723882  0.73035926 -0.546430104
## [10,] -0.005110186  0.53476943  0.06093076 -0.20850149  0.142340304
## [11,] -0.320983196 -0.17567020 -0.06080670  0.04805725  0.034614915
## [12,] -0.327014776 -0.14516969 -0.04522340  0.01659119  0.005581697
## [13,] -0.314820490 -0.17561364 -0.06721924  0.05596536  0.060560919
## [14,] -0.258813108  0.22308298  0.19442998 -0.44872091 -0.326768711
##               [,6]        [,7]         [,8]        [,9]        [,10]
##  [1,]  0.044526359 -0.17890734 -0.144142555  0.39973993 -0.148198559
##  [2,] -0.005998576 -0.03296974  0.011467070  0.03086931 -0.008201763
##  [3,]  0.058775265 -0.20571779 -0.167364800  0.35197625 -0.084031240
##  [4,]  0.149983953 -0.13227399  0.176617917  0.37887847  0.255926554
##  [5,] -0.426330077  0.10369109  0.001199847  0.11206594  0.104282414
##  [6,]  0.191967729 -0.20273937 -0.705818998 -0.27231976  0.325688078
##  [7,] -0.105359227 -0.35411185  0.491909611 -0.11800989  0.430756375
##  [8,] -0.032038542 -0.39502835  0.219708425 -0.42712323 -0.620175981
##  [9,] -0.006947973  0.05544667  0.029381541  0.06162596 -0.010544764
## [10,]  0.658998356  0.20919258  0.192379256  0.27892922 -0.218507203
## [11,]  0.066602688  0.32478199 -0.064277691 -0.15817294 -0.178598971
## [12,]  0.073353089  0.27723030 -0.141662601 -0.20157067 -0.097336522
## [13,]  0.186023201  0.45357478  0.258987872 -0.28389310  0.319613582
## [14,] -0.512393785  0.37267320 -0.048063763  0.24677420 -0.164048762
##              [,11]        [,12]        [,13]         [,14]
##  [1,]  0.163129876  0.247341519 -0.225760111  0.6880688354
##  [2,] -0.004906530 -0.000894499 -0.003696191  0.0009917128
##  [3,]  0.140878098  0.052216855 -0.373279831 -0.7013575654
##  [4,] -0.486192102 -0.278699194  0.504137437 -0.0107356741
##  [5,]  0.042921459 -0.016989706 -0.003833141 -0.0006612429
##  [6,] -0.160368126  0.077624214  0.057040952  0.0383261344
##  [7,]  0.447308487  0.024336020  0.018993479  0.0184610480
##  [8,] -0.315446493 -0.008497969 -0.023943086  0.0017612162
##  [9,] -0.002813804 -0.001494078 -0.003934027 -0.0011495375
## [10,]  0.072884605 -0.015248152 -0.019933850 -0.0032041016
## [11,]  0.341637504  0.454344477  0.586983813 -0.1422757530
## [12,]  0.337261718 -0.766472120 -0.066785234  0.1074222222
## [13,] -0.329756301  0.239370124 -0.449097657  0.0290892380
## [14,] -0.213219745  0.001303249 -0.014564694 -0.0094693128
sp = sum(pc$values[1:2])

L1 = sqrt(pc$values[1])*pc$vectors[,1]
L2 = sqrt(pc$values[2])*pc$vectors[,2]


L = cbind(L1,L2)
cat("Factor Loading:\n")
## Factor Loading:
print(L)
##                L1         L2
##  [1,] -0.92243443 -0.3505049
##  [2,] -0.37974478 -0.1276800
##  [3,] -0.94179345 -0.2982863
##  [4,] -0.92120861 -0.3409215
##  [5,] -0.42041717  0.6921565
##  [6,] -0.76636047  0.5750951
##  [7,] -0.88158378  0.3505836
##  [8,] -0.95455398  0.1803902
##  [9,] -0.38897315  0.6448629
## [10,] -0.01494237  0.9211970
## [11,] -0.93856657 -0.3026105
## [12,] -0.95620313 -0.2500702
## [13,] -0.92054659 -0.3025131
## [14,] -0.75677897  0.3842841

FA With Funcional Principle

fa <- principal(scale_data, nfactors = 2, rotate = "none")
fa
## Principal Components Analysis
## Call: principal(r = scale_data, nfactors = 2, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
##      PC1   PC2   h2    u2 com
## X1  0.92 -0.35 0.97 0.026 1.3
## X2  0.38 -0.13 0.16 0.839 1.2
## X3  0.94 -0.30 0.98 0.024 1.2
## X4  0.92 -0.34 0.96 0.035 1.3
## X5  0.42  0.69 0.66 0.344 1.6
## X6  0.77  0.58 0.92 0.082 1.9
## X7  0.88  0.35 0.90 0.100 1.3
## X8  0.95  0.18 0.94 0.056 1.1
## X9  0.39  0.64 0.57 0.433 1.6
## X10 0.01  0.92 0.85 0.151 1.0
## X12 0.94 -0.30 0.97 0.028 1.2
## X13 0.96 -0.25 0.98 0.023 1.1
## X14 0.92 -0.30 0.94 0.061 1.2
## X15 0.76  0.38 0.72 0.280 1.5
## 
##                        PC1  PC2
## SS loadings           8.55 2.97
## Proportion Var        0.61 0.21
## Cumulative Var        0.61 0.82
## Proportion Explained  0.74 0.26
## Cumulative Proportion 0.74 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.03 
##  with the empirical chi square  82.44  with prob <  0.06 
## 
## Fit based upon off diagonal values = 1
# Factor Analysis with rotation varimax
fa_1 <- principal(scale_data, nfactors = 2, rotate = "varimax")
fa_1
## Principal Components Analysis
## Call: principal(r = scale_data, nfactors = 2, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##       RC1  RC2   h2    u2 com
## X1   0.98 0.08 0.97 0.026 1.0
## X2   0.40 0.05 0.16 0.839 1.0
## X3   0.98 0.14 0.98 0.024 1.0
## X4   0.98 0.09 0.96 0.035 1.0
## X5   0.08 0.81 0.66 0.344 1.0
## X6   0.44 0.85 0.92 0.082 1.5
## X7   0.64 0.70 0.90 0.100 2.0
## X8   0.78 0.58 0.94 0.056 1.8
## X9   0.07 0.75 0.57 0.433 1.0
## X10 -0.38 0.84 0.85 0.151 1.4
## X12  0.98 0.13 0.97 0.028 1.0
## X13  0.97 0.19 0.98 0.023 1.1
## X14  0.96 0.12 0.94 0.061 1.0
## X15  0.52 0.67 0.72 0.280 1.9
## 
##                        RC1  RC2
## SS loadings           7.51 4.01
## Proportion Var        0.54 0.29
## Cumulative Var        0.54 0.82
## Proportion Explained  0.65 0.35
## Cumulative Proportion 0.65 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.03 
##  with the empirical chi square  82.44  with prob <  0.06 
## 
## Fit based upon off diagonal values = 1
# scores FA
scores_FA = scale_data %*% solve(cor(scale_data)) %*% as.matrix(fa$loadings)
head(scores_FA)
##            PC1         PC2
## [1,] 2.1697630  2.05148934
## [2,] 1.1055400 -1.63966676
## [3,] 1.6297127 -0.05881118
## [4,] 0.8031128  4.47458313
## [5,] 1.3684402 -0.59327375
## [6,] 0.3205597  2.12318867