# Load Data
data2 <- read.csv2("G:/Other computers/My Laptop/Tugas/Semester 5/StatMul/project/data2.csv", row.names = 1)
str(data2)
## 'data.frame':    33 obs. of  10 variables:
##  $ X1 : num  58.8 46.7 42.8 54.9 37.3 ...
##  $ X2 : num  32.22 9.4 10.06 6.22 2.58 ...
##  $ X3 : num  33.47 8.48 5.61 6.31 1.2 ...
##  $ X4 : num  5.12 1.35 1.63 1.68 0.79 1.66 2.19 4.8 3.51 2.41 ...
##  $ X5 : num  89.5 55.1 61.1 46 26.3 ...
##  $ X6 : num  89.4 72.8 72.7 71.8 75.4 ...
##  $ X7 : num  74.3 53.7 61.6 59.1 72.9 ...
##  $ X8 : num  49.53 10.32 7.69 9.52 7.79 ...
##  $ X9 : num  63.2 62.5 62.9 59.7 62.9 ...
##  $ X10: int  95570 268700 170750 233670 159380 83350 188170 38522 475300 165250 ...
head(data2)
##                     X1    X2    X3   X4    X5    X6    X7    X8    X9    X10
## Nias             58.79 32.22 33.47 5.12 89.50 89.44 74.26 49.53 63.22  95570
## Mandailing Natal 46.71  9.40  8.48 1.35 55.06 72.78 53.67 10.32 62.52 268700
## Tapanuli Selatan 42.76 10.06  5.61 1.63 61.06 72.72 61.57  7.69 62.89 170750
## Tapanuli Tengah  54.90  6.22  6.31 1.68 45.99 71.80 59.06  9.52 59.68 233670
## Tapanuli Utara   37.28  2.58  1.20 0.79 26.31 75.38 72.93  7.79 62.88 159380
## Toba Samosir     27.54  1.81  3.33 1.66 16.71 65.60 59.15  5.02 55.07  83350
# Checking a linearity relationship between variables
data2.active <- data2[, 1:10]
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
pairs.panels(data2.active, 
             method = "pearson", # correlation method
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots
             ellipses = FALSE, # show correlation ellipses
             lm = TRUE
)

# Checking sampling adequacy
library(psych)
KMO(cor(data2.active))
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor(data2.active))
## Overall MSA =  0.79
## MSA for each item = 
##   X1   X2   X3   X4   X5   X6   X7   X8   X9  X10 
## 0.88 0.89 0.79 0.61 0.80 0.75 0.72 0.81 0.76 0.34
library(psych)
#Bartlett's test of spherecity
cortest.bartlett(cor(data2.active), n = nrow(data2.active))
## $chisq
## [1] 252.5337
## 
## $p.value
## [1] 1.10629e-30
## 
## $df
## [1] 45
library(factoextra)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
fit.pca <- PCA(data2.active, scale.unit = TRUE, ncp = 9, graph = TRUE)

summary(fit.pca)
## 
## Call:
## PCA(X = data2.active, scale.unit = TRUE, ncp = 9, graph = TRUE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               5.309   1.753   1.213   0.572   0.353   0.269   0.203
## % of var.             53.086  17.528  12.129   5.722   3.530   2.695   2.033
## Cumulative % of var.  53.086  70.614  82.743  88.464  91.994  94.688  96.721
##                        Dim.8   Dim.9  Dim.10
## Variance               0.174   0.102   0.052
## % of var.              1.739   1.024   0.516
## Cumulative % of var.  98.460  99.484 100.000
## 
## Individuals (the 10 first)
##                      Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## Nias             |  6.326 |  6.127 21.429  0.938 |  1.168  2.357  0.034 |
## Mandailing Natal |  2.258 |  1.306  0.973  0.334 | -1.290  2.876  0.326 |
## Tapanuli Selatan |  2.238 |  1.266  0.915  0.320 | -1.520  3.992  0.461 |
## Tapanuli Tengah  |  2.327 |  1.122  0.719  0.232 | -1.266  2.771  0.296 |
## Tapanuli Utara   |  2.460 |  0.317  0.057  0.017 | -2.354  9.579  0.915 |
## Toba Samosir     |  2.116 | -1.140  0.742  0.290 | -0.993  1.705  0.220 |
## Labuhan Batu     |  1.543 | -0.508  0.147  0.108 | -0.261  0.118  0.029 |
## Asahan           |  2.268 | -1.785  1.819  0.620 |  0.573  0.568  0.064 |
## Simalungun       |  2.532 | -1.603  1.467  0.401 |  0.441  0.337  0.030 |
## Dairi            |  2.258 |  0.103  0.006  0.002 | -1.640  4.649  0.527 |
##                   Dim.3    ctr   cos2  
## Nias             -0.346  0.299  0.003 |
## Mandailing Natal  0.580  0.841  0.066 |
## Tapanuli Selatan  0.065  0.010  0.001 |
## Tapanuli Tengah   0.010  0.000  0.000 |
## Tapanuli Utara    0.336  0.282  0.019 |
## Toba Samosir     -0.612  0.936  0.084 |
## Labuhan Batu      0.280  0.196  0.033 |
## Asahan           -0.726  1.315  0.102 |
## Simalungun        1.811  8.195  0.512 |
## Dairi             0.752  1.413  0.111 |
## 
## Variables
##                     Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## X1               |  0.844 13.420  0.712 | -0.124  0.871  0.015 | -0.243  4.876
## X2               |  0.826 12.843  0.682 |  0.341  6.630  0.116 | -0.145  1.743
## X3               |  0.901 15.303  0.812 |  0.269  4.131  0.072 |  0.029  0.072
## X4               |  0.040  0.030  0.002 |  0.881 44.298  0.776 | -0.138  1.567
## X5               |  0.955 17.180  0.912 |  0.051  0.147  0.003 | -0.024  0.047
## X6               |  0.686  8.871  0.471 | -0.001  0.000  0.000 |  0.463 17.640
## X7               |  0.742 10.384  0.551 | -0.530 16.049  0.281 | -0.149  1.839
## X8               |  0.860 13.938  0.740 |  0.336  6.447  0.113 |  0.090  0.672
## X9               |  0.640  7.706  0.409 | -0.582 19.339  0.339 |  0.251  5.196
## X10              | -0.131  0.325  0.017 |  0.191  2.086  0.037 |  0.897 66.349
##                    cos2  
## X1                0.059 |
## X2                0.021 |
## X3                0.001 |
## X4                0.019 |
## X5                0.001 |
## X6                0.214 |
## X7                0.022 |
## X8                0.008 |
## X9                0.063 |
## X10               0.805 |
# Eigenvalue
eig.val <-get_eigenvalue(fit.pca)
eig.val
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1  5.30858116       53.0858116                    53.08581
## Dim.2  1.75281183       17.5281183                    70.61393
## Dim.3  1.21286593       12.1286593                    82.74259
## Dim.4  0.57215760        5.7215760                    88.46417
## Dim.5  0.35295233        3.5295233                    91.99369
## Dim.6  0.26945110        2.6945110                    94.68820
## Dim.7  0.20329047        2.0329047                    96.72110
## Dim.8  0.17390154        1.7390154                    98.46012
## Dim.9  0.10237386        1.0237386                    99.48386
## Dim.10 0.05161419        0.5161419                   100.00000
# Screeplot
fviz_eig(fit.pca, addlabels = TRUE, ylim = c(0, 100), size = 5)
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

# Coordinates of variables (Correlation circle)
var <- get_pca_var(fit.pca)
head(var$coord)
##         Dim.1        Dim.2       Dim.3       Dim.4       Dim.5       Dim.6
## X1 0.84403052 -0.123593039 -0.24319739  0.27629322  0.03418761  0.24359568
## X2 0.82569224  0.340897774 -0.14539314  0.26360103 -0.08856874 -0.03657704
## X3 0.90133147  0.269100752  0.02949245  0.07558208 -0.07817511 -0.22129256
## X4 0.03958009  0.881173022 -0.13787964 -0.23792765  0.35655153  0.12348719
## X5 0.95499645  0.050820015 -0.02381695  0.01387724 -0.09428391  0.11591713
## X6 0.68625333 -0.001449887  0.46254214 -0.48749254 -0.22566480  0.12152231
##          Dim.7       Dim.8        Dim.9
## X1 -0.22145364 -0.12865832  0.098587324
## X2  0.20457376  0.19842960  0.144192817
## X3 -0.12395556  0.02308680 -0.148451063
## X4 -0.03089404  0.05225886 -0.007690669
## X5 -0.06235011  0.11873835 -0.147195351
## X6 -0.03547325  0.02648354  0.091145231
var <- get_pca_var(fit.pca)
head(var$cos2)
##          Dim.1        Dim.2        Dim.3        Dim.4       Dim.5      Dim.6
## X1 0.712387519 1.527524e-02 0.0591449704 0.0763379456 0.001168793 0.05933885
## X2 0.681767678 1.162113e-01 0.0211391648 0.0694855031 0.007844422 0.00133788
## X3 0.812398426 7.241521e-02 0.0008698048 0.0057126512 0.006111347 0.04897040
## X4 0.001566584 7.764659e-01 0.0190107959 0.0566095680 0.127128997 0.01524909
## X5 0.912018226 2.582674e-03 0.0005672469 0.0001925778 0.008889457 0.01343678
## X6 0.470943635 2.102173e-06 0.2139452322 0.2376489801 0.050924603 0.01476767
##           Dim.7        Dim.8        Dim.9
## X1 0.0490417131 0.0165529632 9.719461e-03
## X2 0.0418504244 0.0393743045 2.079157e-02
## X3 0.0153649819 0.0005330001 2.203772e-02
## X4 0.0009544415 0.0027309884 5.914639e-05
## X5 0.0038875356 0.0140987956 2.166647e-02
## X6 0.0012583514 0.0007013777 8.307453e-03
library(corrplot)
## corrplot 0.95 loaded
corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(fit.pca, choice = "var", axes = 1:2)

# Contributions of variables to PCs
head(var$contrib)
##         Dim.1        Dim.2       Dim.3       Dim.4      Dim.5      Dim.6
## X1 13.4195465 8.714706e-01  4.87646401 13.34211866  0.3311475 22.0221238
## X2 12.8427476 6.629992e+00  1.74291027 12.14446918  2.2225160  0.4965203
## X3 15.3034945 4.131374e+00  0.07171484  0.99844015  1.7314937 18.1741323
## X4  0.0295104 4.429830e+01  1.56742765  9.89405160 36.0187441  5.6593147
## X5 17.1800750 1.473446e-01  0.04676914  0.03365817  2.5185998  4.9867235
## X6  8.8713654 1.199315e-04 17.63964405 41.53558060 14.4281814  5.4806500
##         Dim.7      Dim.8      Dim.9
## X1 24.1239603  9.5185837  9.4940844
## X2 20.5865153 22.6417232 20.3094510
## X3  7.5581416  0.3064954 21.5267046
## X4  0.4694964  1.5704222  0.0577749
## X5  1.9123058  8.1073439 21.1640665
## X6  0.6189918  0.4033189  8.1148188
# Contributions of variables to PC1
fviz_contrib(fit.pca, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(fit.pca, choice = "var", axes = 2, top = 10)     

fviz_pca_var(fit.pca, col.var = "contrib",
             gradient.cols = c("#00AFBB",
                               "#E7B800", "#FC4E07"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fit.desc <- dimdesc(fit.pca, axes = c(1,2), proba = 0.05)
# Description of dimension 1
fit.desc$Dim.1
## 
## Link between the variable and the continuous variables (R-square)
## =================================================================================
##    correlation      p.value
## X5   0.9549965 6.450327e-18
## X3   0.9013315 8.512308e-13
## X8   0.8601791 1.406428e-10
## X1   0.8440305 6.803771e-10
## X2   0.8256922 3.330608e-09
## X7   0.7424666 7.549093e-07
## X6   0.6862533 1.039431e-05
## X9   0.6396044 6.137853e-05
# Description of dimension 2
fit.desc$Dim.2
## 
## Link between the variable and the continuous variables (R-square)
## =================================================================================
##    correlation      p.value
## X4   0.8811730 1.314651e-11
## X7  -0.5303864 1.498224e-03
## X9  -0.5822144 3.786634e-04
# Graph of individuals 
ind <- get_pca_ind(fit.pca)
ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
# Coordinates of individuals
head(ind$coord)
##                       Dim.1      Dim.2       Dim.3       Dim.4       Dim.5
## Nias              6.1269501  1.1675543 -0.34574846  0.07555444 -0.16887941
## Mandailing Natal  1.3058422 -1.2898445  0.58004490  0.87749715 -0.14840813
## Tapanuli Selatan  1.2663804 -1.5195988  0.06462915  0.36154816 -0.03802839
## Tapanuli Tengah   1.1221091 -1.2661262  0.01017361  0.95120142 -0.17436727
## Tapanuli Utara    0.3165721 -2.3539173  0.33615434 -0.32908090  0.08171388
## Toba Samosir     -1.1398937 -0.9929422 -0.61202167 -0.21655675 -0.60784474
##                       Dim.6      Dim.7       Dim.8       Dim.9
## Nias             -0.3400030 -0.2374305  0.42610873 -0.04319163
## Mandailing Natal  0.2499444 -0.5255386  0.42564804 -0.18505220
## Tapanuli Selatan  0.2755600 -0.1495961  0.75667545 -0.26690034
## Tapanuli Tengah   1.0104554 -0.7190602 -0.25194346 -0.07430483
## Tapanuli Utara    0.1391386  0.3790440  0.05558675 -0.09322234
## Toba Samosir     -0.2071953  0.7641105 -0.05145101 -0.84264761
# Quality of individuals
head(ind$cos2)
##                       Dim.1      Dim.2        Dim.3        Dim.4        Dim.5
## Nias             0.93808248 0.03406485 2.987259e-03 0.0001426502 0.0007126983
## Mandailing Natal 0.33447794 0.32633282 6.599473e-02 0.1510349550 0.0043201757
## Tapanuli Selatan 0.32021530 0.46107502 8.340086e-04 0.0261003309 0.0002887551
## Tapanuli Tengah  0.23249995 0.29601025 1.911185e-05 0.1670697095 0.0056141327
## Tapanuli Utara   0.01655785 0.91546333 1.866964e-02 0.0178922060 0.0011031897
## Toba Samosir     0.29032487 0.22029441 8.369301e-02 0.0104784976 0.0825545243
##                        Dim.6       Dim.7        Dim.8        Dim.9
## Nias             0.002888802 0.001408722 0.0045372551 4.661778e-05
## Mandailing Natal 0.012253857 0.054174545 0.0355375360 6.716987e-03
## Tapanuli Selatan 0.015161641 0.004468421 0.1143228322 1.422368e-02
## Tapanuli Tengah  0.188532858 0.095473599 0.0117208450 1.019499e-03
## Tapanuli Utara   0.003198556 0.023737652 0.0005105063 1.435815e-03
## Toba Samosir     0.009592135 0.130457141 0.0005914844 1.586527e-01
# Contributions of individuals
head(ind$contrib)
##                       Dim.1    Dim.2        Dim.3      Dim.4      Dim.5
## Nias             21.4287229 2.356703 0.2986714958 0.03023363 0.24486313
## Mandailing Natal  0.9733948 2.876242 0.8406137505 4.07813705 0.18909732
## Tapanuli Selatan  0.9154529 3.992167 0.0104359044 0.69231336 0.01241612
## Tapanuli Tengah   0.7187498 2.771436 0.0002585969 4.79198411 0.26103572
## Tapanuli Utara    0.0572075 9.579287 0.2823258903 0.57355588 0.05732732
## Toba Samosir      0.7417137 1.704507 0.9358513428 0.24837841 3.17216177
##                       Dim.6     Dim.7      Dim.8       Dim.9
## Nias              1.3000846 0.8403148 3.16390554  0.05521998
## Mandailing Natal  0.7025753 4.1169753 3.15706781  1.01364407
## Tapanuli Selatan  0.8539621 0.3335872 9.97704489  2.10860511
## Tapanuli Tengah  11.4826048 7.7072525 1.10608582  0.16342974
## Tapanuli Utara    0.2177216 2.1416488 0.05384250  0.25723910
## Toba Samosir      0.4827984 8.7032474 0.04612861 21.01788336
# Plot quality and contribution
fviz_pca_ind(fit.pca, col.ind = "cos2", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Avoid text overlapping (slow if many points)
)

# Contribution to individuals to Dim 1-2
fviz_contrib(fit.pca, choice = "ind", axes = 1:2)