# Load Data
data2 <- read.csv2("G:/Other computers/My Laptop/Tugas/Semester 5/StatMul/project/data2.csv", row.names = 1)
str(data2)
## 'data.frame': 33 obs. of 10 variables:
## $ X1 : num 58.8 46.7 42.8 54.9 37.3 ...
## $ X2 : num 32.22 9.4 10.06 6.22 2.58 ...
## $ X3 : num 33.47 8.48 5.61 6.31 1.2 ...
## $ X4 : num 5.12 1.35 1.63 1.68 0.79 1.66 2.19 4.8 3.51 2.41 ...
## $ X5 : num 89.5 55.1 61.1 46 26.3 ...
## $ X6 : num 89.4 72.8 72.7 71.8 75.4 ...
## $ X7 : num 74.3 53.7 61.6 59.1 72.9 ...
## $ X8 : num 49.53 10.32 7.69 9.52 7.79 ...
## $ X9 : num 63.2 62.5 62.9 59.7 62.9 ...
## $ X10: int 95570 268700 170750 233670 159380 83350 188170 38522 475300 165250 ...
head(data2)
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## Nias 58.79 32.22 33.47 5.12 89.50 89.44 74.26 49.53 63.22 95570
## Mandailing Natal 46.71 9.40 8.48 1.35 55.06 72.78 53.67 10.32 62.52 268700
## Tapanuli Selatan 42.76 10.06 5.61 1.63 61.06 72.72 61.57 7.69 62.89 170750
## Tapanuli Tengah 54.90 6.22 6.31 1.68 45.99 71.80 59.06 9.52 59.68 233670
## Tapanuli Utara 37.28 2.58 1.20 0.79 26.31 75.38 72.93 7.79 62.88 159380
## Toba Samosir 27.54 1.81 3.33 1.66 16.71 65.60 59.15 5.02 55.07 83350
# Checking a linearity relationship between variables
data2.active <- data2[, 1:10]
library(psych)
## Warning: package 'psych' was built under R version 4.5.2
pairs.panels(data2.active,
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
ellipses = FALSE, # show correlation ellipses
lm = TRUE
)

# Checking sampling adequacy
library(psych)
KMO(cor(data2.active))
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor(data2.active))
## Overall MSA = 0.79
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 0.88 0.89 0.79 0.61 0.80 0.75 0.72 0.81 0.76 0.34
library(psych)
#Bartlett's test of spherecity
cortest.bartlett(cor(data2.active), n = nrow(data2.active))
## $chisq
## [1] 252.5337
##
## $p.value
## [1] 1.10629e-30
##
## $df
## [1] 45
library(factoextra)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.5.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR)
fit.pca <- PCA(data2.active, scale.unit = TRUE, ncp = 9, graph = TRUE)


summary(fit.pca)
##
## Call:
## PCA(X = data2.active, scale.unit = TRUE, ncp = 9, graph = TRUE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 5.309 1.753 1.213 0.572 0.353 0.269 0.203
## % of var. 53.086 17.528 12.129 5.722 3.530 2.695 2.033
## Cumulative % of var. 53.086 70.614 82.743 88.464 91.994 94.688 96.721
## Dim.8 Dim.9 Dim.10
## Variance 0.174 0.102 0.052
## % of var. 1.739 1.024 0.516
## Cumulative % of var. 98.460 99.484 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2
## Nias | 6.326 | 6.127 21.429 0.938 | 1.168 2.357 0.034 |
## Mandailing Natal | 2.258 | 1.306 0.973 0.334 | -1.290 2.876 0.326 |
## Tapanuli Selatan | 2.238 | 1.266 0.915 0.320 | -1.520 3.992 0.461 |
## Tapanuli Tengah | 2.327 | 1.122 0.719 0.232 | -1.266 2.771 0.296 |
## Tapanuli Utara | 2.460 | 0.317 0.057 0.017 | -2.354 9.579 0.915 |
## Toba Samosir | 2.116 | -1.140 0.742 0.290 | -0.993 1.705 0.220 |
## Labuhan Batu | 1.543 | -0.508 0.147 0.108 | -0.261 0.118 0.029 |
## Asahan | 2.268 | -1.785 1.819 0.620 | 0.573 0.568 0.064 |
## Simalungun | 2.532 | -1.603 1.467 0.401 | 0.441 0.337 0.030 |
## Dairi | 2.258 | 0.103 0.006 0.002 | -1.640 4.649 0.527 |
## Dim.3 ctr cos2
## Nias -0.346 0.299 0.003 |
## Mandailing Natal 0.580 0.841 0.066 |
## Tapanuli Selatan 0.065 0.010 0.001 |
## Tapanuli Tengah 0.010 0.000 0.000 |
## Tapanuli Utara 0.336 0.282 0.019 |
## Toba Samosir -0.612 0.936 0.084 |
## Labuhan Batu 0.280 0.196 0.033 |
## Asahan -0.726 1.315 0.102 |
## Simalungun 1.811 8.195 0.512 |
## Dairi 0.752 1.413 0.111 |
##
## Variables
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## X1 | 0.844 13.420 0.712 | -0.124 0.871 0.015 | -0.243 4.876
## X2 | 0.826 12.843 0.682 | 0.341 6.630 0.116 | -0.145 1.743
## X3 | 0.901 15.303 0.812 | 0.269 4.131 0.072 | 0.029 0.072
## X4 | 0.040 0.030 0.002 | 0.881 44.298 0.776 | -0.138 1.567
## X5 | 0.955 17.180 0.912 | 0.051 0.147 0.003 | -0.024 0.047
## X6 | 0.686 8.871 0.471 | -0.001 0.000 0.000 | 0.463 17.640
## X7 | 0.742 10.384 0.551 | -0.530 16.049 0.281 | -0.149 1.839
## X8 | 0.860 13.938 0.740 | 0.336 6.447 0.113 | 0.090 0.672
## X9 | 0.640 7.706 0.409 | -0.582 19.339 0.339 | 0.251 5.196
## X10 | -0.131 0.325 0.017 | 0.191 2.086 0.037 | 0.897 66.349
## cos2
## X1 0.059 |
## X2 0.021 |
## X3 0.001 |
## X4 0.019 |
## X5 0.001 |
## X6 0.214 |
## X7 0.022 |
## X8 0.008 |
## X9 0.063 |
## X10 0.805 |
# Eigenvalue
eig.val <-get_eigenvalue(fit.pca)
eig.val
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 5.30858116 53.0858116 53.08581
## Dim.2 1.75281183 17.5281183 70.61393
## Dim.3 1.21286593 12.1286593 82.74259
## Dim.4 0.57215760 5.7215760 88.46417
## Dim.5 0.35295233 3.5295233 91.99369
## Dim.6 0.26945110 2.6945110 94.68820
## Dim.7 0.20329047 2.0329047 96.72110
## Dim.8 0.17390154 1.7390154 98.46012
## Dim.9 0.10237386 1.0237386 99.48386
## Dim.10 0.05161419 0.5161419 100.00000
# Screeplot
fviz_eig(fit.pca, addlabels = TRUE, ylim = c(0, 100), size = 5)
## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.

# Coordinates of variables (Correlation circle)
var <- get_pca_var(fit.pca)
head(var$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## X1 0.84403052 -0.123593039 -0.24319739 0.27629322 0.03418761 0.24359568
## X2 0.82569224 0.340897774 -0.14539314 0.26360103 -0.08856874 -0.03657704
## X3 0.90133147 0.269100752 0.02949245 0.07558208 -0.07817511 -0.22129256
## X4 0.03958009 0.881173022 -0.13787964 -0.23792765 0.35655153 0.12348719
## X5 0.95499645 0.050820015 -0.02381695 0.01387724 -0.09428391 0.11591713
## X6 0.68625333 -0.001449887 0.46254214 -0.48749254 -0.22566480 0.12152231
## Dim.7 Dim.8 Dim.9
## X1 -0.22145364 -0.12865832 0.098587324
## X2 0.20457376 0.19842960 0.144192817
## X3 -0.12395556 0.02308680 -0.148451063
## X4 -0.03089404 0.05225886 -0.007690669
## X5 -0.06235011 0.11873835 -0.147195351
## X6 -0.03547325 0.02648354 0.091145231
var <- get_pca_var(fit.pca)
head(var$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## X1 0.712387519 1.527524e-02 0.0591449704 0.0763379456 0.001168793 0.05933885
## X2 0.681767678 1.162113e-01 0.0211391648 0.0694855031 0.007844422 0.00133788
## X3 0.812398426 7.241521e-02 0.0008698048 0.0057126512 0.006111347 0.04897040
## X4 0.001566584 7.764659e-01 0.0190107959 0.0566095680 0.127128997 0.01524909
## X5 0.912018226 2.582674e-03 0.0005672469 0.0001925778 0.008889457 0.01343678
## X6 0.470943635 2.102173e-06 0.2139452322 0.2376489801 0.050924603 0.01476767
## Dim.7 Dim.8 Dim.9
## X1 0.0490417131 0.0165529632 9.719461e-03
## X2 0.0418504244 0.0393743045 2.079157e-02
## X3 0.0153649819 0.0005330001 2.203772e-02
## X4 0.0009544415 0.0027309884 5.914639e-05
## X5 0.0038875356 0.0140987956 2.166647e-02
## X6 0.0012583514 0.0007013777 8.307453e-03
library(corrplot)
## corrplot 0.95 loaded
corrplot(var$cos2, is.corr=FALSE)

# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(fit.pca, choice = "var", axes = 1:2)

# Contributions of variables to PCs
head(var$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6
## X1 13.4195465 8.714706e-01 4.87646401 13.34211866 0.3311475 22.0221238
## X2 12.8427476 6.629992e+00 1.74291027 12.14446918 2.2225160 0.4965203
## X3 15.3034945 4.131374e+00 0.07171484 0.99844015 1.7314937 18.1741323
## X4 0.0295104 4.429830e+01 1.56742765 9.89405160 36.0187441 5.6593147
## X5 17.1800750 1.473446e-01 0.04676914 0.03365817 2.5185998 4.9867235
## X6 8.8713654 1.199315e-04 17.63964405 41.53558060 14.4281814 5.4806500
## Dim.7 Dim.8 Dim.9
## X1 24.1239603 9.5185837 9.4940844
## X2 20.5865153 22.6417232 20.3094510
## X3 7.5581416 0.3064954 21.5267046
## X4 0.4694964 1.5704222 0.0577749
## X5 1.9123058 8.1073439 21.1640665
## X6 0.6189918 0.4033189 8.1148188
# Contributions of variables to PC1
fviz_contrib(fit.pca, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(fit.pca, choice = "var", axes = 2, top = 10)

fviz_pca_var(fit.pca, col.var = "contrib",
gradient.cols = c("#00AFBB",
"#E7B800", "#FC4E07"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fit.desc <- dimdesc(fit.pca, axes = c(1,2), proba = 0.05)
# Description of dimension 1
fit.desc$Dim.1
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## X5 0.9549965 6.450327e-18
## X3 0.9013315 8.512308e-13
## X8 0.8601791 1.406428e-10
## X1 0.8440305 6.803771e-10
## X2 0.8256922 3.330608e-09
## X7 0.7424666 7.549093e-07
## X6 0.6862533 1.039431e-05
## X9 0.6396044 6.137853e-05
# Description of dimension 2
fit.desc$Dim.2
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## X4 0.8811730 1.314651e-11
## X7 -0.5303864 1.498224e-03
## X9 -0.5822144 3.786634e-04
# Graph of individuals
ind <- get_pca_ind(fit.pca)
ind
## Principal Component Analysis Results for individuals
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the individuals"
## 2 "$cos2" "Cos2 for the individuals"
## 3 "$contrib" "contributions of the individuals"
# Coordinates of individuals
head(ind$coord)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nias 6.1269501 1.1675543 -0.34574846 0.07555444 -0.16887941
## Mandailing Natal 1.3058422 -1.2898445 0.58004490 0.87749715 -0.14840813
## Tapanuli Selatan 1.2663804 -1.5195988 0.06462915 0.36154816 -0.03802839
## Tapanuli Tengah 1.1221091 -1.2661262 0.01017361 0.95120142 -0.17436727
## Tapanuli Utara 0.3165721 -2.3539173 0.33615434 -0.32908090 0.08171388
## Toba Samosir -1.1398937 -0.9929422 -0.61202167 -0.21655675 -0.60784474
## Dim.6 Dim.7 Dim.8 Dim.9
## Nias -0.3400030 -0.2374305 0.42610873 -0.04319163
## Mandailing Natal 0.2499444 -0.5255386 0.42564804 -0.18505220
## Tapanuli Selatan 0.2755600 -0.1495961 0.75667545 -0.26690034
## Tapanuli Tengah 1.0104554 -0.7190602 -0.25194346 -0.07430483
## Tapanuli Utara 0.1391386 0.3790440 0.05558675 -0.09322234
## Toba Samosir -0.2071953 0.7641105 -0.05145101 -0.84264761
# Quality of individuals
head(ind$cos2)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nias 0.93808248 0.03406485 2.987259e-03 0.0001426502 0.0007126983
## Mandailing Natal 0.33447794 0.32633282 6.599473e-02 0.1510349550 0.0043201757
## Tapanuli Selatan 0.32021530 0.46107502 8.340086e-04 0.0261003309 0.0002887551
## Tapanuli Tengah 0.23249995 0.29601025 1.911185e-05 0.1670697095 0.0056141327
## Tapanuli Utara 0.01655785 0.91546333 1.866964e-02 0.0178922060 0.0011031897
## Toba Samosir 0.29032487 0.22029441 8.369301e-02 0.0104784976 0.0825545243
## Dim.6 Dim.7 Dim.8 Dim.9
## Nias 0.002888802 0.001408722 0.0045372551 4.661778e-05
## Mandailing Natal 0.012253857 0.054174545 0.0355375360 6.716987e-03
## Tapanuli Selatan 0.015161641 0.004468421 0.1143228322 1.422368e-02
## Tapanuli Tengah 0.188532858 0.095473599 0.0117208450 1.019499e-03
## Tapanuli Utara 0.003198556 0.023737652 0.0005105063 1.435815e-03
## Toba Samosir 0.009592135 0.130457141 0.0005914844 1.586527e-01
# Contributions of individuals
head(ind$contrib)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nias 21.4287229 2.356703 0.2986714958 0.03023363 0.24486313
## Mandailing Natal 0.9733948 2.876242 0.8406137505 4.07813705 0.18909732
## Tapanuli Selatan 0.9154529 3.992167 0.0104359044 0.69231336 0.01241612
## Tapanuli Tengah 0.7187498 2.771436 0.0002585969 4.79198411 0.26103572
## Tapanuli Utara 0.0572075 9.579287 0.2823258903 0.57355588 0.05732732
## Toba Samosir 0.7417137 1.704507 0.9358513428 0.24837841 3.17216177
## Dim.6 Dim.7 Dim.8 Dim.9
## Nias 1.3000846 0.8403148 3.16390554 0.05521998
## Mandailing Natal 0.7025753 4.1169753 3.15706781 1.01364407
## Tapanuli Selatan 0.8539621 0.3335872 9.97704489 2.10860511
## Tapanuli Tengah 11.4826048 7.7072525 1.10608582 0.16342974
## Tapanuli Utara 0.2177216 2.1416488 0.05384250 0.25723910
## Toba Samosir 0.4827984 8.7032474 0.04612861 21.01788336
# Plot quality and contribution
fviz_pca_ind(fit.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping (slow if many points)
)

# Contribution to individuals to Dim 1-2
fviz_contrib(fit.pca, choice = "ind", axes = 1:2)
