Dataset –> Maternal Mortality di Jawa Barat Tahun 2006
Keterangan:
* X1: Persentase sarana kesehatan
* X2: Persentase
pengeluaran biaya kesehatan per kapita sebulan
* X3: Persentase
wanita yang menikah dibawah umur
* X4: Persentase penolong proses
kelahiran tenaga non medis
* X5: Persentase penduduk miskin
*
X6: Persentase wanita dengan pendidikan tertinggi SD
* X7:
Persentase tenaga medis dan paramedis
Link Data –> Data_PCAFA.csv
library(readr) # import csv
library(psych) # MSA, PCA, FA
library(dplyr) # wrangling
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#import data
data <- read_csv("Data_PCAFA.csv")
## Rows: 40 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): X1, X2, X3, X4, X5, X6, X7
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- as.data.frame(data)
str(data)
## 'data.frame': 40 obs. of 7 variables:
## $ X1: num 13.6 11.6 11 14.2 13.8 11.9 17.7 10.9 11 10.2 ...
## $ X2: num 51.6 75.7 42.6 53.3 29.1 ...
## $ X3: num 25.7 15.9 27.2 19.4 38.9 ...
## $ X4: num 6.55 3.46 14.79 9.42 30.47 ...
## $ X5: num 89.5 6.17 17.51 20.18 29.3 ...
## $ X6: num 32.3 28.1 31.3 30.7 38.9 ...
## $ X7: num 61.1 92.7 47.9 74.2 51.6 64.8 95 65.4 58.4 53.3 ...
cat("Banyak missing value \n", sum(is.na(data)))
## Banyak missing value
## 0
cor(data)
## X1 X2 X3 X4 X5 X6
## X1 1.0000000 -0.21806761 0.2337583 0.18547333 0.15015685 0.29829893
## X2 -0.2180676 1.00000000 -0.4070603 -0.53592906 -0.15924923 0.04872265
## X3 0.2337583 -0.40706029 1.0000000 0.63388124 0.34015581 0.13011964
## X4 0.1854733 -0.53592906 0.6338812 1.00000000 0.34868797 -0.02177375
## X5 0.1501568 -0.15924923 0.3401558 0.34868797 1.00000000 0.04927805
## X6 0.2982989 0.04872265 0.1301196 -0.02177375 0.04927805 1.00000000
## X7 0.5796149 0.01691967 -0.0658386 -0.17972868 -0.14591032 0.09886546
## X7
## X1 0.57961488
## X2 0.01691967
## X3 -0.06583860
## X4 -0.17972868
## X5 -0.14591032
## X6 0.09886546
## X7 1.00000000
#Check MSA
r <- cor(data)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.6
## MSA for each item =
## X1 X2 X3 X4 X5 X6 X7
## 0.50 0.75 0.72 0.65 0.73 0.42 0.44
# delete X6
data <- data[-6]
r <- cor(data)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.63
## MSA for each item =
## X1 X2 X3 X4 X5 X7
## 0.51 0.75 0.74 0.66 0.73 0.44
# delete X7
data <- data[-6]
r <- cor(data)
KMO(r)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = r)
## Overall MSA = 0.72
## MSA for each item =
## X1 X2 X3 X4 X5
## 0.80 0.74 0.72 0.66 0.80
#Bartlett Test
bartlett.test(data)
##
## Bartlett test of homogeneity of variances
##
## data: data
## Bartlett's K-squared = 58.207, df = 4, p-value = 6.903e-12
# scale data
scale_data = scale(data)
r = cov(scale_data)
# eigen value and vector
pc <- eigen(r)
print(pc$values)
## [1] 2.3687401 0.8932720 0.8545650 0.5505003 0.3329226
print(pc$vectors)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.2774585 0.9361873 0.1894472 -0.04866729 0.09116685
## [2,] -0.4593564 -0.0337844 0.5290471 -0.64621977 0.30059850
## [3,] 0.5285888 -0.1331538 -0.0172715 -0.63656967 -0.54529326
## [4,] 0.5529758 -0.2145651 -0.1614117 -0.15418213 0.77353324
## [5,] 0.3561180 -0.2421651 0.8110899 0.38863664 -0.07503824
## variance proportion and cumulative variance
sumvar <- sum(pc$values)
propvar <- sapply(pc$values, function(x) x/sumvar)*100
cumvar <- data.frame(cbind(pc$values, propvar)) %>% mutate(cum = cumsum(propvar))
colnames(cumvar)[1] <- "eigen_value"
row.names(cumvar) <- paste0("PC",c(1:ncol(data)))
print(cumvar)
## eigen_value propvar cum
## PC1 2.3687401 47.374802 47.37480
## PC2 0.8932720 17.865441 65.24024
## PC3 0.8545650 17.091300 82.33154
## PC4 0.5505003 11.010006 93.34155
## PC5 0.3329226 6.658452 100.00000
# PCA result
scores <- as.matrix(scale_data) %*% pc$vectors
scores_PC <- scores[,1:3]
head(scores_PC)
## [,1] [,2] [,3]
## [1,] 0.7403167 -0.48381152 4.56422511
## [2,] -2.7600769 0.60523064 0.79308153
## [3,] -0.5786297 0.13740865 -0.01603602
## [4,] -1.1755182 0.82632211 0.73964119
## [5,] 1.2825547 0.21700912 0.06948287
## [6,] 1.0593820 -0.08294296 -0.33752440
pc <- principal(data, nfactors = ncol(data), rotate = "none")
pc
## Principal Components Analysis
## Call: principal(r = data, nfactors = ncol(data), rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 PC4 PC5 h2 u2 com
## X1 0.43 0.88 0.18 0.04 0.05 1 3.3e-16 1.5
## X2 -0.71 -0.03 0.49 0.48 0.17 1 0.0e+00 2.8
## X3 0.81 -0.13 -0.02 0.47 -0.31 1 -2.2e-16 2.0
## X4 0.85 -0.20 -0.15 0.11 0.45 1 1.1e-16 1.8
## X5 0.55 -0.23 0.75 -0.29 -0.04 1 -2.2e-16 2.4
##
## PC1 PC2 PC3 PC4 PC5
## SS loadings 2.37 0.89 0.85 0.55 0.33
## Proportion Var 0.47 0.18 0.17 0.11 0.07
## Cumulative Var 0.47 0.65 0.82 0.93 1.00
## Proportion Explained 0.47 0.18 0.17 0.11 0.07
## Cumulative Proportion 0.47 0.65 0.82 0.93 1.00
##
## Mean item complexity = 2.1
## Test of the hypothesis that 5 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0
## with the empirical chi square 0 with prob < NA
##
## Fit based upon off diagonal values = 1
pc <- principal(data, nfactors = 3, rotate = "none")
pc
## Principal Components Analysis
## Call: principal(r = data, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 0.43 0.88 0.18 1.00 0.0041 1.5
## X2 -0.71 -0.03 0.49 0.74 0.2600 1.8
## X3 0.81 -0.13 -0.02 0.68 0.3221 1.0
## X4 0.85 -0.20 -0.15 0.79 0.2123 1.2
## X5 0.55 -0.23 0.75 0.91 0.0850 2.0
##
## PC1 PC2 PC3
## SS loadings 2.37 0.89 0.85
## Proportion Var 0.47 0.18 0.17
## Cumulative Var 0.47 0.65 0.82
## Proportion Explained 0.58 0.22 0.21
## Cumulative Proportion 0.58 0.79 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.1
## with the empirical chi square 7.61 with prob < NA
##
## Fit based upon off diagonal values = 0.93
# score PC principal
L <- as.matrix(pc$loadings) # loadings
lambda <- pc$values # eigenvalues
lambda_k <- lambda[1:ncol(L)]
V <- sweep(L, 2, sqrt(lambda_k), "/") # eigenvector
scores_PC <- scale_data %*% as.matrix(V)
scores_PC
## PC1 PC2 PC3
## [1,] 0.74031674 -0.48381152 4.56422511
## [2,] -2.76007691 0.60523064 0.79308153
## [3,] -0.57862973 0.13740865 -0.01603602
## [4,] -1.17551819 0.82632211 0.73964119
## [5,] 1.28255470 0.21700912 0.06948287
## [6,] 1.05938200 -0.08294296 -0.33752440
## [7,] 0.27380630 1.41843424 -0.16937655
## [8,] -0.56935932 0.20912373 -0.12310089
## [9,] -0.53170161 0.13789558 -0.14458085
## [10,] -1.22308686 -0.06954794 0.75485434
## [11,] 0.60625085 -0.02318013 0.47910471
## [12,] -0.05353871 -0.16095565 0.39944460
## [13,] -0.20146767 0.97759442 -0.15746434
## [14,] 0.28455391 0.32475431 -0.41869867
## [15,] -0.64110093 0.22342058 0.33489191
## [16,] -0.95997392 0.44911255 0.66248939
## [17,] -0.46361313 0.09249391 0.50089054
## [18,] 0.41078171 1.32584116 -0.39728874
## [19,] 0.75416348 0.51298626 -0.55615738
## [20,] 3.00330741 0.05119002 -0.20940471
## [21,] 0.96976874 -0.36743675 -0.44588756
## [22,] 3.83909270 -1.32069303 0.53526926
## [23,] 1.69993415 -0.66092961 0.17164297
## [24,] 2.86096781 1.62548706 -0.58691595
## [25,] 0.47871863 1.39434135 0.22555956
## [26,] 2.02271952 -1.06203149 -1.24922487
## [27,] 2.59113036 -1.46363486 0.63563116
## [28,] 1.19656205 -0.23962872 -0.78416333
## [29,] -1.65422248 0.97971808 0.22228092
## [30,] -0.55233312 -1.59787460 -0.50296696
## [31,] -1.70889382 -1.32347899 -0.50878007
## [32,] -1.58030897 -0.92157185 -0.52708854
## [33,] -2.81028281 -1.37473879 0.56267276
## [34,] -2.35781493 -0.36279878 -0.11167178
## [35,] -0.55538616 -1.39546702 -0.41276967
## [36,] -0.10034146 2.47849587 -0.16533007
## [37,] -0.74217397 0.34166511 -0.94774463
## [38,] -1.39818153 -1.19200673 -1.21009534
## [39,] -1.33444925 -0.03623941 -0.99800473
## [40,] -0.12155560 -0.18955592 -0.67088680
library('FactoMineR')
library('factoextra')
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
#using library FactoMineR
# https://rpubs.com/cahyaalkahfi/pca-with-r
pca_result <- PCA(scale_data,
scale.unit = TRUE,
graph = FALSE,
ncp=ncol(data))
# summary pca result
pca_result$eig # vs print(cumvar)
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 2.3687401 47.374802 47.37480
## comp 2 0.8932720 17.865441 65.24024
## comp 3 0.8545650 17.091300 82.33154
## comp 4 0.5505003 11.010006 93.34155
## comp 5 0.3329226 6.658452 100.00000
pca_result$svd$V # vs pc$vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.2774585 0.9361873 0.1894472 0.04866729 0.09116685
## [2,] -0.4593564 -0.0337844 0.5290471 0.64621977 0.30059850
## [3,] 0.5285888 -0.1331538 -0.0172715 0.63656967 -0.54529326
## [4,] 0.5529758 -0.2145651 -0.1614117 0.15418213 0.77353324
## [5,] 0.3561180 -0.2421651 0.8110899 -0.38863664 -0.07503824
pca_result$ind['coord'] # vs head(scores)
## $coord
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 0.74974791 -0.48997497 4.62237045 -1.53582011 -0.56427457
## 2 -2.79523855 0.61294089 0.80318489 1.37736731 0.68693166
## 3 -0.58600111 0.13915915 -0.01624031 0.10185258 -0.16569793
## 4 -1.19049354 0.83684893 0.74906376 0.14890710 0.24302498
## 5 1.29889363 0.21977368 0.07036804 -0.13518585 -0.35311924
## 6 1.07287786 -0.08399960 -0.34182425 0.02620766 -0.13889529
## 7 0.27729442 1.43650419 -0.17153430 0.13356994 -0.42272520
## 8 -0.57661260 0.21178783 -0.12466911 -0.32795325 -0.71498681
## 9 -0.53847515 0.13965228 -0.14642272 0.07907412 -0.03272075
## 10 -1.23866822 -0.07043394 0.76447070 0.77754504 0.15265144
## 11 0.61397411 -0.02347543 0.48520820 -0.48231630 -0.12194594
## 12 -0.05422076 -0.16300612 0.40453327 -0.59952866 1.04848459
## 13 -0.20403424 0.99004836 -0.15947034 -0.22595515 0.05225087
## 14 0.28817895 0.32889147 -0.42403263 -0.35101806 -0.47020373
## 15 -0.64926815 0.22626682 0.33915822 0.65766033 0.39366972
## 16 -0.97220338 0.45483396 0.67092909 0.85749435 0.30014760
## 17 -0.46951927 0.09367222 0.50727157 0.37836973 0.08935175
## 18 0.41601481 1.34273154 -0.40234995 0.13006600 -0.31907357
## 19 0.76377104 0.51952138 -0.56324247 0.06306791 -0.36212889
## 20 3.04156765 0.05184214 -0.21207239 1.35570383 -0.23713078
## 21 0.98212298 -0.37211766 -0.45156789 0.08410708 -0.61102648
## 22 3.88800031 -1.33751782 0.54208825 0.37643993 -0.61379786
## 23 1.72159023 -0.66934944 0.17382959 1.33770892 -1.08910780
## 24 2.89741473 1.64619474 -0.59439288 0.18757605 0.63424809
## 25 0.48481720 1.41210438 0.22843305 0.68105402 -0.08990231
## 26 2.04848769 -1.07556110 -1.26513921 0.03210639 1.18144170
## 27 2.62413972 -1.48228064 0.64372870 -1.02022569 1.57909037
## 28 1.21180550 -0.24268144 -0.79415307 -0.47021366 1.35411733
## 29 -1.67529623 0.99219907 0.22511264 -0.28473935 0.29519737
## 30 -0.55936949 -1.61823052 -0.50937444 0.92701203 -0.21806683
## 31 -1.73066404 -1.34033927 -0.51526161 -0.57618818 -0.29988903
## 32 -1.60044110 -0.93331209 -0.53380331 -0.70955924 0.05654519
## 33 -2.84608404 -1.39225209 0.56984085 1.03318234 0.51388867
## 34 -2.38785200 -0.36742061 -0.11309441 0.34218458 0.25858606
## 35 -0.56246143 -1.41324439 -0.41802809 0.83145991 -0.38555893
## 36 -0.10161974 2.51007034 -0.16743627 -0.79742416 0.06974389
## 37 -0.75162879 0.34601771 -0.95981829 -1.36828762 -0.18854042
## 38 -1.41599348 -1.20719214 -1.22551119 -1.27098999 -0.77802058
## 39 -1.35144929 -0.03670108 -1.01071868 -0.94662744 -0.29363247
## 40 -0.12310414 -0.19197074 -0.67943347 -0.81768445 -0.43892588
# scree plot
fviz_eig(pca_result,
addlabels = TRUE,
ncp = ncol(data),
barfill = "skyblue",
barcolor = "darkblue",
linecolor = "red")
#Biplot
fviz_pca_biplot(pca_result,
geom.ind = "point",
#col.ind = status.ipm,
#palette = c("#FC4E07","#E7B800", "#00AFBB"),
addEllipses = TRUE,
#legend.title = "Kategori"
)
# correlation circle
contrib_circle <- fviz_pca_var(pca_result, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) +
ggtitle("Kontribusi Variabel")
plot(contrib_circle)
# variable contribution
contrib_v_PC1 <- fviz_contrib(pca_result, choice = "var", axes = 1, top = 5) + ggtitle("PC1")
plot(contrib_v_PC1)
contrib_v_PC2 <- fviz_contrib(pca_result, choice = "var", axes = 2, top = 5) + ggtitle("PC2")
plot(contrib_v_PC2)
contrib_v_PC3 <- fviz_contrib(pca_result, choice = "var", axes = 3, top = 5) + ggtitle("PC3")
plot(contrib_v_PC3)
# Factor Analysis
varcov = cov(scale_data)
pc = eigen(varcov)
cat("eigen value:")
## eigen value:
pc$values
## [1] 2.3687401 0.8932720 0.8545650 0.5505003 0.3329226
cat("eigen vector:")
## eigen vector:
pc$vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.2774585 0.9361873 0.1894472 -0.04866729 0.09116685
## [2,] -0.4593564 -0.0337844 0.5290471 -0.64621977 0.30059850
## [3,] 0.5285888 -0.1331538 -0.0172715 -0.63656967 -0.54529326
## [4,] 0.5529758 -0.2145651 -0.1614117 -0.15418213 0.77353324
## [5,] 0.3561180 -0.2421651 0.8110899 0.38863664 -0.07503824
sp = sum(pc$values[1:3])
L1 = sqrt(pc$values[1])*pc$vectors[,1]
L2 = sqrt(pc$values[2])*pc$vectors[,2]
L3 = sqrt(pc$values[3])*pc$vectors[,3]
L = cbind(L1,L2,L3)
cat("factor loading:")
## factor loading:
L
## L1 L2 L3
## [1,] 0.4270284 0.88481933 0.17513011
## [2,] -0.7069822 -0.03193068 0.48906534
## [3,] 0.8135357 -0.12584773 -0.01596624
## [4,] 0.8510692 -0.20279202 -0.14921334
## [5,] 0.5480910 -0.22887763 0.74979329
fa <- principal(scale_data, nfactors = 3, rotate = "none")
fa
## Principal Components Analysis
## Call: principal(r = scale_data, nfactors = 3, rotate = "none")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 PC2 PC3 h2 u2 com
## X1 0.43 0.88 0.18 1.00 0.0041 1.5
## X2 -0.71 -0.03 0.49 0.74 0.2600 1.8
## X3 0.81 -0.13 -0.02 0.68 0.3221 1.0
## X4 0.85 -0.20 -0.15 0.79 0.2123 1.2
## X5 0.55 -0.23 0.75 0.91 0.0850 2.0
##
## PC1 PC2 PC3
## SS loadings 2.37 0.89 0.85
## Proportion Var 0.47 0.18 0.17
## Cumulative Var 0.47 0.65 0.82
## Proportion Explained 0.58 0.22 0.21
## Cumulative Proportion 0.58 0.79 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.1
## with the empirical chi square 7.61 with prob < NA
##
## Fit based upon off diagonal values = 0.93
# Factor Analysis with rotation varimax
fa_1 <- principal(scale_data, nfactors = 3, rotate = "varimax")
fa_1
## Principal Components Analysis
## Call: principal(r = scale_data, nfactors = 3, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC3 RC2 h2 u2 com
## X1 0.13 0.08 0.99 1.00 0.0041 1.0
## X2 -0.84 0.13 -0.15 0.74 0.2600 1.1
## X3 0.73 0.37 0.11 0.68 0.3221 1.5
## X4 0.84 0.29 0.03 0.79 0.2123 1.2
## X5 0.15 0.94 0.07 0.91 0.0850 1.1
##
## RC1 RC3 RC2
## SS loadings 1.97 1.13 1.01
## Proportion Var 0.39 0.23 0.20
## Cumulative Var 0.39 0.62 0.82
## Proportion Explained 0.48 0.28 0.25
## Cumulative Proportion 0.48 0.75 1.00
##
## Mean item complexity = 1.2
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.1
## with the empirical chi square 7.61 with prob < NA
##
## Fit based upon off diagonal values = 0.93
# scores FA
scores_FA = scale_data %*% solve(cor(scale_data)) %*% as.matrix(fa$loadings)
scores_FA
## PC1 PC2 PC3
## [1,] 0.48101527 -0.51189906 4.93735675
## [2,] -1.79333936 0.64036713 0.85791703
## [3,] -0.37596035 0.14538587 -0.01734699
## [4,] -0.76378416 0.87429401 0.80010787
## [5,] 0.83333034 0.22960753 0.07516319
## [6,] 0.68832554 -0.08775819 -0.36511748
## [7,] 0.17790360 1.50078104 -0.18322331
## [8,] -0.36993697 0.22126435 -0.13316455
## [9,] -0.34546915 0.14590107 -0.15640053
## [10,] -0.79469155 -0.07358553 0.81656471
## [11,] 0.39390696 -0.02452585 0.51827218
## [12,] -0.03478638 -0.17029989 0.43209974
## [13,] -0.13090211 1.03434839 -0.17033726
## [14,] 0.18488678 0.34360783 -0.45292786
## [15,] -0.41655054 0.23639120 0.36226978
## [16,] -0.62373588 0.47518565 0.71664880
## [17,] -0.30122917 0.09786362 0.54183903
## [18,] 0.26690235 1.40281250 -0.42976764
## [19,] 0.49001208 0.54276754 -0.60162400
## [20,] 1.95137656 0.05416184 -0.22652383
## [21,] 0.63010000 -0.38876819 -0.48233948
## [22,] 2.49442181 -1.39736550 0.57902825
## [23,] 1.10451952 -0.69929970 0.18567502
## [24,] 1.85889246 1.71985426 -0.63489713
## [25,] 0.31104386 1.47528945 0.24399936
## [26,] 1.31424690 -1.12368743 -1.35135071
## [27,] 1.68356760 -1.54860577 0.68759488
## [28,] 0.77745726 -0.25354030 -0.84826975
## [29,] -1.07481870 1.03659534 0.24045269
## [30,] -0.35887432 -1.69063877 -0.54408520
## [31,] -1.11034099 -1.40031319 -0.55037354
## [32,] -1.02679395 -0.97507344 -0.57017875
## [33,] -1.82596027 -1.45454886 0.60867202
## [34,] -1.53197264 -0.38386096 -0.12080110
## [35,] -0.36085801 -1.47648047 -0.44651415
## [36,] -0.06519611 2.62238426 -0.17884603
## [37,] -0.48222199 0.36150039 -1.02522404
## [38,] -0.90845801 -1.26120835 -1.30902229
## [39,] -0.86704843 -0.03834328 -1.07959298
## [40,] -0.07897984 -0.20056053 -0.72573271