Library yang diperlukan
library(readr)
library(psych)
library(corrplot)
## corrplot 0.95 loaded
library(factoextra)
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
Import dan Cek Informasi Dataset
data <- read_csv("parkinsons_updrs.data.csv")
## Rows: 5875 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (23): index, subject#, age, sex, test_time, motor_UPDRS, total_UPDRS, Ji...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 23
## index `subject#` age sex test_time motor_UPDRS total_UPDRS `Jitter(%)`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 1 72 0 5.64 28.2 34.4 0.00662
## 2 1 1 72 0 12.7 28.4 34.9 0.003
## 3 2 1 72 0 19.7 28.7 35.4 0.00481
## 4 3 1 72 0 25.6 28.9 35.8 0.00528
## 5 4 1 72 0 33.6 29.2 36.4 0.00335
## 6 5 1 72 0 40.7 29.4 36.9 0.00353
## # ℹ 15 more variables: `Jitter(Abs)` <dbl>, `Jitter:RAP` <dbl>,
## # `Jitter:PPQ5` <dbl>, `Jitter:DDP` <dbl>, Shimmer <dbl>,
## # `Shimmer(dB)` <dbl>, `Shimmer:APQ3` <dbl>, `Shimmer:APQ5` <dbl>,
## # `Shimmer:APQ11` <dbl>, `Shimmer:DDA` <dbl>, NHR <dbl>, HNR <dbl>,
## # RPDE <dbl>, DFA <dbl>, PPE <dbl>
colnames(data)
## [1] "index" "subject#" "age" "sex"
## [5] "test_time" "motor_UPDRS" "total_UPDRS" "Jitter(%)"
## [9] "Jitter(Abs)" "Jitter:RAP" "Jitter:PPQ5" "Jitter:DDP"
## [13] "Shimmer" "Shimmer(dB)" "Shimmer:APQ3" "Shimmer:APQ5"
## [17] "Shimmer:APQ11" "Shimmer:DDA" "NHR" "HNR"
## [21] "RPDE" "DFA" "PPE"
Gunakan Fitur yang Sesuai dengan Rules
voice_data <- data[, c(
"Jitter(%)",
"Jitter(Abs)",
"Jitter:RAP",
"Jitter:PPQ5",
"Jitter:DDP",
"Shimmer",
"Shimmer(dB)",
"Shimmer:APQ3",
"Shimmer:APQ5",
"Shimmer:APQ11",
"Shimmer:DDA",
"NHR",
"HNR",
"RPDE",
"DFA",
"PPE"
)]
Statistik Deskriptif
describe(voice_data)
## vars n mean sd median trimmed mad min max range skew
## Jitter(%) 1 5875 0.01 0.01 0.00 0.01 0.00 0.00 0.10 0.10 6.45
## Jitter(Abs) 2 5875 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3.28
## Jitter:RAP 3 5875 0.00 0.00 0.00 0.00 0.00 0.00 0.06 0.06 6.94
## Jitter:PPQ5 4 5875 0.00 0.00 0.00 0.00 0.00 0.00 0.07 0.07 7.58
## Jitter:DDP 5 5875 0.01 0.01 0.01 0.01 0.00 0.00 0.17 0.17 6.94
## Shimmer 6 5875 0.03 0.03 0.03 0.03 0.01 0.00 0.27 0.27 3.31
## Shimmer(dB) 7 5875 0.31 0.23 0.25 0.27 0.13 0.03 2.11 2.08 3.10
## Shimmer:APQ3 8 5875 0.02 0.01 0.01 0.01 0.01 0.00 0.16 0.16 3.10
## Shimmer:APQ5 9 5875 0.02 0.02 0.02 0.02 0.01 0.00 0.17 0.17 3.70
## Shimmer:APQ11 10 5875 0.03 0.02 0.02 0.02 0.01 0.00 0.28 0.27 3.41
## Shimmer:DDA 11 5875 0.05 0.04 0.04 0.04 0.02 0.00 0.49 0.48 3.10
## NHR 12 5875 0.03 0.06 0.02 0.02 0.01 0.00 0.75 0.75 6.55
## HNR 13 5875 21.68 4.29 21.92 21.89 3.73 1.66 37.88 36.22 -0.81
## RPDE 14 5875 0.54 0.10 0.54 0.54 0.11 0.15 0.97 0.82 -0.04
## DFA 15 5875 0.65 0.07 0.64 0.65 0.08 0.51 0.87 0.35 0.28
## PPE 16 5875 0.22 0.09 0.21 0.21 0.08 0.02 0.73 0.71 1.09
## kurtosis se
## Jitter(%) 67.41 0.00
## Jitter(Abs) 18.13 0.00
## Jitter:RAP 78.44 0.00
## Jitter:PPQ5 81.47 0.00
## Jitter:DDP 78.44 0.00
## Shimmer 15.22 0.00
## Shimmer(dB) 13.07 0.00
## Shimmer:APQ3 14.70 0.00
## Shimmer:APQ5 19.22 0.00
## Shimmer:APQ11 19.14 0.00
## Shimmer:DDA 14.71 0.00
## NHR 52.54 0.00
## HNR 2.50 0.06
## RPDE -0.07 0.00
## DFA -0.88 0.00
## PPE 1.95 0.00
Matriks Korelasi
cor_matrix <- cor(voice_data)
round(cor_matrix, 3)
## Jitter(%) Jitter(Abs) Jitter:RAP Jitter:PPQ5 Jitter:DDP Shimmer
## Jitter(%) 1.000 0.866 0.984 0.968 0.984 0.710
## Jitter(Abs) 0.866 1.000 0.845 0.791 0.845 0.649
## Jitter:RAP 0.984 0.845 1.000 0.947 1.000 0.682
## Jitter:PPQ5 0.968 0.791 0.947 1.000 0.947 0.733
## Jitter:DDP 0.984 0.845 1.000 0.947 1.000 0.682
## Shimmer 0.710 0.649 0.682 0.733 0.682 1.000
## Shimmer(dB) 0.717 0.656 0.686 0.735 0.686 0.992
## Shimmer:APQ3 0.664 0.624 0.650 0.677 0.650 0.980
## Shimmer:APQ5 0.694 0.621 0.660 0.734 0.660 0.985
## Shimmer:APQ11 0.646 0.590 0.603 0.668 0.603 0.935
## Shimmer:DDA 0.664 0.624 0.650 0.677 0.650 0.980
## NHR 0.825 0.700 0.792 0.865 0.792 0.795
## HNR -0.675 -0.706 -0.641 -0.662 -0.641 -0.801
## RPDE 0.427 0.547 0.383 0.382 0.383 0.468
## DFA 0.227 0.352 0.215 0.175 0.215 0.133
## PPE 0.722 0.788 0.671 0.663 0.671 0.616
## Shimmer(dB) Shimmer:APQ3 Shimmer:APQ5 Shimmer:APQ11 Shimmer:DDA
## Jitter(%) 0.717 0.664 0.694 0.646 0.664
## Jitter(Abs) 0.656 0.624 0.621 0.590 0.624
## Jitter:RAP 0.686 0.650 0.660 0.603 0.650
## Jitter:PPQ5 0.735 0.677 0.734 0.668 0.677
## Jitter:DDP 0.686 0.650 0.660 0.603 0.650
## Shimmer 0.992 0.980 0.985 0.935 0.980
## Shimmer(dB) 1.000 0.968 0.976 0.936 0.968
## Shimmer:APQ3 0.968 1.000 0.963 0.886 1.000
## Shimmer:APQ5 0.976 0.963 1.000 0.939 0.963
## Shimmer:APQ11 0.936 0.886 0.939 1.000 0.886
## Shimmer:DDA 0.968 1.000 0.963 0.886 1.000
## NHR 0.798 0.733 0.798 0.712 0.733
## HNR -0.802 -0.781 -0.791 -0.778 -0.781
## RPDE 0.472 0.437 0.451 0.481 0.437
## DFA 0.126 0.131 0.128 0.180 0.131
## PPE 0.635 0.577 0.594 0.623 0.577
## NHR HNR RPDE DFA PPE
## Jitter(%) 0.825 -0.675 0.427 0.227 0.722
## Jitter(Abs) 0.700 -0.706 0.547 0.352 0.788
## Jitter:RAP 0.792 -0.641 0.383 0.215 0.671
## Jitter:PPQ5 0.865 -0.662 0.382 0.175 0.663
## Jitter:DDP 0.792 -0.641 0.383 0.215 0.671
## Shimmer 0.795 -0.801 0.468 0.133 0.616
## Shimmer(dB) 0.798 -0.802 0.472 0.126 0.635
## Shimmer:APQ3 0.733 -0.781 0.437 0.131 0.577
## Shimmer:APQ5 0.798 -0.791 0.451 0.128 0.594
## Shimmer:APQ11 0.712 -0.778 0.481 0.180 0.623
## Shimmer:DDA 0.733 -0.781 0.437 0.131 0.577
## NHR 1.000 -0.684 0.417 -0.022 0.565
## HNR -0.684 1.000 -0.659 -0.291 -0.759
## RPDE 0.417 -0.659 1.000 0.192 0.566
## DFA -0.022 -0.291 0.192 1.000 0.395
## PPE 0.565 -0.759 0.566 0.395 1.000
corrplot(cor_matrix, method = "color", tl.cex = 0.6)
Uji Kelayakan Faktor Analisis
KMO Test
KMO(voice_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = voice_data)
## Overall MSA = 0.93
## MSA for each item =
## Jitter(%) Jitter(Abs) Jitter:RAP Jitter:PPQ5 Jitter:DDP
## 0.94 0.95 0.89 0.93 0.89
## Shimmer Shimmer(dB) Shimmer:APQ3 Shimmer:APQ5 Shimmer:APQ11
## 0.93 0.94 0.89 0.95 0.95
## Shimmer:DDA NHR HNR RPDE DFA
## 0.89 0.94 0.95 0.92 0.71
## PPE
## 0.94
Bartlett Test
cortest.bartlett(cor_matrix, n = nrow(voice_data))
## $chisq
## [1] 349581.3
##
## $p.value
## [1] 0
##
## $df
## [1] 120
Analisis PCA
pca_result <- principal(voice_data, scale = TRUE)
pca_result
## Principal Components Analysis
## Call: principal(r = voice_data, scale = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## PC1 h2 u2 com
## Jitter(%) 0.89 0.801 0.20 1
## Jitter(Abs) 0.84 0.702 0.30 1
## Jitter:RAP 0.87 0.754 0.25 1
## Jitter:PPQ5 0.89 0.790 0.21 1
## Jitter:DDP 0.87 0.754 0.25 1
## Shimmer 0.93 0.874 0.13 1
## Shimmer(dB) 0.94 0.875 0.12 1
## Shimmer:APQ3 0.90 0.815 0.18 1
## Shimmer:APQ5 0.92 0.849 0.15 1
## Shimmer:APQ11 0.88 0.771 0.23 1
## Shimmer:DDA 0.90 0.815 0.18 1
## NHR 0.87 0.752 0.25 1
## HNR -0.86 0.742 0.26 1
## RPDE 0.56 0.315 0.68 1
## DFA 0.24 0.056 0.94 1
## PPE 0.77 0.596 0.40 1
##
## PC1
## SS loadings 11.26
## Proportion Var 0.70
##
## Mean item complexity = 1
## Test of the hypothesis that 1 component is sufficient.
##
## The root mean square of the residuals (RMSR) is 0.08
## with the empirical chi square 8861.37 with prob < 0
##
## Fit based upon off diagonal values = 0.99
Eigen Value PCA
eigen_values <- eigen(cor_matrix)
eigen_values$values
## [1] 1.126105e+01 1.672900e+00 1.240510e+00 7.648671e-01 3.092504e-01
## [6] 2.228822e-01 1.727628e-01 1.617443e-01 1.030749e-01 4.352376e-02
## [11] 2.028743e-02 1.401483e-02 8.666707e-03 4.464222e-03 3.778333e-07
## [16] 2.084635e-08
Scree Plot PCA
pca_model <- prcomp(voice_data, scale = TRUE)
fviz_eig(pca_model,
addlabels = TRUE,
ylim = c(0, 70),
main = "Scree Plot PCA")
Parallel Analisis
fa.parallel(voice_data, fa = "pc", n.iter = 100)
## Parallel analysis suggests that the number of factors = NA and the number of components = 3
PCA Final
pca_3 <- principal(voice_data, nfactors = 3, rotate = "varimax", scores = TRUE)
print(pca_3, cut = 0.4)
## Principal Components Analysis
## Call: principal(r = voice_data, nfactors = 3, rotate = "varimax", scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
## RC1 RC2 RC3 h2 u2 com
## Jitter(%) 0.90 0.99 0.012 1.5
## Jitter(Abs) 0.74 0.46 0.86 0.135 2.1
## Jitter:RAP 0.91 0.97 0.027 1.3
## Jitter:PPQ5 0.42 0.87 0.95 0.051 1.5
## Jitter:DDP 0.91 0.97 0.027 1.3
## Shimmer 0.90 0.99 0.013 1.4
## Shimmer(dB) 0.89 0.98 0.020 1.4
## Shimmer:APQ3 0.91 0.95 0.047 1.3
## Shimmer:APQ5 0.90 0.97 0.027 1.4
## Shimmer:APQ11 0.87 0.90 0.103 1.4
## Shimmer:DDA 0.91 0.95 0.047 1.3
## NHR 0.60 0.70 0.85 0.153 2.0
## HNR -0.68 -0.48 0.84 0.165 2.4
## RPDE 0.59 0.54 0.462 1.9
## DFA 0.82 0.68 0.318 1.0
## PPE 0.51 0.61 0.78 0.219 2.6
##
## RC1 RC2 RC3
## SS loadings 6.60 5.46 2.12
## Proportion Var 0.41 0.34 0.13
## Cumulative Var 0.41 0.75 0.89
## Proportion Explained 0.47 0.39 0.15
## Cumulative Proportion 0.47 0.85 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 3 components are sufficient.
##
## The root mean square of the residuals (RMSR) is 0.03
## with the empirical chi square 1021.64 with prob < 4.8e-166
##
## Fit based upon off diagonal values = 1
Biplot PCA
fviz_pca_biplot(prcomp(voice_data, scale = TRUE),
label = "var",
repel = TRUE,
title = "Biplot PCA")
Factor Analysis
fa_result <- fa(voice_data, nfactors = 3, rotate = "varimax", fm = "ml")
print(fa_result, cut = 0.4)
## Factor Analysis using method = ml
## Call: fa(r = voice_data, nfactors = 3, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 ML3 h2 u2 com
## Jitter(%) 0.91 0.984 0.0159 1.4
## Jitter(Abs) 0.76 0.731 0.2694 1.5
## Jitter:RAP 0.93 0.997 0.0027 1.3
## Jitter:PPQ5 0.44 0.85 0.941 0.0587 1.6
## Jitter:DDP 0.93 0.997 0.0027 1.3
## Shimmer 0.92 0.996 0.0044 1.3
## Shimmer(dB) 0.91 0.987 0.0132 1.4
## Shimmer:APQ3 0.93 0.997 0.0025 1.3
## Shimmer:APQ5 0.92 0.976 0.0240 1.3
## Shimmer:APQ11 0.87 0.914 0.0865 1.4
## Shimmer:DDA 0.93 0.997 0.0025 1.3
## NHR 0.59 0.63 0.796 0.2043 2.3
## HNR -0.69 -0.43 0.671 0.3291 1.7
## RPDE 0.250 0.7495 2.1
## DFA 0.048 0.9525 1.2
## PPE 0.44 0.56 0.530 0.4702 2.1
##
## ML1 ML2 ML3
## SS loadings 6.93 5.62 0.27
## Proportion Var 0.43 0.35 0.02
## Cumulative Var 0.43 0.78 0.80
## Proportion Explained 0.54 0.44 0.02
## Cumulative Proportion 0.54 0.98 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 120 with the objective function = 59.58 with Chi Square = 349581.3
## df of the model are 75 and the objective function was 21.63
##
## The root mean square of the residuals (RMSR) is 0.06
## The df corrected root mean square of the residuals is 0.08
##
## The harmonic n.obs is 5875 with the empirical chi square 2534.51 with prob < 0
## The total n.obs was 5875 with Likelihood Chi Square = 126896.9 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.419
## RMSEA index = 0.536 and the 90 % confidence intervals are 0.534 0.539
## BIC = 126246
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## ML1 ML2 ML3
## Correlation of (regression) scores with factors 1 1 0.96
## Multiple R square of scores with factors 1 1 0.92
## Minimum correlation of possible factor scores 1 1 0.84