Library yang diperlukan

library(readr)
library(psych)
library(corrplot)
## corrplot 0.95 loaded
library(factoextra)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/

Import dan Cek Informasi Dataset

data <- read_csv("parkinsons_updrs.data.csv")
## Rows: 5875 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (23): index, subject#, age, sex, test_time, motor_UPDRS, total_UPDRS, Ji...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 23
##   index `subject#`   age   sex test_time motor_UPDRS total_UPDRS `Jitter(%)`
##   <dbl>      <dbl> <dbl> <dbl>     <dbl>       <dbl>       <dbl>       <dbl>
## 1     0          1    72     0      5.64        28.2        34.4     0.00662
## 2     1          1    72     0     12.7         28.4        34.9     0.003  
## 3     2          1    72     0     19.7         28.7        35.4     0.00481
## 4     3          1    72     0     25.6         28.9        35.8     0.00528
## 5     4          1    72     0     33.6         29.2        36.4     0.00335
## 6     5          1    72     0     40.7         29.4        36.9     0.00353
## # ℹ 15 more variables: `Jitter(Abs)` <dbl>, `Jitter:RAP` <dbl>,
## #   `Jitter:PPQ5` <dbl>, `Jitter:DDP` <dbl>, Shimmer <dbl>,
## #   `Shimmer(dB)` <dbl>, `Shimmer:APQ3` <dbl>, `Shimmer:APQ5` <dbl>,
## #   `Shimmer:APQ11` <dbl>, `Shimmer:DDA` <dbl>, NHR <dbl>, HNR <dbl>,
## #   RPDE <dbl>, DFA <dbl>, PPE <dbl>
colnames(data)
##  [1] "index"         "subject#"      "age"           "sex"          
##  [5] "test_time"     "motor_UPDRS"   "total_UPDRS"   "Jitter(%)"    
##  [9] "Jitter(Abs)"   "Jitter:RAP"    "Jitter:PPQ5"   "Jitter:DDP"   
## [13] "Shimmer"       "Shimmer(dB)"   "Shimmer:APQ3"  "Shimmer:APQ5" 
## [17] "Shimmer:APQ11" "Shimmer:DDA"   "NHR"           "HNR"          
## [21] "RPDE"          "DFA"           "PPE"

Gunakan Fitur yang Sesuai dengan Rules

voice_data <- data[, c(
    "Jitter(%)",
    "Jitter(Abs)",
    "Jitter:RAP",
    "Jitter:PPQ5",
    "Jitter:DDP",
    "Shimmer",
    "Shimmer(dB)",
    "Shimmer:APQ3",
    "Shimmer:APQ5",
    "Shimmer:APQ11",
    "Shimmer:DDA",
    "NHR",
    "HNR",
    "RPDE",
    "DFA",
    "PPE"
)]

Statistik Deskriptif

describe(voice_data)
##               vars    n  mean   sd median trimmed  mad  min   max range  skew
## Jitter(%)        1 5875  0.01 0.01   0.00    0.01 0.00 0.00  0.10  0.10  6.45
## Jitter(Abs)      2 5875  0.00 0.00   0.00    0.00 0.00 0.00  0.00  0.00  3.28
## Jitter:RAP       3 5875  0.00 0.00   0.00    0.00 0.00 0.00  0.06  0.06  6.94
## Jitter:PPQ5      4 5875  0.00 0.00   0.00    0.00 0.00 0.00  0.07  0.07  7.58
## Jitter:DDP       5 5875  0.01 0.01   0.01    0.01 0.00 0.00  0.17  0.17  6.94
## Shimmer          6 5875  0.03 0.03   0.03    0.03 0.01 0.00  0.27  0.27  3.31
## Shimmer(dB)      7 5875  0.31 0.23   0.25    0.27 0.13 0.03  2.11  2.08  3.10
## Shimmer:APQ3     8 5875  0.02 0.01   0.01    0.01 0.01 0.00  0.16  0.16  3.10
## Shimmer:APQ5     9 5875  0.02 0.02   0.02    0.02 0.01 0.00  0.17  0.17  3.70
## Shimmer:APQ11   10 5875  0.03 0.02   0.02    0.02 0.01 0.00  0.28  0.27  3.41
## Shimmer:DDA     11 5875  0.05 0.04   0.04    0.04 0.02 0.00  0.49  0.48  3.10
## NHR             12 5875  0.03 0.06   0.02    0.02 0.01 0.00  0.75  0.75  6.55
## HNR             13 5875 21.68 4.29  21.92   21.89 3.73 1.66 37.88 36.22 -0.81
## RPDE            14 5875  0.54 0.10   0.54    0.54 0.11 0.15  0.97  0.82 -0.04
## DFA             15 5875  0.65 0.07   0.64    0.65 0.08 0.51  0.87  0.35  0.28
## PPE             16 5875  0.22 0.09   0.21    0.21 0.08 0.02  0.73  0.71  1.09
##               kurtosis   se
## Jitter(%)        67.41 0.00
## Jitter(Abs)      18.13 0.00
## Jitter:RAP       78.44 0.00
## Jitter:PPQ5      81.47 0.00
## Jitter:DDP       78.44 0.00
## Shimmer          15.22 0.00
## Shimmer(dB)      13.07 0.00
## Shimmer:APQ3     14.70 0.00
## Shimmer:APQ5     19.22 0.00
## Shimmer:APQ11    19.14 0.00
## Shimmer:DDA      14.71 0.00
## NHR              52.54 0.00
## HNR               2.50 0.06
## RPDE             -0.07 0.00
## DFA              -0.88 0.00
## PPE               1.95 0.00

Matriks Korelasi

cor_matrix <- cor(voice_data)
round(cor_matrix, 3)
##               Jitter(%) Jitter(Abs) Jitter:RAP Jitter:PPQ5 Jitter:DDP Shimmer
## Jitter(%)         1.000       0.866      0.984       0.968      0.984   0.710
## Jitter(Abs)       0.866       1.000      0.845       0.791      0.845   0.649
## Jitter:RAP        0.984       0.845      1.000       0.947      1.000   0.682
## Jitter:PPQ5       0.968       0.791      0.947       1.000      0.947   0.733
## Jitter:DDP        0.984       0.845      1.000       0.947      1.000   0.682
## Shimmer           0.710       0.649      0.682       0.733      0.682   1.000
## Shimmer(dB)       0.717       0.656      0.686       0.735      0.686   0.992
## Shimmer:APQ3      0.664       0.624      0.650       0.677      0.650   0.980
## Shimmer:APQ5      0.694       0.621      0.660       0.734      0.660   0.985
## Shimmer:APQ11     0.646       0.590      0.603       0.668      0.603   0.935
## Shimmer:DDA       0.664       0.624      0.650       0.677      0.650   0.980
## NHR               0.825       0.700      0.792       0.865      0.792   0.795
## HNR              -0.675      -0.706     -0.641      -0.662     -0.641  -0.801
## RPDE              0.427       0.547      0.383       0.382      0.383   0.468
## DFA               0.227       0.352      0.215       0.175      0.215   0.133
## PPE               0.722       0.788      0.671       0.663      0.671   0.616
##               Shimmer(dB) Shimmer:APQ3 Shimmer:APQ5 Shimmer:APQ11 Shimmer:DDA
## Jitter(%)           0.717        0.664        0.694         0.646       0.664
## Jitter(Abs)         0.656        0.624        0.621         0.590       0.624
## Jitter:RAP          0.686        0.650        0.660         0.603       0.650
## Jitter:PPQ5         0.735        0.677        0.734         0.668       0.677
## Jitter:DDP          0.686        0.650        0.660         0.603       0.650
## Shimmer             0.992        0.980        0.985         0.935       0.980
## Shimmer(dB)         1.000        0.968        0.976         0.936       0.968
## Shimmer:APQ3        0.968        1.000        0.963         0.886       1.000
## Shimmer:APQ5        0.976        0.963        1.000         0.939       0.963
## Shimmer:APQ11       0.936        0.886        0.939         1.000       0.886
## Shimmer:DDA         0.968        1.000        0.963         0.886       1.000
## NHR                 0.798        0.733        0.798         0.712       0.733
## HNR                -0.802       -0.781       -0.791        -0.778      -0.781
## RPDE                0.472        0.437        0.451         0.481       0.437
## DFA                 0.126        0.131        0.128         0.180       0.131
## PPE                 0.635        0.577        0.594         0.623       0.577
##                  NHR    HNR   RPDE    DFA    PPE
## Jitter(%)      0.825 -0.675  0.427  0.227  0.722
## Jitter(Abs)    0.700 -0.706  0.547  0.352  0.788
## Jitter:RAP     0.792 -0.641  0.383  0.215  0.671
## Jitter:PPQ5    0.865 -0.662  0.382  0.175  0.663
## Jitter:DDP     0.792 -0.641  0.383  0.215  0.671
## Shimmer        0.795 -0.801  0.468  0.133  0.616
## Shimmer(dB)    0.798 -0.802  0.472  0.126  0.635
## Shimmer:APQ3   0.733 -0.781  0.437  0.131  0.577
## Shimmer:APQ5   0.798 -0.791  0.451  0.128  0.594
## Shimmer:APQ11  0.712 -0.778  0.481  0.180  0.623
## Shimmer:DDA    0.733 -0.781  0.437  0.131  0.577
## NHR            1.000 -0.684  0.417 -0.022  0.565
## HNR           -0.684  1.000 -0.659 -0.291 -0.759
## RPDE           0.417 -0.659  1.000  0.192  0.566
## DFA           -0.022 -0.291  0.192  1.000  0.395
## PPE            0.565 -0.759  0.566  0.395  1.000
corrplot(cor_matrix, method = "color", tl.cex = 0.6)

Uji Kelayakan Faktor Analisis

KMO Test

KMO(voice_data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = voice_data)
## Overall MSA =  0.93
## MSA for each item = 
##     Jitter(%)   Jitter(Abs)    Jitter:RAP   Jitter:PPQ5    Jitter:DDP 
##          0.94          0.95          0.89          0.93          0.89 
##       Shimmer   Shimmer(dB)  Shimmer:APQ3  Shimmer:APQ5 Shimmer:APQ11 
##          0.93          0.94          0.89          0.95          0.95 
##   Shimmer:DDA           NHR           HNR          RPDE           DFA 
##          0.89          0.94          0.95          0.92          0.71 
##           PPE 
##          0.94

Bartlett Test

cortest.bartlett(cor_matrix, n = nrow(voice_data))
## $chisq
## [1] 349581.3
## 
## $p.value
## [1] 0
## 
## $df
## [1] 120

Analisis PCA

pca_result <- principal(voice_data, scale = TRUE)
pca_result
## Principal Components Analysis
## Call: principal(r = voice_data, scale = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 PC1    h2   u2 com
## Jitter(%)      0.89 0.801 0.20   1
## Jitter(Abs)    0.84 0.702 0.30   1
## Jitter:RAP     0.87 0.754 0.25   1
## Jitter:PPQ5    0.89 0.790 0.21   1
## Jitter:DDP     0.87 0.754 0.25   1
## Shimmer        0.93 0.874 0.13   1
## Shimmer(dB)    0.94 0.875 0.12   1
## Shimmer:APQ3   0.90 0.815 0.18   1
## Shimmer:APQ5   0.92 0.849 0.15   1
## Shimmer:APQ11  0.88 0.771 0.23   1
## Shimmer:DDA    0.90 0.815 0.18   1
## NHR            0.87 0.752 0.25   1
## HNR           -0.86 0.742 0.26   1
## RPDE           0.56 0.315 0.68   1
## DFA            0.24 0.056 0.94   1
## PPE            0.77 0.596 0.40   1
## 
##                  PC1
## SS loadings    11.26
## Proportion Var  0.70
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 component is sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.08 
##  with the empirical chi square  8861.37  with prob <  0 
## 
## Fit based upon off diagonal values = 0.99

Eigen Value PCA

eigen_values <- eigen(cor_matrix)
eigen_values$values
##  [1] 1.126105e+01 1.672900e+00 1.240510e+00 7.648671e-01 3.092504e-01
##  [6] 2.228822e-01 1.727628e-01 1.617443e-01 1.030749e-01 4.352376e-02
## [11] 2.028743e-02 1.401483e-02 8.666707e-03 4.464222e-03 3.778333e-07
## [16] 2.084635e-08

Scree Plot PCA

pca_model <- prcomp(voice_data, scale = TRUE)
fviz_eig(pca_model,
         addlabels = TRUE,
         ylim = c(0, 70),
         main = "Scree Plot PCA")

Parallel Analisis

fa.parallel(voice_data, fa = "pc", n.iter = 100)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  3

PCA Final

pca_3 <- principal(voice_data, nfactors = 3, rotate = "varimax", scores = TRUE)
print(pca_3, cut = 0.4)
## Principal Components Analysis
## Call: principal(r = voice_data, nfactors = 3, rotate = "varimax", scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 RC1   RC2   RC3   h2    u2 com
## Jitter(%)            0.90       0.99 0.012 1.5
## Jitter(Abs)          0.74  0.46 0.86 0.135 2.1
## Jitter:RAP           0.91       0.97 0.027 1.3
## Jitter:PPQ5    0.42  0.87       0.95 0.051 1.5
## Jitter:DDP           0.91       0.97 0.027 1.3
## Shimmer        0.90             0.99 0.013 1.4
## Shimmer(dB)    0.89             0.98 0.020 1.4
## Shimmer:APQ3   0.91             0.95 0.047 1.3
## Shimmer:APQ5   0.90             0.97 0.027 1.4
## Shimmer:APQ11  0.87             0.90 0.103 1.4
## Shimmer:DDA    0.91             0.95 0.047 1.3
## NHR            0.60  0.70       0.85 0.153 2.0
## HNR           -0.68       -0.48 0.84 0.165 2.4
## RPDE                       0.59 0.54 0.462 1.9
## DFA                        0.82 0.68 0.318 1.0
## PPE                  0.51  0.61 0.78 0.219 2.6
## 
##                        RC1  RC2  RC3
## SS loadings           6.60 5.46 2.12
## Proportion Var        0.41 0.34 0.13
## Cumulative Var        0.41 0.75 0.89
## Proportion Explained  0.47 0.39 0.15
## Cumulative Proportion 0.47 0.85 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.03 
##  with the empirical chi square  1021.64  with prob <  4.8e-166 
## 
## Fit based upon off diagonal values = 1

Biplot PCA

fviz_pca_biplot(prcomp(voice_data, scale = TRUE),
                label = "var",
                repel = TRUE,
                title = "Biplot PCA")

Factor Analysis

fa_result <- fa(voice_data, nfactors = 3, rotate = "varimax", fm = "ml")
print(fa_result, cut = 0.4)
## Factor Analysis using method =  ml
## Call: fa(r = voice_data, nfactors = 3, rotate = "varimax", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                 ML1   ML2   ML3    h2     u2 com
## Jitter(%)            0.91       0.984 0.0159 1.4
## Jitter(Abs)          0.76       0.731 0.2694 1.5
## Jitter:RAP           0.93       0.997 0.0027 1.3
## Jitter:PPQ5    0.44  0.85       0.941 0.0587 1.6
## Jitter:DDP           0.93       0.997 0.0027 1.3
## Shimmer        0.92             0.996 0.0044 1.3
## Shimmer(dB)    0.91             0.987 0.0132 1.4
## Shimmer:APQ3   0.93             0.997 0.0025 1.3
## Shimmer:APQ5   0.92             0.976 0.0240 1.3
## Shimmer:APQ11  0.87             0.914 0.0865 1.4
## Shimmer:DDA    0.93             0.997 0.0025 1.3
## NHR            0.59  0.63       0.796 0.2043 2.3
## HNR           -0.69 -0.43       0.671 0.3291 1.7
## RPDE                            0.250 0.7495 2.1
## DFA                             0.048 0.9525 1.2
## PPE            0.44  0.56       0.530 0.4702 2.1
## 
##                        ML1  ML2  ML3
## SS loadings           6.93 5.62 0.27
## Proportion Var        0.43 0.35 0.02
## Cumulative Var        0.43 0.78 0.80
## Proportion Explained  0.54 0.44 0.02
## Cumulative Proportion 0.54 0.98 1.00
## 
## Mean item complexity =  1.5
## Test of the hypothesis that 3 factors are sufficient.
## 
## df null model =  120  with the objective function =  59.58 with Chi Square =  349581.3
## df of  the model are 75  and the objective function was  21.63 
## 
## The root mean square of the residuals (RMSR) is  0.06 
## The df corrected root mean square of the residuals is  0.08 
## 
## The harmonic n.obs is  5875 with the empirical chi square  2534.51  with prob <  0 
## The total n.obs was  5875  with Likelihood Chi Square =  126896.9  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.419
## RMSEA index =  0.536  and the 90 % confidence intervals are  0.534 0.539
## BIC =  126246
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                   ML1 ML2  ML3
## Correlation of (regression) scores with factors     1   1 0.96
## Multiple R square of scores with factors            1   1 0.92
## Minimum correlation of possible factor scores       1   1 0.84