Import Data

hcv <- read.csv("data/hcvdat0.csv",
stringsAsFactor = FALSE)
head(hcv)
##   X      Category Age Sex  ALB  ALP  ALT  AST  BIL   CHE CHOL CREA  GGT PROT
## 1 1 0=Blood Donor  32   m 38.5 52.5  7.7 22.1  7.5  6.93 3.23  106 12.1 69.0
## 2 2 0=Blood Donor  32   m 38.5 70.3 18.0 24.7  3.9 11.17 4.80   74 15.6 76.5
## 3 3 0=Blood Donor  32   m 46.9 74.7 36.2 52.6  6.1  8.84 5.20   86 33.2 79.3
## 4 4 0=Blood Donor  32   m 43.2 52.0 30.6 22.6 18.9  7.33 4.74   80 33.8 75.7
## 5 5 0=Blood Donor  32   m 39.2 74.1 32.6 24.8  9.6  9.15 4.32   76 29.9 68.7
## 6 6 0=Blood Donor  32   m 41.6 43.3 18.5 19.7 12.3  9.92 6.05  111 91.0 74.0

Pemilahan Variabel Numerik dan Missing Value

hcv_num <- hcv[, sapply(hcv, is.numeric)]
hcv_num <- na.omit(hcv_num)
str(hcv_num)
## 'data.frame':    589 obs. of  12 variables:
##  $ X   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ ALB : num  38.5 38.5 46.9 43.2 39.2 41.6 46.3 42.2 50.9 42.4 ...
##  $ ALP : num  52.5 70.3 74.7 52 74.1 43.3 41.3 41.9 65.5 86.3 ...
##  $ ALT : num  7.7 18 36.2 30.6 32.6 18.5 17.5 35.8 23.2 20.3 ...
##  $ AST : num  22.1 24.7 52.6 22.6 24.8 19.7 17.8 31.1 21.2 20 ...
##  $ BIL : num  7.5 3.9 6.1 18.9 9.6 12.3 8.5 16.1 6.9 35.2 ...
##  $ CHE : num  6.93 11.17 8.84 7.33 9.15 ...
##  $ CHOL: num  3.23 4.8 5.2 4.74 4.32 6.05 4.79 4.6 4.1 4.45 ...
##  $ CREA: num  106 74 86 80 76 111 70 109 83 81 ...
##  $ GGT : num  12.1 15.6 33.2 33.8 29.9 91 16.9 21.5 13.7 15.9 ...
##  $ PROT: num  69 76.5 79.3 75.7 68.7 74 74.5 67.1 71.3 69.9 ...
##  - attr(*, "na.action")= 'omit' Named int [1:26] 122 320 330 414 425 434 499 541 542 546 ...
##   ..- attr(*, "names")= chr [1:26] "122" "320" "330" "414" ...

Matriks Korelasi

cor_mat <- cor(hcv_num)
cor_mat
##                X         Age          ALB         ALP         ALT         AST
## X     1.00000000  0.44305790 -0.315204550  0.01794376 -0.20023304  0.30360292
## Age   0.44305790  1.00000000 -0.191093637  0.17771977 -0.04057647  0.07273886
## ALB  -0.31520455 -0.19109364  1.000000000 -0.14611991  0.03949714 -0.17760895
## ALP   0.01794376  0.17771977 -0.146119911  1.00000000  0.22160301  0.06702428
## ALT  -0.20023304 -0.04057647  0.039497139  0.22160301  1.00000000  0.19865775
## AST   0.30360292  0.07273886 -0.177608947  0.06702428  0.19865775  1.00000000
## BIL   0.17651109  0.03965486 -0.169597498  0.05837241 -0.10679662  0.30957974
## CHE  -0.27853454 -0.07586328  0.360919403  0.02948169  0.22434447 -0.19727042
## CHOL -0.05794709  0.12474161  0.210419878  0.12590008  0.14999727 -0.20121300
## CREA -0.02016270 -0.02514225  0.001433247  0.15390895 -0.03610554 -0.01794810
## GGT   0.22146275  0.14337927 -0.147598318  0.46130000  0.21970686  0.47777362
## PROT -0.16648242 -0.15975998  0.570725680 -0.06308514  0.01678633  0.01740394
##              BIL         CHE         CHOL         CREA          GGT        PROT
## X     0.17651109 -0.27853454 -0.057947087 -0.020162704  0.221462754 -0.16648242
## Age   0.03965486 -0.07586328  0.124741615 -0.025142253  0.143379268 -0.15975998
## ALB  -0.16959750  0.36091940  0.210419878  0.001433247 -0.147598318  0.57072568
## ALP   0.05837241  0.02948169  0.125900079  0.153908950  0.461299996 -0.06308514
## ALT  -0.10679662  0.22434447  0.149997271 -0.036105541  0.219706857  0.01678633
## AST   0.30957974 -0.19727042 -0.201213004 -0.017948098  0.477773617  0.01740394
## BIL   1.00000000 -0.32071323 -0.181569556  0.019909617  0.210566559 -0.05257491
## CHE  -0.32071323  1.00000000  0.428018276 -0.012119999 -0.095716131  0.30628754
## CHOL -0.18156956  0.42801828  1.000000000 -0.051464078  0.008822692  0.24504950
## CREA  0.01990962 -0.01212000 -0.051464078  1.000000000  0.125353469 -0.03011070
## GGT   0.21056656 -0.09571613  0.008822692  0.125353469  1.000000000 -0.03712701
## PROT -0.05257491  0.30628754  0.245049503 -0.030110695 -0.037127008  1.00000000

Matriks korelasi menunjukkan kekuatan dan arah hubungan linear antar variabel numerik tanpa dipengaruhi oleh skala pengukuran.

Matriks Kovarians

cov_mat <- cov(hcv_num)
cov_mat
##                X        Age          ALB        ALP         ALT         AST
## X    30325.61267 766.254175 -316.2678135  80.997414 -727.477884 1737.677172
## Age    766.25418  98.631388  -10.9348172  45.750544   -8.407388   23.742827
## ALB   -316.26781 -10.934817   33.1982701 -21.823283    4.747912  -33.634186
## ALP     80.99741  45.750544  -21.8232826 671.901949  119.841675   57.100968
## ALT   -727.47788  -8.407388    4.7479116 119.841675  435.269784  136.220708
## AST   1737.67717  23.742827  -33.6341863  57.100968  136.220708 1080.231200
## BIL    535.04466   6.855155  -17.0094554  26.337454  -38.783770  177.110426
## CHE   -106.27733  -1.650806    4.5564303   1.674411   10.255372  -14.206173
## CHOL   -11.39233   1.398606    1.3687395   3.684302    3.532962   -7.466047
## CREA  -178.00646 -12.658841    0.4186596 202.254881  -38.188738  -29.906045
## GGT   2094.23092  77.323769  -46.1804560 649.315069  248.909775  852.706557
## PROT  -155.07302  -8.486697   17.5892871  -8.746677    1.873261    3.059630
##             BIL         CHE        CHOL         CREA          GGT        PROT
## X    535.044661 -106.277326 -11.3923339 -178.0064562 2094.2309247 -155.073024
## Age    6.855155   -1.650806   1.3986055  -12.6588412   77.3237685   -8.486697
## ALB  -17.009455    4.556430   1.3687395    0.4186596  -46.1804560   17.589287
## ALP   26.337454    1.674411   3.6843023  202.2548814  649.3150694   -8.746677
## ALT  -38.783770   10.255372   3.5329616  -38.1887382  248.9097752    1.873261
## AST  177.110426  -14.206173  -7.4660468  -29.9060449  852.7065571    3.059630
## BIL  302.988734  -12.231702  -3.5680635   17.5694569  199.0314564   -4.895025
## CHE  -12.231702    4.800799   1.0587548   -1.3462991  -11.3883550    3.589626
## CHOL  -3.568064    1.058755   1.2745375   -2.9455248    0.5408745    1.479767
## CREA  17.569457   -1.346299  -2.9455248 2570.1849279  345.0941704   -8.165186
## GGT  199.031456  -11.388355   0.5408745  345.0941704 2948.7514092  -10.783808
## PROT  -4.895025    3.589626   1.4797666   -8.1651857  -10.7838076   28.610549

Matriks kovarians menggambarkan variasi bersama antar variabel dalam skala asli dan menjadi dasar untuk analisis eigen.

Analisis Eigen

eig <- eigen(cov_mat)
eig$values
##  [1] 3.064615e+04 3.411952e+03 2.431355e+03 8.106778e+02 4.759857e+02
##  [6] 3.597636e+02 2.295242e+02 7.550312e+01 4.386454e+01 1.225674e+01
## [11] 3.543169e+00 8.832592e-01
eig$vectors
##                [,1]          [,2]          [,3]          [,4]         [,5]
##  [1,]  0.9943168408  0.0781794505 -0.0428895929 -0.0328121735  0.020212889
##  [2,]  0.0252084504 -0.0048342063  0.0049027524 -0.0592379614  0.060040370
##  [3,] -0.0104849795  0.0082864828 -0.0043684634 -0.0004021013 -0.025903024
##  [4,]  0.0044080667 -0.2356681437  0.0356393108 -0.4374522941  0.710618877
##  [5,] -0.0230437543 -0.1041992516  0.0916985555  0.0673718565  0.547112440
##  [6,]  0.0607143554 -0.2580042619  0.1999667225  0.8489173365  0.256978864
##  [7,]  0.0184370838 -0.0576707031  0.0297876343  0.1542686413 -0.079875667
##  [8,] -0.0035236442  0.0013755262 -0.0001678846 -0.0100541902  0.011976556
##  [9,] -0.0003865148  0.0002186455  0.0009124350 -0.0103444081  0.005339638
## [10,] -0.0053562951 -0.4023281847 -0.9087666493  0.1021482314  0.004426586
## [11,]  0.0771038647 -0.8340483849  0.3488376959 -0.2101014484 -0.343945278
## [12,] -0.0050747796  0.0005044839  0.0043774244  0.0163142811 -0.009811142
##               [,6]          [,7]         [,8]          [,9]        [,10]
##  [1,] -0.029899000  0.0117861445  0.028276257 -0.0075378056  0.002522380
##  [2,]  0.013200918 -0.0142637105 -0.987882489 -0.1224848387 -0.020137828
##  [3,] -0.041230519 -0.0196324002  0.073071176 -0.7117277819  0.693190933
##  [4,]  0.462008490 -0.1601735463  0.082208949 -0.0250189827  0.017575507
##  [5,] -0.680285668  0.4612712030  0.014509161 -0.0004604439 -0.009521025
##  [6,]  0.171499060 -0.2691408215 -0.025811818 -0.0013787705  0.020143047
##  [7,]  0.524197933  0.8290819581 -0.013118146 -0.0459489418  0.009776830
##  [8,] -0.030163171 -0.0150483056  0.002265340 -0.1084487331 -0.017859713
##  [9,] -0.009869622 -0.0007455209 -0.011252758 -0.0491322591 -0.030609508
## [10,] -0.039663866  0.0110805318 -0.008986684 -0.0015590011 -0.003436025
## [11,] -0.119064616  0.0091672413 -0.002651554  0.0038121325 -0.002258407
## [12,] -0.006258120 -0.0244543592  0.099611920 -0.6793031525 -0.718958094
##               [,11]         [,12]
##  [1,]  1.166495e-03 -0.0005701834
##  [2,] -1.638485e-02 -0.0153975609
##  [3,] -6.807153e-02  0.0016019737
##  [4,] -4.373501e-03 -0.0027458982
##  [5,] -2.126558e-02 -0.0043095508
##  [6,]  9.352837e-03  0.0072123962
##  [7,]  2.699176e-02 -0.0001634271
##  [8,]  9.680597e-01 -0.2222163944
##  [9,]  2.170998e-01  0.9742461763
## [10,]  4.016040e-04  0.0012264068
## [11,]  6.161861e-05 -0.0015769054
## [12,] -9.765782e-02 -0.0337955255

Proporsi Variansi

prop_var <- eig$values / sum(eig$values)
prop_var
##  [1] 7.959737e-01 8.861879e-02 6.314969e-02 2.105577e-02 1.236280e-02
##  [6] 9.344155e-03 5.961443e-03 1.961046e-03 1.139296e-03 3.183450e-04
## [11] 9.202687e-05 2.294093e-05

Eigenvalue yang lebih besar menunjukkan arah utama yang menjelaskan variasi data lebih dominan.

Kesimpulan

Berdasarkan analisis data pasien Hepatitis C, terlihat bahwa beberapa indikator kesehatan saling berkaitan dan bersama-sama membentuk pola kondisi pasien. Pola ini membantu kita memahami faktor mana yang paling berperan dalam perbedaan kondisi kesehatan antar pasien. Dengan memahami pola tersebut, data dapat digunakan sebagai dasar pengambilan keputusan dan analisis lanjutan di bidang kesehatan.