hcv <- read.csv("data/hcvdat0.csv",
stringsAsFactor = FALSE)
head(hcv)
## X Category Age Sex ALB ALP ALT AST BIL CHE CHOL CREA GGT PROT
## 1 1 0=Blood Donor 32 m 38.5 52.5 7.7 22.1 7.5 6.93 3.23 106 12.1 69.0
## 2 2 0=Blood Donor 32 m 38.5 70.3 18.0 24.7 3.9 11.17 4.80 74 15.6 76.5
## 3 3 0=Blood Donor 32 m 46.9 74.7 36.2 52.6 6.1 8.84 5.20 86 33.2 79.3
## 4 4 0=Blood Donor 32 m 43.2 52.0 30.6 22.6 18.9 7.33 4.74 80 33.8 75.7
## 5 5 0=Blood Donor 32 m 39.2 74.1 32.6 24.8 9.6 9.15 4.32 76 29.9 68.7
## 6 6 0=Blood Donor 32 m 41.6 43.3 18.5 19.7 12.3 9.92 6.05 111 91.0 74.0
hcv_num <- hcv[, sapply(hcv, is.numeric)]
hcv_num <- na.omit(hcv_num)
str(hcv_num)
## 'data.frame': 589 obs. of 12 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Age : int 32 32 32 32 32 32 32 32 32 32 ...
## $ ALB : num 38.5 38.5 46.9 43.2 39.2 41.6 46.3 42.2 50.9 42.4 ...
## $ ALP : num 52.5 70.3 74.7 52 74.1 43.3 41.3 41.9 65.5 86.3 ...
## $ ALT : num 7.7 18 36.2 30.6 32.6 18.5 17.5 35.8 23.2 20.3 ...
## $ AST : num 22.1 24.7 52.6 22.6 24.8 19.7 17.8 31.1 21.2 20 ...
## $ BIL : num 7.5 3.9 6.1 18.9 9.6 12.3 8.5 16.1 6.9 35.2 ...
## $ CHE : num 6.93 11.17 8.84 7.33 9.15 ...
## $ CHOL: num 3.23 4.8 5.2 4.74 4.32 6.05 4.79 4.6 4.1 4.45 ...
## $ CREA: num 106 74 86 80 76 111 70 109 83 81 ...
## $ GGT : num 12.1 15.6 33.2 33.8 29.9 91 16.9 21.5 13.7 15.9 ...
## $ PROT: num 69 76.5 79.3 75.7 68.7 74 74.5 67.1 71.3 69.9 ...
## - attr(*, "na.action")= 'omit' Named int [1:26] 122 320 330 414 425 434 499 541 542 546 ...
## ..- attr(*, "names")= chr [1:26] "122" "320" "330" "414" ...
cor_mat <- cor(hcv_num)
cor_mat
## X Age ALB ALP ALT AST
## X 1.00000000 0.44305790 -0.315204550 0.01794376 -0.20023304 0.30360292
## Age 0.44305790 1.00000000 -0.191093637 0.17771977 -0.04057647 0.07273886
## ALB -0.31520455 -0.19109364 1.000000000 -0.14611991 0.03949714 -0.17760895
## ALP 0.01794376 0.17771977 -0.146119911 1.00000000 0.22160301 0.06702428
## ALT -0.20023304 -0.04057647 0.039497139 0.22160301 1.00000000 0.19865775
## AST 0.30360292 0.07273886 -0.177608947 0.06702428 0.19865775 1.00000000
## BIL 0.17651109 0.03965486 -0.169597498 0.05837241 -0.10679662 0.30957974
## CHE -0.27853454 -0.07586328 0.360919403 0.02948169 0.22434447 -0.19727042
## CHOL -0.05794709 0.12474161 0.210419878 0.12590008 0.14999727 -0.20121300
## CREA -0.02016270 -0.02514225 0.001433247 0.15390895 -0.03610554 -0.01794810
## GGT 0.22146275 0.14337927 -0.147598318 0.46130000 0.21970686 0.47777362
## PROT -0.16648242 -0.15975998 0.570725680 -0.06308514 0.01678633 0.01740394
## BIL CHE CHOL CREA GGT PROT
## X 0.17651109 -0.27853454 -0.057947087 -0.020162704 0.221462754 -0.16648242
## Age 0.03965486 -0.07586328 0.124741615 -0.025142253 0.143379268 -0.15975998
## ALB -0.16959750 0.36091940 0.210419878 0.001433247 -0.147598318 0.57072568
## ALP 0.05837241 0.02948169 0.125900079 0.153908950 0.461299996 -0.06308514
## ALT -0.10679662 0.22434447 0.149997271 -0.036105541 0.219706857 0.01678633
## AST 0.30957974 -0.19727042 -0.201213004 -0.017948098 0.477773617 0.01740394
## BIL 1.00000000 -0.32071323 -0.181569556 0.019909617 0.210566559 -0.05257491
## CHE -0.32071323 1.00000000 0.428018276 -0.012119999 -0.095716131 0.30628754
## CHOL -0.18156956 0.42801828 1.000000000 -0.051464078 0.008822692 0.24504950
## CREA 0.01990962 -0.01212000 -0.051464078 1.000000000 0.125353469 -0.03011070
## GGT 0.21056656 -0.09571613 0.008822692 0.125353469 1.000000000 -0.03712701
## PROT -0.05257491 0.30628754 0.245049503 -0.030110695 -0.037127008 1.00000000
Matriks korelasi menunjukkan kekuatan dan arah hubungan linear antar variabel numerik tanpa dipengaruhi oleh skala pengukuran.
cov_mat <- cov(hcv_num)
cov_mat
## X Age ALB ALP ALT AST
## X 30325.61267 766.254175 -316.2678135 80.997414 -727.477884 1737.677172
## Age 766.25418 98.631388 -10.9348172 45.750544 -8.407388 23.742827
## ALB -316.26781 -10.934817 33.1982701 -21.823283 4.747912 -33.634186
## ALP 80.99741 45.750544 -21.8232826 671.901949 119.841675 57.100968
## ALT -727.47788 -8.407388 4.7479116 119.841675 435.269784 136.220708
## AST 1737.67717 23.742827 -33.6341863 57.100968 136.220708 1080.231200
## BIL 535.04466 6.855155 -17.0094554 26.337454 -38.783770 177.110426
## CHE -106.27733 -1.650806 4.5564303 1.674411 10.255372 -14.206173
## CHOL -11.39233 1.398606 1.3687395 3.684302 3.532962 -7.466047
## CREA -178.00646 -12.658841 0.4186596 202.254881 -38.188738 -29.906045
## GGT 2094.23092 77.323769 -46.1804560 649.315069 248.909775 852.706557
## PROT -155.07302 -8.486697 17.5892871 -8.746677 1.873261 3.059630
## BIL CHE CHOL CREA GGT PROT
## X 535.044661 -106.277326 -11.3923339 -178.0064562 2094.2309247 -155.073024
## Age 6.855155 -1.650806 1.3986055 -12.6588412 77.3237685 -8.486697
## ALB -17.009455 4.556430 1.3687395 0.4186596 -46.1804560 17.589287
## ALP 26.337454 1.674411 3.6843023 202.2548814 649.3150694 -8.746677
## ALT -38.783770 10.255372 3.5329616 -38.1887382 248.9097752 1.873261
## AST 177.110426 -14.206173 -7.4660468 -29.9060449 852.7065571 3.059630
## BIL 302.988734 -12.231702 -3.5680635 17.5694569 199.0314564 -4.895025
## CHE -12.231702 4.800799 1.0587548 -1.3462991 -11.3883550 3.589626
## CHOL -3.568064 1.058755 1.2745375 -2.9455248 0.5408745 1.479767
## CREA 17.569457 -1.346299 -2.9455248 2570.1849279 345.0941704 -8.165186
## GGT 199.031456 -11.388355 0.5408745 345.0941704 2948.7514092 -10.783808
## PROT -4.895025 3.589626 1.4797666 -8.1651857 -10.7838076 28.610549
Matriks kovarians menggambarkan variasi bersama antar variabel dalam skala asli dan menjadi dasar untuk analisis eigen.
eig <- eigen(cov_mat)
eig$values
## [1] 3.064615e+04 3.411952e+03 2.431355e+03 8.106778e+02 4.759857e+02
## [6] 3.597636e+02 2.295242e+02 7.550312e+01 4.386454e+01 1.225674e+01
## [11] 3.543169e+00 8.832592e-01
eig$vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.9943168408 0.0781794505 -0.0428895929 -0.0328121735 0.020212889
## [2,] 0.0252084504 -0.0048342063 0.0049027524 -0.0592379614 0.060040370
## [3,] -0.0104849795 0.0082864828 -0.0043684634 -0.0004021013 -0.025903024
## [4,] 0.0044080667 -0.2356681437 0.0356393108 -0.4374522941 0.710618877
## [5,] -0.0230437543 -0.1041992516 0.0916985555 0.0673718565 0.547112440
## [6,] 0.0607143554 -0.2580042619 0.1999667225 0.8489173365 0.256978864
## [7,] 0.0184370838 -0.0576707031 0.0297876343 0.1542686413 -0.079875667
## [8,] -0.0035236442 0.0013755262 -0.0001678846 -0.0100541902 0.011976556
## [9,] -0.0003865148 0.0002186455 0.0009124350 -0.0103444081 0.005339638
## [10,] -0.0053562951 -0.4023281847 -0.9087666493 0.1021482314 0.004426586
## [11,] 0.0771038647 -0.8340483849 0.3488376959 -0.2101014484 -0.343945278
## [12,] -0.0050747796 0.0005044839 0.0043774244 0.0163142811 -0.009811142
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.029899000 0.0117861445 0.028276257 -0.0075378056 0.002522380
## [2,] 0.013200918 -0.0142637105 -0.987882489 -0.1224848387 -0.020137828
## [3,] -0.041230519 -0.0196324002 0.073071176 -0.7117277819 0.693190933
## [4,] 0.462008490 -0.1601735463 0.082208949 -0.0250189827 0.017575507
## [5,] -0.680285668 0.4612712030 0.014509161 -0.0004604439 -0.009521025
## [6,] 0.171499060 -0.2691408215 -0.025811818 -0.0013787705 0.020143047
## [7,] 0.524197933 0.8290819581 -0.013118146 -0.0459489418 0.009776830
## [8,] -0.030163171 -0.0150483056 0.002265340 -0.1084487331 -0.017859713
## [9,] -0.009869622 -0.0007455209 -0.011252758 -0.0491322591 -0.030609508
## [10,] -0.039663866 0.0110805318 -0.008986684 -0.0015590011 -0.003436025
## [11,] -0.119064616 0.0091672413 -0.002651554 0.0038121325 -0.002258407
## [12,] -0.006258120 -0.0244543592 0.099611920 -0.6793031525 -0.718958094
## [,11] [,12]
## [1,] 1.166495e-03 -0.0005701834
## [2,] -1.638485e-02 -0.0153975609
## [3,] -6.807153e-02 0.0016019737
## [4,] -4.373501e-03 -0.0027458982
## [5,] -2.126558e-02 -0.0043095508
## [6,] 9.352837e-03 0.0072123962
## [7,] 2.699176e-02 -0.0001634271
## [8,] 9.680597e-01 -0.2222163944
## [9,] 2.170998e-01 0.9742461763
## [10,] 4.016040e-04 0.0012264068
## [11,] 6.161861e-05 -0.0015769054
## [12,] -9.765782e-02 -0.0337955255
prop_var <- eig$values / sum(eig$values)
prop_var
## [1] 7.959737e-01 8.861879e-02 6.314969e-02 2.105577e-02 1.236280e-02
## [6] 9.344155e-03 5.961443e-03 1.961046e-03 1.139296e-03 3.183450e-04
## [11] 9.202687e-05 2.294093e-05
Eigenvalue yang lebih besar menunjukkan arah utama yang menjelaskan variasi data lebih dominan.
Berdasarkan analisis data pasien Hepatitis C, terlihat bahwa beberapa indikator kesehatan saling berkaitan dan bersama-sama membentuk pola kondisi pasien. Pola ini membantu kita memahami faktor mana yang paling berperan dalam perbedaan kondisi kesehatan antar pasien. Dengan memahami pola tersebut, data dapat digunakan sebagai dasar pengambilan keputusan dan analisis lanjutan di bidang kesehatan.