# Import data
data <- read.csv("hcvdat0.csv")
str(data)
## 'data.frame':    615 obs. of  14 variables:
##  $ X       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Category: chr  "0=Blood Donor" "0=Blood Donor" "0=Blood Donor" "0=Blood Donor" ...
##  $ Age     : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ Sex     : chr  "m" "m" "m" "m" ...
##  $ ALB     : num  38.5 38.5 46.9 43.2 39.2 41.6 46.3 42.2 50.9 42.4 ...
##  $ ALP     : num  52.5 70.3 74.7 52 74.1 43.3 41.3 41.9 65.5 86.3 ...
##  $ ALT     : num  7.7 18 36.2 30.6 32.6 18.5 17.5 35.8 23.2 20.3 ...
##  $ AST     : num  22.1 24.7 52.6 22.6 24.8 19.7 17.8 31.1 21.2 20 ...
##  $ BIL     : num  7.5 3.9 6.1 18.9 9.6 12.3 8.5 16.1 6.9 35.2 ...
##  $ CHE     : num  6.93 11.17 8.84 7.33 9.15 ...
##  $ CHOL    : num  3.23 4.8 5.2 4.74 4.32 6.05 4.79 4.6 4.1 4.45 ...
##  $ CREA    : num  106 74 86 80 76 111 70 109 83 81 ...
##  $ GGT     : num  12.1 15.6 33.2 33.8 29.9 91 16.9 21.5 13.7 15.9 ...
##  $ PROT    : num  69 76.5 79.3 75.7 68.7 74 74.5 67.1 71.3 69.9 ...
summary(data)
##        X           Category              Age            Sex           
##  Min.   :  1.0   Length:615         Min.   :19.00   Length:615        
##  1st Qu.:154.5   Class :character   1st Qu.:39.00   Class :character  
##  Median :308.0   Mode  :character   Median :47.00   Mode  :character  
##  Mean   :308.0                      Mean   :47.41                     
##  3rd Qu.:461.5                      3rd Qu.:54.00                     
##  Max.   :615.0                      Max.   :77.00                     
##                                                                       
##       ALB             ALP              ALT              AST        
##  Min.   :14.90   Min.   : 11.30   Min.   :  0.90   Min.   : 10.60  
##  1st Qu.:38.80   1st Qu.: 52.50   1st Qu.: 16.40   1st Qu.: 21.60  
##  Median :41.95   Median : 66.20   Median : 23.00   Median : 25.90  
##  Mean   :41.62   Mean   : 68.28   Mean   : 28.45   Mean   : 34.79  
##  3rd Qu.:45.20   3rd Qu.: 80.10   3rd Qu.: 33.08   3rd Qu.: 32.90  
##  Max.   :82.20   Max.   :416.60   Max.   :325.30   Max.   :324.00  
##  NA's   :1       NA's   :18       NA's   :1                        
##       BIL             CHE              CHOL            CREA        
##  Min.   :  0.8   Min.   : 1.420   Min.   :1.430   Min.   :   8.00  
##  1st Qu.:  5.3   1st Qu.: 6.935   1st Qu.:4.610   1st Qu.:  67.00  
##  Median :  7.3   Median : 8.260   Median :5.300   Median :  77.00  
##  Mean   : 11.4   Mean   : 8.197   Mean   :5.368   Mean   :  81.29  
##  3rd Qu.: 11.2   3rd Qu.: 9.590   3rd Qu.:6.060   3rd Qu.:  88.00  
##  Max.   :254.0   Max.   :16.410   Max.   :9.670   Max.   :1079.10  
##                                   NA's   :10                       
##       GGT              PROT      
##  Min.   :  4.50   Min.   :44.80  
##  1st Qu.: 15.70   1st Qu.:69.30  
##  Median : 23.30   Median :72.20  
##  Mean   : 39.53   Mean   :72.04  
##  3rd Qu.: 40.20   3rd Qu.:75.40  
##  Max.   :650.90   Max.   :90.00  
##                   NA's   :1
data_num <- data[, sapply(data, is.numeric)]
str(data_num)
## 'data.frame':    615 obs. of  12 variables:
##  $ X   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Age : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ ALB : num  38.5 38.5 46.9 43.2 39.2 41.6 46.3 42.2 50.9 42.4 ...
##  $ ALP : num  52.5 70.3 74.7 52 74.1 43.3 41.3 41.9 65.5 86.3 ...
##  $ ALT : num  7.7 18 36.2 30.6 32.6 18.5 17.5 35.8 23.2 20.3 ...
##  $ AST : num  22.1 24.7 52.6 22.6 24.8 19.7 17.8 31.1 21.2 20 ...
##  $ BIL : num  7.5 3.9 6.1 18.9 9.6 12.3 8.5 16.1 6.9 35.2 ...
##  $ CHE : num  6.93 11.17 8.84 7.33 9.15 ...
##  $ CHOL: num  3.23 4.8 5.2 4.74 4.32 6.05 4.79 4.6 4.1 4.45 ...
##  $ CREA: num  106 74 86 80 76 111 70 109 83 81 ...
##  $ GGT : num  12.1 15.6 33.2 33.8 29.9 91 16.9 21.5 13.7 15.9 ...
##  $ PROT: num  69 76.5 79.3 75.7 68.7 74 74.5 67.1 71.3 69.9 ...
# Correlation Matrix
cor_matrix <- cor(data_num, use = "complete.obs")
cor_matrix
##                X         Age          ALB         ALP         ALT         AST
## X     1.00000000  0.44305790 -0.315204550  0.01794376 -0.20023304  0.30360292
## Age   0.44305790  1.00000000 -0.191093637  0.17771977 -0.04057647  0.07273886
## ALB  -0.31520455 -0.19109364  1.000000000 -0.14611991  0.03949714 -0.17760895
## ALP   0.01794376  0.17771977 -0.146119911  1.00000000  0.22160301  0.06702428
## ALT  -0.20023304 -0.04057647  0.039497139  0.22160301  1.00000000  0.19865775
## AST   0.30360292  0.07273886 -0.177608947  0.06702428  0.19865775  1.00000000
## BIL   0.17651109  0.03965486 -0.169597498  0.05837241 -0.10679662  0.30957974
## CHE  -0.27853454 -0.07586328  0.360919403  0.02948169  0.22434447 -0.19727042
## CHOL -0.05794709  0.12474161  0.210419878  0.12590008  0.14999727 -0.20121300
## CREA -0.02016270 -0.02514225  0.001433247  0.15390895 -0.03610554 -0.01794810
## GGT   0.22146275  0.14337927 -0.147598318  0.46130000  0.21970686  0.47777362
## PROT -0.16648242 -0.15975998  0.570725680 -0.06308514  0.01678633  0.01740394
##              BIL         CHE         CHOL         CREA          GGT        PROT
## X     0.17651109 -0.27853454 -0.057947087 -0.020162704  0.221462754 -0.16648242
## Age   0.03965486 -0.07586328  0.124741615 -0.025142253  0.143379268 -0.15975998
## ALB  -0.16959750  0.36091940  0.210419878  0.001433247 -0.147598318  0.57072568
## ALP   0.05837241  0.02948169  0.125900079  0.153908950  0.461299996 -0.06308514
## ALT  -0.10679662  0.22434447  0.149997271 -0.036105541  0.219706857  0.01678633
## AST   0.30957974 -0.19727042 -0.201213004 -0.017948098  0.477773617  0.01740394
## BIL   1.00000000 -0.32071323 -0.181569556  0.019909617  0.210566559 -0.05257491
## CHE  -0.32071323  1.00000000  0.428018276 -0.012119999 -0.095716131  0.30628754
## CHOL -0.18156956  0.42801828  1.000000000 -0.051464078  0.008822692  0.24504950
## CREA  0.01990962 -0.01212000 -0.051464078  1.000000000  0.125353469 -0.03011070
## GGT   0.21056656 -0.09571613  0.008822692  0.125353469  1.000000000 -0.03712701
## PROT -0.05257491  0.30628754  0.245049503 -0.030110695 -0.037127008  1.00000000

Correlation matrix digunakan untuk melihat hubungan antar variabel numerik dalam data. Nilai korelasi berada pada rentang -1 sampai 1. Jika nilainya mendekati 1, berarti kedua variabel memiliki hubungan yang kuat dan searah. Jika mendekati -1, berarti hubungannya kuat tetapi berlawanan arah. Nilai yang mendekati 0 menunjukkan bahwa hubungan antar variabel lemah atau hampir tidak ada. Dari hasil matriks korelasi, dapat diketahui variabel mana yang salling berkaitan dan mana yang relatif independen.

# Covariance matrix
cov_matrix <- cov(data_num, use = "complete.obs")
cov_matrix
##                X        Age          ALB        ALP         ALT         AST
## X    30325.61267 766.254175 -316.2678135  80.997414 -727.477884 1737.677172
## Age    766.25418  98.631388  -10.9348172  45.750544   -8.407388   23.742827
## ALB   -316.26781 -10.934817   33.1982701 -21.823283    4.747912  -33.634186
## ALP     80.99741  45.750544  -21.8232826 671.901949  119.841675   57.100968
## ALT   -727.47788  -8.407388    4.7479116 119.841675  435.269784  136.220708
## AST   1737.67717  23.742827  -33.6341863  57.100968  136.220708 1080.231200
## BIL    535.04466   6.855155  -17.0094554  26.337454  -38.783770  177.110426
## CHE   -106.27733  -1.650806    4.5564303   1.674411   10.255372  -14.206173
## CHOL   -11.39233   1.398606    1.3687395   3.684302    3.532962   -7.466047
## CREA  -178.00646 -12.658841    0.4186596 202.254881  -38.188738  -29.906045
## GGT   2094.23092  77.323769  -46.1804560 649.315069  248.909775  852.706557
## PROT  -155.07302  -8.486697   17.5892871  -8.746677    1.873261    3.059630
##             BIL         CHE        CHOL         CREA          GGT        PROT
## X    535.044661 -106.277326 -11.3923339 -178.0064562 2094.2309247 -155.073024
## Age    6.855155   -1.650806   1.3986055  -12.6588412   77.3237685   -8.486697
## ALB  -17.009455    4.556430   1.3687395    0.4186596  -46.1804560   17.589287
## ALP   26.337454    1.674411   3.6843023  202.2548814  649.3150694   -8.746677
## ALT  -38.783770   10.255372   3.5329616  -38.1887382  248.9097752    1.873261
## AST  177.110426  -14.206173  -7.4660468  -29.9060449  852.7065571    3.059630
## BIL  302.988734  -12.231702  -3.5680635   17.5694569  199.0314564   -4.895025
## CHE  -12.231702    4.800799   1.0587548   -1.3462991  -11.3883550    3.589626
## CHOL  -3.568064    1.058755   1.2745375   -2.9455248    0.5408745    1.479767
## CREA  17.569457   -1.346299  -2.9455248 2570.1849279  345.0941704   -8.165186
## GGT  199.031456  -11.388355   0.5408745  345.0941704 2948.7514092  -10.783808
## PROT  -4.895025    3.589626   1.4797666   -8.1651857  -10.7838076   28.610549

Matriks covariance matrix menunjukkan seberapa besar penyebaran data pada setiap variabel serta bagaimana dua variabel berubah secara bersamaan. Nilai pada diagonal matriks merupakan varians masing-masing variabel yang menunjukkan seberapa besar variasi data tersebut. Nilai di luar diagonal merupakan kovariansi yang menunjukkan hubungan perubahan antar dua variabel. Kovariansi bernilai positif berarti kedua variabel cenderung meningkat bersama, sedangkan nilai negatif menunjukkan arah perubahan yang berlawanan.

# Eigen value dan eigen vector
eigen_result <- eigen(cov_matrix)

eigen_result$values
##  [1] 3.064615e+04 3.411952e+03 2.431355e+03 8.106778e+02 4.759857e+02
##  [6] 3.597636e+02 2.295242e+02 7.550312e+01 4.386454e+01 1.225674e+01
## [11] 3.543169e+00 8.832592e-01
eigen_result$vectors
##                [,1]          [,2]          [,3]          [,4]         [,5]
##  [1,]  0.9943168408  0.0781794505 -0.0428895929 -0.0328121735  0.020212889
##  [2,]  0.0252084504 -0.0048342063  0.0049027524 -0.0592379614  0.060040370
##  [3,] -0.0104849795  0.0082864828 -0.0043684634 -0.0004021013 -0.025903024
##  [4,]  0.0044080667 -0.2356681437  0.0356393108 -0.4374522941  0.710618877
##  [5,] -0.0230437543 -0.1041992516  0.0916985555  0.0673718565  0.547112440
##  [6,]  0.0607143554 -0.2580042619  0.1999667225  0.8489173365  0.256978864
##  [7,]  0.0184370838 -0.0576707031  0.0297876343  0.1542686413 -0.079875667
##  [8,] -0.0035236442  0.0013755262 -0.0001678846 -0.0100541902  0.011976556
##  [9,] -0.0003865148  0.0002186455  0.0009124350 -0.0103444081  0.005339638
## [10,] -0.0053562951 -0.4023281847 -0.9087666493  0.1021482314  0.004426586
## [11,]  0.0771038647 -0.8340483849  0.3488376959 -0.2101014484 -0.343945278
## [12,] -0.0050747796  0.0005044839  0.0043774244  0.0163142811 -0.009811142
##               [,6]          [,7]         [,8]          [,9]        [,10]
##  [1,] -0.029899000  0.0117861445  0.028276257 -0.0075378056  0.002522380
##  [2,]  0.013200918 -0.0142637105 -0.987882489 -0.1224848387 -0.020137828
##  [3,] -0.041230519 -0.0196324002  0.073071176 -0.7117277819  0.693190933
##  [4,]  0.462008490 -0.1601735463  0.082208949 -0.0250189827  0.017575507
##  [5,] -0.680285668  0.4612712030  0.014509161 -0.0004604439 -0.009521025
##  [6,]  0.171499060 -0.2691408215 -0.025811818 -0.0013787705  0.020143047
##  [7,]  0.524197933  0.8290819581 -0.013118146 -0.0459489418  0.009776830
##  [8,] -0.030163171 -0.0150483056  0.002265340 -0.1084487331 -0.017859713
##  [9,] -0.009869622 -0.0007455209 -0.011252758 -0.0491322591 -0.030609508
## [10,] -0.039663866  0.0110805318 -0.008986684 -0.0015590011 -0.003436025
## [11,] -0.119064616  0.0091672413 -0.002651554  0.0038121325 -0.002258407
## [12,] -0.006258120 -0.0244543592  0.099611920 -0.6793031525 -0.718958094
##               [,11]         [,12]
##  [1,]  1.166495e-03 -0.0005701834
##  [2,] -1.638485e-02 -0.0153975609
##  [3,] -6.807153e-02  0.0016019737
##  [4,] -4.373501e-03 -0.0027458982
##  [5,] -2.126558e-02 -0.0043095508
##  [6,]  9.352837e-03  0.0072123962
##  [7,]  2.699176e-02 -0.0001634271
##  [8,]  9.680597e-01 -0.2222163944
##  [9,]  2.170998e-01  0.9742461763
## [10,]  4.016040e-04  0.0012264068
## [11,]  6.161861e-05 -0.0015769054
## [12,] -9.765782e-02 -0.0337955255

Eigen value menunjukkan seberapa besar variasi data yang dapat dijelaskan oleh setiap komponen utama. Semakin besar nilai eigen value, semakin besar peran komponen tersebut dalam merepresentasikan data. Eigen vector menunjukkan kontribusi masing-masing variabel terhadap komponen utama.Hasil ini dapat membantu untuk memahami struktur data dan melihat variabel mana yang paling berpengaruh tanpa harus melihat semua variabel satu per satu.