R Markdown

library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
data(decanthlon2)
## Warning in data(decanthlon2): data set 'decanthlon2' not found
head(decathlon2)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE    11.04      7.58    14.83      2.07 49.81        14.69  43.75
## CLAY      10.76      7.40    14.26      1.86 49.37        14.05  50.72
## BERNARD   11.02      7.23    14.25      1.92 48.93        14.99  40.87
## YURKOV    11.34      7.09    15.19      2.10 50.42        15.31  46.26
## ZSIVOCZKY 11.13      7.30    13.48      2.01 48.62        14.17  45.67
## McMULLEN  10.83      7.31    13.76      2.13 49.91        14.38  44.41
##           Pole.vault Javeline X1500m Rank Points Competition
## SEBRLE          5.02    63.19  291.7    1   8217    Decastar
## CLAY            4.92    60.15  301.5    2   8122    Decastar
## BERNARD         5.32    62.77  280.1    4   8067    Decastar
## YURKOV          4.72    63.44  276.4    5   8036    Decastar
## ZSIVOCZKY       4.42    55.37  268.0    7   8004    Decastar
## McMULLEN        4.42    56.37  285.1    8   7995    Decastar
str(decathlon2)
## 'data.frame':    27 obs. of  13 variables:
##  $ X100m       : num  11 10.8 11 11.3 11.1 ...
##  $ Long.jump   : num  7.58 7.4 7.23 7.09 7.3 7.31 6.81 7.56 6.97 7.27 ...
##  $ Shot.put    : num  14.8 14.3 14.2 15.2 13.5 ...
##  $ High.jump   : num  2.07 1.86 1.92 2.1 2.01 2.13 1.95 1.86 1.95 1.98 ...
##  $ X400m       : num  49.8 49.4 48.9 50.4 48.6 ...
##  $ X110m.hurdle: num  14.7 14.1 15 15.3 14.2 ...
##  $ Discus      : num  43.8 50.7 40.9 46.3 45.7 ...
##  $ Pole.vault  : num  5.02 4.92 5.32 4.72 4.42 4.42 4.92 4.82 4.72 4.62 ...
##  $ Javeline    : num  63.2 60.1 62.8 63.4 55.4 ...
##  $ X1500m      : num  292 302 280 276 268 ...
##  $ Rank        : int  1 2 4 5 7 8 9 10 11 12 ...
##  $ Points      : int  8217 8122 8067 8036 8004 7995 7802 7733 7708 7651 ...
##  $ Competition : Factor w/ 2 levels "Decastar","OlympicG": 1 1 1 1 1 1 1 1 1 1 ...
decathlon2.active <- decathlon2[1:23, 1:10]
head(decathlon2.active[, 1:5],3) # head of 3 rows of 5 columns.
##         X100m Long.jump Shot.put High.jump X400m
## SEBRLE  11.04      7.58    14.83      2.07 49.81
## CLAY    10.76      7.40    14.26      1.86 49.37
## BERNARD 11.02      7.23    14.25      1.92 48.93
decathlon2.active
##             X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE      11.04      7.58    14.83      2.07 49.81        14.69  43.75
## CLAY        10.76      7.40    14.26      1.86 49.37        14.05  50.72
## BERNARD     11.02      7.23    14.25      1.92 48.93        14.99  40.87
## YURKOV      11.34      7.09    15.19      2.10 50.42        15.31  46.26
## ZSIVOCZKY   11.13      7.30    13.48      2.01 48.62        14.17  45.67
## McMULLEN    10.83      7.31    13.76      2.13 49.91        14.38  44.41
## MARTINEAU   11.64      6.81    14.57      1.95 50.14        14.93  47.60
## HERNU       11.37      7.56    14.41      1.86 51.10        15.06  44.99
## BARRAS      11.33      6.97    14.09      1.95 49.48        14.48  42.10
## NOOL        11.33      7.27    12.68      1.98 49.20        15.29  37.92
## BOURGUIGNON 11.36      6.80    13.46      1.86 51.16        15.67  40.49
## Sebrle      10.85      7.84    16.36      2.12 48.36        14.05  48.72
## Clay        10.44      7.96    15.23      2.06 49.19        14.13  50.11
## Karpov      10.50      7.81    15.93      2.09 46.81        13.97  51.65
## Macey       10.89      7.47    15.73      2.15 48.97        14.56  48.34
## Warners     10.62      7.74    14.48      1.97 47.97        14.01  43.73
## Zsivoczky   10.91      7.14    15.31      2.12 49.40        14.95  45.62
## Hernu       10.97      7.19    14.65      2.03 48.73        14.25  44.72
## Bernard     10.69      7.48    14.80      2.12 49.13        14.17  44.75
## Schwarzl    10.98      7.49    14.01      1.94 49.76        14.25  42.43
## Pogorelov   10.95      7.31    15.10      2.06 50.79        14.21  44.60
## Schoenbeck  10.90      7.30    14.77      1.88 50.30        14.34  44.41
## Barras      11.14      6.99    14.91      1.94 49.41        14.37  44.83
##             Pole.vault Javeline X1500m
## SEBRLE            5.02    63.19 291.70
## CLAY              4.92    60.15 301.50
## BERNARD           5.32    62.77 280.10
## YURKOV            4.72    63.44 276.40
## ZSIVOCZKY         4.42    55.37 268.00
## McMULLEN          4.42    56.37 285.10
## MARTINEAU         4.92    52.33 262.10
## HERNU             4.82    57.19 285.10
## BARRAS            4.72    55.40 282.00
## NOOL              4.62    57.44 266.60
## BOURGUIGNON       5.02    54.68 291.70
## Sebrle            5.00    70.52 280.01
## Clay              4.90    69.71 282.00
## Karpov            4.60    55.54 278.11
## Macey             4.40    58.46 265.42
## Warners           4.90    55.39 278.05
## Zsivoczky         4.70    63.45 269.54
## Hernu             4.80    57.76 264.35
## Bernard           4.40    55.27 276.31
## Schwarzl          5.10    56.32 273.56
## Pogorelov         5.00    53.45 287.63
## Schoenbeck        5.00    60.89 278.82
## Barras            4.60    64.55 267.09
s.decathlon2.active = scale(decathlon2.active, center = TRUE, scale = TRUE)
s.decathlon2.active
##                   X100m  Long.jump    Shot.put  High.jump       X400m
## SEBRLE       0.13427173  0.7352966  0.24860896  0.6475727  0.37430757
## CLAY        -0.79552392  0.1609328 -0.42618678 -1.5244940 -0.06260046
## BERNARD      0.06785776 -0.3815218 -0.43802531 -0.9039035 -0.49950849
## YURKOV       1.13048136 -0.8282492  0.67479574  0.9578679  0.98002098
## ZSIVOCZKY    0.43313462 -0.1581581 -1.34959148  0.0269822 -0.80733006
## McMULLEN    -0.56307501 -0.1262490 -1.01811287  1.2681632  0.47360485
## MARTINEAU    2.12669099 -1.7217040 -0.05919261 -0.5936083  0.70198859
## HERNU        1.23010232  0.6714784 -0.24860896 -1.5244940  1.65524248
## BARRAS       1.09727437 -1.2111584 -0.62744165 -0.5936083  0.04662655
## NOOL         1.09727437 -0.2538854 -2.29667323 -0.2833131 -0.23140583
## BOURGUIGNON  1.19689533 -1.7536131 -1.37326853 -1.5244940  1.71482085
## Sebrle      -0.49666103  1.5649332  2.05990279  1.1647314 -1.06550298
## Clay        -1.85814752  1.9478424  0.72214983  0.5441409 -0.24133556
## Karpov      -1.65890560  1.4692059  1.55084635  0.8544362 -2.60461082
## Macey       -0.36383308  0.3842965  1.31407592  1.4750267 -0.45978958
## Warners     -1.26042174  1.2458422 -0.16573931 -0.3867448 -1.45276237
## Zsivoczky   -0.29741911 -0.6687037  0.81685800  1.1647314 -0.03281128
## Hernu       -0.09817718 -0.5091582  0.03551557  0.2338457 -0.69810305
## Bernard     -1.02797283  0.4162056  0.21309339  1.1647314 -0.30091393
## Schwarzl    -0.06497019  0.4481147 -0.72214983 -0.6970400  0.32465893
## Pogorelov   -0.16459116 -0.1262490  0.56824905  0.5441409  1.34742091
## Schoenbeck  -0.33062609 -0.1581581  0.17757783 -1.3176305  0.86086424
## Barras       0.46634161 -1.1473402  0.34331713 -0.6970400 -0.02288155
##             X110m.hurdle      Discus  Pole.vault   Javeline      X1500m
## SEBRLE        0.32259025 -0.42405847  0.89550300  0.8265919  1.38063089
## CLAY         -1.00011964  1.67152766  0.49479154  0.2099771  2.35950606
## BERNARD       0.94261052 -1.28995346  2.09763738  0.7414017  0.22196233
## YURKOV        1.60396546  0.33059306 -0.30663138  0.8773003 -0.14761299
## ZSIVOCZKY    -0.75211154  0.15320485 -1.50876575 -0.7595685 -0.98664884
## McMULLEN     -0.31809735 -0.22562421 -1.50876575 -0.5567347  0.72138844
## MARTINEAU     0.81860646  0.73347475  0.49479154 -1.3761833 -1.57597164
## HERNU         1.08728191 -0.05124258  0.09408008 -0.3904110  0.72138844
## BARRAS       -0.11142393 -0.92014414 -0.30663138 -0.7534835  0.41174425
## NOOL          1.56263078 -2.17689451 -0.70734284 -0.3397025 -1.12648815
## BOURGUIGNON   2.34798978 -1.40420349  0.89550300 -0.8995238  1.38063089
## Sebrle       -1.00011964  1.07021169  0.81536071  2.3133637  0.21297266
## Clay         -0.83478090  1.48812629  0.41464925  2.1490683  0.41174425
## Karpov       -1.16545838  1.95113958 -0.78748513 -0.7250868  0.02319074
## Macey         0.05391481  0.95596166 -1.58890805 -0.1328120 -1.24435271
## Warners      -1.08278901 -0.43007163  0.41464925 -0.7555118  0.01719763
## Zsivoczky     0.85994115  0.13817195 -0.38677367  0.8793286 -0.83282560
## Hernu        -0.58677280 -0.13242023  0.01393779 -0.2747957 -1.35122990
## Bernard      -0.75211154 -0.12340049 -1.58890805 -0.7798519 -0.15660265
## Schwarzl     -0.58677280 -0.82092701  1.21607217 -0.5668764 -0.43128701
## Pogorelov    -0.66944217 -0.16849919  0.81536071 -1.1490094  0.97409804
## Schoenbeck   -0.40076672 -0.22562421  0.81536071  0.3600741  0.09410925
## Barras       -0.33876469 -0.09934785 -0.78748513  1.1024458 -1.07754439
## attr(,"scaled:center")
##        X100m    Long.jump     Shot.put    High.jump        X400m X110m.hurdle 
##    10.999565     7.349565    14.620000     2.007391    49.433043    14.533913 
##       Discus   Pole.vault     Javeline       X1500m 
##    45.160435     4.796522    59.114783   277.877826 
## attr(,"scaled:scale")
##        X100m    Long.jump     Shot.put    High.jump        X400m X110m.hurdle 
##   0.30114144   0.31339023   0.84470005   0.09668211   1.00707693   0.48385515 
##       Discus   Pole.vault     Javeline       X1500m 
##   3.32603843   0.24955613   4.93014463  10.01149111
#2.2 Đánh giá mối liên quan
s.corr = cor(s.decathlon2.active)
round(s.corr, 2)
##              X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## X100m         1.00     -0.76    -0.45     -0.40  0.59         0.73  -0.48
## Long.jump    -0.76      1.00     0.44      0.34 -0.51        -0.59   0.46
## Shot.put     -0.45      0.44     1.00      0.53 -0.31        -0.38   0.71
## High.jump    -0.40      0.34     0.53      1.00 -0.37        -0.25   0.34
## X400m         0.59     -0.51    -0.31     -0.37  1.00         0.58  -0.36
## X110m.hurdle  0.73     -0.59    -0.38     -0.25  0.58         1.00  -0.53
## Discus       -0.48      0.46     0.71      0.34 -0.36        -0.53   1.00
## Pole.vault    0.09      0.02     0.02     -0.50  0.25         0.14  -0.19
## Javeline     -0.29      0.37     0.48      0.22 -0.13        -0.07   0.28
## X1500m       -0.22      0.22    -0.05     -0.26  0.29        -0.05   0.08
##              Pole.vault Javeline X1500m
## X100m              0.09    -0.29  -0.22
## Long.jump          0.02     0.37   0.22
## Shot.put           0.02     0.48  -0.05
## High.jump         -0.50     0.22  -0.26
## X400m              0.25    -0.13   0.29
## X110m.hurdle       0.14    -0.07  -0.05
## Discus            -0.19     0.28   0.08
## Pole.vault         1.00     0.23   0.39
## Javeline           0.23     1.00   0.09
## X1500m             0.39     0.09   1.00
# 2.3 Tính eigenvalues

eigen = eigen(s.corr)
eigen
## eigen() decomposition
## $values
##  [1] 4.1242133 1.8385309 1.2391403 0.8194402 0.7015528 0.4228828 0.3025817
##  [8] 0.2744700 0.1552169 0.1219710
## 
## $vectors
##               [,1]        [,2]        [,3]        [,4]       [,5]         [,6]
##  [1,]  0.418859080 -0.13230683 -0.27089959 -0.03708806  0.2321476 -0.054398099
##  [2,] -0.391064807  0.20713320  0.17117519  0.12746997 -0.2783669  0.051865558
##  [3,] -0.361388111  0.06298590 -0.46497777 -0.14191803  0.2970589  0.368739186
##  [4,] -0.300413236 -0.34309742 -0.29652805 -0.15968342 -0.4807859  0.437716883
##  [5,]  0.345478567  0.21400770 -0.25470839 -0.47592968 -0.1240569  0.075796432
##  [6,]  0.376265119 -0.01824645 -0.40325254  0.01866477 -0.2676975 -0.004048005
##  [7,] -0.365965721  0.03662510 -0.15857927 -0.43636361  0.4873988 -0.305315353
##  [8,]  0.106985591  0.59549862 -0.08449563  0.37447391  0.2646712  0.503563524
##  [9,] -0.210864329  0.28475723 -0.54270782  0.36646463 -0.2361698 -0.556821016
## [10,] -0.002106782  0.57855748  0.19715884 -0.49491281 -0.3142987 -0.064663250
##              [,7]        [,8]        [,9]       [,10]
##  [1,] -0.16604375 -0.19988005  0.76924639  0.12718339
##  [2,] -0.28056361 -0.75850657  0.13094589  0.08509665
##  [3,] -0.01797323  0.04649571 -0.12129309  0.62263702
##  [4,]  0.05118848  0.16111045  0.28463225 -0.38244596
##  [5,]  0.52012255 -0.44579641 -0.20854176 -0.09784197
##  [6,] -0.67276768 -0.01592804 -0.41058421 -0.04475363
##  [7,] -0.25946615 -0.07550934 -0.03391600 -0.49418361
##  [8,] -0.01889413  0.06282691  0.06540692 -0.39288155
##  [9,]  0.24281145  0.10086127  0.10268134 -0.01103627
## [10,] -0.20245828  0.37119711  0.25950868  0.17991689
# 2.4 Xác định các thành phần
pca = prcomp(decathlon2.active[1:10], center = T, scale = T)
pca
## Standard deviations (1, .., p=10):
##  [1] 2.0308159 1.3559244 1.1131668 0.9052294 0.8375875 0.6502944 0.5500742
##  [8] 0.5238988 0.3939758 0.3492435
## 
## Rotation (n x k) = (10 x 10):
##                       PC1         PC2         PC3         PC4        PC5
## X100m        -0.418859080 -0.13230683 -0.27089959  0.03708806 -0.2321476
## Long.jump     0.391064807  0.20713320  0.17117519 -0.12746997  0.2783669
## Shot.put      0.361388111  0.06298590 -0.46497777  0.14191803 -0.2970589
## High.jump     0.300413236 -0.34309742 -0.29652805  0.15968342  0.4807859
## X400m        -0.345478567  0.21400770 -0.25470839  0.47592968  0.1240569
## X110m.hurdle -0.376265119 -0.01824645 -0.40325254 -0.01866477  0.2676975
## Discus        0.365965721  0.03662510 -0.15857927  0.43636361 -0.4873988
## Pole.vault   -0.106985591  0.59549862 -0.08449563 -0.37447391 -0.2646712
## Javeline      0.210864329  0.28475723 -0.54270782 -0.36646463  0.2361698
## X1500m        0.002106782  0.57855748  0.19715884  0.49491281  0.3142987
##                       PC6         PC7         PC8         PC9        PC10
## X100m        -0.054398099 -0.16604375 -0.19988005 -0.76924639  0.12718339
## Long.jump     0.051865558 -0.28056361 -0.75850657 -0.13094589  0.08509665
## Shot.put      0.368739186 -0.01797323  0.04649571  0.12129309  0.62263702
## High.jump     0.437716883  0.05118848  0.16111045 -0.28463225 -0.38244596
## X400m         0.075796432  0.52012255 -0.44579641  0.20854176 -0.09784197
## X110m.hurdle -0.004048005 -0.67276768 -0.01592804  0.41058421 -0.04475363
## Discus       -0.305315353 -0.25946615 -0.07550934  0.03391600 -0.49418361
## Pole.vault    0.503563524 -0.01889413  0.06282691 -0.06540692 -0.39288155
## Javeline     -0.556821016  0.24281145  0.10086127 -0.10268134 -0.01103627
## X1500m       -0.064663250 -0.20245828  0.37119711 -0.25950868  0.17991689
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0308 1.3559 1.1132 0.90523 0.83759 0.65029 0.55007
## Proportion of Variance 0.4124 0.1839 0.1239 0.08194 0.07016 0.04229 0.03026
## Cumulative Proportion  0.4124 0.5963 0.7202 0.80213 0.87229 0.91458 0.94483
##                            PC8     PC9   PC10
## Standard deviation     0.52390 0.39398 0.3492
## Proportion of Variance 0.02745 0.01552 0.0122
## Cumulative Proportion  0.97228 0.98780 1.0000
#####
#Tính toán hệ số tương quan
s.corr <- cor(decathlon2.active)

print(round(s.corr, digits = 2))
##              X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## X100m         1.00     -0.76    -0.45     -0.40  0.59         0.73  -0.48
## Long.jump    -0.76      1.00     0.44      0.34 -0.51        -0.59   0.46
## Shot.put     -0.45      0.44     1.00      0.53 -0.31        -0.38   0.71
## High.jump    -0.40      0.34     0.53      1.00 -0.37        -0.25   0.34
## X400m         0.59     -0.51    -0.31     -0.37  1.00         0.58  -0.36
## X110m.hurdle  0.73     -0.59    -0.38     -0.25  0.58         1.00  -0.53
## Discus       -0.48      0.46     0.71      0.34 -0.36        -0.53   1.00
## Pole.vault    0.09      0.02     0.02     -0.50  0.25         0.14  -0.19
## Javeline     -0.29      0.37     0.48      0.22 -0.13        -0.07   0.28
## X1500m       -0.22      0.22    -0.05     -0.26  0.29        -0.05   0.08
##              Pole.vault Javeline X1500m
## X100m              0.09    -0.29  -0.22
## Long.jump          0.02     0.37   0.22
## Shot.put           0.02     0.48  -0.05
## High.jump         -0.50     0.22  -0.26
## X400m              0.25    -0.13   0.29
## X110m.hurdle       0.14    -0.07  -0.05
## Discus            -0.19     0.28   0.08
## Pole.vault         1.00     0.23   0.39
## Javeline           0.23     1.00   0.09
## X1500m             0.39     0.09   1.00
#Bước 3 tính eigenvalues: Thành tố đó giải thích bao nhiêu phần trăm
eigen <- eigen(s.corr) #eigen bản chất là phương sai


#Bước 4: phân tích với prcomp
pca <- prcomp(decathlon2.active[1:10], center=T, scale = T)
print(pca)
## Standard deviations (1, .., p=10):
##  [1] 2.0308159 1.3559244 1.1131668 0.9052294 0.8375875 0.6502944 0.5500742
##  [8] 0.5238988 0.3939758 0.3492435
## 
## Rotation (n x k) = (10 x 10):
##                       PC1         PC2         PC3         PC4        PC5
## X100m        -0.418859080 -0.13230683 -0.27089959  0.03708806 -0.2321476
## Long.jump     0.391064807  0.20713320  0.17117519 -0.12746997  0.2783669
## Shot.put      0.361388111  0.06298590 -0.46497777  0.14191803 -0.2970589
## High.jump     0.300413236 -0.34309742 -0.29652805  0.15968342  0.4807859
## X400m        -0.345478567  0.21400770 -0.25470839  0.47592968  0.1240569
## X110m.hurdle -0.376265119 -0.01824645 -0.40325254 -0.01866477  0.2676975
## Discus        0.365965721  0.03662510 -0.15857927  0.43636361 -0.4873988
## Pole.vault   -0.106985591  0.59549862 -0.08449563 -0.37447391 -0.2646712
## Javeline      0.210864329  0.28475723 -0.54270782 -0.36646463  0.2361698
## X1500m        0.002106782  0.57855748  0.19715884  0.49491281  0.3142987
##                       PC6         PC7         PC8         PC9        PC10
## X100m        -0.054398099 -0.16604375 -0.19988005 -0.76924639  0.12718339
## Long.jump     0.051865558 -0.28056361 -0.75850657 -0.13094589  0.08509665
## Shot.put      0.368739186 -0.01797323  0.04649571  0.12129309  0.62263702
## High.jump     0.437716883  0.05118848  0.16111045 -0.28463225 -0.38244596
## X400m         0.075796432  0.52012255 -0.44579641  0.20854176 -0.09784197
## X110m.hurdle -0.004048005 -0.67276768 -0.01592804  0.41058421 -0.04475363
## Discus       -0.305315353 -0.25946615 -0.07550934  0.03391600 -0.49418361
## Pole.vault    0.503563524 -0.01889413  0.06282691 -0.06540692 -0.39288155
## Javeline     -0.556821016  0.24281145  0.10086127 -0.10268134 -0.01103627
## X1500m       -0.064663250 -0.20245828  0.37119711 -0.25950868  0.17991689
#Bước 4b phân tích với prcomp
fviz_eig(pca)

fviz_pca_ind(pca, col.ind = "cos2", gradient.cols = c("red","blue", "green1"), repel = TRUE)

#biểu đồ 3:
fviz_pca_var(pca, col.var = "contrib", gradient.cols = c("red", "blue", "black"), repel = TRUE)