R Markdown
library(FactoMineR)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
data(decanthlon2)
## Warning in data(decanthlon2): data set 'decanthlon2' not found
head(decathlon2)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69 43.75
## CLAY 10.76 7.40 14.26 1.86 49.37 14.05 50.72
## BERNARD 11.02 7.23 14.25 1.92 48.93 14.99 40.87
## YURKOV 11.34 7.09 15.19 2.10 50.42 15.31 46.26
## ZSIVOCZKY 11.13 7.30 13.48 2.01 48.62 14.17 45.67
## McMULLEN 10.83 7.31 13.76 2.13 49.91 14.38 44.41
## Pole.vault Javeline X1500m Rank Points Competition
## SEBRLE 5.02 63.19 291.7 1 8217 Decastar
## CLAY 4.92 60.15 301.5 2 8122 Decastar
## BERNARD 5.32 62.77 280.1 4 8067 Decastar
## YURKOV 4.72 63.44 276.4 5 8036 Decastar
## ZSIVOCZKY 4.42 55.37 268.0 7 8004 Decastar
## McMULLEN 4.42 56.37 285.1 8 7995 Decastar
str(decathlon2)
## 'data.frame': 27 obs. of 13 variables:
## $ X100m : num 11 10.8 11 11.3 11.1 ...
## $ Long.jump : num 7.58 7.4 7.23 7.09 7.3 7.31 6.81 7.56 6.97 7.27 ...
## $ Shot.put : num 14.8 14.3 14.2 15.2 13.5 ...
## $ High.jump : num 2.07 1.86 1.92 2.1 2.01 2.13 1.95 1.86 1.95 1.98 ...
## $ X400m : num 49.8 49.4 48.9 50.4 48.6 ...
## $ X110m.hurdle: num 14.7 14.1 15 15.3 14.2 ...
## $ Discus : num 43.8 50.7 40.9 46.3 45.7 ...
## $ Pole.vault : num 5.02 4.92 5.32 4.72 4.42 4.42 4.92 4.82 4.72 4.62 ...
## $ Javeline : num 63.2 60.1 62.8 63.4 55.4 ...
## $ X1500m : num 292 302 280 276 268 ...
## $ Rank : int 1 2 4 5 7 8 9 10 11 12 ...
## $ Points : int 8217 8122 8067 8036 8004 7995 7802 7733 7708 7651 ...
## $ Competition : Factor w/ 2 levels "Decastar","OlympicG": 1 1 1 1 1 1 1 1 1 1 ...
decathlon2.active <- decathlon2[1:23, 1:10]
head(decathlon2.active[, 1:5],3) # head of 3 rows of 5 columns.
## X100m Long.jump Shot.put High.jump X400m
## SEBRLE 11.04 7.58 14.83 2.07 49.81
## CLAY 10.76 7.40 14.26 1.86 49.37
## BERNARD 11.02 7.23 14.25 1.92 48.93
decathlon2.active
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69 43.75
## CLAY 10.76 7.40 14.26 1.86 49.37 14.05 50.72
## BERNARD 11.02 7.23 14.25 1.92 48.93 14.99 40.87
## YURKOV 11.34 7.09 15.19 2.10 50.42 15.31 46.26
## ZSIVOCZKY 11.13 7.30 13.48 2.01 48.62 14.17 45.67
## McMULLEN 10.83 7.31 13.76 2.13 49.91 14.38 44.41
## MARTINEAU 11.64 6.81 14.57 1.95 50.14 14.93 47.60
## HERNU 11.37 7.56 14.41 1.86 51.10 15.06 44.99
## BARRAS 11.33 6.97 14.09 1.95 49.48 14.48 42.10
## NOOL 11.33 7.27 12.68 1.98 49.20 15.29 37.92
## BOURGUIGNON 11.36 6.80 13.46 1.86 51.16 15.67 40.49
## Sebrle 10.85 7.84 16.36 2.12 48.36 14.05 48.72
## Clay 10.44 7.96 15.23 2.06 49.19 14.13 50.11
## Karpov 10.50 7.81 15.93 2.09 46.81 13.97 51.65
## Macey 10.89 7.47 15.73 2.15 48.97 14.56 48.34
## Warners 10.62 7.74 14.48 1.97 47.97 14.01 43.73
## Zsivoczky 10.91 7.14 15.31 2.12 49.40 14.95 45.62
## Hernu 10.97 7.19 14.65 2.03 48.73 14.25 44.72
## Bernard 10.69 7.48 14.80 2.12 49.13 14.17 44.75
## Schwarzl 10.98 7.49 14.01 1.94 49.76 14.25 42.43
## Pogorelov 10.95 7.31 15.10 2.06 50.79 14.21 44.60
## Schoenbeck 10.90 7.30 14.77 1.88 50.30 14.34 44.41
## Barras 11.14 6.99 14.91 1.94 49.41 14.37 44.83
## Pole.vault Javeline X1500m
## SEBRLE 5.02 63.19 291.70
## CLAY 4.92 60.15 301.50
## BERNARD 5.32 62.77 280.10
## YURKOV 4.72 63.44 276.40
## ZSIVOCZKY 4.42 55.37 268.00
## McMULLEN 4.42 56.37 285.10
## MARTINEAU 4.92 52.33 262.10
## HERNU 4.82 57.19 285.10
## BARRAS 4.72 55.40 282.00
## NOOL 4.62 57.44 266.60
## BOURGUIGNON 5.02 54.68 291.70
## Sebrle 5.00 70.52 280.01
## Clay 4.90 69.71 282.00
## Karpov 4.60 55.54 278.11
## Macey 4.40 58.46 265.42
## Warners 4.90 55.39 278.05
## Zsivoczky 4.70 63.45 269.54
## Hernu 4.80 57.76 264.35
## Bernard 4.40 55.27 276.31
## Schwarzl 5.10 56.32 273.56
## Pogorelov 5.00 53.45 287.63
## Schoenbeck 5.00 60.89 278.82
## Barras 4.60 64.55 267.09
s.decathlon2.active = scale(decathlon2.active, center = TRUE, scale = TRUE)
s.decathlon2.active
## X100m Long.jump Shot.put High.jump X400m
## SEBRLE 0.13427173 0.7352966 0.24860896 0.6475727 0.37430757
## CLAY -0.79552392 0.1609328 -0.42618678 -1.5244940 -0.06260046
## BERNARD 0.06785776 -0.3815218 -0.43802531 -0.9039035 -0.49950849
## YURKOV 1.13048136 -0.8282492 0.67479574 0.9578679 0.98002098
## ZSIVOCZKY 0.43313462 -0.1581581 -1.34959148 0.0269822 -0.80733006
## McMULLEN -0.56307501 -0.1262490 -1.01811287 1.2681632 0.47360485
## MARTINEAU 2.12669099 -1.7217040 -0.05919261 -0.5936083 0.70198859
## HERNU 1.23010232 0.6714784 -0.24860896 -1.5244940 1.65524248
## BARRAS 1.09727437 -1.2111584 -0.62744165 -0.5936083 0.04662655
## NOOL 1.09727437 -0.2538854 -2.29667323 -0.2833131 -0.23140583
## BOURGUIGNON 1.19689533 -1.7536131 -1.37326853 -1.5244940 1.71482085
## Sebrle -0.49666103 1.5649332 2.05990279 1.1647314 -1.06550298
## Clay -1.85814752 1.9478424 0.72214983 0.5441409 -0.24133556
## Karpov -1.65890560 1.4692059 1.55084635 0.8544362 -2.60461082
## Macey -0.36383308 0.3842965 1.31407592 1.4750267 -0.45978958
## Warners -1.26042174 1.2458422 -0.16573931 -0.3867448 -1.45276237
## Zsivoczky -0.29741911 -0.6687037 0.81685800 1.1647314 -0.03281128
## Hernu -0.09817718 -0.5091582 0.03551557 0.2338457 -0.69810305
## Bernard -1.02797283 0.4162056 0.21309339 1.1647314 -0.30091393
## Schwarzl -0.06497019 0.4481147 -0.72214983 -0.6970400 0.32465893
## Pogorelov -0.16459116 -0.1262490 0.56824905 0.5441409 1.34742091
## Schoenbeck -0.33062609 -0.1581581 0.17757783 -1.3176305 0.86086424
## Barras 0.46634161 -1.1473402 0.34331713 -0.6970400 -0.02288155
## X110m.hurdle Discus Pole.vault Javeline X1500m
## SEBRLE 0.32259025 -0.42405847 0.89550300 0.8265919 1.38063089
## CLAY -1.00011964 1.67152766 0.49479154 0.2099771 2.35950606
## BERNARD 0.94261052 -1.28995346 2.09763738 0.7414017 0.22196233
## YURKOV 1.60396546 0.33059306 -0.30663138 0.8773003 -0.14761299
## ZSIVOCZKY -0.75211154 0.15320485 -1.50876575 -0.7595685 -0.98664884
## McMULLEN -0.31809735 -0.22562421 -1.50876575 -0.5567347 0.72138844
## MARTINEAU 0.81860646 0.73347475 0.49479154 -1.3761833 -1.57597164
## HERNU 1.08728191 -0.05124258 0.09408008 -0.3904110 0.72138844
## BARRAS -0.11142393 -0.92014414 -0.30663138 -0.7534835 0.41174425
## NOOL 1.56263078 -2.17689451 -0.70734284 -0.3397025 -1.12648815
## BOURGUIGNON 2.34798978 -1.40420349 0.89550300 -0.8995238 1.38063089
## Sebrle -1.00011964 1.07021169 0.81536071 2.3133637 0.21297266
## Clay -0.83478090 1.48812629 0.41464925 2.1490683 0.41174425
## Karpov -1.16545838 1.95113958 -0.78748513 -0.7250868 0.02319074
## Macey 0.05391481 0.95596166 -1.58890805 -0.1328120 -1.24435271
## Warners -1.08278901 -0.43007163 0.41464925 -0.7555118 0.01719763
## Zsivoczky 0.85994115 0.13817195 -0.38677367 0.8793286 -0.83282560
## Hernu -0.58677280 -0.13242023 0.01393779 -0.2747957 -1.35122990
## Bernard -0.75211154 -0.12340049 -1.58890805 -0.7798519 -0.15660265
## Schwarzl -0.58677280 -0.82092701 1.21607217 -0.5668764 -0.43128701
## Pogorelov -0.66944217 -0.16849919 0.81536071 -1.1490094 0.97409804
## Schoenbeck -0.40076672 -0.22562421 0.81536071 0.3600741 0.09410925
## Barras -0.33876469 -0.09934785 -0.78748513 1.1024458 -1.07754439
## attr(,"scaled:center")
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle
## 10.999565 7.349565 14.620000 2.007391 49.433043 14.533913
## Discus Pole.vault Javeline X1500m
## 45.160435 4.796522 59.114783 277.877826
## attr(,"scaled:scale")
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle
## 0.30114144 0.31339023 0.84470005 0.09668211 1.00707693 0.48385515
## Discus Pole.vault Javeline X1500m
## 3.32603843 0.24955613 4.93014463 10.01149111
#2.2 Đánh giá mối liên quan
s.corr = cor(s.decathlon2.active)
round(s.corr, 2)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## X100m 1.00 -0.76 -0.45 -0.40 0.59 0.73 -0.48
## Long.jump -0.76 1.00 0.44 0.34 -0.51 -0.59 0.46
## Shot.put -0.45 0.44 1.00 0.53 -0.31 -0.38 0.71
## High.jump -0.40 0.34 0.53 1.00 -0.37 -0.25 0.34
## X400m 0.59 -0.51 -0.31 -0.37 1.00 0.58 -0.36
## X110m.hurdle 0.73 -0.59 -0.38 -0.25 0.58 1.00 -0.53
## Discus -0.48 0.46 0.71 0.34 -0.36 -0.53 1.00
## Pole.vault 0.09 0.02 0.02 -0.50 0.25 0.14 -0.19
## Javeline -0.29 0.37 0.48 0.22 -0.13 -0.07 0.28
## X1500m -0.22 0.22 -0.05 -0.26 0.29 -0.05 0.08
## Pole.vault Javeline X1500m
## X100m 0.09 -0.29 -0.22
## Long.jump 0.02 0.37 0.22
## Shot.put 0.02 0.48 -0.05
## High.jump -0.50 0.22 -0.26
## X400m 0.25 -0.13 0.29
## X110m.hurdle 0.14 -0.07 -0.05
## Discus -0.19 0.28 0.08
## Pole.vault 1.00 0.23 0.39
## Javeline 0.23 1.00 0.09
## X1500m 0.39 0.09 1.00
# 2.3 Tính eigenvalues
eigen = eigen(s.corr)
eigen
## eigen() decomposition
## $values
## [1] 4.1242133 1.8385309 1.2391403 0.8194402 0.7015528 0.4228828 0.3025817
## [8] 0.2744700 0.1552169 0.1219710
##
## $vectors
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.418859080 -0.13230683 -0.27089959 -0.03708806 0.2321476 -0.054398099
## [2,] -0.391064807 0.20713320 0.17117519 0.12746997 -0.2783669 0.051865558
## [3,] -0.361388111 0.06298590 -0.46497777 -0.14191803 0.2970589 0.368739186
## [4,] -0.300413236 -0.34309742 -0.29652805 -0.15968342 -0.4807859 0.437716883
## [5,] 0.345478567 0.21400770 -0.25470839 -0.47592968 -0.1240569 0.075796432
## [6,] 0.376265119 -0.01824645 -0.40325254 0.01866477 -0.2676975 -0.004048005
## [7,] -0.365965721 0.03662510 -0.15857927 -0.43636361 0.4873988 -0.305315353
## [8,] 0.106985591 0.59549862 -0.08449563 0.37447391 0.2646712 0.503563524
## [9,] -0.210864329 0.28475723 -0.54270782 0.36646463 -0.2361698 -0.556821016
## [10,] -0.002106782 0.57855748 0.19715884 -0.49491281 -0.3142987 -0.064663250
## [,7] [,8] [,9] [,10]
## [1,] -0.16604375 -0.19988005 0.76924639 0.12718339
## [2,] -0.28056361 -0.75850657 0.13094589 0.08509665
## [3,] -0.01797323 0.04649571 -0.12129309 0.62263702
## [4,] 0.05118848 0.16111045 0.28463225 -0.38244596
## [5,] 0.52012255 -0.44579641 -0.20854176 -0.09784197
## [6,] -0.67276768 -0.01592804 -0.41058421 -0.04475363
## [7,] -0.25946615 -0.07550934 -0.03391600 -0.49418361
## [8,] -0.01889413 0.06282691 0.06540692 -0.39288155
## [9,] 0.24281145 0.10086127 0.10268134 -0.01103627
## [10,] -0.20245828 0.37119711 0.25950868 0.17991689
# 2.4 Xác định các thành phần
pca = prcomp(decathlon2.active[1:10], center = T, scale = T)
pca
## Standard deviations (1, .., p=10):
## [1] 2.0308159 1.3559244 1.1131668 0.9052294 0.8375875 0.6502944 0.5500742
## [8] 0.5238988 0.3939758 0.3492435
##
## Rotation (n x k) = (10 x 10):
## PC1 PC2 PC3 PC4 PC5
## X100m -0.418859080 -0.13230683 -0.27089959 0.03708806 -0.2321476
## Long.jump 0.391064807 0.20713320 0.17117519 -0.12746997 0.2783669
## Shot.put 0.361388111 0.06298590 -0.46497777 0.14191803 -0.2970589
## High.jump 0.300413236 -0.34309742 -0.29652805 0.15968342 0.4807859
## X400m -0.345478567 0.21400770 -0.25470839 0.47592968 0.1240569
## X110m.hurdle -0.376265119 -0.01824645 -0.40325254 -0.01866477 0.2676975
## Discus 0.365965721 0.03662510 -0.15857927 0.43636361 -0.4873988
## Pole.vault -0.106985591 0.59549862 -0.08449563 -0.37447391 -0.2646712
## Javeline 0.210864329 0.28475723 -0.54270782 -0.36646463 0.2361698
## X1500m 0.002106782 0.57855748 0.19715884 0.49491281 0.3142987
## PC6 PC7 PC8 PC9 PC10
## X100m -0.054398099 -0.16604375 -0.19988005 -0.76924639 0.12718339
## Long.jump 0.051865558 -0.28056361 -0.75850657 -0.13094589 0.08509665
## Shot.put 0.368739186 -0.01797323 0.04649571 0.12129309 0.62263702
## High.jump 0.437716883 0.05118848 0.16111045 -0.28463225 -0.38244596
## X400m 0.075796432 0.52012255 -0.44579641 0.20854176 -0.09784197
## X110m.hurdle -0.004048005 -0.67276768 -0.01592804 0.41058421 -0.04475363
## Discus -0.305315353 -0.25946615 -0.07550934 0.03391600 -0.49418361
## Pole.vault 0.503563524 -0.01889413 0.06282691 -0.06540692 -0.39288155
## Javeline -0.556821016 0.24281145 0.10086127 -0.10268134 -0.01103627
## X1500m -0.064663250 -0.20245828 0.37119711 -0.25950868 0.17991689
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0308 1.3559 1.1132 0.90523 0.83759 0.65029 0.55007
## Proportion of Variance 0.4124 0.1839 0.1239 0.08194 0.07016 0.04229 0.03026
## Cumulative Proportion 0.4124 0.5963 0.7202 0.80213 0.87229 0.91458 0.94483
## PC8 PC9 PC10
## Standard deviation 0.52390 0.39398 0.3492
## Proportion of Variance 0.02745 0.01552 0.0122
## Cumulative Proportion 0.97228 0.98780 1.0000
#####
#Tính toán hệ số tương quan
s.corr <- cor(decathlon2.active)
print(round(s.corr, digits = 2))
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## X100m 1.00 -0.76 -0.45 -0.40 0.59 0.73 -0.48
## Long.jump -0.76 1.00 0.44 0.34 -0.51 -0.59 0.46
## Shot.put -0.45 0.44 1.00 0.53 -0.31 -0.38 0.71
## High.jump -0.40 0.34 0.53 1.00 -0.37 -0.25 0.34
## X400m 0.59 -0.51 -0.31 -0.37 1.00 0.58 -0.36
## X110m.hurdle 0.73 -0.59 -0.38 -0.25 0.58 1.00 -0.53
## Discus -0.48 0.46 0.71 0.34 -0.36 -0.53 1.00
## Pole.vault 0.09 0.02 0.02 -0.50 0.25 0.14 -0.19
## Javeline -0.29 0.37 0.48 0.22 -0.13 -0.07 0.28
## X1500m -0.22 0.22 -0.05 -0.26 0.29 -0.05 0.08
## Pole.vault Javeline X1500m
## X100m 0.09 -0.29 -0.22
## Long.jump 0.02 0.37 0.22
## Shot.put 0.02 0.48 -0.05
## High.jump -0.50 0.22 -0.26
## X400m 0.25 -0.13 0.29
## X110m.hurdle 0.14 -0.07 -0.05
## Discus -0.19 0.28 0.08
## Pole.vault 1.00 0.23 0.39
## Javeline 0.23 1.00 0.09
## X1500m 0.39 0.09 1.00
#Bước 3 tính eigenvalues: Thành tố đó giải thích bao nhiêu phần trăm
eigen <- eigen(s.corr) #eigen bản chất là phương sai
#Bước 4: phân tích với prcomp
pca <- prcomp(decathlon2.active[1:10], center=T, scale = T)
print(pca)
## Standard deviations (1, .., p=10):
## [1] 2.0308159 1.3559244 1.1131668 0.9052294 0.8375875 0.6502944 0.5500742
## [8] 0.5238988 0.3939758 0.3492435
##
## Rotation (n x k) = (10 x 10):
## PC1 PC2 PC3 PC4 PC5
## X100m -0.418859080 -0.13230683 -0.27089959 0.03708806 -0.2321476
## Long.jump 0.391064807 0.20713320 0.17117519 -0.12746997 0.2783669
## Shot.put 0.361388111 0.06298590 -0.46497777 0.14191803 -0.2970589
## High.jump 0.300413236 -0.34309742 -0.29652805 0.15968342 0.4807859
## X400m -0.345478567 0.21400770 -0.25470839 0.47592968 0.1240569
## X110m.hurdle -0.376265119 -0.01824645 -0.40325254 -0.01866477 0.2676975
## Discus 0.365965721 0.03662510 -0.15857927 0.43636361 -0.4873988
## Pole.vault -0.106985591 0.59549862 -0.08449563 -0.37447391 -0.2646712
## Javeline 0.210864329 0.28475723 -0.54270782 -0.36646463 0.2361698
## X1500m 0.002106782 0.57855748 0.19715884 0.49491281 0.3142987
## PC6 PC7 PC8 PC9 PC10
## X100m -0.054398099 -0.16604375 -0.19988005 -0.76924639 0.12718339
## Long.jump 0.051865558 -0.28056361 -0.75850657 -0.13094589 0.08509665
## Shot.put 0.368739186 -0.01797323 0.04649571 0.12129309 0.62263702
## High.jump 0.437716883 0.05118848 0.16111045 -0.28463225 -0.38244596
## X400m 0.075796432 0.52012255 -0.44579641 0.20854176 -0.09784197
## X110m.hurdle -0.004048005 -0.67276768 -0.01592804 0.41058421 -0.04475363
## Discus -0.305315353 -0.25946615 -0.07550934 0.03391600 -0.49418361
## Pole.vault 0.503563524 -0.01889413 0.06282691 -0.06540692 -0.39288155
## Javeline -0.556821016 0.24281145 0.10086127 -0.10268134 -0.01103627
## X1500m -0.064663250 -0.20245828 0.37119711 -0.25950868 0.17991689
#Bước 4b phân tích với prcomp
fviz_eig(pca)

fviz_pca_ind(pca, col.ind = "cos2", gradient.cols = c("red","blue", "green1"), repel = TRUE)

#biểu đồ 3:
fviz_pca_var(pca, col.var = "contrib", gradient.cols = c("red", "blue", "black"), repel = TRUE)
