R Markdown

library(FactoMineR)
## Warning: package 'FactoMineR' was built under R version 4.2.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
data(decanthlon2)
## Warning in data(decanthlon2): data set 'decanthlon2' not found
head(decathlon2)
##           X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE    11.04      7.58    14.83      2.07 49.81        14.69  43.75
## CLAY      10.76      7.40    14.26      1.86 49.37        14.05  50.72
## BERNARD   11.02      7.23    14.25      1.92 48.93        14.99  40.87
## YURKOV    11.34      7.09    15.19      2.10 50.42        15.31  46.26
## ZSIVOCZKY 11.13      7.30    13.48      2.01 48.62        14.17  45.67
## McMULLEN  10.83      7.31    13.76      2.13 49.91        14.38  44.41
##           Pole.vault Javeline X1500m Rank Points Competition
## SEBRLE          5.02    63.19  291.7    1   8217    Decastar
## CLAY            4.92    60.15  301.5    2   8122    Decastar
## BERNARD         5.32    62.77  280.1    4   8067    Decastar
## YURKOV          4.72    63.44  276.4    5   8036    Decastar
## ZSIVOCZKY       4.42    55.37  268.0    7   8004    Decastar
## McMULLEN        4.42    56.37  285.1    8   7995    Decastar
str(decathlon2)
## 'data.frame':    27 obs. of  13 variables:
##  $ X100m       : num  11 10.8 11 11.3 11.1 ...
##  $ Long.jump   : num  7.58 7.4 7.23 7.09 7.3 7.31 6.81 7.56 6.97 7.27 ...
##  $ Shot.put    : num  14.8 14.3 14.2 15.2 13.5 ...
##  $ High.jump   : num  2.07 1.86 1.92 2.1 2.01 2.13 1.95 1.86 1.95 1.98 ...
##  $ X400m       : num  49.8 49.4 48.9 50.4 48.6 ...
##  $ X110m.hurdle: num  14.7 14.1 15 15.3 14.2 ...
##  $ Discus      : num  43.8 50.7 40.9 46.3 45.7 ...
##  $ Pole.vault  : num  5.02 4.92 5.32 4.72 4.42 4.42 4.92 4.82 4.72 4.62 ...
##  $ Javeline    : num  63.2 60.1 62.8 63.4 55.4 ...
##  $ X1500m      : num  292 302 280 276 268 ...
##  $ Rank        : int  1 2 4 5 7 8 9 10 11 12 ...
##  $ Points      : int  8217 8122 8067 8036 8004 7995 7802 7733 7708 7651 ...
##  $ Competition : Factor w/ 2 levels "Decastar","OlympicG": 1 1 1 1 1 1 1 1 1 1 ...
decathlon2.active <- decathlon2[1:23, 1:10]
head(decathlon2.active[, 1:5],3) # head of 3 rows of 5 columns.
##         X100m Long.jump Shot.put High.jump X400m
## SEBRLE  11.04      7.58    14.83      2.07 49.81
## CLAY    10.76      7.40    14.26      1.86 49.37
## BERNARD 11.02      7.23    14.25      1.92 48.93
res.pca <- PCA(decathlon2.active, graph=FALSE) #Standadized data.
#View(res.pca)
print(res.pca)
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 23 individuals, described by 10 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
# 4.4.1 eigenvales/variances
eig.val <- get_eigenvalue(res.pca)
eig.val
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1   4.1242133        41.242133                    41.24213
## Dim.2   1.8385309        18.385309                    59.62744
## Dim.3   1.2391403        12.391403                    72.01885
## Dim.4   0.8194402         8.194402                    80.21325
## Dim.5   0.7015528         7.015528                    87.22878
## Dim.6   0.4228828         4.228828                    91.45760
## Dim.7   0.3025817         3.025817                    94.48342
## Dim.8   0.2744700         2.744700                    97.22812
## Dim.9   0.1552169         1.552169                    98.78029
## Dim.10  0.1219710         1.219710                   100.00000
#scree plot
fviz_eig(res.pca, addlabels = TRUE, elim=c(0,50))

# 4.4.2 graph of variables
var <- get_pca_var(res.pca)
print(var)
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
#coordinates
head(var$coord)
##                   Dim.1       Dim.2      Dim.3       Dim.4      Dim.5
## X100m        -0.8506257 -0.17939806  0.3015564  0.03357320 -0.1944440
## Long.jump     0.7941806  0.28085695 -0.1905465 -0.11538956  0.2331567
## Shot.put      0.7339127  0.08540412  0.5175978  0.12846837 -0.2488129
## High.jump     0.6100840 -0.46521415  0.3300852  0.14455012  0.4027002
## X400m        -0.7016034  0.29017826  0.2835329  0.43082552  0.1039085
## X110m.hurdle -0.7641252 -0.02474081  0.4488873 -0.01689589  0.2242200
head(var$cos2)
##                  Dim.1        Dim.2      Dim.3        Dim.4      Dim.5
## X100m        0.7235641 0.0321836641 0.09093628 0.0011271597 0.03780845
## Long.jump    0.6307229 0.0788806285 0.03630798 0.0133147506 0.05436203
## Shot.put     0.5386279 0.0072938636 0.26790749 0.0165041211 0.06190783
## High.jump    0.3722025 0.2164242070 0.10895622 0.0208947375 0.16216747
## X400m        0.4922473 0.0842034209 0.08039091 0.1856106269 0.01079698
## X110m.hurdle 0.5838873 0.0006121077 0.20149984 0.0002854712 0.05027463
head(var$contrib)
##                  Dim.1      Dim.2     Dim.3       Dim.4     Dim.5
## X100m        17.544293  1.7505098  7.338659  0.13755240  5.389252
## Long.jump    15.293168  4.2904162  2.930094  1.62485936  7.748815
## Shot.put     13.060137  0.3967224 21.620432  2.01407269  8.824401
## High.jump     9.024811 11.7715838  8.792888  2.54987951 23.115504
## X400m        11.935544  4.5799296  6.487636 22.65090599  1.539012
## X110m.hurdle 14.157544  0.0332933 16.261261  0.03483735  7.166193
#coordinates of varibles
head(var$coord, 4)
##                Dim.1       Dim.2      Dim.3      Dim.4      Dim.5
## X100m     -0.8506257 -0.17939806  0.3015564  0.0335732 -0.1944440
## Long.jump  0.7941806  0.28085695 -0.1905465 -0.1153896  0.2331567
## Shot.put   0.7339127  0.08540412  0.5175978  0.1284684 -0.2488129
## High.jump  0.6100840 -0.46521415  0.3300852  0.1445501  0.4027002
#plot variables
fviz_pca_var(res.pca, col.var = "black")

# 4.4.2.3 quality of representation
head(var$cos2, 4)
##               Dim.1       Dim.2      Dim.3      Dim.4      Dim.5
## X100m     0.7235641 0.032183664 0.09093628 0.00112716 0.03780845
## Long.jump 0.6307229 0.078880629 0.03630798 0.01331475 0.05436203
## Shot.put  0.5386279 0.007293864 0.26790749 0.01650412 0.06190783
## High.jump 0.3722025 0.216424207 0.10895622 0.02089474 0.16216747
#let visualize the cos2
library(corrplot)
## corrplot 0.92 loaded
corrplot(var$cos2, is.corr = FALSE)

#total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(res.pca, choice = "var", axes=1:2)

#color by cos2 values: quality on the factor map

fviz_pca_var(res.pca, col.var = "cos2", alpha.var = "cos2",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
)

# 4.4.2.4 contribution of variables to PCs
head(var$contrib,4)
##               Dim.1      Dim.2     Dim.3     Dim.4     Dim.5
## X100m     17.544293  1.7505098  7.338659 0.1375524  5.389252
## Long.jump 15.293168  4.2904162  2.930094 1.6248594  7.748815
## Shot.put  13.060137  0.3967224 21.620432 2.0140727  8.824401
## High.jump  9.024811 11.7715838  8.792888 2.5498795 23.115504
corrplot(var$contrib, is.corr = F)

# Contributions of variables to PC1

fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)

#The total contribution to PC1 and PC2 is obtained with the following R code: 
fviz_contrib(res.pca, choice = "var", axes = 1:2, top = 10)

#the most important plot #page 21
fviz_pca_var(res.pca, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)