#library("devtools")
#install_github("kassambara/factoextra")
library("factoextra");library(FactoMineR)
data("decathlon2")
head(decathlon2)
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle Discus
## SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69 43.75
## CLAY 10.76 7.40 14.26 1.86 49.37 14.05 50.72
## BERNARD 11.02 7.23 14.25 1.92 48.93 14.99 40.87
## YURKOV 11.34 7.09 15.19 2.10 50.42 15.31 46.26
## ZSIVOCZKY 11.13 7.30 13.48 2.01 48.62 14.17 45.67
## McMULLEN 10.83 7.31 13.76 2.13 49.91 14.38 44.41
## Pole.vault Javeline X1500m Rank Points Competition
## SEBRLE 5.02 63.19 291.7 1 8217 Decastar
## CLAY 4.92 60.15 301.5 2 8122 Decastar
## BERNARD 5.32 62.77 280.1 4 8067 Decastar
## YURKOV 4.72 63.44 276.4 5 8036 Decastar
## ZSIVOCZKY 4.42 55.37 268.0 7 8004 Decastar
## McMULLEN 4.42 56.37 285.1 8 7995 Decastar
Extraer sólo individuos activos y variables para el análisis de componentes principales:
decathlon2.active <- decathlon2[1:23, 1:10]
head(decathlon2.active[, 1:6])
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle
## SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69
## CLAY 10.76 7.40 14.26 1.86 49.37 14.05
## BERNARD 11.02 7.23 14.25 1.92 48.93 14.99
## YURKOV 11.34 7.09 15.19 2.10 50.42 15.31
## ZSIVOCZKY 11.13 7.30 13.48 2.01 48.62 14.17
## McMULLEN 10.83 7.31 13.76 2.13 49.91 14.38
cor.mat <- round(cor(decathlon2.active),2)
head(cor.mat[, 1:6])
## X100m Long.jump Shot.put High.jump X400m X110m.hurdle
## X100m 1.00 -0.76 -0.45 -0.40 0.59 0.73
## Long.jump -0.76 1.00 0.44 0.34 -0.51 -0.59
## Shot.put -0.45 0.44 1.00 0.53 -0.31 -0.38
## High.jump -0.40 0.34 0.53 1.00 -0.37 -0.25
## X400m 0.59 -0.51 -0.31 -0.37 1.00 0.58
## X110m.hurdle 0.73 -0.59 -0.38 -0.25 0.58 1.00
colMeans(decathlon2.active)
## X100m Long.jump Shot.put High.jump X400m
## 10.999565 7.349565 14.620000 2.007391 49.433043
## X110m.hurdle Discus Pole.vault Javeline X1500m
## 14.533913 45.160435 4.796522 59.114783 277.877826
library(corrplot)
corrplot(cor.mat, type="upper", order="hclust",
tl.col="black", tl.srt=45)
library("PerformanceAnalytics")
chart.Correlation(decathlon2.active[, 1:6], histogram=FALSE, pch=19)
deca_pca <- princomp(covmat=cor.mat)
summary(deca_pca,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 2.0294239 1.3568859 1.1130945 0.90629539 0.8357948
## Proportion of Variance 0.4118562 0.1841139 0.1238979 0.08213713 0.0698553
## Cumulative Proportion 0.4118562 0.5959701 0.7198680 0.80200516 0.8718605
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.64774627 0.5535350 0.52454606 0.4038205
## Proportion of Variance 0.04195752 0.0306401 0.02751486 0.0163071
## Cumulative Proportion 0.91381799 0.9444581 0.97197295 0.9882800
## Comp.10
## Standard deviation 0.34234416
## Proportion of Variance 0.01171995
## Cumulative Proportion 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## X100m 0.419 -0.130 -0.272 0.231 0.186 -0.162
## Long.jump -0.392 0.206 0.168 -0.131 -0.277 0.316 -0.742
## Shot.put -0.362 -0.465 0.142 0.299 0.365
## High.jump -0.301 -0.345 -0.294 0.155 -0.476 0.450 0.155
## X400m 0.344 0.215 -0.255 0.477 -0.126 -0.500 -0.477
## X110m.hurdle 0.376 -0.403 -0.269 0.667
## Discus -0.366 -0.156 0.444 0.480 -0.315 0.257
## Pole.vault 0.106 0.596 -0.372 0.272 0.503
## Javeline -0.210 0.287 -0.544 -0.364 -0.241 -0.545 -0.258 0.114
## X1500m 0.576 0.204 0.491 -0.322 0.187 0.389
## Comp.9 Comp.10
## X100m 0.746 0.240
## Long.jump 0.137 0.107
## Shot.put -0.212 0.599
## High.jump 0.335 -0.335
## X400m -0.167 -0.123
## X110m.hurdle -0.409 -0.107
## Discus -0.493
## Pole.vault 0.121 -0.379
## Javeline 0.102
## X1500m 0.214 0.215
Los valores propios corresponden a la magnitud de la variación explicada por cada componente principal (pc). Los valores propios son grandes para la primera PC y pequeños para las siguientes PC. ahora haremos el gráfico del codo para ver los valores y vectores propios
#res.pca <- PCA(decathlon2.active, graph = FALSE)
#eigenvalues <- res.pca$eig
#barplot(eigenvalues[, 2], names.arg=1:nrow(eigenvalues),
# main = "Variances",
# xlab = "Principal Components",
# ylab = "Percentage of variances",
# col ="steelblue")
# Add connected line segments to the plot
#lines(x = 1:nrow(eigenvalues), eigenvalues[, 2],
# type="b", pch=19, col = "red")
plot(deca_pca$sdev^2,xlab = 'número componente', ylab = 'componente varianza',
type = 'l', main = 'diagrama de codo')
~60% de las informaciones (desviaciones) contenidas en los datos son retenidas por los dos primeros componentes principales.
plano factorial seria el siguiente
res.pca <- PCA(decathlon2.active, graph = FALSE)
eigenvalues <- res.pca$eig
head(eigenvalues)
## eigenvalue percentage of variance cumulative percentage of variance
## comp 1 4.1242133 41.242133 41.24213
## comp 2 1.8385309 18.385309 59.62744
## comp 3 1.2391403 12.391403 72.01885
## comp 4 0.8194402 8.194402 80.21325
## comp 5 0.7015528 7.015528 87.22878
## comp 6 0.4228828 4.228828 91.45760
Los valores propios corresponden a la magnitud de la variación explicada por cada componente principal (pc). Los valores propios son grandes para la primera PC y pequeños para las siguientes PC. ahora haremos el gráfico del codo para ver los valores y vectores propios
fviz_screeplot(res.pca,addlabels=TRUE, hjust = -0.3)
#require(plotly)
#ggplotly(viz, tooltip = c("colour","text", "shape","fill","long","lat"))
#t1<-fviz_pca_var(res.pca)
#ggplotly(t1)
~60% de las informaciones (desviaciones) contenidas en los datos son retenidas por los dos primeros componentes principales.
los coeficientes serÃan los siguientes
round(res.pca$var$coord, digits = 3)#Coordenadas de variables sobre los componentes principales
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## X100m -0.851 -0.179 0.302 0.034 -0.194
## Long.jump 0.794 0.281 -0.191 -0.115 0.233
## Shot.put 0.734 0.085 0.518 0.128 -0.249
## High.jump 0.610 -0.465 0.330 0.145 0.403
## X400m -0.702 0.290 0.284 0.431 0.104
## X110m.hurdle -0.764 -0.025 0.449 -0.017 0.224
## Discus 0.743 0.050 0.177 0.395 -0.408
## Pole.vault -0.217 0.807 0.094 -0.339 -0.222
## Javeline 0.428 0.386 0.604 -0.332 0.198
## X1500m 0.004 0.784 -0.219 0.448 0.263
round(res.pca$var$contrib, digits = 3)#Las contribuciones variables en la determinación de un componente principal dado son (en porcentaje)
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## X100m 17.544 1.751 7.339 0.138 5.389
## Long.jump 15.293 4.290 2.930 1.625 7.749
## Shot.put 13.060 0.397 21.620 2.014 8.824
## High.jump 9.025 11.772 8.793 2.550 23.116
## X400m 11.936 4.580 6.488 22.651 1.539
## X110m.hurdle 14.158 0.033 16.261 0.035 7.166
## Discus 13.393 0.134 2.515 19.041 23.756
## Pole.vault 1.145 35.462 0.714 14.023 7.005
## Javeline 4.446 8.109 29.453 13.430 5.578
## X1500m 0.000 33.473 3.887 24.494 9.878
plano factorial
fviz_pca_ind(res.pca)
fviz_pca_var(res.pca)
fviz_pca_contrib(res.pca,choice = "var", axes = 1)
## Warning in fviz_pca_contrib(res.pca, choice = "var", axes = 1): The
## function fviz_pca_contrib() is deprecated. Please use the function
## fviz_contrib() which can handle outputs of PCA, CA and MCA functions.
fviz_pca_contrib(res.pca,choice = "var", axes = 2)
## Warning in fviz_pca_contrib(res.pca, choice = "var", axes = 2): The
## function fviz_pca_contrib() is deprecated. Please use the function
## fviz_contrib() which can handle outputs of PCA, CA and MCA functions.
fviz_pca_biplot(res.pca)