library(readxl)
library(tidyverse)
datos <- read_xlsx("../data/Data_Paper_Plos_One_Muscle.xlsx", skip = 3,
na = "N/A", n_max = 47) %>%
rename(RA_takeof_leg = RA...7,
OB_takeof_leg = OB...8,
PM_takeof_leg = PM...9,
QL_takeof_leg = QL...10,
ES_takeof_leg = ES...11,
Gmax_takeof_leg = Gmax...12,
Gmed_takeof_leg = Gmed...13,
IL_takeof_leg = IL...14,
RA_free_leg = RA...15,
OB_free_leg = OB...16,
PM_free_leg = PM...17,
QL_free_leg = QL...18,
ES_free_leg = ES...19,
Gmax_free_leg = Gmax...20,
Gmed_free_leg = Gmed...21,
IL_free_leg = IL...22,
id = ID,
edad = `Age (years)`,
altura_cm = `Height (cm)`,
imc = `Body mass (kg)`,
dist_salto_cm = `long jump distance (cm)`,
sprint_100m_seconds = `100-m sprint time (s)`,
grasa_subcut_cm2 = `Subcutaneous fat CSA (absolute value, cm2)`) %>%
mutate(type = if_else(id %in% c(1:23), true = "Long jumpers",
false = "Untrained men"))
datos
library(ggplot2)
datos %>%
ggplot(data = ., aes(x = RA_takeof_leg, y = dist_salto_cm)) +
geom_point(size = 3) +
labs(x = expression('Relative CSA of RA takeoff leg side - cm'^"2"/'kg'^"2/3"),
y = "Personal best record of long jump (cm)") +
geom_smooth(method = "lm", se = FALSE, lty = 3, lwd = 1, color = "black") +
theme_light()
datos %>%
select_if(is.numeric) %>%
select(-id) %>%
gather(key = "variable", value = "valor") %>%
ggplot(data = ., aes(x = valor)) +
facet_wrap(facets = ~variable, scales = "free", ncol = 4) +
geom_histogram(aes(y = ..density..), bins = 10, color = "black",
fill = "gray60") +
geom_density(fill = "gray50", alpha = 0.18) +
geom_rug() +
labs(x = "", y = "Densidad") +
theme_light() +
theme(strip.background = element_rect(fill = "deepskyblue4"),
strip.text = element_text(color = "black"))
library(qqplotr)
datos %>%
select_if(is.numeric) %>%
select(-id) %>%
gather(key = "variable", value = "valor") %>%
ggplot(data = ., aes(sample = valor)) +
facet_wrap(facets = ~variable, scales = "free", ncol = 4) +
geom_qq_band(fill = "gray25") +
stat_qq_line(color = "darkgreen") +
stat_qq_point(color = "black", size = 0.8) +
labs(x = "Cuantiles teóricos", y = "Cuantiles muestrales") +
theme_light() +
theme(strip.background = element_rect(fill = "deepskyblue4"),
strip.text = element_text(color = "black"))
df_takeoff_leg <- datos %>%
select(RA_takeof_leg:IL_takeof_leg) %>%
gather(key = "variable", value = "valor") %>%
mutate(tipo = "TakeoffLeg")
df_free_leg <- datos %>%
select(RA_free_leg:IL_free_leg) %>%
gather(key = "variable", value = "valor") %>%
mutate(tipo = "FreeLeg")
df_takeoff_free <- df_takeoff_leg %>%
bind_rows(df_free_leg)
df_takeoff_free %>%
separate(col = variable, into = c("variable", "v1", "v2")) %>%
select(-c(v1, v2)) %>%
ggplot(data = ., aes(x = tipo, y = valor, fill = tipo)) +
facet_wrap(facets = ~variable, scales = "free", ncol = 4) +
geom_boxplot(color = "black") +
scale_fill_manual(values = c("darkgreen", "gold4")) +
labs(x = "Tipo de pierna", y = "") +
theme_light() +
theme(strip.background = element_rect(fill = "deepskyblue4"),
strip.text = element_text(color = "black"),
legend.position = "none")
datos %>%
dplyr::select(-c(id, dist_salto_cm)) %>%
gather(key = "variable", value = "valor", -type) %>%
ggplot(data = ., aes(x = type, y = valor, fill = type)) +
facet_wrap(facets = ~variable, scales = "free", ncol = 3) +
geom_boxplot(color = "black") +
scale_fill_manual(values = c("darkgreen", "gold4")) +
labs(x = "Grupo", y = "") +
theme_light() +
theme(strip.background = element_rect(fill = "deepskyblue4"),
strip.text = element_text(color = "black"),
legend.position = "none")
Se comprueba la normalidad de las variables (\(\alpha = 0.05\)), bajo el siguiente juego de hipótesis:
\[H_0: X \sim N(\mu, \sigma^2)\\ H1: x \nsim N(\mu, \sigma^2)\]
datos %>%
select_if(is.numeric) %>%
select(-id) %>%
gather(key = "variable", value = "valor") %>%
group_by(variable) %>%
summarise(valor = list(valor)) %>%
ungroup() %>%
group_by(variable) %>%
mutate(shapiro_valorP = round(shapiro.test(unlist(valor))$p.value, digits = 5),
Resultado = if_else(shapiro_valorP <= 0.05, true = "No normalidad",
false = "Sí normalidad")) %>%
select(-valor)
library(corrplot)
library(RColorBrewer)
datos %>%
select_if(is.numeric) %>%
select(-id) %>%
cor(use = "complete.obs") %>%
corrplot(method = "pie",
type = "upper",
diag = FALSE,
tl.cex = 0.8,
tl.srt = 45,
addgrid.col = "black",
order = "hclust",
col = brewer.pal(n = 10, name = "Spectral"))
lm()
Call:
lm(formula = dist_salto_cm ~ RA_takeof_leg, data = datos)
Residuals:
Min 1Q Median 3Q Max
-72.26 -22.41 8.38 22.24 65.10
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 589.584 49.894 11.817 9.66e-11 ***
RA_takeof_leg 11.928 4.443 2.685 0.0139 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 40.89 on 21 degrees of freedom
(22 observations deleted due to missingness)
Multiple R-squared: 0.2555, Adjusted R-squared: 0.2201
F-statistic: 7.208 on 1 and 21 DF, p-value: 0.01387
RA_takeof_leg
es estadísticamente significativa (\(valor\ p =0.0139\)) sobre la variabilidad observada en la distancia del salto. Además, se puede inferir que por cada unidad que aumenta RA_takeof_leg
, la distancia de salto es 11.928 centímetros mayor. La variable RA_takeof_leg
explica 22.01% de la variabilidad observada en la distancia de salto.Analysis of Variance Table
Response: dist_salto_cm
Df Sum Sq Mean Sq F value Pr(>F)
RA_takeof_leg 1 12050 12050.0 7.2078 0.01387 *
Residuals 21 35108 1671.8
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Shapiro-Wilk normality test
data: residuals(mod_rls)
W = 0.96191, p-value = 0.5029
studentized Breusch-Pagan test
data: mod_rls
BP = 0.0025838, df = 1, p-value = 0.9595
Durbin-Watson test
data: mod_rls
DW = 2.1677, p-value = 0.7123
alternative hypothesis: true autocorrelation is greater than 0
reales <- datos$dist_salto_cm[!is.na(datos$dist_salto_cm)]
predichos_rls <- mod_rls$fitted.values
data.frame(
Real = reales,
Predichos = predichos_rls
) %>%
ggplot(data = ., aes(x = Predichos, y = Real)) +
geom_point() +
labs(x = ("Valores predichos de distancia (cm)"),
y = "Valores reales de distancia (cm)") +
geom_smooth(method = "lm", se = FALSE, lty = 3, lwd = 1, color = "black") +
theme_light()
mod_rlm0 <- lm(dist_salto_cm ~ .,
data = datos %>% select_if(is.numeric) %>% select(-id))
summary(mod_rlm0)
Call:
lm(formula = dist_salto_cm ~ ., data = datos %>% select_if(is.numeric) %>%
select(-id))
Residuals:
1 2 3 4 5 6 7 8 9
-0.24001 -1.43196 -0.91660 -0.75285 -0.04370 1.48290 -0.44264 1.69525 -0.39388
10 11 12 13 14 15 16 17 18
1.11512 -1.41008 0.27780 -0.83303 -0.28836 1.11155 -0.39267 0.08616 1.18270
19 20 21 22 23
0.62738 -0.80017 1.33190 -1.93656 0.97174
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1266.6968 182.3241 6.948 0.0910 .
edad -23.4171 3.4665 -6.755 0.0936 .
altura_cm 6.1246 1.3465 4.549 0.1378
imc -2.7528 1.3980 -1.969 0.2992
sprint_100m_seconds -92.8684 5.1288 -18.107 0.0351 *
RA_takeof_leg 124.1035 12.0047 10.338 0.0614 .
OB_takeof_leg 7.1425 2.1907 3.260 0.1895
PM_takeof_leg 9.0525 2.7581 3.282 0.1883
QL_takeof_leg 7.8868 1.4157 5.571 0.1131
ES_takeof_leg 35.5716 5.3988 6.589 0.0959 .
Gmax_takeof_leg -1.0174 0.8984 -1.132 0.4605
Gmed_takeof_leg -10.5371 0.9414 -11.193 0.0567 .
IL_takeof_leg -0.4193 2.4356 -0.172 0.8915
RA_free_leg -86.7869 9.1217 -9.514 0.0667 .
OB_free_leg -2.4849 1.2159 -2.044 0.2897
PM_free_leg -17.9138 3.3439 -5.357 0.1175
QL_free_leg -7.7265 3.9823 -1.940 0.3030
ES_free_leg -49.2399 6.2703 -7.853 0.0806 .
Gmax_free_leg 2.4543 0.5668 4.330 0.1445
Gmed_free_leg 12.7940 0.9606 13.319 0.0477 *
IL_free_leg 2.8488 2.3869 1.194 0.4440
grasa_subcut_cm2 -2.3272 0.3648 -6.379 0.0990 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.83 on 1 degrees of freedom
(22 observations deleted due to missingness)
Multiple R-squared: 0.9995, Adjusted R-squared: 0.9891
F-statistic: 96.23 on 21 and 1 DF, p-value: 0.08023
usdm
que posee funciones flexibles para diagnósticos de multicolinealidad.# Variables predictoras
df_predictoras <-datos %>%
select_if(is.numeric) %>% dplyr::select(-c(id, dist_salto_cm)) %>%
as.matrix()
library(usdm)
vifcor(x = df_predictoras, th = 0.70)
10 variables from the 21 input variables have collinearity problem:
ES_free_leg RA_takeof_leg OB_takeof_leg PM_takeof_leg Gmed_takeof_leg imc Gmax_free_leg Gmax_takeof_leg ES_takeof_leg QL_free_leg
After excluding the collinear variables, the linear correlation coefficients ranges between:
min correlation ( OB_free_leg ~ sprint_100m_seconds ): -0.001734317
max correlation ( IL_free_leg ~ IL_takeof_leg ): 0.6817036
---------- VIFs of the remained variables --------
Este resultado muestra que de las 21 variables consideradas en el modelo inicial (modelo 0) 10 de ellas presentan problemas de colinealidad. Tomando un límite de 0.70 de correlación como criterio de exclusión de predictores, la correlación máxima presente en las variables seleccionadas es de 0.68. La tabla anterior muestra los predictores que podrían hacer parte del modelo.
Nuevo modelo:
mod_rlm1 <- lm(dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds
+ QL_takeof_leg + IL_takeof_leg + RA_free_leg + OB_free_leg
+ PM_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2,
data = datos)
summary(mod_rlm1)
Call:
lm(formula = dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds +
QL_takeof_leg + IL_takeof_leg + RA_free_leg + OB_free_leg +
PM_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2,
data = datos)
Residuals:
Min 1Q Median 3Q Max
-39.024 -13.760 -0.812 13.552 38.971
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1859.2274 339.7523 5.472 0.000194 ***
edad 4.8879 5.8586 0.834 0.421854
altura_cm -3.8708 1.5704 -2.465 0.031409 *
sprint_100m_seconds -58.9174 20.8987 -2.819 0.016692 *
QL_takeof_leg -2.5243 5.5584 -0.454 0.658553
IL_takeof_leg 10.7345 6.0471 1.775 0.103514
RA_free_leg 6.0558 5.5662 1.088 0.299875
OB_free_leg -0.4214 2.0322 -0.207 0.839505
PM_free_leg -0.1120 3.1732 -0.035 0.972481
Gmed_free_leg -0.6212 1.5826 -0.393 0.702176
IL_free_leg -4.6031 6.7318 -0.684 0.508261
grasa_subcut_cm2 0.4789 0.8361 0.573 0.578311
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 29.51 on 11 degrees of freedom
(22 observations deleted due to missingness)
Multiple R-squared: 0.7968, Adjusted R-squared: 0.5936
F-statistic: 3.921 on 11 and 11 DF, p-value: 0.01622
6 variables from the 21 input variables have collinearity problem:
ES_free_leg RA_takeof_leg imc OB_takeof_leg Gmax_takeof_leg PM_takeof_leg
After excluding the collinear variables, the linear correlation coefficients ranges between:
min correlation ( OB_free_leg ~ sprint_100m_seconds ): -0.001734317
max correlation ( Gmed_free_leg ~ Gmed_takeof_leg ): 0.8683011
---------- VIFs of the remained variables --------
Este método de selección arroja que 6 de las 21 variables totales (mod_rlm0
) tienen problemas de colinealidad. El modelo considera 15 variables como independientes, con valores máximos de VIF que no superan 9.64.
Nuevo modelo:
mod_rlm2 <- lm(dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds
+ QL_takeof_leg + IL_takeof_leg + RA_free_leg + OB_free_leg
+ PM_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2
+ ES_takeof_leg + Gmed_takeof_leg + QL_free_leg + Gmax_free_leg,
data = datos)
summary(mod_rlm2)
Call:
lm(formula = dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds +
QL_takeof_leg + IL_takeof_leg + RA_free_leg + OB_free_leg +
PM_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2 +
ES_takeof_leg + Gmed_takeof_leg + QL_free_leg + Gmax_free_leg,
data = datos)
Residuals:
Min 1Q Median 3Q Max
-31.787 -9.943 0.621 8.825 41.008
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2148.78930 411.69356 5.219 0.00123 **
edad 2.60937 6.48691 0.402 0.69950
altura_cm -4.05747 1.69627 -2.392 0.04803 *
sprint_100m_seconds -75.17773 24.48274 -3.071 0.01805 *
QL_takeof_leg 2.87009 8.02411 0.358 0.73112
IL_takeof_leg 11.95805 7.06408 1.693 0.13433
RA_free_leg 7.78298 6.68455 1.164 0.28244
OB_free_leg 0.49818 2.85436 0.175 0.86639
PM_free_leg 1.05675 3.95723 0.267 0.79713
Gmed_free_leg 3.73730 3.50620 1.066 0.32184
IL_free_leg -8.81768 7.69175 -1.146 0.28931
grasa_subcut_cm2 0.61986 0.97455 0.636 0.54497
ES_takeof_leg -3.66857 2.95889 -1.240 0.25498
Gmed_takeof_leg -3.10319 3.03813 -1.021 0.34105
QL_free_leg -7.34943 10.65306 -0.690 0.51248
Gmax_free_leg 0.08009 2.32387 0.034 0.97347
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 31.11 on 7 degrees of freedom
(22 observations deleted due to missingness)
Multiple R-squared: 0.8564, Adjusted R-squared: 0.5486
F-statistic: 2.782 on 15 and 7 DF, p-value: 0.08803
Start: AIC=162.76
dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds + QL_takeof_leg +
IL_takeof_leg + RA_free_leg + OB_free_leg + PM_free_leg +
Gmed_free_leg + IL_free_leg + grasa_subcut_cm2 + ES_takeof_leg +
Gmed_takeof_leg + QL_free_leg + Gmax_free_leg
Df Sum of Sq RSS AIC
- Gmax_free_leg 1 1.1 6774.8 160.77
- OB_free_leg 1 29.5 6803.1 160.86
- PM_free_leg 1 69.0 6842.6 161.00
- QL_takeof_leg 1 123.8 6897.4 161.18
- edad 1 156.6 6930.2 161.29
- grasa_subcut_cm2 1 391.5 7165.1 162.05
- QL_free_leg 1 460.6 7234.2 162.28
<none> 6773.6 162.76
- Gmed_takeof_leg 1 1009.6 7783.2 163.96
- Gmed_free_leg 1 1099.4 7873.1 164.22
- IL_free_leg 1 1271.7 8045.3 164.72
- RA_free_leg 1 1311.8 8085.5 164.83
- ES_takeof_leg 1 1487.5 8261.2 165.33
- IL_takeof_leg 1 2772.9 9546.5 168.65
- altura_cm 1 5536.6 12310.3 174.50
- sprint_100m_seconds 1 9123.9 15897.6 180.38
Step: AIC=160.77
dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds + QL_takeof_leg +
IL_takeof_leg + RA_free_leg + OB_free_leg + PM_free_leg +
Gmed_free_leg + IL_free_leg + grasa_subcut_cm2 + ES_takeof_leg +
Gmed_takeof_leg + QL_free_leg
Df Sum of Sq RSS AIC
- OB_free_leg 1 35.5 6810.3 158.89
- PM_free_leg 1 99.7 6874.5 159.10
- QL_takeof_leg 1 126.0 6900.8 159.19
- edad 1 155.8 6930.6 159.29
- grasa_subcut_cm2 1 440.8 7215.6 160.22
- QL_free_leg 1 462.1 7236.9 160.28
<none> 6774.8 160.77
- Gmed_free_leg 1 1218.9 7993.6 162.57
+ Gmax_free_leg 1 1.1 6773.6 162.76
- IL_free_leg 1 1293.7 8068.4 162.78
- RA_free_leg 1 1349.1 8123.8 162.94
- Gmed_takeof_leg 1 1367.9 8142.7 163.00
- ES_takeof_leg 1 1816.1 8590.9 164.23
- IL_takeof_leg 1 3018.5 9793.3 167.24
- altura_cm 1 5693.6 12468.4 172.80
- sprint_100m_seconds 1 9166.3 15941.1 178.45
Step: AIC=158.89
dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds + QL_takeof_leg +
IL_takeof_leg + RA_free_leg + PM_free_leg + Gmed_free_leg +
IL_free_leg + grasa_subcut_cm2 + ES_takeof_leg + Gmed_takeof_leg +
QL_free_leg
Df Sum of Sq RSS AIC
- PM_free_leg 1 79.0 6889.4 157.15
- QL_takeof_leg 1 102.9 6913.2 157.23
- edad 1 207.8 7018.2 157.58
- grasa_subcut_cm2 1 414.3 7224.6 158.24
- QL_free_leg 1 508.6 7318.9 158.54
<none> 6810.3 158.89
- Gmed_free_leg 1 1202.6 8012.9 160.63
+ OB_free_leg 1 35.5 6774.8 160.77
+ Gmax_free_leg 1 7.2 6803.1 160.86
- IL_free_leg 1 1288.8 8099.1 160.87
- Gmed_takeof_leg 1 1341.6 8151.9 161.02
- ES_takeof_leg 1 1786.2 8596.5 162.24
- RA_free_leg 1 1909.5 8719.8 162.57
- IL_takeof_leg 1 3197.1 10007.5 165.74
- altura_cm 1 5844.6 12654.9 171.14
- sprint_100m_seconds 1 9529.2 16339.5 177.01
Step: AIC=157.15
dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds + QL_takeof_leg +
IL_takeof_leg + RA_free_leg + Gmed_free_leg + IL_free_leg +
grasa_subcut_cm2 + ES_takeof_leg + Gmed_takeof_leg + QL_free_leg
Df Sum of Sq RSS AIC
- QL_takeof_leg 1 149.4 7038.8 155.65
- edad 1 421.9 7311.2 156.52
- grasa_subcut_cm2 1 518.3 7407.6 156.82
- QL_free_leg 1 579.2 7468.5 157.01
<none> 6889.4 157.15
+ PM_free_leg 1 79.0 6810.3 158.89
+ Gmax_free_leg 1 38.0 6851.3 159.02
- Gmed_free_leg 1 1288.9 8178.2 159.10
+ OB_free_leg 1 14.8 6874.5 159.10
- Gmed_takeof_leg 1 1348.2 8237.6 159.26
- IL_free_leg 1 1368.2 8257.6 159.32
- ES_takeof_leg 1 1710.1 8599.5 160.25
- RA_free_leg 1 1830.5 8719.8 160.57
- IL_takeof_leg 1 3448.0 10337.3 164.48
- altura_cm 1 6137.7 13027.0 169.80
- sprint_100m_seconds 1 10215.0 17104.4 176.07
Step: AIC=155.65
dist_salto_cm ~ edad + altura_cm + sprint_100m_seconds + IL_takeof_leg +
RA_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2 +
ES_takeof_leg + Gmed_takeof_leg + QL_free_leg
Df Sum of Sq RSS AIC
- edad 1 365.4 7404.1 154.81
- QL_free_leg 1 440.4 7479.2 155.04
- grasa_subcut_cm2 1 606.6 7645.4 155.55
<none> 7038.8 155.65
- Gmed_free_leg 1 1157.1 8195.9 157.15
+ QL_takeof_leg 1 149.4 6889.4 157.15
+ PM_free_leg 1 125.6 6913.2 157.23
- Gmed_takeof_leg 1 1225.7 8264.5 157.34
+ Gmax_free_leg 1 12.2 7026.6 157.60
+ OB_free_leg 1 0.2 7038.6 157.64
- IL_free_leg 1 1370.9 8409.7 157.74
- ES_takeof_leg 1 1589.8 8628.5 158.33
- RA_free_leg 1 1979.9 9018.7 159.35
- IL_takeof_leg 1 3493.6 10532.4 162.91
- altura_cm 1 6405.2 13443.9 168.53
- sprint_100m_seconds 1 10188.9 17227.6 174.23
Step: AIC=154.81
dist_salto_cm ~ altura_cm + sprint_100m_seconds + IL_takeof_leg +
RA_free_leg + Gmed_free_leg + IL_free_leg + grasa_subcut_cm2 +
ES_takeof_leg + Gmed_takeof_leg + QL_free_leg
Df Sum of Sq RSS AIC
- grasa_subcut_cm2 1 585.1 7989.3 154.56
<none> 7404.1 154.81
- QL_free_leg 1 791.7 8195.9 155.15
+ edad 1 365.4 7038.8 155.65
+ PM_free_leg 1 332.1 7072.1 155.75
+ QL_takeof_leg 1 92.9 7311.2 156.52
+ Gmax_free_leg 1 44.0 7360.2 156.67
- Gmed_free_leg 1 1355.5 8759.6 156.68
+ OB_free_leg 1 17.6 7386.6 156.75
- IL_free_leg 1 1498.2 8902.3 157.05
- ES_takeof_leg 1 1552.0 8956.1 157.19
- Gmed_takeof_leg 1 1793.9 9198.0 157.80
- IL_takeof_leg 1 3493.9 10898.0 161.70
- RA_free_leg 1 3552.7 10956.8 161.82
- altura_cm 1 6309.8 13714.0 166.99
- sprint_100m_seconds 1 14910.2 22314.4 178.18
Step: AIC=154.56
dist_salto_cm ~ altura_cm + sprint_100m_seconds + IL_takeof_leg +
RA_free_leg + Gmed_free_leg + IL_free_leg + ES_takeof_leg +
Gmed_takeof_leg + QL_free_leg
Df Sum of Sq RSS AIC
- QL_free_leg 1 454.7 8444.0 153.83
<none> 7989.3 154.56
+ grasa_subcut_cm2 1 585.1 7404.1 154.81
+ PM_free_leg 1 520.3 7468.9 155.01
- IL_free_leg 1 947.6 8936.9 155.14
- ES_takeof_leg 1 1107.1 9096.4 155.54
+ edad 1 343.9 7645.4 155.55
- Gmed_free_leg 1 1283.8 9273.1 155.99
+ Gmax_free_leg 1 170.2 7819.1 156.06
+ QL_takeof_leg 1 164.9 7824.3 156.08
+ OB_free_leg 1 1.4 7987.9 156.55
- Gmed_takeof_leg 1 1978.0 9967.3 157.65
- IL_takeof_leg 1 2914.2 10903.4 159.71
- RA_free_leg 1 3510.1 11499.4 160.94
- altura_cm 1 6059.0 14048.3 165.54
- sprint_100m_seconds 1 16150.3 24139.5 177.99
Step: AIC=153.83
dist_salto_cm ~ altura_cm + sprint_100m_seconds + IL_takeof_leg +
RA_free_leg + Gmed_free_leg + IL_free_leg + ES_takeof_leg +
Gmed_takeof_leg
Df Sum of Sq RSS AIC
<none> 8444.0 153.83
+ PM_free_leg 1 603.1 7840.8 154.13
+ edad 1 600.6 7843.4 154.13
- Gmed_free_leg 1 935.8 9379.8 154.25
+ QL_free_leg 1 454.7 7989.3 154.56
- IL_free_leg 1 1063.8 9507.8 154.56
+ grasa_subcut_cm2 1 248.1 8195.9 155.15
- ES_takeof_leg 1 1366.9 9810.9 155.28
+ OB_free_leg 1 164.7 8279.3 155.38
+ Gmax_free_leg 1 61.2 8382.8 155.66
+ QL_takeof_leg 1 28.1 8415.9 155.75
- Gmed_takeof_leg 1 1615.8 10059.7 155.86
- IL_takeof_leg 1 3156.5 11600.5 159.14
- RA_free_leg 1 4051.8 12495.8 160.85
- altura_cm 1 5609.7 14053.7 163.55
- sprint_100m_seconds 1 16115.0 24559.0 176.39
Call:
lm(formula = dist_salto_cm ~ altura_cm + sprint_100m_seconds +
IL_takeof_leg + RA_free_leg + Gmed_free_leg + IL_free_leg +
ES_takeof_leg + Gmed_takeof_leg, data = datos)
Residuals:
Min 1Q Median 3Q Max
-37.963 -9.854 1.429 12.784 38.271
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2117.013 232.811 9.093 2.99e-07 ***
altura_cm -3.298 1.081 -3.050 0.008654 **
sprint_100m_seconds -79.567 15.393 -5.169 0.000142 ***
IL_takeof_leg 10.107 4.418 2.288 0.038232 *
RA_free_leg 10.418 4.020 2.592 0.021311 *
Gmed_free_leg 2.936 2.357 1.246 0.233340
IL_free_leg -6.554 4.935 -1.328 0.205398
ES_takeof_leg -2.534 1.683 -1.505 0.154437
Gmed_takeof_leg -2.973 1.816 -1.637 0.123959
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.56 on 14 degrees of freedom
(22 observations deleted due to missingness)
Multiple R-squared: 0.8209, Adjusted R-squared: 0.7186
F-statistic: 8.023 on 8 and 14 DF, p-value: 0.0004255
plsr()
del paquete pls
permite ajustar modelos con Partial Least Squares, además, incluye la posibilidad de recurrir a validación cruzada para identificar el número adecuado de componentes con el cual minimizar el cuadrado medio del error. En este caso particular usé la opción de cross-validation y se identifica un sólo componente principal como óptimo. La biblioteca caret
también permite ajustar modelos de mínimos cuadrados parciales (PLS) y regresión por componentes principales (PCR).# Cargando biblioteca
library(pls)
# Ajustando el modelo con validación cruzada
set.seed(123)
mod_rlm3 <- plsr(formula = dist_salto_cm ~ .,
data = datos %>% select_if(is.numeric) %>% dplyr::select(-id),
scale. = TRUE, validation = "CV")
# Estimando el CME (cuadrado medio del error)
mod_pls_CV <- MSEP(mod_rlm3, estimate = "CV")
# Número de componentes óptimo
plot(mod_pls_CV$val, xlab = "Número de componentes", ylab = "CME")
\[CME = \frac{1}{n}\sum^{n}_{i = 1}(y_i - \hat{y_i})^2\]
# Cuadrado medio del error
cme <- function(predichos, real) {
cme = mean((real - predichos)^2)
return(cme)
}
# CME de 4 modelos
cme_rls <- cme(predichos_rls, reales)
cme_rlm1 <- cme(predichos_rlm1, reales)
cme_rlm2 <- cme(predichos_rlm2, reales)
cme_rlm3 <- cme(predichos_rlm3, reales)
lime
, MLmetrics
, mltools
, hydroGOF
, entre otras.data.frame(
Modelo = c("RLS", "RLM1", "RLM2", "RLM3"),
CME = c(cme_rls, cme_rlm1, cme_rlm2, cme_rlm3)
) %>%
ggplot(data = ., aes(x = Modelo, y = CME)) +
geom_col(width = 0.5, color = "black", fill = "dodgerblue3") +
geom_text(aes(label = round(CME, digits = 2)), vjust = -0.2) +
theme_light()
car
car
, haciendo uso de la función vif, es posible conocer los valores del Factor de Inflación de Varianza. edad altura_cm imc sprint_100m_seconds
30.551616 46.551051 78.222318 3.767064
RA_takeof_leg OB_takeof_leg PM_takeof_leg QL_takeof_leg
523.233517 69.187078 67.892841 4.960818
ES_takeof_leg Gmax_takeof_leg Gmed_takeof_leg IL_takeof_leg
661.053994 32.048069 35.930840 15.504385
RA_free_leg OB_free_leg PM_free_leg QL_free_leg
322.636871 29.338158 82.874550 26.189789
ES_free_leg Gmax_free_leg Gmed_free_leg IL_free_leg
853.136657 16.326639 30.031175 16.590305
grasa_subcut_cm2
15.271952
# Matriz de correlaciones
mtx_cor <- cor(df_predictoras, use = "complete.obs")
# Inversa de mtx_cor
inversa_cor <- solve(mtx_cor)
# VIFs
diag(inversa_cor)
edad altura_cm imc sprint_100m_seconds
30.551616 46.551051 78.222318 3.767064
RA_takeof_leg OB_takeof_leg PM_takeof_leg QL_takeof_leg
523.233517 69.187078 67.892841 4.960818
ES_takeof_leg Gmax_takeof_leg Gmed_takeof_leg IL_takeof_leg
661.053994 32.048069 35.930840 15.504385
RA_free_leg OB_free_leg PM_free_leg QL_free_leg
322.636871 29.338158 82.874550 26.189789
ES_free_leg Gmax_free_leg Gmed_free_leg IL_free_leg
853.136657 16.326639 30.031175 16.590305
grasa_subcut_cm2
15.271952
mctest
mctest
es posible graficar los VIFs y valores propios (Eigen Values).data.frame(
variable = names(diag(inversa_cor)),
VIF = diag(inversa_cor)
) %>%
ggplot(data = ., aes(x = variable, y = VIF)) +
geom_point() +
geom_hline(yintercept = 10, color = "red", lty = 2) +
labs(x = "Variable", y = "VIF",
title = "Factor Inflacionario de Varianza (VIF)") +
theme_light() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, color = "black"),
axis.text.y = element_text(color = "black"))