Para que las pruebas de hipótesis (𝑡 y 𝐹) sean válidas, nuestro modelo de regresión debe cumplir ciertos supuestos estadísticos. En esta práctica, nos enfocaremos en dos: 1. Ausencia de Multicolinealidad perfecta: Los regresores no deben estar altamente correlacionados entre sí. 2. Normalidad de los residuos: Los errores (𝜀) deben seguir una distribución normal para garantizar la validez de los intervalos de confianza en muestras pequeñas.
library(datasets)
data(mtcars)
modelo <- lm(mpg ~ disp + hp + wt + qsec, data = mtcars)
summary(modelo)
##
## Call:
## lm(formula = mpg ~ disp + hp + wt + qsec, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8664 -1.5819 -0.3788 1.1712 5.6468
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.329638 8.639032 3.164 0.00383 **
## disp 0.002666 0.010738 0.248 0.80576
## hp -0.018666 0.015613 -1.196 0.24227
## wt -4.609123 1.265851 -3.641 0.00113 **
## qsec 0.544160 0.466493 1.166 0.25362
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.622 on 27 degrees of freedom
## Multiple R-squared: 0.8351, Adjusted R-squared: 0.8107
## F-statistic: 34.19 on 4 and 27 DF, p-value: 3.311e-10
Calculo manual.
library(knitr)
library(kableExtra)
X_mat <- model.matrix(modelo)
kable(head(X_mat, 6),
caption = "Matriz de diseno (primeras observaciones)",
digits = 3) %>%
kable_styling(
full_width = TRUE,
position = "right",
font_size = 16
)
| (Intercept) | disp | hp | wt | qsec | |
|---|---|---|---|---|---|
| Mazda RX4 | 1 | 160 | 110 | 2.620 | 16.46 |
| Mazda RX4 Wag | 1 | 160 | 110 | 2.875 | 17.02 |
| Datsun 710 | 1 | 108 | 93 | 2.320 | 18.61 |
| Hornet 4 Drive | 1 | 258 | 110 | 3.215 | 19.44 |
| Hornet Sportabout | 1 | 360 | 175 | 3.440 | 17.02 |
| Valiant | 1 | 225 | 105 | 3.460 | 20.22 |
X_mat <- model.matrix(modelo)
XX_matrix <- t(X_mat) %*% X_mat
XX_matrix_fmt <- format(XX_matrix,
big.mark = ".",
decimal.mark = ",",
scientific = FALSE)
library(knitr)
library(kableExtra)
kable(XX_matrix_fmt,
caption = "Matriz XtX") %>%
kable_styling(
full_width = TRUE,
position = "right",
font_size = 14
)
| (Intercept) | disp | hp | wt | qsec | |
|---|---|---|---|---|---|
| (Intercept) | 32,0000 | 7.383,1000 | 4.694,0000 | 102,9520 | 571,1600 |
| disp | 7.383,1000 | 2.179.627,4700 | 1.291.364,4000 | 27.091,4888 | 128.801,5040 |
| hp | 4.694,0000 | 1.291.364,4000 | 834.278,0000 | 16.471,7440 | 81.092,1600 |
| wt | 102,9520 | 27.091,4888 | 16.471,7440 | 360,9011 | 1.828,0946 |
| qsec | 571,1600 | 128.801,5040 | 81.092,1600 | 1.828,0946 | 10.293,4802 |
options(scipen = 999)
library(stargazer)
options(scipen = 999)
X_mat <- model.matrix(modelo)
XX_matrix <- t(X_mat) %*% X_mat
Sn <- solve(diag(sqrt(diag(XX_matrix))))
suppressMessages(
suppressWarnings(
stargazer(Sn,
type = "text",
digits = 4,
summary = FALSE)
)
)
##
## ==================================
## 0.1768 0 0 0 0
## 0 0.0007 0 0 0
## 0 0 0.0011 0 0
## 0 0 0 0.0526 0
## 0 0 0 0 0.0099
## ----------------------------------
library(stargazer)
options(scipen = 999)
X_mat <- model.matrix(modelo)
XX_matrix <- t(X_mat) %*% X_mat
Sn <- solve(diag(sqrt(diag(XX_matrix))))
XX_norm <- (Sn %*% XX_matrix) %*% Sn
stargazer(XX_norm,
type = "html",
digits = 4,
summary = FALSE)
| 1 | 0.8840 | 0.9085 | 0.9580 | 0.9952 |
| 0.8840 | 1 | 0.9576 | 0.9659 | 0.8599 |
| 0.9085 | 0.9576 | 1 | 0.9493 | 0.8751 |
| 0.9580 | 0.9659 | 0.9493 | 1 | 0.9485 |
| 0.9952 | 0.8599 | 0.8751 | 0.9485 | 1 |
library(stargazer)
lambdas <- eigen(XX_norm, symmetric = TRUE)
autovalores_row <- t(lambdas$values)
suppressMessages(
suppressWarnings(
stargazer(autovalores_row,
type = "html",
digits = 4,
summary = FALSE)
)
)
| 4.7215 | 0.2166 | 0.0504 | 0.0101 | 0.0014 |
K <- sqrt(max(lambdas$values) / min(lambdas$values))
K
## [1] 57.48052
Como κ(x) = 57.48 es mayor a 30, se considera que la multicolinealidad es severa en el modelo. Esto indica que existe una fuerte correlación entre algunas de las variables explicativas, lo que puede inflar la varianza de los coeficientes estimados y dificultar su interpretación individual.
library(mctest)
X_mat <- model.matrix(modelo)
resultado_mctest <- mctest(mod = modelo)
resultado_mctest
##
## Call:
## omcdiag(mod = mod, Inter = TRUE, detr = detr, red = red, conf = conf,
## theil = theil, cn = cn)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.0247 0
## Farrar Chi-Square: 106.7504 1
## Red Indicator: 0.6542 1
## Sum of Lambda Inverse: 23.2023 1
## Theil's Method: 0.7121 1
## Condition Number: 57.4805 1
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
library(olsrr)
library(knitr)
library(kableExtra)
resultado_olsrr <- ols_eigen_cindex(model = modelo)
kable(resultado_olsrr,
digits = 4,
caption = "Índice de Condicion (Multicolinealidad)") %>%
kable_styling(
full_width = FALSE,
position = "center"
)
| Eigenvalue | Condition Index | intercept | disp | hp | wt | qsec |
|---|---|---|---|---|---|---|
| 4.7215 | 1.0000 | 0.0001 | 0.0011 | 0.0014 | 0.0005 | 0.0001 |
| 0.2166 | 4.6693 | 0.0026 | 0.0368 | 0.0278 | 0.0002 | 0.0047 |
| 0.0504 | 9.6772 | 0.0017 | 0.1209 | 0.3924 | 0.0377 | 0.0002 |
| 0.0101 | 21.6161 | 0.0258 | 0.7773 | 0.0596 | 0.7018 | 0.0025 |
| 0.0014 | 57.4805 | 0.9698 | 0.0639 | 0.5189 | 0.2598 | 0.9925 |
library(knitr)
library(kableExtra)
Zn <- scale(X_mat[,-1])
kable(head(Zn, 6),
caption = "Variables Estandarizadas (primeras observaciones)",
digits = 3,
row.names = TRUE) %>%
kable_styling(full_width = TRUE, position = "right")
| disp | hp | wt | qsec | |
|---|---|---|---|---|
| Mazda RX4 | -0.571 | -0.535 | -0.610 | -0.777 |
| Mazda RX4 Wag | -0.571 | -0.535 | -0.350 | -0.464 |
| Datsun 710 | -0.990 | -0.783 | -0.917 | 0.426 |
| Hornet 4 Drive | 0.220 | -0.535 | -0.002 | 0.890 |
| Hornet Sportabout | 1.043 | 0.413 | 0.228 | -0.464 |
| Valiant | -0.046 | -0.608 | 0.248 | 1.327 |
library(knitr)
library(kableExtra)
n <- nrow(Zn)
R <- cor(X_mat[,-1])
kable(R,
caption = "Matriz de Correlacion (R)",
digits = 4,
row.names = TRUE) %>%
kable_styling(full_width = TRUE, position = "right")
| disp | hp | wt | qsec | |
|---|---|---|---|---|
| disp | 1.0000 | 0.7909 | 0.8880 | -0.4337 |
| hp | 0.7909 | 1.0000 | 0.6587 | -0.7082 |
| wt | 0.8880 | 0.6587 | 1.0000 | -0.1747 |
| qsec | -0.4337 | -0.7082 | -0.1747 | 1.0000 |
library(knitr)
determinante_R <- det(R)
kable(data.frame(Determinante_R = round(determinante_R, 6)),
caption = "Determinante de la matriz de correlación (|R|)",
align = "c")
| Determinante_R |
|---|
| 0.024666 |
library(knitr)
# Número de variables explicativas
m <- ncol(X_mat[,-1])
# Número de observaciones
n <- nrow(X_mat[,-1])
# Estadístico Chi-cuadrado de Farrar-Glauber
chi_FG <- -(n - 1 - (2*m + 5)/6) * log(determinante_R)
# Mostrar resultado
kable(data.frame(Chi_Cuadrado_FG = round(chi_FG, 4)),
caption = "Estadístico Chi-cuadrado de Farrar-Glauber",
align = "c")
| Chi_Cuadrado_FG |
|---|
| 106.7504 |
library(knitr)
gl <- m * (m - 1) / 2
VC <- qchisq(p = 0.95, df = gl)
kable(data.frame(
Valor_Critico = round(VC, 4)
),
caption = "Valor Crítico de la distribución Chi-cuadrado",
align = "c")
| Valor_Critico |
|---|
| 12.5916 |
library(fastGraph)
# Gráfica Chi-cuadrado
curve(dchisq(x, df = gl), from = 0, to = chi_FG + 20,
main = "Prueba de Farrar-Glauber",
xlab = "Chi-cuadrado",
ylab = "Densidad")
# Estadístico
abline(v = chi_FG, col = "red", lwd = 2)
# Valor crítico
abline(v = VC, col = "green", lwd = 2)
Dado que χ²FG ≥ 12.5916, se rechaza la hipótesis nula (H₀). Por lo
tanto, existe evidencia estadística de que los regresores del modelo
(disp, hp, wt y qsec) presentan correlación entre sí, lo que indica la
presencia de multicolinealidad.
library(mctest)
# Aplicación de la prueba
resultado_fg <- mctest::omcdiag(mod = modelo)
resultado_fg
##
## Call:
## mctest::omcdiag(mod = modelo)
##
##
## Overall Multicollinearity Diagnostics
##
## MC Results detection
## Determinant |X'X|: 0.0247 0
## Farrar Chi-Square: 106.7504 1
## Red Indicator: 0.6542 1
## Sum of Lambda Inverse: 23.2023 1
## Theil's Method: 0.7121 1
## Condition Number: 57.4805 1
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
library(psych)
FG_test <- cortest.bartlett(X_mat[,-1])
FG_test
## $chisq
## [1] 106.7504
##
## $p.value
## [1] 0.000000000000000000009757936
##
## $df
## [1] 6
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
R.cuadrado.regresores <- c(
summary(lm(disp ~ hp + wt + qsec, data = mtcars))$r.squared,
summary(lm(hp ~ disp + wt + qsec, data = mtcars))$r.squared,
summary(lm(wt ~ disp + hp + qsec, data = mtcars))$r.squared,
summary(lm(qsec ~ disp + hp + wt, data = mtcars))$r.squared
)
as.data.frame(R.cuadrado.regresores) %>%
mutate(VIF = 1/(1 - R.cuadrado.regresores))
## R.cuadrado.regresores VIF
## 1 0.8747721 7.985439
## 2 0.8064550 5.166758
## 3 0.8554274 6.916942
## 4 0.6808292 3.133119
library(knitr)
library(kableExtra)
inversa_R <- solve(R)
kable(inversa_R,
caption = "Inversa de la matriz de correlación (R⁻¹)",
digits = 4,
row.names = TRUE) %>%
kable_styling(full_width = TRUE, position = "right")
| disp | hp | wt | qsec | |
|---|---|---|---|---|
| disp | 7.9854 | -1.3982 | -5.9185 | 1.4390 |
| hp | -1.3982 | 5.1668 | -1.6800 | 2.7593 |
| wt | -5.9185 | -1.6800 | 6.9169 | -2.5481 |
| qsec | 1.4390 | 2.7593 | -2.5481 | 3.1331 |
VIFs <- diag(inversa_R)
VIFs
## disp hp wt qsec
## 7.985439 5.166758 6.916942 3.133119
library(performance)
## Warning: package 'performance' was built under R version 4.5.3
VIFs <- multicollinearity(modelo, verbose = FALSE)
VIFs
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## qsec 3.13 [2.10, 5.15] 1.77 0.32 [0.19, 0.48]
##
## Moderate Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## disp 7.99 [4.92, 13.44] 2.83 0.13 [0.07, 0.20]
## hp 5.17 [3.28, 8.62] 2.27 0.19 [0.12, 0.30]
## wt 6.92 [4.30, 11.61] 2.63 0.14 [0.09, 0.23]
library(see)
## Warning: package 'see' was built under R version 4.5.3
plot(VIFs)
### Cálculo de los VIF’s usando “car”
library(car)
## Cargando paquete requerido: carData
##
## Adjuntando el paquete: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:psych':
##
## logit
VIFs_car <- vif(modelo)
VIFs_car
## disp hp wt qsec
## 7.985439 5.166758 6.916942 3.133119
library(mctest)
mc.plot(mod = modelo, vif = 2)
Preguntas de reflexión:
Si el valor de 𝑉𝐼𝐹 es mayor a 5 o 10, ¿qué implica esto para la interpretación de los coeficientes? 1. Si el VIF es mayor a 5 o 10, indica la presencia de multicolinealidad entre los regresores. Esto provoca que los coeficientes tengan alta varianza, sean inestables y difíciles de interpretar individualmente. En este modelo, variables como disp, hp y wt presentan VIF elevados, lo que confirma este problema.
Observando los resultados: ¿Existe alguna variable en este modelo que parezca estar “robándole” significancia a las demás? Justifica basándote en su correlación técnica (ej. desplazamiento vs. potencia). Sí, variables como disp, hp y wt parecen “robarse” significancia, ya que están altamente correlacionadas entre sí. Al estar relacionadas con el tamaño y rendimiento del vehículo, compiten por explicar mpg, generando redundancia de información y reduciendo la significancia individual de los coeficientes.
¿Qué indica el Indice de condición en este modelo? El índice de condición κ(x) = 57.48 supera el umbral de 30, indicando multicolinealidad severa. Esto implica que los coeficientes son inestables y que algunas variables pueden no resultar significativas, aun siendo relevantes en el modelo.
Verificando el ajuste de los residuos a la distribución normal, se usará la librería fitdistrplus.
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.5.3
## Cargando paquete requerido: MASS
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:olsrr':
##
## cement
## Cargando paquete requerido: survival
fit_normal <- fitdist(data = modelo$residuals, distr = "norm")
plot(fit_normal)
Usando tseries.
library(tseries)
## Warning: package 'tseries' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
salida_JB<-jarque.bera.test(modelo$residuals)
salida_JB
##
## Jarque Bera Test
##
## data: modelo$residuals
## X-squared = 3.316, df = 2, p-value = 0.1905
library(fastGraph)
alpha_sig <- 0.05
JB <- salida_JB$statistic
gl <- salida_JB$parameter
VC <- qchisq(1 - alpha_sig, gl, lower.tail = TRUE)
shadeDist(JB,
ddist = "dchisq",
parm1 = gl,
lower.tail = FALSE,
xmin = 0,
sub = paste("VC:", round(VC,2), " ", "JB:", round(JB,2)))
cat("JB=", salida_JB$statistic, "\n")
## JB= 3.315957
cat("p-value=", salida_JB$p.value, "\n")
## p-value= 0.1905237
En este caso, dado que el p-value = 0.1905 es mayor a 0.05, no se rechaza la hipótesis nula de normalidad. Por lo tanto, los residuos del modelo siguen una distribución normal.
Cálculo Manual
library(dplyr)
library(tibble)
library(gt)
## Warning: package 'gt' was built under R version 4.5.3
# Residuos de TU modelo
residuos <- modelo$residuals
# Construcción de la tabla KS
residuos %>%
as_tibble() %>%
mutate(posicion = row_number()) %>%
arrange(value) %>%
mutate(dist1 = row_number()/n()) %>%
mutate(dist2 = (row_number()-1)/n()) %>%
mutate(zi = as.vector(scale(value, center = TRUE))) %>%
mutate(pi = pnorm(zi, lower.tail = TRUE)) %>%
mutate(dif1 = abs(dist1 - pi)) %>%
mutate(dif2 = abs(dist2 - pi)) %>%
rename(residuales = value) -> tabla_KS
tabla_KS %>%
gt() %>%
tab_header("Tabla para calcular el Estadistico KS") %>%
tab_source_note(source_note = "Fuente: Elaboracion propia") %>%
# Resaltar máximo dif1
tab_style(
style = list(
cell_fill(color = "#A569BD"),
cell_text(style = "italic")
),
locations = cells_body(
columns = dif1,
rows = dif1 == max(dif1)
)
) %>%
# Resaltar máximo dif2
tab_style(
style = list(
cell_fill(color = "#3498DB"),
cell_text(style = "italic")
),
locations = cells_body(
columns = dif2,
rows = dif2 == max(dif2)
)
)
| Tabla para calcular el Estadistico KS | |||||||
| residuales | posicion | dist1 | dist2 | zi | pi | dif1 | dif2 |
|---|---|---|---|---|---|---|---|
| -3.8664153 | 21 | 0.03125 | 0.00000 | -1.58000709 | 0.05705262 | 0.025802622 | 0.057052622 |
| -3.7219399 | 23 | 0.06250 | 0.03125 | -1.52096733 | 0.06413402 | 0.001634018 | 0.032884018 |
| -2.9249910 | 6 | 0.09375 | 0.06250 | -1.19529489 | 0.11598592 | 0.022235918 | 0.053485918 |
| -2.8335055 | 22 | 0.12500 | 0.09375 | -1.15790946 | 0.12345049 | 0.001549506 | 0.029700494 |
| -2.5153146 | 3 | 0.15625 | 0.12500 | -1.02788103 | 0.15200290 | 0.004247104 | 0.027002896 |
| -2.1098370 | 11 | 0.18750 | 0.15625 | -0.86218297 | 0.19429342 | 0.006793419 | 0.038043419 |
| -1.8775253 | 14 | 0.21875 | 0.18750 | -0.76724902 | 0.22146674 | 0.002716736 | 0.033966736 |
| -1.5839621 | 1 | 0.25000 | 0.21875 | -0.64728469 | 0.25872385 | 0.008723854 | 0.039973854 |
| -1.5812652 | 7 | 0.28125 | 0.25000 | -0.64618256 | 0.25908057 | 0.022169435 | 0.009080565 |
| -1.5256810 | 32 | 0.31250 | 0.28125 | -0.62346815 | 0.26648846 | 0.046011536 | 0.014761536 |
| -1.0761488 | 24 | 0.34375 | 0.31250 | -0.43976723 | 0.33005285 | 0.013697147 | 0.017552853 |
| -1.0743171 | 9 | 0.37500 | 0.34375 | -0.43901872 | 0.33032399 | 0.044676013 | 0.013426013 |
| -0.8170837 | 29 | 0.40625 | 0.37500 | -0.33390049 | 0.36922732 | 0.037022675 | 0.005772675 |
| -0.7133657 | 2 | 0.43750 | 0.40625 | -0.29151623 | 0.38532827 | 0.052171732 | 0.020921732 |
| -0.4169002 | 30 | 0.46875 | 0.43750 | -0.17036590 | 0.43236119 | 0.036388805 | 0.005138805 |
| -0.3833408 | 10 | 0.50000 | 0.46875 | -0.15665188 | 0.43775961 | 0.062240392 | 0.030990392 |
| -0.3742943 | 26 | 0.53125 | 0.50000 | -0.15295502 | 0.43921687 | 0.092033127 | 0.060783127 |
| -0.3244422 | 4 | 0.56250 | 0.53125 | -0.13258300 | 0.44726159 | 0.115238410 | 0.083988410 |
| 0.0522694 | 15 | 0.59375 | 0.56250 | 0.02135984 | 0.50852070 | 0.085229304 | 0.053979304 |
| 0.2096827 | 13 | 0.62500 | 0.59375 | 0.08568663 | 0.53414223 | 0.090857767 | 0.059607767 |
| 0.2708056 | 5 | 0.65625 | 0.62500 | 0.11066445 | 0.54405878 | 0.112191219 | 0.080941219 |
| 0.8242600 | 27 | 0.68750 | 0.65625 | 0.33683309 | 0.63187864 | 0.055621362 | 0.024371362 |
| 0.9856164 | 12 | 0.71875 | 0.68750 | 0.40277125 | 0.65644174 | 0.062308257 | 0.031058257 |
| 1.1599816 | 16 | 0.75000 | 0.71875 | 0.47402542 | 0.68225911 | 0.067740891 | 0.036490891 |
| 1.2050398 | 19 | 0.78125 | 0.75000 | 0.49243841 | 0.68879527 | 0.092454727 | 0.061204727 |
| 1.6307713 | 31 | 0.81250 | 0.78125 | 0.66641320 | 0.74742649 | 0.065073512 | 0.033823512 |
| 1.6563961 | 8 | 0.84375 | 0.81250 | 0.67688474 | 0.75076046 | 0.092989544 | 0.061739544 |
| 2.5145182 | 25 | 0.87500 | 0.84375 | 1.02755556 | 0.84792053 | 0.027079468 | 0.004170532 |
| 2.7033586 | 28 | 0.90625 | 0.87500 | 1.10472502 | 0.86536062 | 0.040889380 | 0.009639380 |
| 5.2230317 | 20 | 0.93750 | 0.90625 | 2.13438716 | 0.98359445 | 0.046094449 | 0.077344449 |
| 5.6377518 | 18 | 0.96875 | 0.93750 | 2.30386212 | 0.98938481 | 0.020634808 | 0.051884808 |
| 5.6468467 | 17 | 1.00000 | 0.96875 | 2.30757875 | 0.98948871 | 0.010511289 | 0.020738711 |
| Fuente: Elaboracion propia | |||||||
# Cálculo del estadístico D (KS)
D <- max(max(tabla_KS$dif1), max(tabla_KS$dif2))
# Mostrar resultado
print(D)
## [1] 0.1152384
En este caso, dado que 0.1152384 < 0.1726, no se rechaza la hipótesis nula: ε∼N(0,σ2), por lo que los residuos siguen una distribución normal.
library(nortest)
prueba_KS <- lillie.test(modelo$residuals)
prueba_KS
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: modelo$residuals
## D = 0.11524, p-value = 0.3446
p.value <- prueba_KS$p.value
p.value
## [1] 0.3445667
En este caso, dado que 0.3445667 > 0.05, no se rechaza la hipótesis nula: ε∼N(0,σ2), por lo que los residuos siguen una distribución normal.
Cálculo Manual
library(dplyr)
library(gt)
# Residuos de tu modelo
residuos <- modelo$residuals
# Construcción de la tabla SW
residuos %>%
as_tibble() %>%
rename(residuales = value) %>%
arrange(residuales) %>%
mutate(pi = (row_number() - 0.375)/(n() + 0.25)) %>%
mutate(mi = qnorm(pi, lower.tail = TRUE)) %>%
mutate(ai = 0) -> tabla_SW
# Cálculos auxiliares
m <- sum(tabla_SW$mi^2)
n <- nrow(tabla_SW)
theta <- 1/sqrt(n)
# Cálculo de coeficientes ai
tabla_SW$ai[n] <- -2.706056*theta^5 + 4.434685*theta^4 - 2.071190*theta^3 - 0.147981*theta^2 + 0.2211570*theta + tabla_SW$mi[n]/sqrt(m)
tabla_SW$ai[n-1] <- -3.582633*theta^5 + 5.682633*theta^4 - 1.752461*theta^3 - 0.293762*theta^2 + 0.042981*theta + tabla_SW$mi[n-1]/sqrt(m)
tabla_SW$ai[1] <- -tabla_SW$ai[n]
tabla_SW$ai[2] <- -tabla_SW$ai[n-1]
omega <- (m - 2*tabla_SW$mi[n]^2 - 2*tabla_SW$mi[n-1]^2) /
(1 - 2*tabla_SW$ai[n]^2 - 2*tabla_SW$ai[n-1]^2)
tabla_SW$ai[3:(n-2)] <- tabla_SW$mi[3:(n-2)] / sqrt(omega)
# Cálculo final
tabla_SW %>%
mutate(ai_ui = ai * residuales,
ui2 = residuales^2) -> tabla_SW
# Mostrar tabla bonita
tabla_SW %>%
gt() %>%
tab_header("Tabla para calcular el Estadistico W") %>%
tab_source_note(source_note = "Fuente: Elaboracion propia")
| Tabla para calcular el Estadistico W | |||||
| residuales | pi | mi | ai | ai_ui | ui2 |
|---|---|---|---|---|---|
| -3.8664153 | 0.01937984 | -2.06672907 | -0.407971522 | 1.577387330 | 14.94916721 |
| -3.7219399 | 0.05038760 | -1.64110706 | -0.296267293 | 1.102689045 | 13.85283628 |
| -2.9249910 | 0.08139535 | -1.39574699 | -0.248692215 | 0.727422479 | 8.55557209 |
| -2.8335055 | 0.11240310 | -1.21384688 | -0.216281511 | 0.612834858 | 8.02875360 |
| -2.5153146 | 0.14341085 | -1.06511983 | -0.189781537 | 0.477360277 | 6.32680768 |
| -2.1098370 | 0.17441860 | -0.93684711 | -0.166926086 | 0.352186833 | 4.45141219 |
| -1.8775253 | 0.20542636 | -0.82239396 | -0.146532987 | 0.275119397 | 3.52510140 |
| -1.5839621 | 0.23643411 | -0.71782010 | -0.127900165 | 0.202589020 | 2.50893608 |
| -1.5812652 | 0.26744186 | -0.62056827 | -0.110571972 | 0.174843606 | 2.50039947 |
| -1.5256810 | 0.29844961 | -0.52886482 | -0.094232382 | 0.143768554 | 2.32770247 |
| -1.0761488 | 0.32945736 | -0.44141204 | -0.078650169 | 0.084639285 | 1.15809624 |
| -1.0743171 | 0.36046512 | -0.35721583 | -0.063648210 | 0.068378362 | 1.15415730 |
| -0.8170837 | 0.39147287 | -0.27548228 | -0.049085041 | 0.040106585 | 0.66762569 |
| -0.7133657 | 0.42248062 | -0.19555148 | -0.034843085 | 0.024855860 | 0.50889056 |
| -0.4169002 | 0.45348837 | -0.11685275 | -0.020820656 | 0.008680136 | 0.17380580 |
| -0.3833408 | 0.48449612 | -0.03887224 | -0.006926201 | 0.002655095 | 0.14695018 |
| -0.3742943 | 0.51550388 | 0.03887224 | 0.006926201 | -0.002592437 | 0.14009622 |
| -0.3244422 | 0.54651163 | 0.11685275 | 0.020820656 | -0.006755099 | 0.10526272 |
| 0.0522694 | 0.57751938 | 0.19555148 | 0.034843085 | 0.001821227 | 0.00273209 |
| 0.2096827 | 0.60852713 | 0.27548228 | 0.049085041 | 0.010292282 | 0.04396682 |
| 0.2708056 | 0.63953488 | 0.35721583 | 0.063648210 | 0.017236289 | 0.07333566 |
| 0.8242600 | 0.67054264 | 0.44141204 | 0.078650169 | 0.064828188 | 0.67940453 |
| 0.9856164 | 0.70155039 | 0.52886482 | 0.094232382 | 0.092876982 | 0.97143970 |
| 1.1599816 | 0.73255814 | 0.62056827 | 0.110571972 | 0.128261451 | 1.34555727 |
| 1.2050398 | 0.76356589 | 0.71782010 | 0.127900165 | 0.154124786 | 1.45212086 |
| 1.6307713 | 0.79457364 | 0.82239396 | 0.146532987 | 0.238961790 | 2.65941502 |
| 1.6563961 | 0.82558140 | 0.93684711 | 0.166926086 | 0.276495710 | 2.74364789 |
| 2.5145182 | 0.85658915 | 1.06511983 | 0.189781537 | 0.477209125 | 6.32280165 |
| 2.7033586 | 0.88759690 | 1.21384688 | 0.216281511 | 0.584686474 | 7.30814753 |
| 5.2230317 | 0.91860465 | 1.39574699 | 0.248692215 | 1.298927330 | 27.28006053 |
| 5.6377518 | 0.94961240 | 1.64110706 | 0.296267293 | 1.670281449 | 31.78424483 |
| 5.6468467 | 0.98062016 | 2.06672907 | 0.407971522 | 2.303752631 | 31.88687740 |
| Fuente: Elaboracion propia | |||||
W <- (sum(tabla_SW$ai_ui)^2) / sum(tabla_SW$ui2)
print(W)
## [1] 0.9366138
n <- nrow(tabla_SW)
mu <- 0.0038915*log(n)^3 - 0.083751*log(n)^2 - 0.31082*log(n) - 1.5861
sigma <- exp(0.0030302*log(n)^2 - 0.082676*log(n) - 0.4803)
Wn <- (log(1 - W) - mu) / sigma
print(Wn)
## [1] 1.554447
p_value_SW <- pnorm(Wn, lower.tail = FALSE)
print(p_value_SW)
## [1] 0.06003893
library(fastGraph)
shadeDist(Wn,
ddist = "dnorm",
lower.tail = FALSE,
sub = paste("Wn:", round(Wn, 3),
" p-value:", round(p_value_SW, 5)))
En este caso, dado que el p-value = 0.0005937472 < 0.05, se rechaza
la hipótesis nula de normalidad. Por lo tanto, los residuos no siguen
una distribución normal, es decir, no se cumple que ε∼N(0,σ2).
salida_SW <- shapiro.test(resid(modelo))
print(salida_SW)
##
## Shapiro-Wilk normality test
##
## data: resid(modelo)
## W = 0.93661, p-value = 0.06004
Mismos resultados que en el cálculo manual.
Importante, a partir de esta salida se puede calcular el Wn si se llegará a necesitar:
salida_SW <- shapiro.test(resid(modelo))
Wn_salida <- qnorm(salida_SW$p.value, lower.tail = FALSE)
print(Wn_salida)
## [1] 1.554447
El modelo no es completamente confiable para realizar inferencia estadística, debido a la presencia de multicolinealidad severa evidenciada por un índice de condición elevado y valores altos de VIF, lo que genera inestabilidad en los coeficientes y dificulta su interpretación. Además, aunque algunas pruebas como Jarque-Bera y Kolmogorov-Smirnov no rechazan la normalidad, la prueba de Shapiro-Wilk indica que los residuos no siguen una distribución normal, afectando la validez de los resultados, especialmente en muestras pequeñas.
Como medidas correctivas, se recomienda eliminar o combinar variables altamente correlacionadas como disp, hp y wt, ya que estas generan redundancia de información. También es conveniente aplicar transformaciones a los datos, como logaritmos, para mejorar la normalidad de los residuos. Finalmente, se sugiere trabajar con una muestra más grande, lo que permitiría obtener estimaciones más estables y resultados más robustos en el modelo.