Ejercicios de Prueba de Normalidad

Estimacion del modelo.

modelo<-lm(price~lotsize+sqrft+bdrms, data = hprice1)
summary(modelo)
## 
## Call:
## lm(formula = price ~ lotsize + sqrft + bdrms, data = hprice1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -120.026  -38.530   -6.555   32.323  209.376 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.177e+01  2.948e+01  -0.739  0.46221    
## lotsize      2.068e-03  6.421e-04   3.220  0.00182 ** 
## sqrft        1.228e-01  1.324e-02   9.275 1.66e-14 ***
## bdrms        1.385e+01  9.010e+00   1.537  0.12795    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 59.83 on 84 degrees of freedom
## Multiple R-squared:  0.6724, Adjusted R-squared:  0.6607 
## F-statistic: 57.46 on 3 and 84 DF,  p-value: < 2.2e-16
kable(as.data.frame(summary(modelo)$coefficients), digits = 4,
      caption = "Coeficientes estimados del modelo") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
Coeficientes estimados del modelo
Estimate Std. Error t value Pr(>&#124;t&#124;)
(Intercept) -21.7703 29.4750 -0.7386 0.4622
lotsize 0.0021 0.0006 3.2201 0.0018
sqrft 0.1228 0.0132 9.2751 0.0000
bdrms 13.8525 9.0101 1.5374 0.1279

Residuos.

residuos<-residuals(modelo)

Calculos de manera manual

library(dplyr)

# Tabla con residuos.
 resumen <- data.frame(residuos = residuos) %>%
   mutate(
     media = mean(residuos),
     sd = sd(residuos),
     skewness = mean((residuos - mean(residuos))^3) / sd(residuos)^3,
     kurtosis = mean((residuos - mean(residuos))^4) / sd(residuos)^4)

head(resumen)
##     residuos         media       sd  skewness kurtosis
## 1 -45.639765 -2.321494e-15 58.79282 0.9443546 5.141959
## 2  74.848732 -2.321494e-15 58.79282 0.9443546 5.141959
## 3  -8.236558 -2.321494e-15 58.79282 0.9443546 5.141959
## 4 -12.081520 -2.321494e-15 58.79282 0.9443546 5.141959
## 5  18.093192 -2.321494e-15 58.79282 0.9443546 5.141959
## 6  62.939597 -2.321494e-15 58.79282 0.9443546 5.141959
kable(head(resumen), digits = 4, caption = "Cálculos preliminares de los residuos") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
Cálculos preliminares de los residuos
residuos media sd skewness kurtosis
-45.6398 0 58.7928 0.9444 5.142
74.8487 0 58.7928 0.9444 5.142
-8.2366 0 58.7928 0.9444 5.142
-12.0815 0 58.7928 0.9444 5.142
18.0932 0 58.7928 0.9444 5.142
62.9396 0 58.7928 0.9444 5.142

Prueba Jarque-Bera (JB)

Calculo manual.

n <- length(residuos)
sk <- resumen$skewness[1]
ku <- resumen$kurtosis[1]

jarquebera <- (n/6) * (sk^2 + ((ku - 3)^2)/4)
print(jarquebera)
## [1] 29.90244

Prueba con libreria.

library(tseries)
jarque.bera.test(residuos)
## 
##  Jarque Bera Test
## 
## data:  residuos
## X-squared = 32.278, df = 2, p-value = 9.794e-08

Grafico.

hist(residuos, probability = TRUE, main = "Histograma de residuos")
curve(dnorm(x, mean(residuos), sd(residuos)), add = TRUE)

Prueba Kolmogorov-Smirnov (KS)

Calculos manuales.

# Estandarizar residuos
z <- (residuos - mean(residuos)) / sd(residuos)

# Crear tabla
ks_tabla <- data.frame(
  z = sort(z),
  Fn = (1:n)/n,
  F_teorica = pnorm(sort(z))
) %>%
  mutate(D = abs(Fn - F_teorica))

head(ks_tabla)
##            z         Fn  F_teorica            D
## 81 -2.041515 0.01136364 0.02059981 0.0092361731
## 77 -1.964674 0.02272727 0.02472601 0.0019987418
## 24 -1.821326 0.03409091 0.03427866 0.0001877487
## 48 -1.551958 0.04545455 0.06033615 0.0148816002
## 12 -1.453599 0.05681818 0.07302879 0.0162106057
## 32 -1.312621 0.06818182 0.09465535 0.0264735301
kable(head(ks_tabla), digits = 4, caption = "Tabla manual de la prueba Kolmogorov-Smirnov") %>%
  kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
Tabla manual de la prueba Kolmogorov-Smirnov
z Fn F_teorica D
81 -2.0415 0.0114 0.0206 0.0092
77 -1.9647 0.0227 0.0247 0.0020
24 -1.8213 0.0341 0.0343 0.0002
48 -1.5520 0.0455 0.0603 0.0149
12 -1.4536 0.0568 0.0730 0.0162
32 -1.3126 0.0682 0.0947 0.0265
# Estadístico KS
D_max <- max(ks_tabla$D)
print(D_max)
## [1] 0.0754392

Usando liberia KS

ks.test(residuos, "pnorm", mean(residuos), sd(residuos))
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  residuos
## D = 0.075439, p-value = 0.67
## alternative hypothesis: two-sided

Prueba Shapiro-WilK (SW)

shapiro.test(residuos)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuos
## W = 0.94132, p-value = 0.0005937

Grafico

qqnorm(residuos, main = "Gráfico Q-Q de los residuos")
qqline(residuos, col = 2)