library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
datos <- read_excel('datos3.xlsx')
attach(datos)
datos %>% head(10)
## # A tibble: 10 × 4
## y x1 x2 x3
## <dbl> <dbl> <dbl> <dbl>
## 1 24.2 8 6.56 8
## 2 24 9 6.56 7
## 3 21.9 11 7.42 10
## 4 24.2 8 7.8 8
## 5 21.2 10 5.09 7
## 6 23.5 10 5.3 9
## 7 20.6 10 2.98 7
## 8 23.4 12 2.99 7
## 9 22.3 8 6.72 8
## 10 25.1 13 6.51 7
lm(y ~ ., data = datos) -> modelo
modelo |> summary()
##
## Call:
## lm(formula = y ~ ., data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1091 -0.3640 0.0634 0.8775 2.1345
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.62063 2.31786 4.151 0.00024 ***
## x1 0.78425 0.14206 5.521 4.83e-06 ***
## x2 0.97364 0.12963 7.511 1.83e-08 ***
## x3 0.05373 0.22669 0.237 0.81420
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.266 on 31 degrees of freedom
## Multiple R-squared: 0.715, Adjusted R-squared: 0.6874
## F-statistic: 25.93 on 3 and 31 DF, p-value: 1.38e-08
modelo |> residuals() -> r
Prueba de normalidad: Shapiro-Wilk Ho: Los errores se distribuyen normalmente H1: Los errores No se distribuyen normalmente
r |> shapiro.test()
##
## Shapiro-Wilk normality test
##
## data: r
## W = 0.91712, p-value = 0.01179
(modelo|> rstudent() |> round(4)|>abs()) > 2
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 14 15 16 17 18 19 20 21 22 23 24 25 26
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 27 28 29 30 31 32 33 34 35
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
modelo |> ols_plot_resid_stud()
modelo %>% model.matrix -> X
X %*% solve(t(X) %*% X) %*% t(X) -> H
length(datos$y) -> n; 4 -> k
diag(H) > 2*k/n
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 14 15 16 17 18 19 20 21 22 23 24 25 26
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## 27 28 29 30 31 32 33 34 35
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
modelo |> ols_plot_resid_lev()