library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(olsrr)
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
datos <- read_excel('datos3.xlsx')
attach(datos)
datos %>% head(10)
## # A tibble: 10 × 4
##        y    x1    x2    x3
##    <dbl> <dbl> <dbl> <dbl>
##  1  24.2     8  6.56     8
##  2  24       9  6.56     7
##  3  21.9    11  7.42    10
##  4  24.2     8  7.8      8
##  5  21.2    10  5.09     7
##  6  23.5    10  5.3      9
##  7  20.6    10  2.98     7
##  8  23.4    12  2.99     7
##  9  22.3     8  6.72     8
## 10  25.1    13  6.51     7
lm(y ~ ., data = datos) -> modelo
modelo |> summary()
## 
## Call:
## lm(formula = y ~ ., data = datos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1091 -0.3640  0.0634  0.8775  2.1345 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.62063    2.31786   4.151  0.00024 ***
## x1           0.78425    0.14206   5.521 4.83e-06 ***
## x2           0.97364    0.12963   7.511 1.83e-08 ***
## x3           0.05373    0.22669   0.237  0.81420    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.266 on 31 degrees of freedom
## Multiple R-squared:  0.715,  Adjusted R-squared:  0.6874 
## F-statistic: 25.93 on 3 and 31 DF,  p-value: 1.38e-08
modelo |> residuals() -> r

Prueba de normalidad: Shapiro-Wilk Ho: Los errores se distribuyen normalmente H1: Los errores No se distribuyen normalmente

r |> shapiro.test()
## 
##  Shapiro-Wilk normality test
## 
## data:  r
## W = 0.91712, p-value = 0.01179

Identificando los posibles outliers

(modelo|> rstudent() |> round(4)|>abs()) > 2
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
## FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
##    14    15    16    17    18    19    20    21    22    23    24    25    26 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
##    27    28    29    30    31    32    33    34    35 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
modelo |> ols_plot_resid_stud()

Identificando las posibles observaciones leverage

modelo %>% model.matrix -> X
X %*% solve(t(X) %*% X) %*% t(X) -> H

length(datos$y) -> n; 4 -> k
diag(H) > 2*k/n
##     1     2     3     4     5     6     7     8     9    10    11    12    13 
## FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
##    14    15    16    17    18    19    20    21    22    23    24    25    26 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
##    27    28    29    30    31    32    33    34    35 
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
modelo |> ols_plot_resid_lev()