MLR

Librerias

#install.packages("ggplot2", "zoo", "dplyr", "ggrepel")
source("Script.R")
library(readxl)
library(GGally)

Loading required package: ggplot2

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

library(ggplot2)
library(lmtest)

Loading required package: zoo


Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

library(ggrepel)
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(car)

Loading required package: carData


Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

library(plotly)


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(scatterplot3d)
library(corrplot)

Warning: package 'corrplot' was built under R version 4.4.2

corrplot 0.95 loaded

library(MASS)

Warning: package 'MASS' was built under R version 4.4.2


Attaching package: 'MASS'

The following object is masked from 'package:plotly':

    select

The following object is masked from 'package:dplyr':

    select

Datos

datos <- read_excel("datos_juegos.xlsx", 
    col_types = c("text", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric"))

Warning: Coercing text to numeric in E2 / R2C5: '38.9'

Warning: Coercing text to numeric in F2 / R2C6: '64.7'

Warning: Coercing text to numeric in H2 / R2C8: '59.7'

Warning: Coercing text to numeric in E3 / R3C5: '38.8'

Warning: Coercing text to numeric in F3 / R3C6: '61.3'

Warning: Coercing text to numeric in H3 / R3C8: '55.0'

Warning: Coercing text to numeric in E4 / R4C5: '40.1'

Warning: Coercing text to numeric in F4 / R4C6: '60.0'

Warning: Coercing text to numeric in H4 / R4C8: '65.6'

Warning: Coercing text to numeric in E5 / R5C5: '41.6'

Warning: Coercing text to numeric in F5 / R5C6: '45.3'

Warning: Coercing text to numeric in H5 / R5C8: '61.4'

Warning: Coercing text to numeric in E6 / R6C5: '39.2'

Warning: Coercing text to numeric in F6 / R6C6: '53.8'

Warning: Coercing text to numeric in H6 / R6C8: '66.1'

Warning: Coercing text to numeric in E7 / R7C5: '39.7'

Warning: Coercing text to numeric in F7 / R7C6: '74.1'

Warning: Coercing text to numeric in H7 / R7C8: '61.0'

Warning: Coercing text to numeric in E8 / R8C5: '38.1'

Warning: Coercing text to numeric in F8 / R8C6: '65.4'

Warning: Coercing text to numeric in H8 / R8C8: '66.1'

Warning: Coercing text to numeric in E9 / R9C5: '37.0'

Warning: Coercing text to numeric in F9 / R9C6: '78.3'

Warning: Coercing text to numeric in H9 / R9C8: '58.9'

Warning: Coercing text to numeric in E10 / R10C5: '42.1'

Warning: Coercing text to numeric in F10 / R10C6: '47.6'

Warning: Coercing text to numeric in H10 / R10C8: '57.0'

Warning: Coercing text to numeric in E11 / R11C5: '42.3'

Warning: Coercing text to numeric in F11 / R11C6: '54.2'

Warning: Coercing text to numeric in H11 / R11C8: '58.9'

Warning: Coercing text to numeric in E12 / R12C5: '37.3'

Warning: Coercing text to numeric in F12 / R12C6: '48.0'

Warning: Coercing text to numeric in H12 / R12C8: '68.5'

Warning: Coercing text to numeric in E13 / R13C5: '39.5'

Warning: Coercing text to numeric in F13 / R13C6: '51.9'

Warning: Coercing text to numeric in H13 / R13C8: '59.2'

Warning: Coercing text to numeric in E14 / R14C5: '37.4'

Warning: Coercing text to numeric in F14 / R14C6: '53.6'

Warning: Coercing text to numeric in H14 / R14C8: '58.8'

Warning: Coercing text to numeric in E15 / R15C5: '35.1'

Warning: Coercing text to numeric in F15 / R15C6: '71.4'

Warning: Coercing text to numeric in H15 / R15C8: '58.6'

Warning: Coercing text to numeric in E16 / R16C5: '38.8'

Warning: Coercing text to numeric in F16 / R16C6: '58.3'

Warning: Coercing text to numeric in H16 / R16C8: '59.2'

Warning: Coercing text to numeric in E17 / R17C5: '36.6'

Warning: Coercing text to numeric in F17 / R17C6: '52.6'

Warning: Coercing text to numeric in H17 / R17C8: '54.4'

Warning: Coercing text to numeric in E18 / R18C5: '35.3'

Warning: Coercing text to numeric in F18 / R18C6: '59.3'

Warning: Coercing text to numeric in H18 / R18C8: '49.6'

Warning: Coercing text to numeric in E19 / R19C5: '41.1'

Warning: Coercing text to numeric in F19 / R19C6: '55.3'

Warning: Coercing text to numeric in H19 / R19C8: '54.3'

Warning: Coercing text to numeric in E20 / R20C5: '38.6'

Warning: Coercing text to numeric in F20 / R20C6: '69.6'

Warning: Coercing text to numeric in H20 / R20C8: '58.7'

Warning: Coercing text to numeric in E21 / R21C5: '39.3'

Warning: Coercing text to numeric in F21 / R21C6: '78.3'

Warning: Coercing text to numeric in H21 / R21C8: '51.7'

Warning: Coercing text to numeric in E22 / R22C5: '39.7'

Warning: Coercing text to numeric in F22 / R22C6: '38.1'

Warning: Coercing text to numeric in H22 / R22C8: '61.9'

Warning: Coercing text to numeric in E23 / R23C5: '39.7'

Warning: Coercing text to numeric in F23 / R23C6: '68.8'

Warning: Coercing text to numeric in H23 / R23C8: '52.7'

Warning: Coercing text to numeric in E24 / R24C5: '35.5'

Warning: Coercing text to numeric in F24 / R24C6: '68.8'

Warning: Coercing text to numeric in H24 / R24C8: '57.8'

Warning: Coercing text to numeric in E25 / R25C5: '35.3'

Warning: Coercing text to numeric in F25 / R25C6: '74.1'

Warning: Coercing text to numeric in H25 / R25C8: '59.7'

Warning: Coercing text to numeric in E26 / R26C5: '38.7'

Warning: Coercing text to numeric in F26 / R26C6: '50.0'

Warning: Coercing text to numeric in H26 / R26C8: '54.9'

Warning: Coercing text to numeric in E27 / R27C5: '39.9'

Warning: Coercing text to numeric in F27 / R27C6: '57.1'

Warning: Coercing text to numeric in H27 / R27C8: '65.3'

Warning: Coercing text to numeric in E28 / R28C5: '37.4'

Warning: Coercing text to numeric in F28 / R28C6: '56.3'

Warning: Coercing text to numeric in H28 / R28C8: '43.8'

Warning: Coercing text to numeric in E29 / R29C5: '39.3'

Warning: Coercing text to numeric in F29 / R29C6: '47.0'

Warning: Coercing text to numeric in H29 / R29C8: '53.5'

View(datos)

Descriptivo

summary(datos)

     Team                 y                x1             x2      
 Length:28          Min.   : 0.000   Min.   :1416   Min.   :1414  
 Class :character   1st Qu.: 4.000   1st Qu.:1896   1st Qu.:1714  
 Mode  :character   Median : 6.500   Median :2111   Median :2106  
                    Mean   : 6.964   Mean   :2110   Mean   :2127  
                    3rd Qu.:10.000   3rd Qu.:2303   3rd Qu.:2474  
                    Max.   :13.000   Max.   :2971   Max.   :2929  
       x3              x4              x6               x7       
 Min.   :35.10   Min.   :38.10   Min.   : 576.0   Min.   :43.80  
 1st Qu.:37.38   1st Qu.:52.42   1st Qu.: 720.0   1st Qu.:54.77  
 Median :38.85   Median :57.70   Median : 794.0   Median :58.85  
 Mean   :38.65   Mean   :59.40   Mean   : 795.4   Mean   :58.30  
 3rd Qu.:39.70   3rd Qu.:68.80   3rd Qu.: 869.8   3rd Qu.:61.10  
 Max.   :42.30   Max.   :78.30   Max.   :1037.0   Max.   :68.50  
       x8             x9      
 Min.   :1457   Min.   :1575  
 1st Qu.:1888   1st Qu.:1916  
 Median :2062   Median :2142  
 Mean   :2133   Mean   :2138  
 3rd Qu.:2427   3rd Qu.:2328  
 Max.   :2876   Max.   :2670

str(datos)

tibble [28 × 10] (S3: tbl_df/tbl/data.frame)
 $ Team: chr [1:28] "Washington" "Minnesota" "NewEngland" "Oakland" ...
 $ y   : num [1:28] 10 11 11 13 10 11 10 11 4 2 ...
 $ x1  : num [1:28] 2113 2003 2957 2285 2971 ...
 $ x2  : num [1:28] 1985 2855 1737 2905 1666 ...
 $ x3  : num [1:28] 38.9 38.8 40.1 41.6 39.2 39.7 38.1 37 42.1 42.3 ...
 $ x4  : num [1:28] 64.7 61.3 60 45.3 53.8 74.1 65.4 78.3 47.6 54.2 ...
 $ x6  : num [1:28] 868 615 914 957 836 786 754 797 714 797 ...
 $ x7  : num [1:28] 59.7 55 65.6 61.4 66.1 61 66.1 58.9 57 58.9 ...
 $ x8  : num [1:28] 2205 2096 1847 1903 1457 ...
 $ x9  : num [1:28] 1917 1575 2175 2476 1866 ...

# Calcular y mostrar la matriz de correlación
cor_matrix <- cor(datos[, -1])
print(cor_matrix)

             y          x1          x2           x3          x4           x6
y   1.00000000  0.59323604  0.48273470 -0.082959146  0.25847477  0.271019654
x1  0.59323604  1.00000000 -0.03674736  0.212819565  0.07029904  0.256720775
x2  0.48273470 -0.03674736  1.00000000 -0.066770369  0.30151583 -0.176888590
x3 -0.08295915  0.21281957 -0.06677037  1.000000000 -0.40628208 -0.009623584
x4  0.25847477  0.07029904  0.30151583 -0.406282084  1.00000000 -0.135729185
x6  0.27101965  0.25672077 -0.17688859 -0.009623584 -0.13572919  1.000000000
x7  0.55756504  0.83187621 -0.19407433  0.157452089 -0.10576424  0.373145415
x8 -0.65865441 -0.64844090  0.02964349  0.239404646 -0.04424329 -0.381550164
x9 -0.26032019 -0.10589032  0.19649361  0.061839967  0.14166111 -0.228206868
           x7          x8          x9
y   0.5575650 -0.65865441 -0.26032019
x1  0.8318762 -0.64844090 -0.10589032
x2 -0.1940743  0.02964349  0.19649361
x3  0.1574521  0.23940465  0.06183997
x4 -0.1057642 -0.04424329  0.14166111
x6  0.3731454 -0.38155016 -0.22820687
x7  1.0000000 -0.67985993 -0.21583513
x8 -0.6798599  1.00000000  0.41218066
x9 -0.2158351  0.41218066  1.00000000

# Visualizar la matriz de correlación
corrplot(cor_matrix, method = "circle", type = "upper", tl.cex = 0.8)

Normalidad De Las Variables

shapiro.test(datos$y)


    Shapiro-Wilk normality test

data:  datos$y
W = 0.94487, p-value = 0.147

shapiro.test(datos$x1)


    Shapiro-Wilk normality test

data:  datos$x1
W = 0.97067, p-value = 0.5985

shapiro.test(datos$x2)


    Shapiro-Wilk normality test

data:  datos$x2
W = 0.92349, p-value = 0.04234

shapiro.test(datos$x3)


    Shapiro-Wilk normality test

data:  datos$x3
W = 0.95676, p-value = 0.2915

shapiro.test(datos$x4)


    Shapiro-Wilk normality test

data:  datos$x4
W = 0.96985, p-value = 0.5765

shapiro.test(datos$x6)


    Shapiro-Wilk normality test

data:  datos$x6
W = 0.97936, p-value = 0.8348

shapiro.test(datos$x7)


    Shapiro-Wilk normality test

data:  datos$x7
W = 0.96548, p-value = 0.4659

shapiro.test(datos$x8)


    Shapiro-Wilk normality test

data:  datos$x8
W = 0.97143, p-value = 0.6194

shapiro.test(datos$x9)


    Shapiro-Wilk normality test

data:  datos$x9
W = 0.97749, p-value = 0.7867

Cof. De Correlacion

cor(datos$x1,datos$y)

[1] 0.593236

cor(datos$x2,datos$y)

[1] 0.4827347

cor(datos$x3,datos$y)

[1] -0.08295915

cor(datos$x4,datos$y)

[1] 0.2584748

cor(datos$x6,datos$y)

[1] 0.2710197

cor(datos$x7,datos$y)

[1] 0.557565

cor(datos$x8,datos$y)

[1] -0.6586544

cor(datos$x9,datos$y)

[1] -0.2603202

Seleccion De Modelo

Primero Iniciaremos con Entrada Forzada

modelo_inical= lm(y ~ x1 + x2 + x3 + x4 + x6 + x7 + x8 + x9,datos)
summary(modelo_inical)


Call:
lm(formula = y ~ x1 + x2 + x3 + x4 + x6 + x7 + x8 + x9, data = datos)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.10083 -1.07065  0.08889  1.03516  2.53756 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.265e+01  1.128e+01  -1.121    0.276    
x1           8.784e-04  1.968e-03   0.446    0.660    
x2           3.971e-03  7.748e-04   5.126 6.01e-05 ***
x3           2.713e-02  2.452e-01   0.111    0.913    
x4           4.916e-02  4.099e-02   1.199    0.245    
x6           2.774e-03  3.227e-03   0.860    0.401    
x7           2.213e-01  1.339e-01   1.653    0.115    
x8          -2.489e-03  1.795e-03  -1.386    0.182    
x9          -2.087e-03  1.373e-03  -1.520    0.145    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.81 on 19 degrees of freedom
Multiple R-squared:  0.8096,    Adjusted R-squared:  0.7294 
F-statistic:  10.1 on 8 and 19 DF,  p-value: 2.053e-05

A continuacion veremos VIF del modelo inicial

vif(modelo_inical)

      x1       x2       x3       x4       x6       x7       x8       x9 
4.745184 1.232079 1.944976 1.553580 1.259526 4.398719 3.590802 1.379696

Ningun VIF >5 (o >10) por lo cual no existe una colienealidad alta entre ninguna de las variables.

Ahora veremos el AIC del modelo inicial

AIC(modelo_inical, k = 8)

[1] 181.8375

#Manual
AIC_funt(modelo_inical, 8)

'log Lik.' 117.8375 (df=10)

Por ultimo utilizaremos Stepwise Both para agregar o eliminar variables explicativas de forma certera

empty.model<-lm(y~1, data=datos)
horizonte<-formula(y ~ x1 + x2 + x3 + x4 + x6 + x7 + x8 + x9)

modboth <- stepAIC(empty.model, trace=TRUE, direction="both", scope=horizonte)

Start:  AIC=70.81
y ~ 1

       Df Sum of Sq    RSS    AIC
+ x8    1   141.845 185.12 56.886
+ x1    1   115.068 211.90 60.669
+ x7    1   101.646 225.32 62.389
+ x2    1    76.193 250.77 65.385
+ x6    1    24.016 302.95 70.678
<none>              326.96 70.814
+ x9    1    22.157 304.81 70.849
+ x4    1    21.844 305.12 70.878
+ x3    1     2.250 324.71 72.621

Step:  AIC=56.89
y ~ x8

       Df Sum of Sq    RSS    AIC
+ x2    1    82.554 102.56 42.352
+ x4    1    17.230 167.89 56.151
+ x1    1    15.573 169.55 56.426
<none>              185.12 56.886
+ x7    1     7.326 177.79 57.756
+ x3    1     1.937 183.18 58.592
+ x6    1     0.149 184.97 58.864
+ x9    1     0.049 185.07 58.879
- x8    1   141.845 326.96 70.814

Step:  AIC=42.35
y ~ x8 + x2

       Df Sum of Sq     RSS    AIC
+ x7    1    25.054  77.511 36.510
+ x1    1    17.277  85.287 39.187
<none>              102.565 42.352
+ x3    1     4.365  98.200 43.134
+ x6    1     4.190  98.375 43.184
+ x9    1     2.726  99.839 43.598
+ x4    1     2.145 100.420 43.760
- x2    1    82.554 185.119 56.886
- x8    1   148.206 250.771 65.385

Step:  AIC=36.51
y ~ x8 + x2 + x7

       Df Sum of Sq     RSS    AIC
+ x9    1     5.948  71.562 36.274
<none>               77.511 36.510
+ x4    1     4.384  73.127 36.880
+ x6    1     1.968  75.542 37.790
+ x1    1     0.677  76.834 38.264
+ x3    1     0.025  77.486 38.501
- x7    1    25.054 102.565 42.352
- x8    1    29.158 106.668 43.451
- x2    1   100.282 177.793 57.756

Step:  AIC=36.27
y ~ x8 + x2 + x7 + x9

       Df Sum of Sq     RSS    AIC
+ x4    1     6.102  65.460 35.779
<none>               71.562 36.274
- x9    1     5.948  77.511 36.510
+ x1    1     1.744  69.818 37.583
+ x6    1     1.519  70.043 37.673
+ x3    1     0.177  71.385 38.205
- x8    1    15.674  87.237 39.820
- x7    1    28.276  99.839 43.598
- x2    1   106.229 177.791 59.755

Step:  AIC=35.78
y ~ x8 + x2 + x7 + x9 + x4

       Df Sum of Sq     RSS    AIC
<none>               65.460 35.779
- x4    1     6.102  71.562 36.274
+ x6    1     2.172  63.288 36.834
- x9    1     7.667  73.127 36.880
+ x1    1     0.720  64.740 37.469
+ x3    1     0.331  65.129 37.637
- x8    1    12.070  77.530 38.517
- x7    1    31.559  97.018 44.795
- x2    1    88.921 154.381 57.802

modboth$anova

Stepwise Model Path 
Analysis of Deviance Table

Initial Model:
y ~ 1

Final Model:
y ~ x8 + x2 + x7 + x9 + x4


  Step Df   Deviance Resid. Df Resid. Dev      AIC
1                           27  326.96429 70.81410
2 + x8  1 141.845488        26  185.11880 56.88621
3 + x2  1  82.554065        25  102.56473 42.35211
4 + x7  1  25.053948        24   77.51078 36.50995
5 + x9  1   5.948471        23   71.56231 36.27419
6 + x4  1   6.102449        22   65.45987 35.77852

summary(modboth) #y ~ x8 + x2 + x7 + x9 + x4


Call:
lm(formula = y ~ x8 + x2 + x7 + x9 + x4, data = datos)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.78880 -1.00317  0.03609  0.86568  2.80212 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.058e+01  7.939e+00  -1.333  0.19613    
x8          -2.779e-03  1.380e-03  -2.014  0.05639 .  
x2           3.963e-03  7.249e-04   5.467 1.72e-05 ***
x7           2.843e-01  8.730e-02   3.257  0.00361 ** 
x9          -2.049e-03  1.276e-03  -1.605  0.12271    
x4           4.799e-02  3.351e-02   1.432  0.16617    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.725 on 22 degrees of freedom
Multiple R-squared:  0.7998,    Adjusted R-squared:  0.7543 
F-statistic: 17.58 on 5 and 22 DF,  p-value: 4.927e-07

AIC(modboth, k = 5)

[1] 138.2391

AIC_funt(modboth,5)

'log Lik.' 113.2391 (df=7)

Obtenemos un nuevo modelo con un AIC mas bajo que el inicial, por ende utilizaremos este.

modelo <- modboth
summary(modelo)


Call:
lm(formula = y ~ x8 + x2 + x7 + x9 + x4, data = datos)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.78880 -1.00317  0.03609  0.86568  2.80212 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.058e+01  7.939e+00  -1.333  0.19613    
x8          -2.779e-03  1.380e-03  -2.014  0.05639 .  
x2           3.963e-03  7.249e-04   5.467 1.72e-05 ***
x7           2.843e-01  8.730e-02   3.257  0.00361 ** 
x9          -2.049e-03  1.276e-03  -1.605  0.12271    
x4           4.799e-02  3.351e-02   1.432  0.16617    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.725 on 22 degrees of freedom
Multiple R-squared:  0.7998,    Adjusted R-squared:  0.7543 
F-statistic: 17.58 on 5 and 22 DF,  p-value: 4.927e-07

var. categorica??

no.

Verificacion De Supuestos

qqnorm(residuals(modelo))  # Crear gráfico Q-Q
qqline(residuals(modelo))  # Añadir la línea de referencia

# La función lm() calcula y almacena los valores predichos por el modelo y los
# residuos.
datos$prediccion <- modelo$fitted.values
datos$residuos   <- modelo$residuals


#Distribución de los residuos 
ggplot(data = datos, aes(x = prediccion, y = residuos)) +
  geom_point(aes(color = residuos)) +
  scale_color_gradient2(low = "blue3", mid = "grey", high = "red") +
  geom_hline(yintercept = 0) +
  geom_segment(aes(xend = prediccion, yend = 0), alpha = 0.2) +
  labs(title = "Distribución de los residuos", x = "predicción modelo",
       y = "residuo") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")

Normalidad

shapiro.test(datos$residuos)


    Shapiro-Wilk normality test

data:  datos$residuos
W = 0.96885, p-value = 0.55

Homocedasticidad

bptest(modelo)


    studentized Breusch-Pagan test

data:  modelo
BP = 6.9601, df = 5, p-value = 0.2236

ggplot(data = datos, aes(x = prediccion, y = residuos)) +
  geom_point(aes(color = residuos)) +
  scale_color_gradient2(low = "blue3", mid = "grey", high = "red") +
  geom_segment(aes(xend = prediccion, yend = 0), alpha = 0.2) +
  geom_smooth(se = FALSE, color = "firebrick") +
  labs(title = "Distribución de los residuos", x = "predicción modelo",
       y = "residuo") +
  geom_hline(yintercept = 0) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Autocorrelacion De Los Residuos

dw_resultado <- dwtest(modelo) 
print(dw_resultado)


    Durbin-Watson test

data:  modelo
DW = 1.5967, p-value = 0.1182
alternative hypothesis: true autocorrelation is greater than 0

ggplot(data = datos, aes(x = seq_along(residuos), y = residuos)) +
  geom_point(aes(color = residuos)) +
  scale_color_gradient2(low = "blue3", mid = "grey", high = "red") +
  geom_line(size = 0.3) +
  labs(title = "Distribución de los residuos", x = "index", y = "residuo") +
  geom_hline(yintercept = 0) +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5), legend.position = "none")

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Tenemos que se cumplen los supuestos

Outliers y Alto Leverage

Prueba De Bonferroni

outlierTest(modelo)

No Studentized residuals with Bonferroni p < 0.05
Largest |rstudent|:
  rstudent unadjusted p-value Bonferroni p
4 2.205185           0.038725           NA

# Valores de leverage
hatvalues_model <- hatvalues(modelo)
datos$Leverage <- hatvalues_model
n <- nrow(datos)
p <- length(coef(modelo))
umbral_leverage <- 2 * (p / n)
plot(hatvalues_model, main = "Valores de Leverage", xlab = "Observaciones", ylab = "Leverage")
abline(h = umbral_leverage, col = "red", lty = 2)

# Distancia de Cook
cooks_dist <- cooks.distance(modelo)
plot(cooks_dist, main = "Distancia de Cook", xlab = "Observaciones", ylab = "Distancia de Cook")
abline(h = 4 / n, col = "red", lty = 2)

datos$Cooks_Distance <- cooks_dist

# Identificar observaciones influyentes
influyentes <- which(cooks_dist > (4 / n))
cat("Observaciones influyentes:", influyentes, "\n")

Observaciones influyentes: 4 21

# Prueba de Bonferroni para Outliers
bonferroni_test <- outlierTest(modelo, cutoff=Inf)
print("Resultados del Test de Bonferroni para Outliers:")

[1] "Resultados del Test de Bonferroni para Outliers:"

print(bonferroni_test)

     rstudent unadjusted p-value Bonferroni p
4   2.2051850           0.038725           NA
21 -1.8983400           0.071481           NA
15 -1.8171484           0.083494           NA
3   1.7958115           0.086928           NA
1   1.7259639           0.099039           NA
10 -1.6258698           0.118890           NA
7  -1.2773923           0.215410           NA
8   1.2262929           0.233660           NA
9   1.1521426           0.262210           NA
24 -0.8574013           0.400900           NA

# Gráfico de diagnóstico usando influenceIndexPlot
influenceIndexPlot(modelo, vars=c("Cook", "hat", "Bonf"), id.n=4, las=1)

Warning in plot.window(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter

Warning in box(...): "id.n" is not a graphical parameter

Warning in title(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter

Warning in plot.window(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter

Warning in box(...): "id.n" is not a graphical parameter

Warning in title(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter
Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter

Warning in plot.window(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter

Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter
Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not a
graphical parameter

Warning in box(...): "id.n" is not a graphical parameter

Warning in title(...): "id.n" is not a graphical parameter

Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
graphical parameter

Manejo De Outliers

Lo hareos mediante M-Estimacion

modelo_robusto <- rlm(y ~ x8 + x2 + x7 + x9 + x4, data = datos)
AIC(modelo_robusto)

[1] 117.6841

AIC(modelo)

[1] 117.2391

Prediccion

# Para los nuevos datos de predicción (nuevo conjunto de predictores)
nuevos_datos <- data.frame(x8 = 5, x2 = 10, x7 = 15, x9 = 7, x4 = 3)

# Predicción con intervalos de confianza
pred_conf <- predict(modelo_robusto, newdata = nuevos_datos, interval = "confidence", level = 0.95)

# Predicción con intervalos de predicción
pred_pred <- predict(modelo_robusto, newdata = nuevos_datos, interval = "prediction", level = 0.95)

Warning in predict.lm(modelo_robusto, newdata = nuevos_datos, interval = "prediction", : Assuming constant prediction variance even though model fit is weighted

# Ver los resultados
print(pred_conf)

        fit       lwr      upr
1 -5.040181 -14.42864 4.348277

print(pred_pred)

        fit       lwr      upr
1 -5.040181 -14.73326 4.652903