Ejercicio

El diseño eficiente de ciertos tipos de incineradores de desechos municipales exige que se disponga de información acerca del contenido energético de los desechos. Los autores del artículo “Modeling the Energy Content of Municipal Solid Waste Using Multiple Regression Analysis” (J. of the Air and Waste Mgmt. Assoc., 1996: 650-656) bondadosamente nos proporcionaron la información siguiente acerca de:
\(y\)= contenido energético (kcal/kg),
las tres variables físicas de composición:
\(x_1\)= % de plástico por peso,
\(x_2\) = % de papel por peso
\(x_3\) = % de basura por peso
y la variable próxima de análisis
\(x_4\) = % de humedad por peso para especímenes de desechos de cierta región.
A continuación se presenta la información para las 12 primeras observaciones.

plástico <- c(18.69,19.43,19.24,22.64,16.54,21.44,19.53,23.97,21.45,20.34,17.03,21.03)
papel <- c(15.65,23.51,24.23,22.20,23.56,23.65,24.45,19.39,23.84,26.50,23.46,26.99)
basura <- c(45.01,39.69,43.16,35.76,41.20,35.56,40.18,44.11,35.41,34.21,32.45,38.19)
agua <- c(58.21,46.31,46.63,45.85,55.14,54.24,47.20,43.82,51.01,49.06,53.23,51.78)
y <- c(947,1407,1452,1553,989,1162,1466,1656,1254,1336,1097,1266)
data <- data.frame(plástico,papel,basura,agua,y)
data
##    plástico papel basura  agua    y
## 1     18.69 15.65  45.01 58.21  947
## 2     19.43 23.51  39.69 46.31 1407
## 3     19.24 24.23  43.16 46.63 1452
## 4     22.64 22.20  35.76 45.85 1553
## 5     16.54 23.56  41.20 55.14  989
## 6     21.44 23.65  35.56 54.24 1162
## 7     19.53 24.45  40.18 47.20 1466
## 8     23.97 19.39  44.11 43.82 1656
## 9     21.45 23.84  35.41 51.01 1254
## 10    20.34 26.50  34.21 49.06 1336
## 11    17.03 23.46  32.45 53.23 1097
## 12    21.03 26.99  38.19 51.78 1266
regmult<- lm(y~plástico+papel+basura+agua)
regmult
## 
## Call:
## lm(formula = y ~ plástico + papel + basura + agua)
## 
## Coefficients:
## (Intercept)     plástico        papel       basura         agua  
##   2830.9739      23.4287       0.8243       2.4007     -42.1352
summary(regmult)
## 
## Call:
## lm(formula = y ~ plástico + papel + basura + agua)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.579 -15.178   9.568  11.736  49.628 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2830.9739   372.1728   7.607 0.000126 ***
## plástico      23.4287     5.7564   4.070 0.004749 ** 
## papel          0.8243     4.4053   0.187 0.856874    
## basura         2.4007     3.0765   0.780 0.460752    
## agua         -42.1352     2.9744 -14.166 2.07e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32.2 on 7 degrees of freedom
## Multiple R-squared:  0.9865, Adjusted R-squared:  0.9788 
## F-statistic: 128.2 on 4 and 7 DF,  p-value: 1.261e-06
confint(regmult)
##                   2.5 %      97.5 %
## (Intercept) 1950.924992 3711.022858
## plástico       9.816832   37.040503
## papel         -9.592539   11.241186
## basura        -4.873961    9.675414
## agua         -49.168439  -35.101875
step(regmult,direction = "backward")
## Start:  AIC=86.86
## y ~ plástico + papel + basura + agua
## 
##            Df Sum of Sq    RSS     AIC
## - papel     1        36   7294  84.918
## - basura    1       631   7889  85.860
## <none>                    7257  86.859
## - plástico  1     17174  24431  99.425
## - agua      1    208058 215315 125.539
## 
## Step:  AIC=84.92
## y ~ plástico + basura + agua
## 
##            Df Sum of Sq    RSS     AIC
## - basura    1       767   8061  84.119
## <none>                    7294  84.918
## - plástico  1     18814  26108  98.221
## - agua      1    263871 271165 126.307
## 
## Step:  AIC=84.12
## y ~ plástico + agua
## 
##            Df Sum of Sq    RSS     AIC
## <none>                    8061  84.119
## - plástico  1     18469  26530  96.414
## - agua      1    268087 276148 124.525
## 
## Call:
## lm(formula = y ~ plástico + agua)
## 
## Coefficients:
## (Intercept)     plástico         agua  
##     2977.40        22.82       -42.58

modelo final

modeloF<- lm(formula = y ~ plástico + agua)
summary(modeloF)
## 
## Call:
## lm(formula = y ~ plástico + agua)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.115 -17.157   0.957  15.156  52.495 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2977.397    198.761  14.980 1.14e-07 ***
## plástico      22.819      5.025   4.541   0.0014 ** 
## agua         -42.575      2.461 -17.301 3.25e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.93 on 9 degrees of freedom
## Multiple R-squared:  0.985,  Adjusted R-squared:  0.9817 
## F-statistic: 296.4 on 2 and 9 DF,  p-value: 6.119e-09

Validación de los supuestos

shapiro.test(modeloF$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  modeloF$residuals
## W = 0.96116, p-value = 0.8002
plot(modeloF)

Homogeneidad de varianza

library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(modeloF)
## 
##  studentized Breusch-Pagan test
## 
## data:  modeloF
## BP = 1.7069, df = 2, p-value = 0.4259

Autocorrelacion

dwtest(modeloF)
## 
##  Durbin-Watson test
## 
## data:  modeloF
## DW = 2.1502, p-value = 0.6287
## alternative hypothesis: true autocorrelation is greater than 0

#datos influyentes

library(car)
## Warning: package 'car' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
influencePlot(modeloF)

##      StudRes       Hat       CookD
## 1  0.9136547 0.3978989 0.187324112
## 2 -1.8055125 0.2399977 0.274271647
## 7  2.3938436 0.1809115 0.276542743
## 8 -0.1108100 0.3987490 0.003049067
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
corrplot(cor(dplyr::select(data, plástico, papel,basura,agua,y)),
         method = "number", tl.col = "black")