Curso: Regresión aplicada a las ciencia ambientales
Responsable del curso: Dr. Jorge Mendez González
Objetivo: Utilizar tres algoritmos (stepwise, machine learning y algoritmo genético) para elegir los mejores predictores para construir un modelo de regresión lineal múltiple
Datos: Los datos utilizados corresponde al conjunto de datos conocido como “New Yor Air Quality Measuresments”, además se generaron las siguientes variables o=Ozone, s=Solar.R, w=Wind, t=Temp, SW=Solar.R-Wind, WT=Wind-Temp, TS=Temp-Solar.R, S2=Solar.R^2, W2=Wind^2, T2=Temp^2, S2W=Solar.R^2-Wind, W2T=Wind^2-Temp, lnW=logaritmo de Wind, lnT= logaritmo de Temp.
library(readxl)
Datos <- read_excel("Datos_sample.xlsx",
sheet = "Datos_sample")
View(Datos)
attach(Datos)
library(car)
library(MASS)
library(ISLR)
library(tictoc)
library(sjPlot)
library(glmulti)
library(tidyverse)
library(performance)
library(tidyr)
library(boot)
library(vembedr)
library(DT)
library(effects)
library(report)
library(equatiomatic)
library(visreg)
library(ggiraphExtra)
library(leaps)
library(relaimpo)
library(caret)
library(ggplot2)
Datos$SW<-s*w; attach(Datos)
Datos$WT<-w*t; attach(Datos)
Datos$TS<-t*s; attach(Datos)
Datos$S2<-s*s; attach(Datos)
Datos$W2<-w*w; attach(Datos)
Datos$T2<-t*t; attach(Datos)
Datos$S2W<-S2*w; attach(Datos)
Datos$W2T<-W2*t; attach(Datos)
Datos$lnW<-log(w); attach(Datos)
Datos$lnT<-log(t); attach(Datos)
View(Datos)
attach(Datos)
names(Datos)
## [1] "o" "s" "w" "t" "SW" "WT" "TS" "S2" "W2" "T2" "S2W" "W2T"
## [13] "lnW" "lnT"
Set1<-data.frame( o, s, w, t, SW, WT, TS, S2, W2, T2, S2W, W2T, lnW, lnT)
head(Set1)
regvacia<-lm(formula = o ~ 1, Set1)
summary(regvacia)
##
## Call:
## lm(formula = o ~ 1, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.53 -22.53 -10.03 18.97 94.47
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.530 3.017 13.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.17 on 99 degrees of freedom
regcompleta<-lm(o ~ (.), Set1);
summary(regcompleta)
##
## Call:
## lm(formula = o ~ (.), data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.919 -10.191 -2.383 6.718 54.078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.559e+03 6.402e+03 0.712 0.478
## s 8.477e-02 3.585e-01 0.236 0.814
## w -2.327e+01 2.522e+01 -0.923 0.359
## t 2.913e+01 6.092e+01 0.478 0.634
## SW -1.231e-02 2.985e-02 -0.412 0.681
## WT 1.182e-01 4.331e-01 0.273 0.786
## TS 2.710e-03 2.513e-03 1.078 0.284
## S2 -3.791e-04 9.480e-04 -0.400 0.690
## W2 5.967e-01 1.215e+00 0.491 0.625
## T2 -6.475e-02 1.969e-01 -0.329 0.743
## S2W 9.468e-06 8.587e-05 0.110 0.912
## W2T -7.769e-04 1.890e-02 -0.041 0.967
## lnW 1.805e+01 7.003e+01 0.258 0.797
## lnT -1.465e+03 2.284e+03 -0.641 0.523
##
## Residual standard error: 16.57 on 86 degrees of freedom
## Multiple R-squared: 0.738, Adjusted R-squared: 0.6984
## F-statistic: 18.64 on 13 and 86 DF, p-value: < 2.2e-16
modelo<-step(regvacia, scope = list(lower=regvacia, upper=regcompleta),
direction = "both") # backward, both, forward
## Start: AIC=682.37
## o ~ 1
##
## Df Sum of Sq RSS AIC
## + T2 1 50752 39363 601.54
## + t 1 48504 41611 607.09
## + lnT 1 45782 44333 613.43
## + lnW 1 35703 54412 633.92
## + w 1 28506 61609 646.34
## + TS 1 22591 67524 655.51
## + W2 1 19898 70217 659.42
## + W2T 1 13807 76308 667.74
## + WT 1 11930 78185 670.17
## + s 1 10323 79792 672.20
## + S2 1 5401 84714 678.19
## <none> 90115 682.37
## + SW 1 391 89724 683.93
## + S2W 1 118 89996 684.24
##
## Step: AIC=601.54
## o ~ T2
##
## Df Sum of Sq RSS AIC
## + lnW 1 7268 32095 583.13
## + WT 1 6338 33025 585.98
## + w 1 4963 34400 590.06
## + lnT 1 4126 35237 592.47
## + t 1 4078 35285 592.60
## + W2T 1 3870 35493 593.19
## + W2 1 2755 36608 596.29
## + TS 1 2121 37242 598.00
## + s 1 1697 37666 599.13
## + S2 1 1313 38050 600.15
## <none> 39363 601.54
## + SW 1 131 39232 603.21
## + S2W 1 8 39355 603.52
## - T2 1 50752 90115 682.37
##
## Step: AIC=583.13
## o ~ T2 + lnW
##
## Df Sum of Sq RSS AIC
## + lnT 1 2733.6 29362 576.23
## + TS 1 2697.2 29398 576.35
## + t 1 2646.9 29449 576.52
## + s 1 2219.2 29876 577.96
## + S2 1 1878.3 30217 579.10
## + SW 1 1553.2 30542 580.17
## + W2 1 1514.9 30581 580.29
## + S2W 1 1239.5 30856 581.19
## + w 1 1152.1 30943 581.47
## + W2T 1 892.8 31203 582.31
## <none> 32095 583.13
## + WT 1 163.2 31932 584.62
## - lnW 1 7267.7 39363 601.54
## - T2 1 22316.4 54412 633.92
##
## Step: AIC=576.23
## o ~ T2 + lnW + lnT
##
## Df Sum of Sq RSS AIC
## + TS 1 2962.6 26399 567.59
## + s 1 2682.1 26680 568.65
## + S2 1 2155.5 27206 570.60
## + SW 1 2114.4 27247 570.75
## + S2W 1 1633.9 27728 572.50
## + W2 1 783.3 28578 575.52
## + W2T 1 727.8 28634 575.72
## <none> 29362 576.23
## + w 1 490.4 28871 576.54
## + WT 1 438.2 28924 576.72
## + t 1 267.0 29095 577.31
## - lnT 1 2733.6 32095 583.13
## - T2 1 5706.8 35069 591.99
## - lnW 1 5875.2 35237 592.47
##
## Step: AIC=567.59
## o ~ T2 + lnW + lnT + TS
##
## Df Sum of Sq RSS AIC
## + S2 1 733.9 25665 566.77
## + W2 1 565.2 25834 567.43
## + S2W 1 530.1 25869 567.56
## <none> 26399 567.59
## + t 1 509.7 25890 567.64
## + W2T 1 504.9 25894 567.66
## + s 1 423.9 25975 567.97
## + w 1 331.8 26067 568.33
## + WT 1 287.1 26112 568.50
## + SW 1 196.0 26203 568.85
## - TS 1 2962.6 29362 576.23
## - lnT 1 2999.0 29398 576.35
## - T2 1 5400.8 31800 584.20
## - lnW 1 6365.0 32764 587.19
##
## Step: AIC=566.77
## o ~ T2 + lnW + lnT + TS + S2
##
## Df Sum of Sq RSS AIC
## + W2 1 555.0 25110 566.59
## + W2T 1 536.4 25129 566.66
## <none> 25665 566.77
## + w 1 331.2 25334 567.47
## + WT 1 329.7 25336 567.48
## - S2 1 733.9 26399 567.59
## + t 1 289.9 25375 567.64
## + s 1 161.2 25504 568.14
## + SW 1 59.8 25606 568.54
## + S2W 1 40.1 25625 568.62
## - TS 1 1541.0 27206 570.60
## - lnT 1 2839.1 28504 575.26
## - T2 1 4122.8 29788 579.67
## - lnW 1 6087.1 31752 586.06
##
## Step: AIC=566.59
## o ~ T2 + lnW + lnT + TS + S2 + W2
##
## Df Sum of Sq RSS AIC
## + w 1 626.8 24484 566.06
## + SW 1 504.5 24606 566.56
## <none> 25110 566.59
## - W2 1 555.0 25665 566.77
## + S2W 1 386.2 24724 567.04
## - S2 1 723.6 25834 567.43
## + t 1 263.3 24847 567.53
## + s 1 161.3 24949 567.94
## + WT 1 50.4 25060 568.39
## + W2T 1 0.2 25110 568.59
## - TS 1 1491.1 26602 570.36
## - lnT 1 2170.7 27281 572.88
## - T2 1 3288.7 28399 576.89
## - lnW 1 3599.6 28710 577.98
##
## Step: AIC=566.06
## o ~ T2 + lnW + lnT + TS + S2 + W2 + w
##
## Df Sum of Sq RSS AIC
## - lnW 1 83.4 24567 564.40
## + SW 1 498.2 23985 566.00
## <none> 24484 566.06
## + S2W 1 446.5 24037 566.22
## - w 1 626.8 25110 566.59
## - S2 1 673.4 25157 566.77
## + t 1 178.6 24305 567.33
## - W2 1 850.6 25334 567.47
## + s 1 133.5 24350 567.51
## + WT 1 123.2 24360 567.55
## + W2T 1 74.7 24409 567.75
## - TS 1 1410.1 25894 569.66
## - lnT 1 2395.8 26879 573.39
## - T2 1 3558.1 28042 577.63
##
## Step: AIC=564.4
## o ~ T2 + lnT + TS + S2 + W2 + w
##
## Df Sum of Sq RSS AIC
## <none> 24567 564.40
## + SW 1 453.0 24114 564.54
## + S2W 1 397.2 24170 564.77
## - S2 1 686.6 25254 565.16
## + t 1 207.7 24359 565.55
## + WT 1 152.3 24415 565.78
## + s 1 144.6 24422 565.81
## + lnW 1 83.4 24484 566.06
## + W2T 1 54.5 24513 566.18
## - TS 1 1430.9 25998 568.06
## - lnT 1 2313.3 26880 571.40
## - W2 1 2450.8 27018 571.91
## - T2 1 3476.4 28043 575.63
## - w 1 4143.0 28710 577.98
summary(modelo)
##
## Call:
## lm(formula = o ~ T2 + lnT + TS + S2 + W2 + w, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.545 -8.883 -3.689 6.819 58.346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.101e+03 3.573e+02 3.082 0.002702 **
## T2 3.067e-02 8.453e-03 3.628 0.000467 ***
## lnT -2.790e+02 9.428e+01 -2.959 0.003911 **
## TS 2.478e-03 1.065e-03 2.327 0.022115 *
## S2 -3.640e-04 2.258e-04 -1.612 0.110320
## W2 3.176e-01 1.043e-01 3.046 0.003018 **
## w -9.298e+00 2.348e+00 -3.960 0.000147 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.25 on 93 degrees of freedom
## Multiple R-squared: 0.7274, Adjusted R-squared: 0.7098
## F-statistic: 41.36 on 6 and 93 DF, p-value: < 2.2e-16
plot(predictorEffects(modelo))
extract_eq(modelo)
\[ \operatorname{o} = \alpha + \beta_{1}(\operatorname{T2}) + \beta_{2}(\operatorname{lnT}) + \beta_{3}(\operatorname{TS}) + \beta_{4}(\operatorname{S2}) + \beta_{5}(\operatorname{W2}) + \beta_{6}(\operatorname{w}) + \epsilon \]
extract_eq(modelo, wrap = TRUE, use_coefs = TRUE)
\[ \begin{aligned} \operatorname{\widehat{o}} &= 1101.48 + 0.03(\operatorname{T2}) - 279(\operatorname{lnT}) + 0(\operatorname{TS})\ + \\ &\quad 0(\operatorname{S2}) + 0.32(\operatorname{W2}) - 9.3(\operatorname{w}) \end{aligned} \]
modelo1 <- lm( o~T2 + lnT + TS + W2 + w, data = Set1); summary(modelo)
##
## Call:
## lm(formula = o ~ T2 + lnT + TS + S2 + W2 + w, data = Set1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.545 -8.883 -3.689 6.819 58.346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.101e+03 3.573e+02 3.082 0.002702 **
## T2 3.067e-02 8.453e-03 3.628 0.000467 ***
## lnT -2.790e+02 9.428e+01 -2.959 0.003911 **
## TS 2.478e-03 1.065e-03 2.327 0.022115 *
## S2 -3.640e-04 2.258e-04 -1.612 0.110320
## W2 3.176e-01 1.043e-01 3.046 0.003018 **
## w -9.298e+00 2.348e+00 -3.960 0.000147 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.25 on 93 degrees of freedom
## Multiple R-squared: 0.7274, Adjusted R-squared: 0.7098
## F-statistic: 41.36 on 6 and 93 DF, p-value: < 2.2e-16
outlierTest(modelo, cutoff=Inf, n.max=5) # prueba de bonferroni para detectar outliers
## rstudent unadjusted p-value Bonferroni p
## 61 3.981687 0.00013658 0.013658
## 24 3.926361 0.00016642 0.016642
## 89 3.245410 0.00163620 0.163620
## 86 2.524825 0.01328800 NA
## 75 -2.436417 0.01675900 NA
check_heteroskedasticity (modelo) # los errores deben mostrar homogeneidad
## Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
check_autocorrelation (modelo) # residualaes no correlacionados
## OK: Residuals appear to be independent and not autocorrelated (p = 0.424).
check_collinearity (modelo) # variables (x) independienets no correlacioandas
check_normality (modelo) # normalidad de los residuales (errores)
## Warning: Non-normality of residuals detected (p < .001).
Conclusiones: con el método stepwise, las mejores predictores son las variables T2, lnT, TS, S2, W2, w. Con R-squared: 0.7098 y solo la variables S2 no es estadísticamente significativo. Solo cumple con el supuesto de no multicolinealidad
el_modelo<-o ~ s + w + t + SW + WT + TS + S2 + W2 + T2 + S2W + W2T + lnW + lnT
modelo_m2 <- glmulti(el_modelo, # (y ~ x)
data = Datos,
method = "h", # "h" exhahustivo, "g" genetico, "l", "d" un resumen simple
crit = aic, # AICC corrected AIC for small samples
level = 1, # 2 interacciones, 1 sin interaccion
family = gaussian,
fitfunction = glm, # Type of model (LM, GLM, GLMER etc.)
confsetsize = 100, # Keep 100 best models
includeobjects = TRUE)
## Initialization...
## TASK: Exhaustive screening of candidate set.
## Fitting...
##
## After 50 models:
## Best model: o~1+s+w+t+WT
## Crit= 863.300975637537
## Mean crit= 895.55931159027
##
## After 100 models:
## Best model: o~1+s+w+t+WT+TS
## Crit= 859.274355740497
## Mean crit= 892.451948952168
##
## After 150 models:
## Best model: o~1+s+w+t+WT+TS
## Crit= 859.274355740497
## Mean crit= 871.787430117434
##
## After 200 models:
## Best model: o~1+w+t+SW+TS+W2
## Crit= 855.042190420282
## Mean crit= 864.658446556444
##
## After 250 models:
## Best model: o~1+w+t+SW+TS+W2
## Crit= 855.042190420282
## Mean crit= 862.562083876287
##
## After 300 models:
## Best model: o~1+w+t+SW+TS+W2
## Crit= 855.042190420282
## Mean crit= 860.606923184467
##
## After 350 models:
## Best model: o~1+w+t+SW+TS+W2
## Crit= 855.042190420282
## Mean crit= 859.183394029442
##
## After 400 models:
## Best model: o~1+w+t+SW+TS+W2
## Crit= 855.042190420282
## Mean crit= 858.388112331377
##
## After 450 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 856.89039509677
##
## After 500 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 855.699564986598
##
## After 550 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.804833048639
##
## After 600 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.804833048639
##
## After 650 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.804833048639
##
## After 700 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.770866115894
##
## After 750 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.622932624369
##
## After 800 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.601694541883
##
## After 850 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.601694541883
##
## After 900 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.601694541883
##
## After 950 models:
## Best model: o~1+w+t+SW+TS+W2+T2
## Crit= 850.062716769946
## Mean crit= 854.441772047856
##
## After 1000 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.566475380091
##
## After 1050 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.230014543558
##
## After 1100 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.184983198234
##
## After 1150 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.184983198234
##
## After 1200 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.184983198234
##
## After 1250 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.184983198234
##
## After 1300 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.184983198234
##
## After 1350 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 853.128488824702
##
## After 1400 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.794568039007
##
## After 1450 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.60465332803
##
## After 1500 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.51659388634
##
## After 1550 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.453873643748
##
## After 1600 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1650 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1700 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1750 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1800 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1850 models:
## Best model: o~1+w+t+TS+W2+T2+S2W
## Crit= 848.793480005309
## Mean crit= 852.389457200208
##
## After 1900 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 852.2403985246
##
## After 1950 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 852.053274489967
##
## After 2000 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.974569233581
##
## After 2050 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.865702869645
##
## After 2100 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85252901843
##
## After 2150 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2200 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2250 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2300 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2350 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2400 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2450 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.85081336209
##
## After 2500 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.848962285991
##
## After 2550 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.847461374087
##
## After 2600 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.815532734457
##
## After 2650 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2700 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2750 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2800 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2850 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2900 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 2950 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.78803490609
##
## After 3000 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.782196588487
##
## After 3050 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.782196588487
##
## After 3100 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.712841095955
##
## After 3150 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.695086935746
##
## After 3200 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3250 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3300 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3350 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3400 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3450 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.689074576896
##
## After 3500 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.677694615387
##
## After 3550 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.677694615387
##
## After 3600 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669987421857
##
## After 3650 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3700 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3750 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3800 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3850 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3900 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 3950 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 4000 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.669467812749
##
## After 4050 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.614148259323
##
## After 4100 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.614148259323
##
## After 4150 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.599842901735
##
## After 4200 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.599255716143
##
## After 4250 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.599255716143
##
## After 4300 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.599255716143
##
## After 4350 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.599255716143
##
## After 4400 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.571833342965
##
## After 4450 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.454440510922
##
## After 4500 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.377909560405
##
## After 4550 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.361106337383
##
## After 4600 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.361106337383
##
## After 4650 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.345485714849
##
## After 4700 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.248523733423
##
## After 4750 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.190570256856
##
## After 4800 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.171731792722
##
## After 4850 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.171731792722
##
## After 4900 models:
## Best model: o~1+w+t+TS+T2+S2W+W2T
## Crit= 848.740291832167
## Mean crit= 851.171731792722
##
## After 4950 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 851.061441511248
##
## After 5000 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 851.00124191794
##
## After 5050 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.973817688437
##
## After 5100 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.973817688437
##
## After 5150 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.973817688437
##
## After 5200 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.910143361775
##
## After 5250 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.805780264474
##
## After 5300 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.786338028679
##
## After 5350 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.75269545155
##
## After 5400 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.735167777672
##
## After 5450 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.695369029972
##
## After 5500 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.689181388807
##
## After 5550 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.686393740544
##
## After 5600 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.684104561231
##
## After 5650 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.653785422996
##
## After 5700 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.630641695158
##
## After 5750 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.628310420826
##
## After 5800 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.628310420826
##
## After 5850 models:
## Best model: o~1+w+t+TS+W2+S2W+lnT
## Crit= 848.419387632089
## Mean crit= 850.610287293237
##
## After 5900 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.513635146916
##
## After 5950 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.500728723815
##
## After 6000 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.481624475145
##
## After 6050 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.468926297165
##
## After 6100 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.455333745191
##
## After 6150 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.364103110836
##
## After 6200 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.355684598997
##
## After 6250 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.355684598997
##
## After 6300 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.335940880733
##
## After 6350 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.335940880733
##
## After 6400 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.335940880733
##
## After 6450 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.335940880733
##
## After 6500 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.335940880733
##
## After 6550 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6600 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6650 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6700 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6750 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6800 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6850 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6900 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 6950 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 7000 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 7050 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.334705367098
##
## After 7100 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.311818508889
##
## After 7150 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.311818508889
##
## After 7200 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.311818508889
##
## After 7250 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.311818508889
##
## After 7300 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.311818508889
##
## After 7350 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7400 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7450 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7500 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7550 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7600 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7650 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7700 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7750 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7800 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7850 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7900 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 7950 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.295519610703
##
## After 8000 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.279068983114
##
## After 8050 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.279068983114
##
## After 8100 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.279068983114
##
## After 8150 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.275957723258
##
## After 8200 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.275957723258
##
## After 8250 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.265973598307
##
## After 8300 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.265973598307
##
## After 8350 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.265973598307
##
## After 8400 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.265081336702
##
## After 8450 models:
## Best model: o~1+w+t+TS+S2W+W2T+lnT
## Crit= 848.103833544764
## Mean crit= 850.265081336702
## Completed.
summary(modelo_m2)
## $name
## [1] "glmulti.analysis"
##
## $method
## [1] "h"
##
## $fitting
## [1] "glm"
##
## $crit
## [1] "aic"
##
## $level
## [1] 1
##
## $marginality
## [1] FALSE
##
## $confsetsize
## [1] 100
##
## $bestic
## [1] 848.1038
##
## $icvalues
## [1] 848.1038 848.3992 848.4194 848.6152 848.7403 848.7935 848.9422 849.1497
## [9] 849.4212 849.4862 849.5595 849.5857 849.6352 849.7101 849.7111 849.7632
## [17] 849.7974 849.8054 849.8688 849.8718 849.8743 849.8818 849.9011 849.9198
## [25] 849.9911 850.0104 850.0335 850.0444 850.0627 850.0627 850.0659 850.0876
## [33] 850.0937 850.1571 850.1571 850.1688 850.1819 850.1865 850.1910 850.2460
## [41] 850.2605 850.2748 850.3043 850.3060 850.3254 850.3283 850.3484 850.3607
## [49] 850.3636 850.3824 850.4136 850.4163 850.4472 850.4699 850.4711 850.4727
## [57] 850.4837 850.4856 850.5168 850.5350 850.5463 850.5564 850.5981 850.6010
## [65] 850.6063 850.6122 850.6545 850.6668 850.6759 850.6866 850.7145 850.7247
## [73] 850.7288 850.7333 850.7377 850.7384 850.7447 850.7450 850.7471 850.7739
## [81] 850.7766 850.7806 850.8086 850.8333 850.8563 850.8882 850.8912 850.9007
## [89] 850.9015 850.9044 850.9321 850.9428 850.9525 850.9546 850.9660 850.9775
## [97] 850.9780 850.9879 851.0062 851.0144
##
## $bestmodel
## [1] "o ~ 1 + w + t + TS + S2W + W2T + lnT"
##
## $modelweights
## [1] 0.027829021 0.024008380 0.023767102 0.021550753 0.020243834 0.019712563
## [7] 0.018299586 0.016496762 0.014402533 0.013942103 0.013440252 0.013264984
## [13] 0.012941109 0.012465307 0.012458960 0.012138888 0.011932630 0.011884933
## [19] 0.011514478 0.011497155 0.011482479 0.011439588 0.011329679 0.011224517
## [25] 0.010831168 0.010727446 0.010604272 0.010546382 0.010450547 0.010450374
## [31] 0.010433997 0.010321110 0.010289627 0.009968741 0.009968670 0.009910343
## [37] 0.009845987 0.009823274 0.009801081 0.009535064 0.009466185 0.009398785
## [43] 0.009261095 0.009253391 0.009164302 0.009150653 0.009059114 0.009003973
## [49] 0.008990836 0.008906503 0.008768971 0.008756868 0.008622485 0.008525436
## [55] 0.008520362 0.008513365 0.008466936 0.008458794 0.008327822 0.008252527
## [61] 0.008205857 0.008164541 0.007996097 0.007984577 0.007963378 0.007939943
## [67] 0.007773680 0.007726218 0.007691108 0.007650039 0.007543840 0.007505636
## [73] 0.007490151 0.007473192 0.007456979 0.007454312 0.007430751 0.007429800
## [79] 0.007422131 0.007323074 0.007313138 0.007298669 0.007197238 0.007108816
## [85] 0.007027774 0.006916261 0.006906199 0.006873257 0.006870560 0.006860474
## [91] 0.006766213 0.006730268 0.006697417 0.006690661 0.006652521 0.006614430
## [97] 0.006612811 0.006580139 0.006520149 0.006493616
##
## $includeobjects
## [1] TRUE
lm(o ~ 1 + w + t + TS + S2W + W2T + lnT)
##
## Call:
## lm(formula = o ~ 1 + w + t + TS + S2W + W2T + lnT)
##
## Coefficients:
## (Intercept) w t TS S2W W2T
## 2.597e+03 -1.060e+01 1.058e+01 2.248e-03 -2.970e-05 5.744e-03
## lnT
## -7.689e+02
plot(modelo_m2, type="s", col="deepskyblue1")
Conclusiones: con el método Algoritmo genético las mejores predictores resultaron w, t, ts, S2W, W2T y lnT. La importancia de las variables fueron TS y w.
set.seed(123)
train.control <- trainControl(method = "cv", number = 10)
step.model <- train(o~(.), data = Set1,
method = "leapBackward",
tuneGrid = data.frame(nvmax = 1:7),
trControl = train.control
)
summary(step.model$finalModel)
## Subset selection object
## 13 Variables (and intercept)
## Forced in Forced out
## s FALSE FALSE
## w FALSE FALSE
## t FALSE FALSE
## SW FALSE FALSE
## WT FALSE FALSE
## TS FALSE FALSE
## S2 FALSE FALSE
## W2 FALSE FALSE
## T2 FALSE FALSE
## S2W FALSE FALSE
## W2T FALSE FALSE
## lnW FALSE FALSE
## lnT FALSE FALSE
## 1 subsets of each size up to 5
## Selection Algorithm: backward
## s w t SW WT TS S2 W2 T2 S2W W2T lnW lnT
## 1 ( 1 ) " " " " "*" " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " "*" " " " " " " " " " " " " " " " " " " "*"
## 3 ( 1 ) " " "*" "*" " " " " " " " " " " " " " " " " " " "*"
## 4 ( 1 ) " " "*" "*" " " " " "*" " " " " " " " " " " " " "*"
## 5 ( 1 ) " " "*" "*" " " " " "*" " " "*" " " " " " " " " "*"
plot(step.model)
RocImp2 <- varImp(step.model, scale = TRUE); RocImp2
## loess r-squared variable importance
##
## Overall
## T2 100.00
## t 99.83
## lnT 99.71
## W2 60.28
## lnW 60.25
## w 59.97
## W2T 41.73
## S2W 28.56
## S2 20.81
## s 19.97
## WT 19.20
## TS 13.39
## SW 0.00
plot(RocImp2, col="red")
subset1<-lm(o~t); summary(subset1)
##
## Call:
## lm(formula = o ~ t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.170 -15.649 0.101 11.351 79.906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -138.7801 16.9028 -8.211 9e-13 ***
## t 2.3080 0.2159 10.688 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.61 on 98 degrees of freedom
## Multiple R-squared: 0.5382, Adjusted R-squared: 0.5335
## F-statistic: 114.2 on 1 and 98 DF, p-value: < 2.2e-16
#Importancia de las variables
library(viridis)
library(ggplot2)
RocImp2 <- varImp(step.model, scale = TRUE); RocImp2
## loess r-squared variable importance
##
## Overall
## T2 100.00
## t 99.83
## lnT 99.71
## W2 60.28
## lnW 60.25
## w 59.97
## W2T 41.73
## S2W 28.56
## S2 20.81
## s 19.97
## WT 19.20
## TS 13.39
## SW 0.00
plot(RocImp2, col="red")
p<-ggplot(data=RocImp2, aes(y=Overall)) +
geom_bar(stat="identity", fill=turbo(13))+
theme_minimal(); p
Conclusiones: con el método Machine learning el mejor predictor resulto t para la estimación de Ozone. La importancia de las variables fueron TS y t.
Variable con la mejor predicción de acuerdo al algoritmo Machine Learning
Mod_final<-lm(o~t); summary(Mod_final)
##
## Call:
## lm(formula = o ~ t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.170 -15.649 0.101 11.351 79.906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -138.7801 16.9028 -8.211 9e-13 ***
## t 2.3080 0.2159 10.688 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.61 on 98 degrees of freedom
## Multiple R-squared: 0.5382, Adjusted R-squared: 0.5335
## F-statistic: 114.2 on 1 and 98 DF, p-value: < 2.2e-16
extract_eq(Mod_final)
\[ \operatorname{o} = \alpha + \beta_{1}(\operatorname{t}) + \epsilon \]
extract_eq(Mod_final, wrap = TRUE, use_coefs = TRUE)
\[ \begin{aligned} \operatorname{\widehat{o}} &= -138.78 + 2.31(\operatorname{t}) \end{aligned} \]
library(DescTools)
library(tidyr)
library(pastecs)
library(PerformanceAnalytics)
library(nortest)
library(correlation)
library(boot)
library(corrplot)
library(qgraph)
library(psych)
library(colorRamps)
library(corrr)
res.cor <- correlate(Datos); res.cor
corPlot(Datos,
gr = colorRampPalette(heat.colors(10)))
library(gvlma)
gvlma(Mod_final)
##
## Call:
## lm(formula = o ~ t)
##
## Coefficients:
## (Intercept) t
## -138.780 2.308
##
##
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance = 0.05
##
## Call:
## gvlma(x = Mod_final)
##
## Value p-value Decision
## Global Stat 5.729e+01 1.078e-11 Assumptions NOT satisfied!
## Skewness 1.897e+01 1.325e-05 Assumptions NOT satisfied!
## Kurtosis 2.311e+01 1.531e-06 Assumptions NOT satisfied!
## Link Function 1.520e+01 9.656e-05 Assumptions NOT satisfied!
## Heteroscedasticity 5.043e-05 9.943e-01 Assumptions acceptable.
library(performance)
library(easystats)
library(car)
library(graphics)
check_heteroskedasticity (Mod_final) # los errores deben mostrar homogeneidad
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.029).
check_autocorrelation (Mod_final) # residualaes no correlacionados
## OK: Residuals appear to be independent and not autocorrelated (p = 0.120).
check_collinearity (Mod_final) # variables (x) independienets no correlacioandas
## NULL
check_normality (Mod_final) # normalidad de los residuales (errores)
## Warning: Non-normality of residuals detected (p < .001).
library(DescTools)
library(tidyr)
library(pastecs)
library(PerformanceAnalytics)
library(nortest)
library(correlation)
library(boot)
library(corrplot)
library(qgraph)
library(dplyr)
library(tidyverse)
library(gvlma)
library(car)
library(performance)
ri=rstudent(Mod_final)
boxplot(ri)
outlierTest(Mod_final, cutoff=Inf, n.max=5) # prueba de bonferroni para detectar outliers
## rstudent unadjusted p-value Bonferroni p
## 24 4.229170 0.00005316 0.005316
## 61 3.704415 0.00035230 0.035230
## 89 2.543380 0.01255900 NA
## 86 2.052478 0.04281700 NA
## 19 2.001837 0.04809500 NA
Datos1<-Datos[-c(24, 61),]
attach(Datos1)
Mod_fina2 <-lm(o ~ t, data=Datos1)
outlierTest(Mod_fina2, cutoff=Inf, n.max=5)
## rstudent unadjusted p-value Bonferroni p
## 87 3.146339 0.0022082 0.2164
## 84 2.569366 0.0117440 NA
## 19 2.526222 0.0131830 NA
## 4 -2.174231 0.0321720 NA
## 65 -1.887232 0.0621820 NA
#veamos cuantos datos tenemos
Datos2<-Datos1[-87,]
attach(Datos2)
check_heteroskedasticity (Mod_fina2)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.030).
check_autocorrelation (Mod_fina2)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.108).
check_collinearity (Mod_fina2)
## NULL
check_normality (Mod_fina2)
## OK: residuals appear as normally distributed (p = 0.214).
Conclusión: al eliminar los datos atípicos, solo se cumple dos supuestos Independencias de las variables y distribución normal. No se cumple con homocedasticidad.
Mod_fina3 <- lm(o ~ t, data=Datos2, weights = 1/(t)) # ponderaciones 1/x, 1/(x)^2, sqrt(x)
summary(Mod_fina3)
##
## Call:
## lm(formula = o ~ t, data = Datos2, weights = 1/(t))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -4.026 -1.624 0.001 1.335 4.870
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -121.8024 13.2361 -9.202 8.38e-15 ***
## t 2.0622 0.1721 11.983 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.89 on 95 degrees of freedom
## Multiple R-squared: 0.6018, Adjusted R-squared: 0.5976
## F-statistic: 143.6 on 1 and 95 DF, p-value: < 2.2e-16
check_heteroskedasticity (Mod_fina3)
## OK: Error variance appears to be homoscedastic (p = 0.064).
check_autocorrelation (Mod_fina3)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.144).
check_collinearity (Mod_fina3)
## NULL
check_normality (Mod_fina3)
## OK: residuals appear as normally distributed (p = 0.259).
Se realizarón diferentes transformaciones como Box Cox, eliminando datos atípicos. Finalmente, utilizando la opción de regresión ponderada se cumplen con los supuestos de regresión. El modelo generado es estadisticamente válido para estimar Ozono.
Al utilizar los algoritmos para seleccionar los predictores es más fácil y práctico el procedimiento de Machine Learning, en este caso el algoritmo Stepwise y Algoritmo genético dieron parecidos predictores solo que no son significativos o no cumplieron la importancia de las variables.