y:Conversion del n-heptano en acetileno(%)
x1:Temperatura del reactor
x2:Reaccion(molar)de H2 a heptano
x3:Tiempo de contacto(seg)
#Datos sobre el acetileno(ejemplo 10.1,introduccion al analisis de regresion lineal)
y<-c(49,50.2,50.5,48.5,47.5,44.5,28,31.5,34.5,35,38,38.5,15,17,20.5,29.5) #Variable respuesta
x1<-c(1300,1300,1300,1300,1300,1300,1200,1200,1200,1200,1200,1200,1100,1100,1100,1100)
x2<-c(7.5,9,11,13.5,17,23,5.3,7.5,11,13.5,17,23,5.3,7.5,11,17)
x3<-c(0.012,0.012,0.0115,0.013,0.0135,0.012,0.04,0.038,0.032,0.026,0.034,0.041,0.084,0.098,0.092,0.086)
datos=data.frame(y,x1,x2,x3,x1*x2,x1*x3,x2*x3,x1*x1,x2*x2,x3*x3) #x1,x2,x3, son variables explicativas
datos
## y x1 x2 x3 x1...x2 x1...x3 x2...x3 x1...x1 x2...x2 x3...x3
## 1 49.0 1300 7.5 0.0120 9750 15.60 0.0900 1690000 56.25 0.00014400
## 2 50.2 1300 9.0 0.0120 11700 15.60 0.1080 1690000 81.00 0.00014400
## 3 50.5 1300 11.0 0.0115 14300 14.95 0.1265 1690000 121.00 0.00013225
## 4 48.5 1300 13.5 0.0130 17550 16.90 0.1755 1690000 182.25 0.00016900
## 5 47.5 1300 17.0 0.0135 22100 17.55 0.2295 1690000 289.00 0.00018225
## 6 44.5 1300 23.0 0.0120 29900 15.60 0.2760 1690000 529.00 0.00014400
## 7 28.0 1200 5.3 0.0400 6360 48.00 0.2120 1440000 28.09 0.00160000
## 8 31.5 1200 7.5 0.0380 9000 45.60 0.2850 1440000 56.25 0.00144400
## 9 34.5 1200 11.0 0.0320 13200 38.40 0.3520 1440000 121.00 0.00102400
## 10 35.0 1200 13.5 0.0260 16200 31.20 0.3510 1440000 182.25 0.00067600
## 11 38.0 1200 17.0 0.0340 20400 40.80 0.5780 1440000 289.00 0.00115600
## 12 38.5 1200 23.0 0.0410 27600 49.20 0.9430 1440000 529.00 0.00168100
## 13 15.0 1100 5.3 0.0840 5830 92.40 0.4452 1210000 28.09 0.00705600
## 14 17.0 1100 7.5 0.0980 8250 107.80 0.7350 1210000 56.25 0.00960400
## 15 20.5 1100 11.0 0.0920 12100 101.20 1.0120 1210000 121.00 0.00846400
## 16 29.5 1100 17.0 0.0860 18700 94.60 1.4620 1210000 289.00 0.00739600
#------------------------------------------------------------------------------------------------------------
z<-aov(y~x1+x2+x3+x1:x2+x1:x3+x2:x3+I(x1^2)+I(x2^2)+I(x3^2))#Modelo ajustado(regresion lineal polinomial)
print(coef(z)) # Estimacion de los parametros
## (Intercept) x1 x2 x3 I(x1^2)
## -3.617228e+03 5.324332e+00 1.924396e+01 1.376632e+04 -1.926707e-03
## I(x2^2) I(x3^2) x1:x2 x1:x3 x2:x3
## -3.034201e-02 -1.158168e+04 -1.414447e-02 -1.057731e+01 -2.103478e+01
print(summary(z)) #ANOVA
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 1896.7 1896.7 2334.092 5.27e-09 ***
## x2 1 56.3 56.3 69.286 0.000163 ***
## x3 1 0.4 0.4 0.542 0.489203
## I(x1^2) 1 11.5 11.5 14.145 0.009388 **
## I(x2^2) 1 51.8 51.8 63.757 0.000206 ***
## I(x3^2) 1 13.2 13.2 16.285 0.006837 **
## x1:x2 1 83.9 83.9 103.266 5.28e-05 ***
## x1:x3 1 0.8 0.8 0.928 0.372601
## x2:x3 1 4.2 4.2 5.182 0.063116 .
## Residuals 6 4.9 0.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Detectar multicolinealidad.Se puede observar que efectivamente hay multicolinealidad,valores de VIF mayores a 10
print(vif(z))
## x1 x2 x3 I(x1^2) I(x2^2) I(x3^2)
## 2.856749e+06 1.095614e+04 2.017163e+06 2.501945e+06 6.573359e+01 1.266710e+04
## x1:x2 x1:x3 x2:x3
## 9.802903e+03 1.428092e+06 2.403594e+02
#Eliminar el intercepto que contiene unos,y se especifica que y,es la variable de respuesta y datos es el data frame creado anteriormente
m<-model.matrix(y~.,data=datos)[,-1]
m
## x1 x2 x3 x1...x2 x1...x3 x2...x3 x1...x1 x2...x2 x3...x3
## 1 1300 7.5 0.0120 9750 15.60 0.0900 1690000 56.25 0.00014400
## 2 1300 9.0 0.0120 11700 15.60 0.1080 1690000 81.00 0.00014400
## 3 1300 11.0 0.0115 14300 14.95 0.1265 1690000 121.00 0.00013225
## 4 1300 13.5 0.0130 17550 16.90 0.1755 1690000 182.25 0.00016900
## 5 1300 17.0 0.0135 22100 17.55 0.2295 1690000 289.00 0.00018225
## 6 1300 23.0 0.0120 29900 15.60 0.2760 1690000 529.00 0.00014400
## 7 1200 5.3 0.0400 6360 48.00 0.2120 1440000 28.09 0.00160000
## 8 1200 7.5 0.0380 9000 45.60 0.2850 1440000 56.25 0.00144400
## 9 1200 11.0 0.0320 13200 38.40 0.3520 1440000 121.00 0.00102400
## 10 1200 13.5 0.0260 16200 31.20 0.3510 1440000 182.25 0.00067600
## 11 1200 17.0 0.0340 20400 40.80 0.5780 1440000 289.00 0.00115600
## 12 1200 23.0 0.0410 27600 49.20 0.9430 1440000 529.00 0.00168100
## 13 1100 5.3 0.0840 5830 92.40 0.4452 1210000 28.09 0.00705600
## 14 1100 7.5 0.0980 8250 107.80 0.7350 1210000 56.25 0.00960400
## 15 1100 11.0 0.0920 12100 101.20 1.0120 1210000 121.00 0.00846400
## 16 1100 17.0 0.0860 18700 94.60 1.4620 1210000 289.00 0.00739600
n<-y#Variable de respuesta
#--------------------------------------------------------------------------------------------------------------
#Regresion ridge
#Como se realiza regresion ridge,alpha debe ser igual a cero.Donde m es una matriz y n es la variable de respuesta
fit.ridge=glmnet(m,n,alpha=0)
fit.ridge#Por defecto encuentra 100 modelos y el lambda asociado a cada uno
##
## Call: glmnet(x = m, y = n, alpha = 0)
##
## Df %Dev Lambda
## 1 9 0.00 10890.0
## 2 9 1.14 9920.0
## 3 9 1.25 9039.0
## 4 9 1.37 8236.0
## 5 9 1.50 7504.0
## 6 9 1.64 6838.0
## 7 9 1.80 6230.0
## 8 9 1.97 5677.0
## 9 9 2.16 5173.0
## 10 9 2.37 4713.0
## 11 9 2.59 4294.0
## 12 9 2.84 3913.0
## 13 9 3.11 3565.0
## 14 9 3.40 3249.0
## 15 9 3.72 2960.0
## 16 9 4.07 2697.0
## 17 9 4.46 2457.0
## 18 9 4.87 2239.0
## 19 9 5.32 2040.0
## 20 9 5.82 1859.0
## 21 9 6.35 1694.0
## 22 9 6.93 1543.0
## 23 9 7.57 1406.0
## 24 9 8.25 1281.0
## 25 9 8.99 1167.0
## 26 9 9.79 1064.0
## 27 9 10.66 969.2
## 28 9 11.59 883.1
## 29 9 12.60 804.7
## 30 9 13.67 733.2
## 31 9 14.83 668.1
## 32 9 16.07 608.7
## 33 9 17.40 554.6
## 34 9 18.81 505.4
## 35 9 20.31 460.5
## 36 9 21.90 419.6
## 37 9 23.58 382.3
## 38 9 25.35 348.3
## 39 9 27.21 317.4
## 40 9 29.15 289.2
## 41 9 31.18 263.5
## 42 9 33.29 240.1
## 43 9 35.46 218.8
## 44 9 37.71 199.3
## 45 9 40.00 181.6
## 46 9 42.34 165.5
## 47 9 44.72 150.8
## 48 9 47.12 137.4
## 49 9 49.52 125.2
## 50 9 51.93 114.1
## 51 9 54.31 103.9
## 52 9 56.66 94.7
## 53 9 58.96 86.3
## 54 9 61.21 78.6
## 55 9 63.39 71.6
## 56 9 65.48 65.3
## 57 9 67.49 59.5
## 58 9 69.40 54.2
## 59 9 71.20 49.4
## 60 9 72.90 45.0
## 61 9 74.49 41.0
## 62 9 75.98 37.4
## 63 9 77.35 34.0
## 64 9 78.62 31.0
## 65 9 79.79 28.2
## 66 9 80.86 25.7
## 67 9 81.85 23.5
## 68 9 82.75 21.4
## 69 9 83.57 19.5
## 70 9 84.33 17.7
## 71 9 85.03 16.2
## 72 9 85.67 14.7
## 73 9 86.26 13.4
## 74 9 86.81 12.2
## 75 9 87.32 11.1
## 76 9 87.81 10.2
## 77 9 88.27 9.3
## 78 9 88.71 8.4
## 79 9 89.14 7.7
## 80 9 89.55 7.0
## 81 9 89.95 6.4
## 82 9 90.35 5.8
## 83 9 90.74 5.3
## 84 9 91.13 4.8
## 85 9 91.51 4.4
## 86 9 91.89 4.0
## 87 9 92.27 3.6
## 88 9 92.64 3.3
## 89 9 93.01 3.0
## 90 9 93.38 2.8
## 91 9 93.74 2.5
## 92 9 94.09 2.3
## 93 9 94.44 2.1
## 94 9 94.78 1.9
## 95 9 95.10 1.7
## 96 9 95.41 1.6
## 97 9 95.71 1.4
## 98 9 95.99 1.3
## 99 9 96.26 1.2
## 100 9 96.52 1.1
plot(fit.ridge,"lambda",label=TRUE)#Grafica los 100 modelos asociados a cada lambda segun las 9 variables explicativas
#Validacion cruzada:Para encontrar el mejor lambda,es decir el mejor K(0<k<1).
cv.ridge=cv.glmnet(m,n,alpha=0)
cv.ridge
##
## Call: cv.glmnet(x = m, y = n, alpha = 0)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 1.089 100 11.54 4.063 9
## 1se 1.903 94 15.01 4.715 9
#Permite observar cual deberia ser el valor de lambda,o en que rango se encuentra.
plot(cv.ridge)
abline(v=log(fit.ridge$lambda[100]),col="blue",lwd=4,lty=3)
# El mejor lambda, es decir donde los coeficientes se estabilizan,el RMSE disminuye, y aumenta el coeficiente de determinacion.
mejorlambda<-cv.ridge$lambda.min
mejorlambda
## [1] 1.088771
log(mejorlambda)
## [1] 0.08504987
coef(fit.ridge)[,which(fit.ridge$lambda==mejorlambda)]#Modelo ajustado asociado con el mejor lambda
## (Intercept) x1 x2 x3 x1...x2
## -5.993398e+01 5.226306e-02 1.571538e-01 -7.068626e+01 7.259847e-05
## x1...x3 x2...x3 x1...x1 x2...x2 x3...x3
## -7.422396e-02 9.540321e+00 2.276365e-05 -9.430590e-03 -1.330913e+02
#Datos de entrenamiento o testing(Para poder predecir con el modelo ridge)
#Del dataframe escojo un subconjunto de las observaciones,sin reemplazo.
split = sample.split(datos, SplitRatio = 0.75)
test1 = subset(datos, split == FALSE)
test1
## y x1 x2 x3 x1...x2 x1...x3 x2...x3 x1...x1 x2...x2 x3...x3
## 6 44.5 1300 23.0 0.012 29900 15.6 0.276 1690000 529.00 0.000144
## 9 34.5 1200 11.0 0.032 13200 38.4 0.352 1440000 121.00 0.001024
## 10 35.0 1200 13.5 0.026 16200 31.2 0.351 1440000 182.25 0.000676
## 16 29.5 1100 17.0 0.086 18700 94.6 1.462 1210000 289.00 0.007396
#Predecir con el modelo ridge
x.test1=model.matrix(y~.,data=test1)[,-1]
pred=predict(fit.ridge, s=mejorlambda,newx = x.test1)
pred #Predicion de la variable de respuesta y
## s1
## 6 47.88286
## 9 35.21699
## 10 36.24535
## 16 26.26618
#--------------------------------------------------------------------------------------------------------------
#RMSE y R^2,del mejor modelo,es decir con el lambda mas pequeño.
data.frame(RMSE=RMSE(pred,test1$y),Rsquare = R2(pred,test1$y))
## RMSE s1
## 1 2.447771 0.9851485