set.seed(1001280374)
df <- rnorm_multi(n = 120,
mu = c(0.5, 300, 30, 35, 0.6),
sd = c(0.2, 20, 5, 8, 0.15),
r = c(0.8, 0.7, 0.5, 0.6, 0.8, 0.4, 0.3, 0.4, 0.4, 0.5),
## Correlación de pares de variables
varnames = c('Compact', 'Labranza', 'Arena', 'Arcilla','Mecanizado'))
df$Compact <- round(df$Compact)
df$Mecanizado <- cut(df$Mecanizado, breaks = 3,labels = c('Baja','Media','Alta'))
df
## Compact Labranza Arena Arcilla Mecanizado
## 1 1 310.3625 37.24430 34.63123 Media
## 2 0 284.7040 26.66142 20.63241 Baja
## 3 0 303.9642 26.34951 36.37896 Media
## 4 0 279.6183 24.51511 32.21826 Baja
## 5 0 287.1838 24.55035 25.99216 Media
## 6 0 263.2712 25.53881 26.83614 Baja
## 7 1 324.3488 32.64756 34.29699 Baja
## 8 1 305.7464 27.74288 35.35863 Baja
## 9 1 353.9027 36.85248 36.16247 Media
## 10 0 295.3648 32.90768 25.64311 Baja
## 11 0 242.8194 17.38874 22.10732 Media
## 12 1 312.1549 30.43462 32.43018 Media
## 13 1 292.3824 30.71607 16.26357 Baja
## 14 0 288.1993 28.96683 37.61803 Baja
## 15 0 296.0060 31.16343 31.56989 Baja
## 16 0 281.2216 26.63362 33.47779 Baja
## 17 1 323.2693 34.73483 36.51840 Media
## 18 0 304.1375 36.42649 48.43564 Baja
## 19 1 301.9910 28.63438 32.22601 Media
## 20 0 296.9307 26.73795 23.74382 Baja
## 21 0 280.6484 33.34617 31.79872 Media
## 22 0 290.2375 23.11632 37.28055 Media
## 23 1 331.8617 32.94716 39.42101 Media
## 24 0 303.8357 31.14634 41.88244 Media
## 25 0 296.1847 31.61951 38.18146 Media
## 26 0 278.5760 23.98710 36.95932 Media
## 27 1 311.8128 35.45679 46.65055 Media
## 28 1 333.2474 43.29243 28.79891 Media
## 29 1 301.9508 35.50521 32.56026 Media
## 30 1 329.2886 40.42508 37.86328 Alta
## 31 1 327.4760 36.32180 27.71496 Media
## 32 0 289.8340 28.81558 38.19235 Baja
## 33 0 283.3483 23.71916 36.44274 Media
## 34 0 274.6979 21.01182 38.22073 Media
## 35 1 287.8062 31.81891 47.46128 Media
## 36 0 307.7893 28.37188 40.21793 Baja
## 37 0 273.6756 22.22065 29.91438 Media
## 38 1 291.7972 30.58108 33.88692 Alta
## 39 1 336.5311 31.13483 33.83266 Baja
## 40 1 315.1476 37.84270 49.10723 Media
## 41 1 298.4448 24.59980 47.83663 Media
## 42 1 328.9906 31.91808 51.66575 Baja
## 43 0 283.1206 21.04917 27.78586 Media
## 44 1 314.4181 28.52251 26.20147 Media
## 45 0 269.6734 27.06122 35.14993 Media
## 46 1 310.6899 31.43399 43.05206 Alta
## 47 0 303.7981 26.10448 32.24616 Media
## 48 0 262.9124 23.68691 32.41881 Media
## 49 0 296.2365 33.68074 35.17137 Media
## 50 1 329.3165 32.95139 45.35547 Media
## 51 0 285.1973 27.26426 30.44442 Media
## 52 1 304.7545 34.66419 37.91065 Media
## 53 0 284.0164 31.92261 29.26771 Media
## 54 1 327.0706 36.99928 38.78380 Media
## 55 0 287.3738 29.13468 40.94839 Media
## 56 1 317.4619 35.28213 49.47617 Alta
## 57 1 312.2436 35.29517 41.04482 Media
## 58 1 337.3517 35.96468 53.21087 Media
## 59 1 310.7279 26.64808 32.94356 Baja
## 60 1 339.9653 41.77788 39.38635 Baja
## 61 1 304.1918 31.45502 33.35425 Media
## 62 0 283.8848 27.36532 18.96011 Baja
## 63 0 310.4142 32.26041 30.52880 Media
## 64 1 293.7472 30.36640 42.03680 Media
## 65 0 293.5730 30.48958 28.88307 Baja
## 66 0 286.3911 26.64320 34.15319 Baja
## 67 0 288.5934 26.58108 27.15257 Baja
## 68 1 312.1951 38.65730 22.25834 Media
## 69 0 293.4375 29.81204 34.49080 Baja
## 70 0 302.6591 24.83251 31.73436 Baja
## 71 0 313.5873 27.74647 44.87413 Baja
## 72 1 293.0282 30.09558 50.16688 Alta
## 73 0 270.1183 26.90207 40.82050 Baja
## 74 0 286.7048 28.09336 32.74538 Media
## 75 0 284.6991 30.94794 42.52065 Media
## 76 0 290.9228 26.09455 24.58793 Baja
## 77 1 319.6417 31.94264 48.66497 Media
## 78 0 290.5703 30.85743 45.86672 Media
## 79 1 301.0215 31.44534 43.79645 Media
## 80 1 306.7977 28.15455 27.01762 Baja
## 81 0 301.3716 33.23957 35.80305 Baja
## 82 0 297.6486 25.32458 35.98468 Baja
## 83 0 300.4910 26.77191 29.47681 Media
## 84 0 293.9080 31.83813 43.44683 Baja
## 85 0 286.6819 25.79073 39.11863 Media
## 86 0 288.0809 29.86088 36.47092 Baja
## 87 1 305.0932 36.38146 41.27370 Baja
## 88 1 327.7489 30.21237 35.77241 Media
## 89 0 280.3278 26.15555 35.19440 Media
## 90 1 325.8989 31.61403 54.38865 Media
## 91 1 300.9724 31.72600 42.97734 Media
## 92 0 288.0473 28.63876 24.27531 Baja
## 93 1 306.0890 27.87184 36.47873 Media
## 94 1 317.3175 33.44703 32.89285 Media
## 95 1 311.3648 33.07997 46.65731 Alta
## 96 1 303.2731 29.58233 38.15898 Baja
## 97 0 278.6194 25.16682 35.98482 Media
## 98 0 275.4959 23.20729 26.86925 Baja
## 99 1 319.5540 36.51456 42.48816 Baja
## 100 1 295.1840 31.72816 40.26944 Media
## 101 1 314.8570 35.63167 49.72083 Media
## 102 1 309.4173 32.26934 33.63780 Baja
## 103 0 288.3631 26.08595 34.60374 Baja
## 104 0 287.7272 24.19852 30.60265 Baja
## 105 0 292.7791 29.52606 36.89440 Media
## 106 1 293.0032 29.37917 35.10479 Baja
## 107 1 315.8785 37.67562 43.02871 Media
## 108 1 333.4215 32.73186 26.45228 Baja
## 109 1 308.6341 34.81995 36.87833 Media
## 110 0 268.3774 22.71233 28.08139 Baja
## 111 1 299.9689 23.31169 29.80181 Baja
## 112 0 292.5778 27.43447 36.55761 Media
## 113 0 268.3028 14.90685 24.30859 Baja
## 114 0 281.4019 28.52473 33.46524 Media
## 115 1 302.1556 28.68541 28.22527 Media
## 116 1 318.4483 34.61201 43.68144 Media
## 117 1 287.0627 27.95638 26.83106 Media
## 118 0 281.7856 29.91227 31.52068 Baja
## 119 0 277.9536 26.89005 43.34091 Media
## 120 1 327.0834 30.05843 37.21308 Baja
univar_lab <- glm(Compact ~ Labranza, family = binomial, data = df)
summary(univar_lab)
##
## Call:
## glm(formula = Compact ~ Labranza, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.20001 -0.50791 -0.06645 0.48238 2.10815
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -50.09803 9.00330 -5.564 2.63e-08 ***
## Labranza 0.16718 0.03012 5.550 2.86e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.22 on 119 degrees of freedom
## Residual deviance: 83.04 on 118 degrees of freedom
## AIC: 87.04
##
## Number of Fisher Scoring iterations: 6
univar_arena <- glm(Compact ~ Arena, family = binomial, data = df)
summary(univar_arena)
##
## Call:
## glm(formula = Compact ~ Arena, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2506 -0.7077 -0.1758 0.8137 2.3419
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.78611 2.22749 -5.291 1.22e-07 ***
## Arena 0.39081 0.07391 5.288 1.24e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.22 on 119 degrees of freedom
## Residual deviance: 115.85 on 118 degrees of freedom
## AIC: 119.85
##
## Number of Fisher Scoring iterations: 5
univar_arcilla <- glm(Compact ~ Arcilla, family = binomial, data = df)
summary(univar_arcilla)
##
## Call:
## glm(formula = Compact ~ Arcilla, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6403 -1.0931 -0.6852 1.1271 1.9668
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.20445 1.00282 -3.195 0.00140 **
## Arcilla 0.08770 0.02751 3.188 0.00143 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.22 on 119 degrees of freedom
## Residual deviance: 154.60 on 118 degrees of freedom
## AIC: 158.6
##
## Number of Fisher Scoring iterations: 4
univar_mecanizado <- glm(Compact ~ Mecanizado, family = binomial, data = df)
summary(univar_mecanizado)
##
## Call:
## glm(formula = Compact ~ Mecanizado, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2415 -1.2415 -0.9123 1.1146 1.4680
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6614 0.3078 -2.149 0.0317 *
## MecanizadoMedia 0.8109 0.3934 2.061 0.0393 *
## MecanizadoAlta 17.2275 979.6101 0.018 0.9860
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.22 on 119 degrees of freedom
## Residual deviance: 152.79 on 117 degrees of freedom
## AIC: 158.79
##
## Number of Fisher Scoring iterations: 15
\[\small Los~ resultados~ de~ la~ regresión~ univariable~ muestran~ que~ exceptuando~ la~ alta~ mecanización\\ \small todas~ las~ variables~ explicativas~ parecieran~ influir~ en~ la~ compactación~ en~ base~ a~ sus~ pvalores. \]
model1 <- glm(Compact ~ Labranza + Arena + Arcilla + Mecanizado, family = binomial, data = df)
summary(model1)
##
## Call:
## glm(formula = Compact ~ Labranza + Arena + Arcilla + Mecanizado,
## family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.28210 -0.44044 -0.03929 0.26411 2.00286
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -51.97251 10.21775 -5.086 3.65e-07 ***
## Labranza 0.16128 0.03563 4.527 5.98e-06 ***
## Arena 0.13456 0.09658 1.393 0.164
## Arcilla -0.03129 0.04816 -0.650 0.516
## MecanizadoMedia 1.04933 0.63597 1.650 0.099 .
## MecanizadoAlta 19.11002 2110.19003 0.009 0.993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.222 on 119 degrees of freedom
## Residual deviance: 70.915 on 114 degrees of freedom
## AIC: 82.915
##
## Number of Fisher Scoring iterations: 17
\[\small El~ resultado,~ muestra~ el~ pvalor~ de~ MecanizadoAlta~ como~ insignificante,\\ \small~ además~ es~el~ más~ alto,~ por~ lo~ que~ se~ excluye.\]
model2 <- glm(Compact ~ Labranza + Arena + Arcilla, family = binomial, data = df)
summary(model2)
##
## Call:
## glm(formula = Compact ~ Labranza + Arena + Arcilla, family = binomial,
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.99831 -0.46805 -0.05203 0.36420 2.15975
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -48.43314 9.30253 -5.206 1.92e-07 ***
## Labranza 0.14525 0.03237 4.487 7.21e-06 ***
## Arena 0.15017 0.09488 1.583 0.113
## Arcilla 0.01156 0.04217 0.274 0.784
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.222 on 119 degrees of freedom
## Residual deviance: 79.873 on 116 degrees of freedom
## AIC: 87.873
##
## Number of Fisher Scoring iterations: 6
\[\small Ahora~ el~ pvalor~ estadísticicamente~ insignifcante~ más~ alto~ es~ el~ de~ Arcilla,~ por~ lo~ que~ se~ excluye.\]
model3 <- glm(Compact ~ Labranza + Arena, family = binomial, data = df)
summary(model3)
##
## Call:
## glm(formula = Compact ~ Labranza + Arena, family = binomial,
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.03418 -0.47807 -0.05175 0.36657 2.12567
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -48.43426 9.28786 -5.215 1.84e-07 ***
## Labranza 0.14600 0.03226 4.525 6.03e-06 ***
## Arena 0.15651 0.09232 1.695 0.09 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.222 on 119 degrees of freedom
## Residual deviance: 79.948 on 117 degrees of freedom
## AIC: 85.948
##
## Number of Fisher Scoring iterations: 6
\[El~ pvalor~ de~ Arena~ es~ superior~ al~ 5\%~ así~ que~ se~ excluye~ del~ modelo.\]
model_final <- glm(Compact ~ Labranza, family = binomial, data = df)
summary(model_final)
##
## Call:
## glm(formula = Compact ~ Labranza, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.20001 -0.50791 -0.06645 0.48238 2.10815
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -50.09803 9.00330 -5.564 2.63e-08 ***
## Labranza 0.16718 0.03012 5.550 2.86e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.22 on 119 degrees of freedom
## Residual deviance: 83.04 on 118 degrees of freedom
## AIC: 87.04
##
## Number of Fisher Scoring iterations: 6
delta.coef <- abs((coef(model2)-coef(model1)[-c(4)][-c(5)])/coef(model1)[-c(4)][-c(5)])
round(delta.coef, 3)
## (Intercept) Labranza Arena Arcilla
## 0.068 0.099 0.116 0.989
\[Eliminar~ la~ variable~ mecanización~ no~ representa~ \triangle\beta>20\%~para~ las~ otras~ variables.\]
delta.coef2 <- abs((coef(model3)-coef(model2)[-c(4)])/coef(model3)[-c(4)])
round(delta.coef2, 3)
## (Intercept) Labranza Arena
## 0.000 0.005 0.040
delta.coef3 <- abs((coef(model_final)-coef(model3)[-c(3)])/coef(model3)[-c(3)])
round(delta.coef3, 3)
## (Intercept) Labranza
## 0.034 0.145
\[Eliminar~ variables~ no~ representa~ \triangle\beta>20\%~para~ el~ modelo~ 3~y~ para~ el~ modelo~ final.\]
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.1.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
test1 <- lrtest(model1, model2)
test2 <- lrtest(model2, model3)
test3 <- lrtest(model3, model_final)
test1
## Likelihood ratio test
##
## Model 1: Compact ~ Labranza + Arena + Arcilla + Mecanizado
## Model 2: Compact ~ Labranza + Arena + Arcilla
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -35.457
## 2 4 -39.936 -2 8.9581 0.01134 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test2
## Likelihood ratio test
##
## Model 1: Compact ~ Labranza + Arena + Arcilla
## Model 2: Compact ~ Labranza + Arena
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 4 -39.936
## 2 3 -39.974 -1 0.0754 0.7837
test3
## Likelihood ratio test
##
## Model 1: Compact ~ Labranza + Arena
## Model 2: Compact ~ Labranza
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 3 -39.974
## 2 2 -41.520 -1 3.0913 0.07871 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model2, model1, test = 'Chisq')
## Analysis of Deviance Table
##
## Model 1: Compact ~ Labranza + Arena + Arcilla
## Model 2: Compact ~ Labranza + Arena + Arcilla + Mecanizado
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 116 79.873
## 2 114 70.915 2 8.9581 0.01134 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[\small El~ modelo~ 1~ y~ 2~ son~ significativamente~ diferentes,~ en~ base~ a ~eso~ no ~se~ podría~ eliminar~ la~ variable~ mecanización.\\ \small Sin~ embargo,~ la~ eliminación~ de~ los~ pvalores~ y~ la~ comparación~ de~ coeficientes~ parecen~ indicar~ que~ sólo~ la~ labranza~ influye~ sobre~ la~ compactación.\]
pred1<- model_final$fitted.values
pred<-ifelse(pred1> 0.5, 1, 0)
hist(pred)
par(mfrow = c(2,2))
scatter.smooth(df$Labranza, pred, cex = 0.5)
pred11<-model1$fitted.values
pred2<-ifelse(pred11> 0.5, 1, 0)
hist(pred2)
par(mfrow = c(2,2))
scatter.smooth(df$Labranza, pred, cex = 0.5)
scatter.smooth(df$Arena, pred, cex = 0.5)
scatter.smooth(df$Arcilla, pred, cex = 0.5)
\[\small El~ modelo~ final~ escogido~ parece~
tener~ valores~ predichos~ con~ cierta~ linearidad~ en~ un~ corto~
intervalo.\\ \small Teniendo~ en~ cuenta~ la~ falta~ de~ significancia~
en~ la~ diferencia~ con~ el~ primer~ modelo,~ se~ corrió~ también~ con~
los~ valores~ predichos~ de~ este,\\ \small dónde~ tanto~ la~ arena~
como~ la~ arcilla~ mostraron~ gráficas~ más~ propias~ de~ una~
curva.\]
media_l <- mean(df$Labranza)
colores_c <- ifelse(df$Labranza < media_l, 'blue', 'green')
plot(model_final$fitted.values, cex = (df$Labranza * 0.003), pch = 19, col = colores_c)
title("Valores ajustados para Labranza")
abline(h = 0.5, cex = 1.2, col = 'red')
\[\small Si~ la~ labranza~ es~ menor~ a~ la~
media,~ el~ modelo~ predice~ una~ mayor~ compactación.\\ \small Según~
el~ gráfico~ la~ labranza~ es~ una~ variable~ totalmente~
discrimantoria,~ esto~ en~ línea~ con~ los~ pvalores~
observados.\]
model_inter1 <- glm(Compact ~ Labranza + Arena + Arcilla + Mecanizado + Labranza:Arena + Labranza:Arcilla+ Labranza:Mecanizado + Arena:Arcilla + Arcilla:Mecanizado+ Labranza:Arcilla:Arena:Mecanizado, family = binomial, data = df)
summary(model_inter1)
##
## Call:
## glm(formula = Compact ~ Labranza + Arena + Arcilla + Mecanizado +
## Labranza:Arena + Labranza:Arcilla + Labranza:Mecanizado +
## Arena:Arcilla + Arcilla:Mecanizado + Labranza:Arcilla:Arena:Mecanizado,
## family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.10781 -0.30737 -0.00688 0.14994 2.58188
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.743e+02 5.863e+02 -0.468 0.640
## Labranza 9.300e-01 1.943e+00 0.479 0.632
## Arena 5.422e+00 1.948e+01 0.278 0.781
## Arcilla 6.846e+00 1.666e+01 0.411 0.681
## MecanizadoMedia 2.651e+01 3.224e+01 0.822 0.411
## MecanizadoAlta 7.537e+01 1.288e+05 0.001 1.000
## Labranza:Arena -1.794e-02 6.453e-02 -0.278 0.781
## Labranza:Arcilla -2.366e-02 5.505e-02 -0.430 0.667
## Labranza:MecanizadoMedia -1.164e-01 1.116e-01 -1.042 0.297
## Labranza:MecanizadoAlta -2.137e-01 4.448e+02 0.000 1.000
## Arena:Arcilla -2.000e-01 5.461e-01 -0.366 0.714
## Arcilla:MecanizadoMedia 1.842e-01 2.543e-01 0.724 0.469
## Arcilla:MecanizadoAlta 4.625e-01 8.462e+02 0.001 1.000
## Labranza:Arena:Arcilla:MecanizadoBaja 6.732e-04 1.805e-03 0.373 0.709
## Labranza:Arena:Arcilla:MecanizadoMedia 6.826e-04 1.813e-03 0.376 0.707
## Labranza:Arena:Arcilla:MecanizadoAlta 6.452e-04 9.476e-02 0.007 0.995
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.222 on 119 degrees of freedom
## Residual deviance: 63.096 on 104 degrees of freedom
## AIC: 95.096
##
## Number of Fisher Scoring iterations: 16
model_inter2 <- glm(Compact ~ Labranza + Arena + Arcilla + Mecanizado + Arcilla:Mecanizado, family = binomial, data = df)
summary(model_inter2)
##
## Call:
## glm(formula = Compact ~ Labranza + Arena + Arcilla + Mecanizado +
## Arcilla:Mecanizado, family = binomial, data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.11383 -0.34039 -0.02505 0.19538 2.63362
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.891e+01 1.265e+01 -4.655 3.23e-06 ***
## Labranza 1.950e-01 4.485e-02 4.349 1.37e-05 ***
## Arena 1.473e-01 9.836e-02 1.498 0.1342
## Arcilla -1.387e-01 6.554e-02 -2.116 0.0344 *
## MecanizadoMedia -7.306e+00 3.770e+00 -1.938 0.0526 .
## MecanizadoAlta 1.554e+01 1.142e+04 0.001 0.9989
## Arcilla:MecanizadoMedia 2.384e-01 1.066e-01 2.236 0.0254 *
## Arcilla:MecanizadoAlta 1.120e-01 2.602e+02 0.000 0.9997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 166.222 on 119 degrees of freedom
## Residual deviance: 65.087 on 112 degrees of freedom
## AIC: 81.087
##
## Number of Fisher Scoring iterations: 17
\[El~ primer~ modelo~ no~ muestra~ interacciones,~ haciendo~ el~ proceso~ de~ eliminación~ se~ encontró~ un~ pvalor~\\ que~ indica~ que~ el~ contenido~ de~ arcilla~ y~ la~ labranza~ pueden~ tener~ interacción~ en~ su~ efecto~ sobre~ la~ compactación,\\~ por~ lo~ que~ deben~ ser~ analizados~ de~ manera~ conjunta. \]
lrtest(model1,model_inter2)
## Likelihood ratio test
##
## Model 1: Compact ~ Labranza + Arena + Arcilla + Mecanizado
## Model 2: Compact ~ Labranza + Arena + Arcilla + Mecanizado + Arcilla:Mecanizado
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 6 -35.457
## 2 8 -32.544 2 5.8277 0.05427 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\[\small Entre~ el~ modelo~ con~ interacción~ y~ el~ modelo~ 1~ parece~ que~ el~ pvalor~ se~ encuentra~ ligeramente~ por~ encima~ del~ 5\%\\ \small por~ lo~ que~ se~ puede~ descartar~ el~ modelo~ con~ interacción.\]
rta= model_inter2$fitted.values
prop_com <- rta*100
cat_arc<- cut(df$Arcilla,breaks=4)
cat_mec<- df$Mecanizado
data_2 <- data.frame(cat_arc, cat_mec, prop_com)
tips2 <- data_2 %>%
group_by( cat_arc, cat_mec) %>%
summarise(media_prop_compactacion = mean(prop_com))
## `summarise()` has grouped output by 'cat_arc'. You can override using the
## `.groups` argument.
# Graficando las dos variables
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
tips2$tip_groups
## Warning: Unknown or uninitialised column: `tip_groups`.
## NULL
ggplot(data = tips2) +
aes(x = cat_mec, y = media_prop_compactacion, color = cat_arc) +
geom_line(aes(group = cat_arc))
\[\small Dado~ que~ el~ modelo~ final~ es~
univariado~ se~ examinó~ la~ interacción~ entre~ el~ contenido~ de~
arcilla~ y~ la~ mecanización,\\ \small observando~ que~ hay~ un~ punto~
en~ el~ que~ la~ compactación~ es~ la~ misma~ usando~ mecanización~ Baja
- Media~ cuándo~ los~ contenidos~ de~ arcilla~ son~ bajos.\] ##
Matriz de confusión valores observados de abortos con valores predichos
de aborto
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.1.3
## ResourceSelection 0.3-5 2019-07-22
cut_prob <- ifelse(fitted(model_final) > 0.5, 1, 0)
table(model_final$y, cut_prob)
## cut_prob
## 0 1
## 0 52 10
## 1 9 49
hoslem.test(model_final$y, fitted(model_final))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model_final$y, fitted(model_final)
## X-squared = 3.2878, df = 8, p-value = 0.915
cut_prob1 <- ifelse(fitted(model1) > 0.5, 1, 0)
table(model1$y, cut_prob1)
## cut_prob1
## 0 1
## 0 53 9
## 1 10 48
hoslem.test(model1$y, fitted(model1))
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: model1$y, fitted(model1)
## X-squared = 9.2881, df = 8, p-value = 0.3186
Predprob<-predict(model_final,type="response")
plot(Predprob,jitter(as.numeric(df$Compact),0.5), cex=0.5, ylab="Compactación")
abline(v = 0.5, col = 'red')
text(x = 0.8, y = 0.8, 'alta probabilidad de compactación, \n predicha y observada')
text(x = 0.3, y = 0.2, 'alta probabilidad de no compactación, \n predicha y observada')
Predprob1<-predict(model1,type="response")
plot(Predprob1,jitter(as.numeric(df$Compact),0.5), cex=0.5, ylab="Compactación")
abline(v = 0.5, col = 'red')
text(x = 0.8, y = 0.8, 'alta probabilidad de compactación, \n predicha y observada')
text(x = 0.3, y = 0.2, 'alta probabilidad de no compactación, \n predicha y observada')
\[Los~ modelos~ se~ desvían~ en~ 19~ datos~
de~ 120~ (16\%),\\~ el~ pvalor~ indica~ que~ no~ hay~ diferencia~ entre~
los~ valores~ observados~ y~ predichos~.\\ \small Esto~ último~ se~
puede~ comprobar~ con~ lo~ visto~ en~ las~ gráficas.\]