set.seed(888)
edad <- abs(round(rnorm(n = 1000,
                        mean = 67,
                        sd = 2)))
dap <- abs(round(rnorm(n = 1000,
                      mean = 30,
                      sd = 3), 1)) #diámetro a la altura del pecho
hibrido <- factor(rbinom(n = 1000,
                         size = 1,
                         prob = 0.6),
                  labels = c('h1', 'h2'))
rto <- abs(round(rnorm(n = 1000,
                       mean = 80,
                       sd = 5), 1)) #Rendimiento

cloA <- abs(round(rnorm(n = 1000,
                        mean = 320,
                        sd = 10)))

z <- 0.22 * edad - 0.12 * cloA + dap -8 #Variable artificial

pr <- 1/(1+exp(-z)) # Probabilidad de aborto

y = rbinom(1000,1,pr) # Abortos

abortos <- factor(rbinom(1000, 1, pr),
                  labels = c('Si', 'No'))

data <- data.frame(edad,
                   dap,
                   hibrido,
                   rto,
                   cloA,
                   abortos)

Análisis univariado

univariable_edad <- glm(abortos ~ edad, family = binomial, data = data)
summary(univariable_edad)
## 
## Call:
## glm(formula = abortos ~ edad, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1503  -0.8605  -0.8170   1.4210   1.8084  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.05022    2.39036  -3.786 0.000153 ***
## edad         0.12310    0.03556   3.461 0.000538 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.4  on 999  degrees of freedom
## Residual deviance: 1229.2  on 998  degrees of freedom
## AIC: 1233.2
## 
## Number of Fisher Scoring iterations: 4
univariable_dap <- glm(abortos ~ dap, family = binomial, data = data)
summary(univariable_dap)
## 
## Call:
## glm(formula = abortos ~ dap, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4531  -0.5872  -0.2376   0.4999   2.8432  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -24.8936     1.6185  -15.38   <2e-16 ***
## dap           0.7846     0.0519   15.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  755.79  on 998  degrees of freedom
## AIC: 759.79
## 
## Number of Fisher Scoring iterations: 6
univariable_h <- glm(abortos ~ hibrido, family = binomial, data = data)
summary(univariable_h)
## 
## Call:
## glm(formula = abortos ~ hibrido, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8729  -0.8729  -0.8593   1.5162   1.5332  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.76837    0.10694  -7.185 6.71e-13 ***
## hibridoh2   -0.03772    0.13892  -0.272    0.786    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.4  on 999  degrees of freedom
## Residual deviance: 1241.3  on 998  degrees of freedom
## AIC: 1245.3
## 
## Number of Fisher Scoring iterations: 4
univariable_rto <- glm(abortos ~ rto, family = binomial, data = data)
summary(univariable_rto)
## 
## Call:
## glm(formula = abortos ~ rto, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9096  -0.8705  -0.8544   1.5059   1.5949  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.539898   1.082494  -1.423    0.155
## rto          0.009327   0.013443   0.694    0.488
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.4  on 999  degrees of freedom
## Residual deviance: 1240.9  on 998  degrees of freedom
## AIC: 1244.9
## 
## Number of Fisher Scoring iterations: 4
univariable_cloA <- glm(abortos ~ cloA, family = binomial, data = data)
summary(univariable_cloA)
## 
## Call:
## glm(formula = abortos ~ cloA, family = binomial, data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5341  -0.8671  -0.6766   1.2407   2.1413  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 19.537051   2.405340   8.122 4.57e-16 ***
## cloA        -0.063704   0.007558  -8.428  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.4  on 999  degrees of freedom
## Residual deviance: 1161.7  on 998  degrees of freedom
## AIC: 1165.7
## 
## Number of Fisher Scoring iterations: 4

Análisis multivariado

model1 <- glm(abortos ~ edad + dap + hibrido + rto + cloA, family = binomial, data = data)
summary(model1)
## 
## Call:
## glm(formula = abortos ~ edad + dap + hibrido + rto + cloA, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3338  -0.4460  -0.1331   0.2626   3.2746  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.79506    5.16762  -2.282   0.0225 *  
## edad          0.26299    0.05460   4.817 1.46e-06 ***
## dap           0.95360    0.06582  14.488  < 2e-16 ***
## hibridoh2     0.05606    0.20878   0.269   0.7883    
## rto           0.03075    0.02006   1.533   0.1252    
## cloA         -0.12056    0.01229  -9.813  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  607.94  on 994  degrees of freedom
## AIC: 619.94
## 
## Number of Fisher Scoring iterations: 6
model2 <- glm(abortos ~ edad + dap + rto + cloA, family = binomial, data = data) # Elimiando la variable con el pvalue más alto
summary(model2)
## 
## Call:
## glm(formula = abortos ~ edad + dap + rto + cloA, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3245  -0.4480  -0.1327   0.2647   3.2637  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -11.74117    5.16578  -2.273    0.023 *  
## edad          0.26287    0.05459   4.815 1.47e-06 ***
## dap           0.95287    0.06573  14.497  < 2e-16 ***
## rto           0.03092    0.02005   1.542    0.123    
## cloA         -0.12057    0.01229  -9.812  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  608.01  on 995  degrees of freedom
## AIC: 618.01
## 
## Number of Fisher Scoring iterations: 6

Delta beta

delta.coef <- abs((coef(model2)-coef(model1)[-c(4)])/coef(model1)[-c(4)])
round(delta.coef, 3)
## (Intercept)        edad         dap         rto        cloA 
##       0.005       0.000       0.001       0.005       0.000
model3 <- glm(abortos ~ edad + dap + cloA, family = binomial, data = data) # Elimiando la variable con el pvalue más alto
summary(model3)
## 
## Call:
## glm(formula = abortos ~ edad + dap + cloA, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2637  -0.4490  -0.1377   0.2815   3.2321  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.02097    4.83981  -1.864   0.0623 .  
## edad         0.25929    0.05427   4.778 1.77e-06 ***
## dap          0.94669    0.06532  14.494  < 2e-16 ***
## cloA        -0.11997    0.01223  -9.806  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  610.41  on 996  degrees of freedom
## AIC: 618.41
## 
## Number of Fisher Scoring iterations: 6
delta.coef2 <- abs((coef(model3)-coef(model2)[-c(4)])/coef(model2)[-c(4)])
round(delta.coef2, 3) # Este es el modelo que escogemos
## (Intercept)        edad         dap        cloA 
##       0.232       0.014       0.006       0.005
model_final <- glm(abortos ~ edad + dap + cloA, family = binomial, data = data) # Elimiando la variable con el pvalue más alto
summary(model_final)
## 
## Call:
## glm(formula = abortos ~ edad + dap + cloA, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2637  -0.4490  -0.1377   0.2815   3.2321  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.02097    4.83981  -1.864   0.0623 .  
## edad         0.25929    0.05427   4.778 1.77e-06 ***
## dap          0.94669    0.06532  14.494  < 2e-16 ***
## cloA        -0.11997    0.01223  -9.806  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  610.41  on 996  degrees of freedom
## AIC: 618.41
## 
## Number of Fisher Scoring iterations: 6

Test de comparación (‘Chisq’)

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
lrtest(model_final, model2)
## Likelihood ratio test
## 
## Model 1: abortos ~ edad + dap + cloA
## Model 2: abortos ~ edad + dap + rto + cloA
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   4 -305.20                     
## 2   5 -304.01  1 2.3958     0.1217
anova(model_final, model2, test = 'Chisq')
## Analysis of Deviance Table
## 
## Model 1: abortos ~ edad + dap + cloA
## Model 2: abortos ~ edad + dap + rto + cloA
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       996     610.41                     
## 2       995     608.01  1   2.3958   0.1217

Tercer paso: comprobación de linealidad

par(mfrow = c(2,3))
scatter.smooth(edad, log(pr/(1-pr)), cex = 0.5) 
scatter.smooth(dap, log(pr/(1-pr)), cex = 0.5)
scatter.smooth(cloA, log(pr/(1-pr)), cex = 0.5)

Cuarto paso : interacción entre covariables

Asumimos interacción entre cloA y el dap

model.interation <- glm(abortos ~ edad + cloA + dap + cloA:dap, family = binomial, data = data)

summary(model.interation)
## 
## Call:
## glm(formula = abortos ~ edad + cloA + dap + cloA:dap, family = binomial, 
##     data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3293  -0.4499  -0.1438   0.2811   3.1953  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -55.026082  60.228558  -0.914    0.361    
## edad          0.259535   0.054285   4.781 1.74e-06 ***
## cloA          0.024419   0.188456   0.130    0.897    
## dap           2.433369   1.941952   1.253    0.210    
## cloA:dap     -0.004665   0.006081  -0.767    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1241.38  on 999  degrees of freedom
## Residual deviance:  609.81  on 995  degrees of freedom
## AIC: 619.81
## 
## Number of Fisher Scoring iterations: 6

La interacción no tiene efecto sobre los abortos florales

lrtest(model_final, model.interation)
## Likelihood ratio test
## 
## Model 1: abortos ~ edad + dap + cloA
## Model 2: abortos ~ edad + cloA + dap + cloA:dap
##   #Df  LogLik Df Chisq Pr(>Chisq)
## 1   4 -305.20                    
## 2   5 -304.91  1 0.594     0.4409
cloA = rep((4:15), length.out = 100, 4)
dap = mean(dap)
edad = rep(c(20, 40, 60, 80), 100)
newdata <- data.frame(cloA, dap, edad)
newdata1 <- cbind(newdata, predict(model.interation, newdata = newdata, type = "link", se = TRUE))
newdata1 <- within(newdata1, {
    edad <- factor(edad)
    predictedProb <- plogis(fit)
    LL <- plogis(fit - (1.96*se.fit))
    UL <- plogis(fit - (1.96*se.fit))
})
library(ggplot2)
ggplot(newdata1, 
       aes (x = cloA, y = predictedProb))+ geom_ribbon(aes(ymin =LL,
                                                           ymax = UL,
                                                           fill = edad),
                                                       alpha = 0.2 ) + geom_line(aes (colour = edad), size = 8)

Entrega

library(faux)
## 
## ************
## Welcome to faux. For support and examples visit:
## https://debruine.github.io/faux/
## - Get and set global package options with: faux_options()
## ************
dfa <- rnorm_multi(n = 1000,
            mu = c(67, 30, 30, 320),
            sd = c(2, 3, 5, 10),
            varnames = c('Edad1', 'dap1', 'rto1', 'clolA'),
            r = c(0.4, 0.6, 0.5, 0.6, 0.7, 0.8))

dfa$hibrido1 <- round(runif(n = 1000, min = 0,max = 1.2))

w <- 0.5 * dfa$clolA - 0.01 * dfa$dap - 0.6 * dfa$rto - 0.02 * dfa$Edad

dfa$abortos1 <- ifelse(w > 140, 1, 0)

Primer paso : análisis Univariado

univariable_clolA <- glm(abortos1~ clolA, family = binomial, data = dfa)
summary(univariable_clolA)
## 
## Call:
## glm(formula = abortos1 ~ clolA, family = binomial, data = dfa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8487  -0.5702   0.1166   0.5981   2.6904  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -84.86354    5.45093  -15.57   <2e-16 ***
## clolA         0.26626    0.01707   15.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1375.04  on 999  degrees of freedom
## Residual deviance:  777.35  on 998  degrees of freedom
## AIC: 781.35
## 
## Number of Fisher Scoring iterations: 6
univariable_dap1 <- glm(abortos1 ~ dap1, family = binomial, data = dfa) 
summary(univariable_dap1)
## 
## Call:
## glm(formula = abortos1 ~ dap1, family = binomial, data = dfa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2312  -1.0204   0.5367   0.9699   2.2198  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.64633    0.82487  -11.69   <2e-16 ***
## dap1         0.32866    0.02749   11.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1375.0  on 999  degrees of freedom
## Residual deviance: 1189.4  on 998  degrees of freedom
## AIC: 1193.4
## 
## Number of Fisher Scoring iterations: 3
univariable_edad1 <- glm(abortos1 ~ Edad1, family = binomial, data = dfa)
summary(univariable_edad1)
## 
## Call:
## glm(formula = abortos1 ~ Edad1, family = binomial, data = dfa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7385  -1.2151   0.8745   1.0827   1.6272  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -12.21490    2.19037  -5.577 2.45e-08 ***
## Edad1         0.18573    0.03274   5.673 1.40e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1375.0  on 999  degrees of freedom
## Residual deviance: 1341.2  on 998  degrees of freedom
## AIC: 1345.2
## 
## Number of Fisher Scoring iterations: 4
univariable_rto1 <- glm(abortos1 ~ rto1, family = binomial, data = dfa)
summary(univariable_rto1)
## 
## Call:
## glm(formula = abortos1 ~ rto1, family = binomial, data = dfa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1741  -1.1613   0.7244   1.0396   1.7607  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.37199    0.41970  -8.034 9.41e-16 ***
## rto1         0.11990    0.01393   8.606  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1375.0  on 999  degrees of freedom
## Residual deviance: 1291.1  on 998  degrees of freedom
## AIC: 1295.1
## 
## Number of Fisher Scoring iterations: 4
univariable_h1 <- glm(abortos1 ~ hibrido1, family = binomial, data = dfa) 
summary(univariable_h1)
## 
## Call:
## glm(formula = abortos1 ~ hibrido1, family = binomial, data = dfa)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.299  -1.299   1.061   1.061   1.135  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)   0.1009     0.1031   0.978    0.328
## hibrido1      0.1802     0.1311   1.375    0.169
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1375.0  on 999  degrees of freedom
## Residual deviance: 1373.1  on 998  degrees of freedom
## AIC: 1377.1
## 
## Number of Fisher Scoring iterations: 3

Segundo paso : análisis multivariado

mod1 <- glm(abortos1 ~ dfa$Edad1 + dfa$dap1 + dfa$rto1 + dfa$clolA + dfa$hibrido1, data = dfa)
summary(mod1)
## 
## Call:
## glm(formula = abortos1 ~ dfa$Edad1 + dfa$dap1 + dfa$rto1 + dfa$clolA + 
##     dfa$hibrido1, data = dfa)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.63505  -0.26185  -0.00653   0.25284   0.58013  
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.795239   0.572584 -31.079   <2e-16 ***
## dfa$Edad1     -0.005544   0.005870  -0.945    0.345    
## dfa$dap1      -0.009221   0.004414  -2.089    0.037 *  
## dfa$rto1      -0.075757   0.003462 -21.881   <2e-16 ***
## dfa$clolA      0.066385   0.001803  36.821   <2e-16 ***
## dfa$hibrido1   0.011885   0.019307   0.616    0.538    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08718899)
## 
##     Null deviance: 247.191  on 999  degrees of freedom
## Residual deviance:  86.666  on 994  degrees of freedom
## AIC: 406.18
## 
## Number of Fisher Scoring iterations: 2
length(dfa$Edad1)
## [1] 1000
length(dfa$dap1)
## [1] 1000
length(dfa$rto1)
## [1] 1000
length(dfa$clolA)
## [1] 1000
length(dfa$hibrido1)
## [1] 1000

De acuerdo al summary la variable que tiene efecto sobre el aborto floral es dap, entonces se procede a eliminar una a una las demás variables, se comoienza con la que mayor p-valor tiene

mod2 <- glm(abortos1 ~ dfa$Edad1 + dfa$dap1 + dfa$rto1 + dfa$clolA, data = dfa)
summary(mod2)
## 
## Call:
## glm(formula = abortos1 ~ dfa$Edad1 + dfa$dap1 + dfa$rto1 + dfa$clolA, 
##     data = dfa)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.64319  -0.26180  -0.00848   0.25059   0.57308  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.798756   0.572377 -31.096   <2e-16 ***
## dfa$Edad1    -0.005489   0.005867  -0.936   0.3498    
## dfa$dap1     -0.009228   0.004412  -2.091   0.0367 *  
## dfa$rto1     -0.075738   0.003461 -21.883   <2e-16 ***
## dfa$clolA     0.066407   0.001802  36.851   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08713457)
## 
##     Null deviance: 247.191  on 999  degrees of freedom
## Residual deviance:  86.699  on 995  degrees of freedom
## AIC: 404.56
## 
## Number of Fisher Scoring iterations: 2
mod3 <- glm(abortos1 ~ dfa$Edad1 + dfa$rto1 + dfa$clolA, data = dfa)
summary(mod3)
## 
## Call:
## glm(formula = abortos1 ~ dfa$Edad1 + dfa$rto1 + dfa$clolA, data = dfa)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.65129  -0.26455  -0.00324   0.25073   0.57498  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.498255   0.554987 -31.529   <2e-16 ***
## dfa$Edad1    -0.005835   0.005875  -0.993    0.321    
## dfa$rto1     -0.076320   0.003456 -22.085   <2e-16 ***
## dfa$clolA     0.064728   0.001616  40.052   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08742976)
## 
##     Null deviance: 247.19  on 999  degrees of freedom
## Residual deviance:  87.08  on 996  degrees of freedom
## AIC: 406.95
## 
## Number of Fisher Scoring iterations: 2
mod4 <- glm(abortos1 ~ dfa$rto1 + dfa$clolA, data = dfa)
summary(mod4)
## 
## Call:
## glm(formula = abortos1 ~ dfa$rto1 + dfa$clolA, data = dfa)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6556  -0.2635  -0.0068   0.2538   0.5870  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.831349   0.442198  -40.32   <2e-16 ***
## dfa$rto1     -0.077673   0.003176  -24.46   <2e-16 ***
## dfa$clolA     0.064675   0.001615   40.04   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08742857)
## 
##     Null deviance: 247.191  on 999  degrees of freedom
## Residual deviance:  87.166  on 997  degrees of freedom
## AIC: 405.94
## 
## Number of Fisher Scoring iterations: 2
mod_final <- mod4

Delta coef

del_coef1 <- abs((coef(mod2)-coef(mod1)[-6]))
round(del_coef1, 3)
## (Intercept)   dfa$Edad1    dfa$dap1    dfa$rto1   dfa$clolA 
##       0.004       0.000       0.000       0.000       0.000
del_coef2 <- abs((coef(mod3)-coef(mod2)[-3]))
round(del_coef2, 3)
## (Intercept)   dfa$Edad1    dfa$rto1   dfa$clolA 
##       0.301       0.000       0.001       0.002
del_coef3 <- abs((coef(mod4)-coef(mod3)[-2]))
round(del_coef3, 3)
## (Intercept)    dfa$rto1   dfa$clolA 
##       0.333       0.001       0.000
library(lmtest)

lrtest(mod_final, mod3)
## Likelihood ratio test
## 
## Model 1: abortos1 ~ dfa$rto1 + dfa$clolA
## Model 2: abortos1 ~ dfa$Edad1 + dfa$rto1 + dfa$clolA
##   #Df  LogLik Df Chisq Pr(>Chisq)
## 1   4 -198.97                    
## 2   5 -198.47  1  0.99     0.3197

De acuerdo este test, observamos que la eliminacion de la variable edad no es muy importante sobre los abortos florales, por lo tanto el modelo no difiere cuando se quita la variable.

Prueba de comparación Chi-cuadrado

anova(mod_final, mod3, test = 'Chisq')
## Analysis of Deviance Table
## 
## Model 1: abortos1 ~ dfa$rto1 + dfa$clolA
## Model 2: abortos1 ~ dfa$Edad1 + dfa$rto1 + dfa$clolA
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       997     87.166                     
## 2       996     87.080  1 0.086249   0.3206

Confirmamos que la eliminación de la variable edad, no cambia el modelo, es decir los abortos no son alterados por esa variable.

Tercer paso : Comprobación de linealidad

pred <- fitted.values(mod_final)
par(mfrow = c(2, 3))
scatter.smooth(dfa$rto1, pred,cex = 0.5)
scatter.smooth(dfa$clolA, pred, cex = 0.5)

Se presenta linealidad entre el rendimiento con la predicción o probabilidad de abortos, al igual que la clorofila con los abortos.