modelo_final.R

setwd("~/Dropbox/Proyecto_Invasión_Neusa/data")
## Librerias necesarias
library("aod", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("arm", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## Loading required package: MASS

## Loading required package: Matrix

## Loading required package: lme4

## 
## arm (Version 1.10-1, built: 2018-4-12)

## Working directory is /Users/mazinger/Dropbox/Proyecto_Invasión_Neusa/data

library("ggplot2", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("tidyverse", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ tibble  2.0.1       ✔ purrr   0.3.1  
## ✔ tidyr   0.8.3       ✔ dplyr   0.8.0.1
## ✔ readr   1.3.1       ✔ stringr 1.4.0  
## ✔ tibble  2.0.1       ✔ forcats 0.3.0

## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()

library("caret", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library("rms", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## Loading required package: Hmisc

## Loading required package: survival

## 
## Attaching package: 'survival'

## The following object is masked from 'package:caret':
## 
##     cluster

## The following object is masked from 'package:aod':
## 
##     rats

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## Loading required package: SparseM

## 
## Attaching package: 'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

library("car", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## Loading required package: carData

## 
## Attaching package: 'car'

## The following objects are masked from 'package:rms':
## 
##     Predict, vif

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

## The following object is masked from 'package:arm':
## 
##     logit

library("rcompanion", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("lmtest", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'lmtest'

## The following object is masked from 'package:rms':
## 
##     lrtest

## Lectura base de datos incluyendo las dos especies
invasion<-read.table("basem.csv", sep = ",", header = T)
attach(invasion)
## Conversión a factor para variables categóricas
via<-factor(via)
lado.cam<-factor(lado.cam)
cobertura<-factor(cobertura)
pendiente<-factor(pendiente)
nombre<-factor(nombre)
rep<-factor(rep)
invas.tipo<-factor(invas.tipo)
tipol<-factor(tipol)
## Observando la distribución de los casos por variables convertidas a factor
xtabs(~invas.low+via)

##          via
## invas.low   1   2   3
##         0  42 228  83
##         1  86 307 241

xtabs(~invas.low+lado.cam)

##          lado.cam
## invas.low   1   2
##         0 351   2
##         1 617  17

# Cobertura contiene un gran número de niveles que no permiten agrupar observaciones, se ha desechado ya que genera prediciones perfectas
xtabs(~invas.low+pendiente)

##          pendiente
## invas.low   1   2   3   4   5
##         0 213  40  11  13  76
##         1 395 128  31  18  62

xtabs(~invas.low+nombre)

##          nombre
## invas.low Retamo espinoso Retamo liso
##         0             178         175
##         1             351         283

xtabs(~invas.low+rep) # El nivel 4 de rep (estados reproductivos, frutos) fue desechado ya que tiene un n insuficiente (2 observaciones) para hacer alguna predicción

##          rep
## invas.low   1   2   3   5
##         0  21   3  53 276
##         1   5   3   4 622

xtabs(~invas.low+invas.tipo)

##          invas.tipo
## invas.low   1   2
##         0  20 333
##         1 117 517

xtabs(~invas.low+tipol)

##          tipol
## invas.low   1   2   3
##         0 315  30   8
##         1 485  69  80

## Modelo general
invaslogit.low<-glm(invas.low~alt.prom+borde+msnm.prom+msnm.rango+via+lado.cam+pendiente+nombre+rep+invas.tipo+tipol, family = "binomial")
summary(invaslogit.low)

## 
## Call:
## glm(formula = invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + 
##     via + lado.cam + pendiente + nombre + rep + invas.tipo + 
##     tipol, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0874  -0.8480   0.4400   0.7968   2.6279  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.7427523  1.3035551  -2.871 0.004089 ** 
## alt.prom           1.2699983  0.1660271   7.649 2.02e-14 ***
## borde              0.0350409  0.0248413   1.411 0.158366    
## msnm.prom          0.0006150  0.0004348   1.415 0.157169    
## msnm.rango         0.0062936  0.0031188   2.018 0.043598 *  
## via2              -0.0771173  0.2594589  -0.297 0.766296    
## via3               0.4013411  0.2883210   1.392 0.163924    
## lado.cam2          0.9416924  0.7868155   1.197 0.231369    
## pendiente2         0.3269072  0.2431634   1.344 0.178821    
## pendiente3        -0.0176977  0.4340698  -0.041 0.967478    
## pendiente4        -0.2597689  0.4304679  -0.603 0.546205    
## pendiente5        -0.4385832  0.2298293  -1.908 0.056353 .  
## nombreRetamo liso -0.4961679  0.1651055  -3.005 0.002654 ** 
## rep2               0.4977624  1.1059608   0.450 0.652658    
## rep3              -0.9448714  0.7417504  -1.274 0.202720    
## rep5               1.3634404  0.5400489   2.525 0.011581 *  
## invas.tipo2       -0.7763884  0.2897073  -2.680 0.007364 ** 
## tipol2             0.2964296  0.2827218   1.048 0.294415    
## tipol3             1.5443832  0.4019586   3.842 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1287.15  on 986  degrees of freedom
## Residual deviance:  980.36  on 968  degrees of freedom
## AIC: 1018.4
## 
## Number of Fisher Scoring iterations: 6

## Procedimiento stepwise backward selection (automático), seleción de variables que explican la magnitud de la invasión
step.model.invaslogit.low <- invaslogit.low %>% stepAIC(trace = FALSE)
coef(step.model.invaslogit.low)

##       (Intercept)          alt.prom             borde         msnm.prom 
##     -3.8898267459      1.3471745693      0.0478829467      0.0006382136 
##        msnm.rango              via2              via3 nombreRetamo liso 
##      0.0066201232     -0.0544953363      0.4487091674     -0.4899168323 
##              rep2              rep3              rep5       invas.tipo2 
##      0.2900814825     -0.9641517922      1.2883349208     -0.7996233107 
##            tipol2            tipol3 
##      0.2318806003      1.4748870881

## Modelo final
invaslogit.low.reduced<-glm(invas.low~alt.prom+borde+msnm.prom+msnm.rango+via+nombre+rep+invas.tipo+tipol, family = "binomial")
summary(invaslogit.low.reduced)

## 
## Call:
## glm(formula = invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + 
##     via + nombre + rep + invas.tipo + tipol, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1761  -0.8753   0.4499   0.8221   2.4842  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -3.8898267  1.2913570  -3.012 0.002594 ** 
## alt.prom           1.3471746  0.1639015   8.219  < 2e-16 ***
## borde              0.0478829  0.0281630   1.700 0.089092 .  
## msnm.prom          0.0006382  0.0004303   1.483 0.137981    
## msnm.rango         0.0066201  0.0030087   2.200 0.027785 *  
## via2              -0.0544953  0.2562888  -0.213 0.831614    
## via3               0.4487092  0.2842735   1.578 0.114464    
## nombreRetamo liso -0.4899168  0.1636987  -2.993 0.002764 ** 
## rep2               0.2900815  1.1292617   0.257 0.797274    
## rep3              -0.9641518  0.7395680  -1.304 0.192347    
## rep5               1.2883349  0.5376399   2.396 0.016562 *  
## invas.tipo2       -0.7996233  0.2883851  -2.773 0.005558 ** 
## tipol2             0.2318806  0.2785030   0.833 0.405072    
## tipol3             1.4748871  0.3989689   3.697 0.000218 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1287.15  on 986  degrees of freedom
## Residual deviance:  989.22  on 973  degrees of freedom
## AIC: 1017.2
## 
## Number of Fisher Scoring iterations: 6

Anova(invaslogit.low.reduced, type="II", test="Wald")

## Analysis of Deviance Table (Type II tests)
## 
## Response: invas.low
##            Df   Chisq Pr(>Chisq)    
## alt.prom    1 67.5588  < 2.2e-16 ***
## borde       1  2.8907  0.0890921 .  
## msnm.prom   1  2.2003  0.1379807    
## msnm.rango  1  4.8414  0.0277851 *  
## via         2  7.7531  0.0207225 *  
## nombre      1  8.9568  0.0027643 ** 
## rep         3 22.8158  4.411e-05 ***
## invas.tipo  1  7.6882  0.0055583 ** 
## tipol       2 13.9236  0.0009474 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Odds ratios
exp(cbind(OR = coef(invaslogit.low.reduced), confint(invaslogit.low.reduced)))

## Waiting for profiling to be done...

##                           OR       2.5 %     97.5 %
## (Intercept)       0.02044889 0.001562754  0.2491235
## alt.prom          3.84654202 2.808494537  5.3422597
## borde             1.04904785 1.005420548  1.1221336
## msnm.prom         1.00063842 0.999797102  1.0014869
## msnm.rango        1.00664208 1.001212523  1.0130825
## via2              0.94696293 0.570644138  1.5614565
## via3              1.56628906 0.894667927  2.7315523
## nombreRetamo liso 0.61267735 0.443569399  0.8430887
## rep2              1.33653639 0.141433634 12.6528631
## rep3              0.38130649 0.083750494  1.6374629
## rep5              3.62674271 1.350892616 11.5668476
## invas.tipo2       0.44949825 0.249400644  0.7765320
## tipol2            1.26096916 0.735243111  2.1972067
## tipol3            4.37054226 2.110775889 10.2781776

## Pruebas de Ajuste y adecuación del modelo
#Pseudo R2
nagelkerke(invaslogit.low.reduced)

## $Models
##                                                                                                                        
## Model: "glm, invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via + nombre + rep + invas.tipo + tipol, binomial"
## Null:  "glm, invas.low ~ 1, binomial"                                                                                  
## 
## $Pseudo.R.squared.for.model.vs.null
##                              Pseudo.R.squared
## McFadden                             0.231466
## Cox and Snell (ML)                   0.260556
## Nagelkerke (Cragg and Uhler)         0.357619
## 
## $Likelihood.ratio.test
##  Df.diff LogLik.diff  Chisq    p.value
##      -13     -148.97 297.93 6.5136e-56
## 
## $Number.of.observations
##           
## Model: 987
## Null:  987
## 
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
## 
## $Warnings
## [1] "None"

# Significancia del modelo (siempre y cuando significativa, la adición de las variables en comparación a un modelo nulo puede explicar la magnitud de la invasión)
lrtest(invaslogit.low.reduced)

## Likelihood ratio test
## 
## Model 1: invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via + 
##     nombre + rep + invas.tipo + tipol
## Model 2: invas.low ~ 1
##   #Df  LogLik  Df  Chisq Pr(>Chisq)    
## 1  14 -494.61                          
## 2   1 -643.58 -13 297.93  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Sobredispersión (no debe ser superior a 1.5)
summary(invaslogit.low.reduced)$deviance / summary(invaslogit.low.reduced)$df.residual

## [1] 1.016672

# Examen gráfico de los residuos
plot(predict(invaslogit.low.reduced),residuals(invaslogit.low.reduced),col=c("blue","red")[1+invas.low])
abline(h=0,lty=2,col="grey")
lines(lowess(predict(invaslogit.low.reduced),residuals(invaslogit.low.reduced)),col="black",lwd=2)

modelo_final.R

mazinger

Tue Mar 26 23:13:04 2019