setwd("~/Dropbox/Proyecto_Invasión_Neusa/data")
# Librerias necesarias
library("aod", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("arm", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/mazinger/Dropbox/Proyecto_Invasión_Neusa/data
library("ggplot2", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("tidyverse", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## ── Attaching packages ──────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.0.1       ✔ purrr   0.3.1  
## ✔ tidyr   0.8.3       ✔ dplyr   0.8.0.1
## ✔ readr   1.3.1       ✔ stringr 1.4.0  
## ✔ tibble  2.0.1       ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
library("caret", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library("rms", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: Hmisc
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## The following object is masked from 'package:aod':
## 
##     rats
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
library("car", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: carData
## 
## Attaching package: 'car'
## The following objects are masked from 'package:rms':
## 
##     Predict, vif
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:arm':
## 
##     logit
library("rcompanion", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("lmtest", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'lmtest'
## The following object is masked from 'package:rms':
## 
##     lrtest
# Validación interna de las variables que explican la magnitud de la invasión en el modelo general: utilizando 2 subconjuntos de datos que representan las dos especies usadas 
# se corre el modelo con las mismas variables predictoras seleccionadas anteriormente y se elimina la variable categórica "nombre" ya que es la que conforma los 2 subconjuntos de datos    
# Lectura base de datos para Retamo espinoso
invasion2<-read.table("retesp.csv", sep = ",", header = T)
attach(invasion2)
# Conversión a factor para variables categóricas
via<-factor(via)
lado.cam<-factor(lado.cam)
cobertura<-factor(cobertura)
pendiente<-factor(pendiente)
nombre<-factor(nombre)
rep<-factor(rep)
invas.tipo<-factor(invas.tipo)
tipol<-factor(tipol)
# Modelo final
invaslogit.low2<-glm(invas.low~alt.prom+borde+msnm.prom+msnm.rango+via+rep+invas.tipo+tipol, family = "binomial")
summary(invaslogit.low2)
## 
## Call:
## glm(formula = invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + 
##     via + rep + invas.tipo + tipol, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5137  -0.6184   0.3193   0.7169   2.5723  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.8012424  1.6651272  -3.484 0.000494 ***
## alt.prom     1.5429742  0.2432828   6.342 2.26e-10 ***
## borde        0.0654247  0.0592832   1.104 0.269769    
## msnm.prom    0.0013450  0.0005679   2.368 0.017863 *  
## msnm.rango   0.0086294  0.0042285   2.041 0.041270 *  
## via2         0.2474929  0.3897508   0.635 0.525427    
## via3         0.5280214  0.4214351   1.253 0.210237    
## rep2         1.0284554  1.7314469   0.594 0.552521    
## rep3        -0.8465774  0.8100291  -1.045 0.295968    
## rep5         1.1478650  0.5553040   2.067 0.038725 *  
## invas.tipo2 -1.3664937  0.5026620  -2.719 0.006558 ** 
## tipol2       0.4929592  0.4022653   1.225 0.220403    
## tipol3       1.9421650  0.5636136   3.446 0.000569 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 675.72  on 528  degrees of freedom
## Residual deviance: 457.83  on 516  degrees of freedom
## AIC: 483.83
## 
## Number of Fisher Scoring iterations: 7
Anova(invaslogit.low2, type="II", test="Wald")
## Analysis of Deviance Table (Type II tests)
## 
## Response: invas.low
##            Df   Chisq Pr(>Chisq)    
## alt.prom    1 40.2249  2.263e-10 ***
## borde       1  1.2179   0.269769    
## msnm.prom   1  5.6095   0.017863 *  
## msnm.rango  1  4.1649   0.041270 *  
## via         2  1.8814   0.390349    
## rep         3 12.9195   0.004814 ** 
## invas.tipo  1  7.3903   0.006558 ** 
## tipol       2 12.6843   0.001761 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Odds ratios
exp(cbind(OR = coef(invaslogit.low2), confint(invaslogit.low2)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                      OR        2.5 %       97.5 %
## (Intercept) 0.003023796 0.0001083114   0.07536921
## alt.prom    4.678484326 2.9498976943   7.66594611
## borde       1.067612302 0.9945526093   1.25561957
## msnm.prom   1.001345896 1.0002410123   1.00247598
## msnm.rango  1.008666780 1.0012296193   1.01795619
## via2        1.280810251 0.5939234161   2.75185124
## via3        1.695574149 0.7398535624   3.88001195
## rep2        2.796742526 0.1184081283 111.09502594
## rep3        0.428880327 0.0773725560   2.04340384
## rep5        3.151457229 1.1239229380  10.31789888
## invas.tipo2 0.254999492 0.0868075894   0.64092749
## tipol2      1.637153689 0.7535071057   3.67728056
## tipol3      6.973833204 2.5717092587  24.66783212
# Pseudo R2
nagelkerke(invaslogit.low2)
## $Models
##                                                                                                               
## Model: "glm, invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via + rep + invas.tipo + tipol, binomial"
## Null:  "glm, invas.low ~ 1, binomial"                                                                         
## 
## $Pseudo.R.squared.for.model.vs.null
##                              Pseudo.R.squared
## McFadden                             0.322453
## Cox and Snell (ML)                   0.337600
## Nagelkerke (Cragg and Uhler)         0.468092
## 
## $Likelihood.ratio.test
##  Df.diff LogLik.diff  Chisq    p.value
##      -12     -108.94 217.89 6.5072e-40
## 
## $Number.of.observations
##           
## Model: 529
## Null:  529
## 
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
## 
## $Warnings
## [1] "None"
# Significancia del modelo (siempre y cuando significativa, la adición de las variables en comparación a un modelo nulo puede explicar la magnitud de la invasión)
lrtest(invaslogit.low2)
## Likelihood ratio test
## 
## Model 1: invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via + 
##     rep + invas.tipo + tipol
## Model 2: invas.low ~ 1
##   #Df  LogLik  Df  Chisq Pr(>Chisq)    
## 1  13 -228.92                          
## 2   1 -337.86 -12 217.89  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Sobredispersión (no debe ser superior a 1.5)
summary(invaslogit.low2)$deviance / summary(invaslogit.low2)$df.residual
## [1] 0.8872704
#examen gráfico de los residuos
plot(predict(invaslogit.low2),residuals(invaslogit.low2),col=c("blue","red")[1+invas.low])
abline(h=0,lty=2,col="grey")
lines(lowess(predict(invaslogit.low2),residuals(invaslogit.low2)),col="black",lwd=2)

# El modelo muestra que las variables seleccionadas en el conjunto de datos completo (con las dos especies incluidas)
#son adecuadas para predecir la invasion de retamo espinoso, el pseudo R2 del Retamo espinoso es ligeramente mayor al R2 del modelo con las dos especies
#no obstante, el modelo de Retamo espinoso muestra predicción perfecta, por tanto se analiza el n disponible en las variables categoricas para hacer la predicción
xtabs(~invas.low+via)
##          via
## invas.low   1   2   3
##         0  22 111  45
##         1  40 180 131
xtabs(~invas.low+rep)
##          rep
## invas.low   1   2   3   5
##         0  20   1  34 123
##         1   5   3   3 340
# Existen muy pocas observaciones para los niveles 1,2 y 3 de rep (estados reproductivos) para hacer una predicción adecuada esta es la razón por la cual el modelo
# muestra una predicción perfecta
xtabs(~invas.low+invas.tipo)
##          invas.tipo
## invas.low   1   2
##         0   6 172
##         1  79 272
xtabs(~invas.low+tipol)
##          tipol
## invas.low   1   2   3
##         0 159  15   4
##         1 255  40  56
#Conclusión: el modelo con las dos especies puede predecir mejor la magnitud de la invasión ya que no presenta prediciones perfectas