setwd("~/Dropbox/Proyecto_Invasión_Neusa/data")
## Librerias necesarias
library("aod", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("arm", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/mazinger/Dropbox/Proyecto_Invasión_Neusa/data
library("ggplot2", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("tidyverse", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## ── Attaching packages ──────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.0.1 ✔ purrr 0.3.1
## ✔ tidyr 0.8.3 ✔ dplyr 0.8.0.1
## ✔ readr 1.3.1 ✔ stringr 1.4.0
## ✔ tibble 2.0.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
library("caret", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library("rms", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: Hmisc
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## The following object is masked from 'package:aod':
##
## rats
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
library("car", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: carData
##
## Attaching package: 'car'
## The following objects are masked from 'package:rms':
##
## Predict, vif
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:arm':
##
## logit
library("rcompanion", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
library("lmtest", lib.loc="/Library/Frameworks/R.framework/Versions/3.5/Resources/library")
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'lmtest'
## The following object is masked from 'package:rms':
##
## lrtest
## Lectura base de datos incluyendo las dos especies
invasion<-read.table("basem.csv", sep = ",", header = T)
attach(invasion)
## Conversión a factor para variables categóricas
via<-factor(via)
lado.cam<-factor(lado.cam)
cobertura<-factor(cobertura)
pendiente<-factor(pendiente)
nombre<-factor(nombre)
rep<-factor(rep)
invas.tipo<-factor(invas.tipo)
tipol<-factor(tipol)
## Observando la distribución de los casos por variables convertidas a factor
xtabs(~invas.low+via)
## via
## invas.low 1 2 3
## 0 42 228 83
## 1 86 307 241
xtabs(~invas.low+lado.cam)
## lado.cam
## invas.low 1 2
## 0 351 2
## 1 617 17
# Cobertura contiene un gran número de niveles que no permiten agrupar observaciones, se ha desechado ya que genera prediciones perfectas
xtabs(~invas.low+pendiente)
## pendiente
## invas.low 1 2 3 4 5
## 0 213 40 11 13 76
## 1 395 128 31 18 62
xtabs(~invas.low+nombre)
## nombre
## invas.low Retamo espinoso Retamo liso
## 0 178 175
## 1 351 283
xtabs(~invas.low+rep) # El nivel 4 de rep (estados reproductivos, frutos) fue desechado ya que tiene un n insuficiente (2 observaciones) para hacer alguna predicción
## rep
## invas.low 1 2 3 5
## 0 21 3 53 276
## 1 5 3 4 622
xtabs(~invas.low+invas.tipo)
## invas.tipo
## invas.low 1 2
## 0 20 333
## 1 117 517
xtabs(~invas.low+tipol)
## tipol
## invas.low 1 2 3
## 0 315 30 8
## 1 485 69 80
## Modelo general
invaslogit.low<-glm(invas.low~alt.prom+borde+msnm.prom+msnm.rango+via+lado.cam+pendiente+nombre+rep+invas.tipo+tipol, family = "binomial")
summary(invaslogit.low)
##
## Call:
## glm(formula = invas.low ~ alt.prom + borde + msnm.prom + msnm.rango +
## via + lado.cam + pendiente + nombre + rep + invas.tipo +
## tipol, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0874 -0.8480 0.4400 0.7968 2.6279
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.7427523 1.3035551 -2.871 0.004089 **
## alt.prom 1.2699983 0.1660271 7.649 2.02e-14 ***
## borde 0.0350409 0.0248413 1.411 0.158366
## msnm.prom 0.0006150 0.0004348 1.415 0.157169
## msnm.rango 0.0062936 0.0031188 2.018 0.043598 *
## via2 -0.0771173 0.2594589 -0.297 0.766296
## via3 0.4013411 0.2883210 1.392 0.163924
## lado.cam2 0.9416924 0.7868155 1.197 0.231369
## pendiente2 0.3269072 0.2431634 1.344 0.178821
## pendiente3 -0.0176977 0.4340698 -0.041 0.967478
## pendiente4 -0.2597689 0.4304679 -0.603 0.546205
## pendiente5 -0.4385832 0.2298293 -1.908 0.056353 .
## nombreRetamo liso -0.4961679 0.1651055 -3.005 0.002654 **
## rep2 0.4977624 1.1059608 0.450 0.652658
## rep3 -0.9448714 0.7417504 -1.274 0.202720
## rep5 1.3634404 0.5400489 2.525 0.011581 *
## invas.tipo2 -0.7763884 0.2897073 -2.680 0.007364 **
## tipol2 0.2964296 0.2827218 1.048 0.294415
## tipol3 1.5443832 0.4019586 3.842 0.000122 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1287.15 on 986 degrees of freedom
## Residual deviance: 980.36 on 968 degrees of freedom
## AIC: 1018.4
##
## Number of Fisher Scoring iterations: 6
## Procedimiento stepwise backward selection (automático), seleción de variables que explican la magnitud de la invasión
step.model.invaslogit.low <- invaslogit.low %>% stepAIC(trace = FALSE)
coef(step.model.invaslogit.low)
## (Intercept) alt.prom borde msnm.prom
## -3.8898267459 1.3471745693 0.0478829467 0.0006382136
## msnm.rango via2 via3 nombreRetamo liso
## 0.0066201232 -0.0544953363 0.4487091674 -0.4899168323
## rep2 rep3 rep5 invas.tipo2
## 0.2900814825 -0.9641517922 1.2883349208 -0.7996233107
## tipol2 tipol3
## 0.2318806003 1.4748870881
## Modelo final
invaslogit.low.reduced<-glm(invas.low~alt.prom+borde+msnm.prom+msnm.rango+via+nombre+rep+invas.tipo+tipol, family = "binomial")
summary(invaslogit.low.reduced)
##
## Call:
## glm(formula = invas.low ~ alt.prom + borde + msnm.prom + msnm.rango +
## via + nombre + rep + invas.tipo + tipol, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1761 -0.8753 0.4499 0.8221 2.4842
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.8898267 1.2913570 -3.012 0.002594 **
## alt.prom 1.3471746 0.1639015 8.219 < 2e-16 ***
## borde 0.0478829 0.0281630 1.700 0.089092 .
## msnm.prom 0.0006382 0.0004303 1.483 0.137981
## msnm.rango 0.0066201 0.0030087 2.200 0.027785 *
## via2 -0.0544953 0.2562888 -0.213 0.831614
## via3 0.4487092 0.2842735 1.578 0.114464
## nombreRetamo liso -0.4899168 0.1636987 -2.993 0.002764 **
## rep2 0.2900815 1.1292617 0.257 0.797274
## rep3 -0.9641518 0.7395680 -1.304 0.192347
## rep5 1.2883349 0.5376399 2.396 0.016562 *
## invas.tipo2 -0.7996233 0.2883851 -2.773 0.005558 **
## tipol2 0.2318806 0.2785030 0.833 0.405072
## tipol3 1.4748871 0.3989689 3.697 0.000218 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1287.15 on 986 degrees of freedom
## Residual deviance: 989.22 on 973 degrees of freedom
## AIC: 1017.2
##
## Number of Fisher Scoring iterations: 6
Anova(invaslogit.low.reduced, type="II", test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: invas.low
## Df Chisq Pr(>Chisq)
## alt.prom 1 67.5588 < 2.2e-16 ***
## borde 1 2.8907 0.0890921 .
## msnm.prom 1 2.2003 0.1379807
## msnm.rango 1 4.8414 0.0277851 *
## via 2 7.7531 0.0207225 *
## nombre 1 8.9568 0.0027643 **
## rep 3 22.8158 4.411e-05 ***
## invas.tipo 1 7.6882 0.0055583 **
## tipol 2 13.9236 0.0009474 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Odds ratios
exp(cbind(OR = coef(invaslogit.low.reduced), confint(invaslogit.low.reduced)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.02044889 0.001562754 0.2491235
## alt.prom 3.84654202 2.808494537 5.3422597
## borde 1.04904785 1.005420548 1.1221336
## msnm.prom 1.00063842 0.999797102 1.0014869
## msnm.rango 1.00664208 1.001212523 1.0130825
## via2 0.94696293 0.570644138 1.5614565
## via3 1.56628906 0.894667927 2.7315523
## nombreRetamo liso 0.61267735 0.443569399 0.8430887
## rep2 1.33653639 0.141433634 12.6528631
## rep3 0.38130649 0.083750494 1.6374629
## rep5 3.62674271 1.350892616 11.5668476
## invas.tipo2 0.44949825 0.249400644 0.7765320
## tipol2 1.26096916 0.735243111 2.1972067
## tipol3 4.37054226 2.110775889 10.2781776
## Pruebas de Ajuste y adecuación del modelo
#Pseudo R2
nagelkerke(invaslogit.low.reduced)
## $Models
##
## Model: "glm, invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via + nombre + rep + invas.tipo + tipol, binomial"
## Null: "glm, invas.low ~ 1, binomial"
##
## $Pseudo.R.squared.for.model.vs.null
## Pseudo.R.squared
## McFadden 0.231466
## Cox and Snell (ML) 0.260556
## Nagelkerke (Cragg and Uhler) 0.357619
##
## $Likelihood.ratio.test
## Df.diff LogLik.diff Chisq p.value
## -13 -148.97 297.93 6.5136e-56
##
## $Number.of.observations
##
## Model: 987
## Null: 987
##
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
##
## $Warnings
## [1] "None"
# Significancia del modelo (siempre y cuando significativa, la adición de las variables en comparación a un modelo nulo puede explicar la magnitud de la invasión)
lrtest(invaslogit.low.reduced)
## Likelihood ratio test
##
## Model 1: invas.low ~ alt.prom + borde + msnm.prom + msnm.rango + via +
## nombre + rep + invas.tipo + tipol
## Model 2: invas.low ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 14 -494.61
## 2 1 -643.58 -13 297.93 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Sobredispersión (no debe ser superior a 1.5)
summary(invaslogit.low.reduced)$deviance / summary(invaslogit.low.reduced)$df.residual
## [1] 1.016672
# Examen gráfico de los residuos
plot(predict(invaslogit.low.reduced),residuals(invaslogit.low.reduced),col=c("blue","red")[1+invas.low])
abline(h=0,lty=2,col="grey")
lines(lowess(predict(invaslogit.low.reduced),residuals(invaslogit.low.reduced)),col="black",lwd=2)
