#Importar los datos:
bat.df <- read.table('http://www.uib.no/People/nzlkj/statkey/data/bat.csv',header=T)
attach(bat.df)
head(bat.df,15)
## occurrence temp
## 1 0 8.0
## 2 0 8.5
## 3 0 9.0
## 4 0 9.0
## 5 0 9.0
## 6 0 9.0
## 7 0 9.5
## 8 0 9.5
## 9 0 9.5
## 10 0 10.0
## 11 0 10.0
## 12 0 10.0
## 13 0 10.0
## 14 0 10.0
## 15 0 10.5
#Ejecutar el modelo:
bat.glm <- glm(occurrence~temp, binomial, data=bat.df)
summary(bat.glm)
##
## Call:
## glm(formula = occurrence ~ temp, family = binomial, data = bat.df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.60282 -0.78282 -0.01836 0.59162 1.63226
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -28.3926 4.1007 -6.924 4.40e-12 ***
## temp 2.1893 0.3163 6.921 4.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 253.25 on 182 degrees of freedom
## Residual deviance: 144.48 on 181 degrees of freedom
## AIC: 148.48
##
## Number of Fisher Scoring iterations: 6
#Observe que si hay diferencias sifnificativas.
anova(bat.glm, test='Chisq')
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: occurrence
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 182 253.25
## temp 1 108.77 181 144.47 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Modelos nulos,
bat.glm1 <- glm(occurrence~1, binomial) #Estimación del intercepto mediante un efecto aleatorio.
summary(bat.glm1, test="Chisq")
##
## Call:
## glm(formula = occurrence ~ 1, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.136 -1.136 -1.136 1.219 1.219
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.09844 0.14802 -0.665 0.506
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 253.25 on 182 degrees of freedom
## Residual deviance: 253.25 on 182 degrees of freedom
## AIC: 255.25
##
## Number of Fisher Scoring iterations: 3
anova(bat.glm,bat.glm1, test='Chi')
## Analysis of Deviance Table
##
## Model 1: occurrence ~ temp
## Model 2: occurrence ~ 1
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 181 144.47
## 2 182 253.25 -1 -108.77 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bat.glm2 <- glm(occurrence~1 +temp, binomial)#Estimación del intercepto mediante un efecto aleatorio, mas la variable independiente
summary(bat.glm2)
##
## Call:
## glm(formula = occurrence ~ 1 + temp, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.60282 -0.78282 -0.01836 0.59162 1.63226
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -28.3926 4.1007 -6.924 4.40e-12 ***
## temp 2.1893 0.3163 6.921 4.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 253.25 on 182 degrees of freedom
## Residual deviance: 144.48 on 181 degrees of freedom
## AIC: 148.48
##
## Number of Fisher Scoring iterations: 6
#Comparamos los modelos
anova(bat.glm,bat.glm2, test='Chis')
## Analysis of Deviance Table
##
## Model 1: occurrence ~ temp
## Model 2: occurrence ~ 1 + temp
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 181 144.47
## 2 181 144.47 0 0
anova(bat.glm2, test='Chis')
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: occurrence
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 182 253.25
## temp 1 108.77 181 144.47 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Mostrar el efecto de forma gráfica
library(effects)
plot(allEffects(bat.glm))
library(popbio)
logi.hist.plot(temp,occurrence,boxp=F,type="hist",col="gray", xlab="temp")
# o sin uso de paquetes
plot(temp, occurrence)
tempvals <- seq(8,15.1, .1)
lines(tempvals, predict(bat.glm, list(temp=tempvals), type='response'))
#Desarrolle modelos de glm
set.seed(2016)
bodysize<-rnorm(20,30,5) # genera 20 valores, con media of 30 & s.d.=2
set.seed(4)
age<-rnorm(20,5,1.5)
bodysize<-sort(bodysize) # ordena de forma ascendente.
survive<-c(0,0,0,0,0,1,0,1,0,0,1,1,0,1,1,1,0,1,1,1) # el vector sobrevivenvia, se supone que la mortalidad ocurre en individuos de bodysize menores
dat<-as.data.frame(cbind(bodysize,survive,age)) # crea un data frame.
head(dat, 8)
## bodysize survive age
## 1 16.04265 0 5.325132
## 2 25.42629 0 4.186261
## 3 25.47205 0 6.336717
## 4 25.61454 0 5.893971
## 5 26.18246 0 7.453427
## 6 26.49067 1 6.033913
## 7 26.50730 0 3.078130
## 8 26.57495 1 4.680283
library(popbio)
logi.hist.plot(age,survive,boxp=F,type="hist",col="gray", xlab="bodysize")
#Modelo glm
glm.s<-glm(survive~bodysize,family=binomial,dat)
summary(glm.s)
##
## Call:
## glm(formula = survive ~ bodysize, family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.96476 -0.82351 0.08846 0.88483 1.54749
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.9608 4.6317 -1.935 0.0530 .
## bodysize 0.3066 0.1583 1.937 0.0527 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 21.656 on 18 degrees of freedom
## AIC: 25.656
##
## Number of Fisher Scoring iterations: 5
library(effects)
plot(allEffects(glm.s))
#Genere un modelo que incluya la edad "age" y compare contra todos los modelos que puede ejeuctar.
#Piense, en mode,os simples, multiples, de efectos aleatorios
glm.s2<-glm(survive~bodysize+age,family=binomial,dat)
summary(glm.s2)
##
## Call:
## glm(formula = survive ~ bodysize + age, family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.59619 -0.59853 0.02848 0.76485 1.70778
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.9583 5.3666 -0.924 0.3555
## bodysize 0.3066 0.1675 1.830 0.0672 .
## age -0.7220 0.5045 -1.431 0.1524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 19.171 on 17 degrees of freedom
## AIC: 25.171
##
## Number of Fisher Scoring iterations: 5
library(effects)
plot(allEffects(glm.s2))
summary(glm.s2)
##
## Call:
## glm(formula = survive ~ bodysize + age, family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.59619 -0.59853 0.02848 0.76485 1.70778
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.9583 5.3666 -0.924 0.3555
## bodysize 0.3066 0.1675 1.830 0.0672 .
## age -0.7220 0.5045 -1.431 0.1524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 19.171 on 17 degrees of freedom
## AIC: 25.171
##
## Number of Fisher Scoring iterations: 5
#Analice su conclusión
#Comparación de modelos
anova(glm.s,glm.s2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: survive ~ bodysize
## Model 2: survive ~ bodysize + age
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 18 21.655
## 2 17 19.171 1 2.4846 0.115
glm.s3<-glm(survive~1 +bodysize,family=binomial,dat)
library(effects)
plot(allEffects(glm.s3))
summary(glm.s3)
##
## Call:
## glm(formula = survive ~ 1 + bodysize, family = binomial, data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.96476 -0.82351 0.08846 0.88483 1.54749
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.9608 4.6317 -1.935 0.0530 .
## bodysize 0.3066 0.1583 1.937 0.0527 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 27.726 on 19 degrees of freedom
## Residual deviance: 21.656 on 18 degrees of freedom
## AIC: 25.656
##
## Number of Fisher Scoring iterations: 5
anova(glm.s, glm.s3, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: survive ~ bodysize
## Model 2: survive ~ 1 + bodysize
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 18 21.655
## 2 18 21.655 0 0
seeds.df <-read.table('http://www.uib.no/People/nzlkj/statkey/data/seeds.csv',header=T)
attach(seeds.df)
head(seeds.df)
## Hamster Seed.type Not.eaten Eaten
## 1 1 B 48 2
## 2 2 B 46 4
## 3 3 A 41 9
## 4 4 B 46 4
## 5 5 A 40 10
## 6 6 A 39 11
#Crear plot para cada variable dependiente:
plot(Seed.type, Eaten)
plot(Seed.type, Not.eaten)
#Generar el modelo:
fit1.glm <- glm(cbind(Eaten,Not.eaten)~Seed.type, binomial)
summary(fit1.glm)
##
## Call:
## glm(formula = cbind(Eaten, Not.eaten) ~ Seed.type, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.13219 -0.98042 -0.07752 0.98500 1.85766
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0460 0.1075 -9.733 < 2e-16 ***
## Seed.typeB -2.0221 0.2527 -8.001 1.24e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 117.193 on 17 degrees of freedom
## Residual deviance: 28.802 on 16 degrees of freedom
## AIC: 88.967
##
## Number of Fisher Scoring iterations: 5
anova(fit1.glm, test='Chisq')
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: cbind(Eaten, Not.eaten)
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 17 117.193
## Seed.type 1 88.391 16 28.802 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit2.glm <- glm(cbind(Eaten,Not.eaten)~Seed.type, quasibinomial)
anova(fit2.glm, test='F')
## Analysis of Deviance Table
##
## Model: quasibinomial, link: logit
##
## Response: cbind(Eaten, Not.eaten)
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 17 117.193
## Seed.type 1 88.391 16 28.802 56.77 1.196e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit2.glm)
##
## Call:
## glm(formula = cbind(Eaten, Not.eaten) ~ Seed.type, family = quasibinomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.13219 -0.98042 -0.07752 0.98500 1.85766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0460 0.1341 -7.800 7.69e-07 ***
## Seed.typeB -2.0221 0.3154 -6.412 8.60e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.557005)
##
## Null deviance: 117.193 on 17 degrees of freedom
## Residual deviance: 28.802 on 16 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
#Para calcular la probabilidad de depredación para la semilla "A"?
exp(-1.0460)/(1+exp(0.1341)) #se obtien el resultado me da la probabilidad.
## [1] 0.1639091
#Para calcular la probabilidad de depredación para la semilla "B"?
exp(-2.0221)/(1+exp(0.3154)) # se obtien el resultado me da la probabilidad de depredación para la semilla "B"
## [1] 0.05583633
library(effects)
plot(allEffects(fit2.glm))
#Visual gráfica de las predicciones
fit.pre <- predict(fit1.glm, type='response', se.fit=T)
plot(Seed.type, fit.pre$fit, xlab='Seed type', ylab='Probability of predation', ylim=c(0,1))
#En la base de datos se está interesado en saber si el nacimiento de niños con bajo peso <2.5kg (low), depende de la edad (age) y la raza (race) de la madre
library(MASS)
head(birthwt)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 0 0 0 1 0 2523
## 86 0 33 155 3 0 0 0 0 3 2551
## 87 0 20 105 1 1 0 0 0 1 2557
## 88 0 21 108 1 1 0 0 1 2 2594
## 89 0 18 107 1 1 0 0 1 0 2600
## 91 0 21 124 3 0 0 0 0 0 2622
nacimiento <- glm(low ~ age + race, family = binomial, data = birthwt)
summary(nacimiento) #Interprete el resulatado.
##
## Call:
## glm(formula = low ~ age + race, family = binomial, data = birthwt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1054 -0.8820 -0.7607 1.3466 1.8182
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.33286 0.86012 -0.387 0.699
## age -0.04355 0.03219 -1.353 0.176
## race 0.28600 0.17413 1.642 0.101
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 229.21 on 186 degrees of freedom
## AIC: 235.21
##
## Number of Fisher Scoring iterations: 4
#Para saber si mi modelo presenta sobredispersión, se puede aplicar la siguiente función.
phi <- sum(resid(nacimiento, type = "pearson")^2) / df.residual(nacimiento)
phi
## [1] 1.011612
#Lo que hay que buscar que es que el valor no sobrepase 1.2. Para modelos con pocos datos es preferible que no pase de 1.5, para que no exista sobredispersión.
#valores menores a uno no habría problema.
#Este parámetro nos indica cuántas veces mayor es la varianza de la media.
#dispersión fue menor que uno, resulta que la varianza condicional es en realidad menor que la media condicional, por lo que se interpreta que no hay sobredispersión.
#o aplicar un modelo "quasi()".
nacimiento2 <- glm(low ~ age + race, family = quasibinomial, data = birthwt)
summary(nacimiento2) #Haga lectura de los parámetros de dispersión.
##
## Call:
## glm(formula = low ~ age + race, family = quasibinomial, data = birthwt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1054 -0.8820 -0.7607 1.3466 1.8182
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.33286 0.86510 -0.385 0.701
## age -0.04355 0.03237 -1.345 0.180
## race 0.28600 0.17514 1.633 0.104
##
## (Dispersion parameter for quasibinomial family taken to be 1.011613)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 229.21 on 186 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
nacimiento2$coefficients
## (Intercept) age race
## -0.33285936 -0.04354883 0.28599803
pi.hat<- predict.glm(nacimiento2, data.frame(age=25, race=3), type="response", se.fit=TRUE)
pi.hat
## $fit
## 1
## 0.3627181
##
## $se.fit
## 1
## 0.06073225
##
## $residual.scale
## [1] 1.00579
library(effects)
plot(allEffects(nacimiento2))
fit.pre <- predict(nacimiento2, type='response', se.fit=T)
plot(birthwt$age, fit.pre$fit, xlab='age', ylab='Probabilidad de nacimientos <25', ylim=c(0,1))
fit.pre <- predict(nacimiento2, type='response', se.fit=T)
plot(birthwt$race, fit.pre$fit, xlab='raza', ylab='Probabilidad de nacimientos <25', ylim=c(0,1))
nacimiento3<-glm(low ~age+ smoke, data = birthwt, family = binomial)
summary(nacimiento3)
##
## Call:
## glm(formula = low ~ age + smoke, family = binomial, data = birthwt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1589 -0.8668 -0.7470 1.2821 1.7925
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.06091 0.75732 0.080 0.9359
## age -0.04978 0.03197 -1.557 0.1195
## smoke 0.69185 0.32181 2.150 0.0316 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 227.28 on 186 degrees of freedom
## AIC: 233.28
##
## Number of Fisher Scoring iterations: 4
fit.pre <- predict(nacimiento3, type='response', se.fit=T)
plot(birthwt$smoke, fit.pre$fit, xlab='smoke', ylab='Probabilidad de nacimientos <25', ylim=c(0,1))
nacimiento4<-glm(low ~smoke, data = birthwt, family = binomial)
summary(nacimiento4)
##
## Call:
## glm(formula = low ~ smoke, family = binomial, data = birthwt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0197 -0.7623 -0.7623 1.3438 1.6599
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0871 0.2147 -5.062 4.14e-07 ***
## smoke 0.7041 0.3196 2.203 0.0276 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 234.67 on 188 degrees of freedom
## Residual deviance: 229.80 on 187 degrees of freedom
## AIC: 233.8
##
## Number of Fisher Scoring iterations: 4
exp(0.7041)/(1+exp(0.3196))#La prob de tener nacimientos de niños con pesos <25 de mujeres fumadoras es de 85.08%
## [1] 0.8508144
exp(-1.0871)/(1+exp(0.2147))#La prob de tener nacimientos de niños con pesos <25, pára mujeres NO fumadoras es de 15.05%
## [1] 0.1505668
library(effects)
plot(allEffects(nacimiento4))
Observación: Drop1= para diseños no ortogonales, produce resultados (Type III SS). Anova= para comparar modelos, para distribucion de datos binomial, se requiere en la función Test=“Chisq”.