library(faraway)
data(gala)
names(gala)
## [1] "Species" "Endemics" "Area" "Elevation" "Nearest" "Scruz"
## [7] "Adjacent"
head(gala)
## Species Endemics Area Elevation Nearest Scruz Adjacent
## Baltra 58 23 25.09 346 0.6 0.6 1.84
## Bartolome 31 21 1.24 109 0.6 26.3 572.33
## Caldwell 3 3 0.21 114 2.8 58.7 0.78
## Champion 25 9 0.10 46 1.9 47.4 0.18
## Coamano 2 1 0.05 77 1.9 1.9 903.82
## Daphne.Major 18 11 0.34 119 8.0 8.0 1.84
# Fit a poisson model and check for overdispersion
pm1 <- glm(Species ~ Area + Elevation + Nearest + Adjacent, family = poisson,
data = gala)
summary(pm1)
##
## Call:
## glm(formula = Species ~ Area + Elevation + Nearest + Adjacent,
## family = poisson, data = gala)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.149 -4.035 -0.978 2.363 9.985
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.00e+00 4.98e-02 60.12 <2e-16 ***
## Area -5.83e-04 2.58e-05 -22.64 <2e-16 ***
## Elevation 3.61e-03 8.60e-05 41.91 <2e-16 ***
## Nearest -3.24e-03 1.44e-03 -2.24 0.025 *
## Adjacent -7.58e-04 2.78e-05 -27.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 813.62 on 25 degrees of freedom
## AIC: 984.5
##
## Number of Fisher Scoring iterations: 5
# the predicted number of species is:
# log(y)=2.996-.0005*AREA+.0036*ELEVATION-.0032*NEAREST-.0007*ADJACENT
plot(log(fitted(pm1)), log((gala$Species - fitted(pm1))^2), xlab = expression(hat(mu)),
ylab = expression((y - hat(mu))^2))
abline(0, 1)
pm2 <- glm(Species ~ Area + Elevation + Nearest + Adjacent, family = quasipoisson,
data = gala)
summary(pm2)
##
## Call:
## glm(formula = Species ~ Area + Elevation + Nearest + Adjacent,
## family = quasipoisson, data = gala)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.149 -4.035 -0.978 2.363 9.985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.995521 0.277488 10.80 6.7e-11 ***
## Area -0.000583 0.000143 -4.06 0.00042 ***
## Elevation 0.003606 0.000479 7.52 7.1e-08 ***
## Nearest -0.003240 0.008046 -0.40 0.69056
## Adjacent -0.000758 0.000155 -4.89 5.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 31.02)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 813.62 on 25 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
# model fitting
drop1(pm2, test = "F") #use F-test when model is overdispersed
## Single term deletions
##
## Model:
## Species ~ Area + Elevation + Nearest + Adjacent
## Df Deviance F value Pr(>F)
## <none> 814
## Area 1 1324 15.67 0.00055 ***
## Elevation 1 2587 54.50 9.8e-08 ***
## Nearest 1 819 0.16 0.69497
## Adjacent 1 1798 30.24 1.0e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# when overdispersion is a severe problem, fit a negative binomial model
library(MASS)
## Warning: package 'MASS' was built under R version 2.15.3
gala.nb <- glm.nb(glm(Species ~ Area + Elevation + Nearest + Adjacent, data = gala))
summary(gala.nb)
##
## Call:
## glm.nb(formula = glm(Species ~ Area + Elevation + Nearest + Adjacent,
## data = gala), init.theta = 1.651652744, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.127 -0.997 -0.121 0.541 1.671
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.817638 0.237855 11.85 < 2e-16 ***
## Area -0.000646 0.000288 -2.24 0.0250 *
## Elevation 0.003933 0.000693 5.68 1.4e-08 ***
## Nearest -0.000313 0.010722 -0.03 0.9767
## Adjacent -0.000795 0.000225 -3.54 0.0004 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.652) family taken to be 1)
##
## Null deviance: 87.286 on 29 degrees of freedom
## Residual deviance: 33.156 on 25 degrees of freedom
## AIC: 302.6
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.652
## Std. Err.: 0.434
##
## 2 x log-likelihood: -290.592
predict(gala.nb)[1] #predicted value for Baltra
## Baltra
## 4.16
exp(4.16) #fit is a little better
## [1] 64.07