Logistic and poisson regression

library(faraway)
data(gala)
names(gala)
## [1] "Species"   "Endemics"  "Area"      "Elevation" "Nearest"   "Scruz"    
## [7] "Adjacent"
head(gala)
##              Species Endemics  Area Elevation Nearest Scruz Adjacent
## Baltra            58       23 25.09       346     0.6   0.6     1.84
## Bartolome         31       21  1.24       109     0.6  26.3   572.33
## Caldwell           3        3  0.21       114     2.8  58.7     0.78
## Champion          25        9  0.10        46     1.9  47.4     0.18
## Coamano            2        1  0.05        77     1.9   1.9   903.82
## Daphne.Major      18       11  0.34       119     8.0   8.0     1.84

# Fit a poisson model and check for overdispersion
pm1 <- glm(Species ~ Area + Elevation + Nearest + Adjacent, family = poisson, 
    data = gala)
summary(pm1)
## 
## Call:
## glm(formula = Species ~ Area + Elevation + Nearest + Adjacent, 
##     family = poisson, data = gala)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -11.149   -4.035   -0.978    2.363    9.985  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.00e+00   4.98e-02   60.12   <2e-16 ***
## Area        -5.83e-04   2.58e-05  -22.64   <2e-16 ***
## Elevation    3.61e-03   8.60e-05   41.91   <2e-16 ***
## Nearest     -3.24e-03   1.44e-03   -2.24    0.025 *  
## Adjacent    -7.58e-04   2.78e-05  -27.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3510.73  on 29  degrees of freedom
## Residual deviance:  813.62  on 25  degrees of freedom
## AIC: 984.5
## 
## Number of Fisher Scoring iterations: 5
# the predicted number of species is:
# log(y)=2.996-.0005*AREA+.0036*ELEVATION-.0032*NEAREST-.0007*ADJACENT
plot(log(fitted(pm1)), log((gala$Species - fitted(pm1))^2), xlab = expression(hat(mu)), 
    ylab = expression((y - hat(mu))^2))
abline(0, 1)

plot of chunk unnamed-chunk-1


pm2 <- glm(Species ~ Area + Elevation + Nearest + Adjacent, family = quasipoisson, 
    data = gala)
summary(pm2)
## 
## Call:
## glm(formula = Species ~ Area + Elevation + Nearest + Adjacent, 
##     family = quasipoisson, data = gala)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -11.149   -4.035   -0.978    2.363    9.985  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.995521   0.277488   10.80  6.7e-11 ***
## Area        -0.000583   0.000143   -4.06  0.00042 ***
## Elevation    0.003606   0.000479    7.52  7.1e-08 ***
## Nearest     -0.003240   0.008046   -0.40  0.69056    
## Adjacent    -0.000758   0.000155   -4.89  5.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for quasipoisson family taken to be 31.02)
## 
##     Null deviance: 3510.73  on 29  degrees of freedom
## Residual deviance:  813.62  on 25  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 5

# model fitting
drop1(pm2, test = "F")  #use F-test when model is overdispersed
## Single term deletions
## 
## Model:
## Species ~ Area + Elevation + Nearest + Adjacent
##           Df Deviance F value  Pr(>F)    
## <none>            814                    
## Area       1     1324   15.67 0.00055 ***
## Elevation  1     2587   54.50 9.8e-08 ***
## Nearest    1      819    0.16 0.69497    
## Adjacent   1     1798   30.24 1.0e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# when overdispersion is a severe problem, fit a negative binomial model
library(MASS)
## Warning: package 'MASS' was built under R version 2.15.3
gala.nb <- glm.nb(glm(Species ~ Area + Elevation + Nearest + Adjacent, data = gala))
summary(gala.nb)
## 
## Call:
## glm.nb(formula = glm(Species ~ Area + Elevation + Nearest + Adjacent, 
##     data = gala), init.theta = 1.651652744, link = log)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.127  -0.997  -0.121   0.541   1.671  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  2.817638   0.237855   11.85  < 2e-16 ***
## Area        -0.000646   0.000288   -2.24   0.0250 *  
## Elevation    0.003933   0.000693    5.68  1.4e-08 ***
## Nearest     -0.000313   0.010722   -0.03   0.9767    
## Adjacent    -0.000795   0.000225   -3.54   0.0004 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for Negative Binomial(1.652) family taken to be 1)
## 
##     Null deviance: 87.286  on 29  degrees of freedom
## Residual deviance: 33.156  on 25  degrees of freedom
## AIC: 302.6
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.652 
##           Std. Err.:  0.434 
## 
##  2 x log-likelihood:  -290.592
predict(gala.nb)[1]  #predicted value for Baltra
## Baltra 
##   4.16
exp(4.16)  #fit is a little better
## [1] 64.07