### Poisson Regression example in Section 3.1 in Faraway's book

library(faraway)
data(gala)
# The data is from Johnson and Raven (1973) and Weisberg (2005). 
# There are 30 Galapagos islands, each with different geological variables.
# Consider the relationship between the number of species (counts) and these geological variables.

# ?gala                                                                 # Data description
gala
##              Species Endemics    Area Elevation Nearest Scruz Adjacent
## Baltra            58       23   25.09       346     0.6   0.6     1.84
## Bartolome         31       21    1.24       109     0.6  26.3   572.33
## Caldwell           3        3    0.21       114     2.8  58.7     0.78
## Champion          25        9    0.10        46     1.9  47.4     0.18
## Coamano            2        1    0.05        77     1.9   1.9   903.82
## Daphne.Major      18       11    0.34       119     8.0   8.0     1.84
## Daphne.Minor      24        0    0.08        93     6.0  12.0     0.34
## Darwin            10        7    2.33       168    34.1 290.2     2.85
## Eden               8        4    0.03        71     0.4   0.4    17.95
## Enderby            2        2    0.18       112     2.6  50.2     0.10
## Espanola          97       26   58.27       198     1.1  88.3     0.57
## Fernandina        93       35  634.49      1494     4.3  95.3  4669.32
## Gardner1          58       17    0.57        49     1.1  93.1    58.27
## Gardner2           5        4    0.78       227     4.6  62.2     0.21
## Genovesa          40       19   17.35        76    47.4  92.2   129.49
## Isabela          347       89 4669.32      1707     0.7  28.1   634.49
## Marchena          51       23  129.49       343    29.1  85.9    59.56
## Onslow             2        2    0.01        25     3.3  45.9     0.10
## Pinta            104       37   59.56       777    29.1 119.6   129.49
## Pinzon           108       33   17.95       458    10.7  10.7     0.03
## Las.Plazas        12        9    0.23        94     0.5   0.6    25.09
## Rabida            70       30    4.89       367     4.4  24.4   572.33
## SanCristobal     280       65  551.62       716    45.2  66.6     0.57
## SanSalvador      237       81  572.33       906     0.2  19.8     4.89
## SantaCruz        444       95  903.82       864     0.6   0.0     0.52
## SantaFe           62       28   24.08       259    16.5  16.5     0.52
## SantaMaria       285       73  170.92       640     2.6  49.2     0.10
## Seymour           44       16    1.84       147     0.6   9.6    25.09
## Tortuga           16        8    1.24       186     6.8  50.9    17.95
## Wolf              21       12    2.85       253    34.1 254.7     2.33
             # Species    Area Elevation Nearest Scruz Adjacent
# Baltra            58   25.09       346     0.6   0.6     1.84
# Bartolome         31    1.24       109     0.6  26.3   572.33
# Caldwell           3    0.21       114     2.8  58.7     0.78
# Champion          25    0.10        46     1.9  47.4     0.18
# Coamano            2    0.05        77     1.9   1.9   903.82
# Daphne.Major      18    0.34       119     8.0   8.0     1.84
# Daphne.Minor      24    0.08        93     6.0  12.0     0.34
# Darwin            10    2.33       168    34.1 290.2     2.85
# Eden               8    0.03        71     0.4   0.4    17.95
# Enderby            2    0.18       112     2.6  50.2     0.10
# Espanola          97   58.27       198     1.1  88.3     0.57
# Fernandina        93  634.49      1494     4.3  95.3  4669.32
# Gardner1          58    0.57        49     1.1  93.1    58.27
# Gardner2           5    0.78       227     4.6  62.2     0.21
# Genovesa          40   17.35        76    47.4  92.2   129.49
# Isabela          347 4669.32      1707     0.7  28.1   634.49
# Marchena          51  129.49       343    29.1  85.9    59.56
# Onslow             2    0.01        25     3.3  45.9     0.10
# Pinta            104   59.56       777    29.1 119.6   129.49
# Pinzon           108   17.95       458    10.7  10.7     0.03
# Las.Plazas        12    0.23        94     0.5   0.6    25.09
# Rabida            70    4.89       367     4.4  24.4   572.33
# SanCristobal     280  551.62       716    45.2  66.6     0.57
# SanSalvador      237  572.33       906     0.2  19.8     4.89
# SantaCruz        444  903.82       864     0.6   0.0     0.52
# SantaFe           62   24.08       259    16.5  16.5     0.52
# SantaMaria       285  170.92       640     2.6  49.2     0.10
# Seymour           44    1.84       147     0.6   9.6    25.09
# Tortuga           16    1.24       186     6.8  50.9    17.95
# Wolf              21    2.85       253    34.1 254.7     2.33   
gala <- gala[,-2]

modl <- lm(Species ~ . , gala)                                      # try linear regression
summary(modl)
## 
## Call:
## lm(formula = Species ~ ., data = gala)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -111.679  -34.898   -7.862   33.460  182.584 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.068221  19.154198   0.369 0.715351    
## Area        -0.023938   0.022422  -1.068 0.296318    
## Elevation    0.319465   0.053663   5.953 3.82e-06 ***
## Nearest      0.009144   1.054136   0.009 0.993151    
## Scruz       -0.240524   0.215402  -1.117 0.275208    
## Adjacent    -0.074805   0.017700  -4.226 0.000297 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared:  0.7658, Adjusted R-squared:  0.7171 
## F-statistic:  15.7 on 5 and 24 DF,  p-value: 6.838e-07
# Call:
# lm(formula = Species ~ ., data = gala)

# Residuals:
     # Min       1Q   Median       3Q      Max 
# -111.679  -34.898   -7.862   33.460  182.584 

# Coefficients:
             # Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  7.068221  19.154198   0.369 0.715351    
# Area        -0.023938   0.022422  -1.068 0.296318    
# Elevation    0.319465   0.053663   5.953 3.82e-06 ***
# Nearest      0.009144   1.054136   0.009 0.993151    
# Scruz       -0.240524   0.215402  -1.117 0.275208    
# Adjacent    -0.074805   0.017700  -4.226 0.000297 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# Residual standard error: 60.98 on 24 degrees of freedom
# Multiple R-squared:  0.7658,    Adjusted R-squared:  0.7171 
# F-statistic:  15.7 on 5 and 24 DF,  p-value: 6.838e-07

plot(predict(modl),residuals(modl),xlab="Fitted",ylab="Residuals")  # non-constant variance

modt <- lm(sqrt(Species) ~ . , gala)                                # sqrt transform
summary(modt)
## 
## Call:
## lm(formula = sqrt(Species) ~ ., data = gala)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5572 -1.4969 -0.3031  1.3527  5.2110 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.3919243  0.8712678   3.893 0.000690 ***
## Area        -0.0019718  0.0010199  -1.933 0.065080 .  
## Elevation    0.0164784  0.0024410   6.751 5.55e-07 ***
## Nearest      0.0249326  0.0479495   0.520 0.607844    
## Scruz       -0.0134826  0.0097980  -1.376 0.181509    
## Adjacent    -0.0033669  0.0008051  -4.182 0.000333 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.774 on 24 degrees of freedom
## Multiple R-squared:  0.7827, Adjusted R-squared:  0.7374 
## F-statistic: 17.29 on 5 and 24 DF,  p-value: 2.874e-07
# Call:
# lm(formula = sqrt(Species) ~ ., data = gala)

# Residuals:
    # Min      1Q  Median      3Q     Max 
# -4.5572 -1.4969 -0.3031  1.3527  5.2110 

# Coefficients:
              # Estimate Std. Error t value Pr(>|t|)    
# (Intercept)  3.3919243  0.8712678   3.893 0.000690 ***
# Area        -0.0019718  0.0010199  -1.933 0.065080 .  
# Elevation    0.0164784  0.0024410   6.751 5.55e-07 ***
# Nearest      0.0249326  0.0479495   0.520 0.607844    
# Scruz       -0.0134826  0.0097980  -1.376 0.181509    
# Adjacent    -0.0033669  0.0008051  -4.182 0.000333 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# Residual standard error: 2.774 on 24 degrees of freedom
# Multiple R-squared:  0.7827,    Adjusted R-squared:  0.7374 
# F-statistic: 17.29 on 5 and 24 DF,  p-value: 2.874e-07

plot(predict(modt),residuals(modt),xlab="Fitted",ylab="Residuals")  # better fit

modp <- glm(Species ~ .,family=poisson, gala)
summary(modp)
## 
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.2752  -4.4966  -0.9443   1.9168  10.1849  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.155e+00  5.175e-02  60.963  < 2e-16 ***
## Area        -5.799e-04  2.627e-05 -22.074  < 2e-16 ***
## Elevation    3.541e-03  8.741e-05  40.507  < 2e-16 ***
## Nearest      8.826e-03  1.821e-03   4.846 1.26e-06 ***
## Scruz       -5.709e-03  6.256e-04  -9.126  < 2e-16 ***
## Adjacent    -6.630e-04  2.933e-05 -22.608  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3510.73  on 29  degrees of freedom
## Residual deviance:  716.85  on 24  degrees of freedom
## AIC: 889.68
## 
## Number of Fisher Scoring iterations: 5
# Call:
# glm(formula = Species ~ ., family = poisson, data = gala)

# Deviance Residuals: 
    # Min       1Q   Median       3Q      Max  
# -8.2752  -4.4966  -0.9443   1.9168  10.1849  

# Coefficients:
              # Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  3.155e+00  5.175e-02  60.963  < 2e-16 ***
# Area        -5.799e-04  2.627e-05 -22.074  < 2e-16 ***
# Elevation    3.541e-03  8.741e-05  40.507  < 2e-16 ***
# Nearest      8.826e-03  1.821e-03   4.846 1.26e-06 ***
# Scruz       -5.709e-03  6.256e-04  -9.126  < 2e-16 ***
# Adjacent    -6.630e-04  2.933e-05 -22.608  < 2e-16 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for poisson family taken to be 1)

    # Null deviance: 3510.73  on 29  degrees of freedom
# Residual deviance:  716.85  on 24  degrees of freedom
# AIC: 889.68

# Number of Fisher Scoring iterations: 5


plot(log(fitted(modp)),log((gala$Species-fitted(modp))^2),
xlab=expression(hat(mu)),ylab=expression((yhat(mu))^2),xlim=c(0,10),ylim=c(0,10))
abline(0,1)

dp <-sum(residuals(modp,type="pearson")^2)/modp$df.res;dp
## [1] 31.74914
summary(modp, dispersion=dp)                        # No effect on slope coefficients due to 
## 
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -8.2752  -4.4966  -0.9443   1.9168  10.1849  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.1548079  0.2915897  10.819  < 2e-16 ***
## Area        -0.0005799  0.0001480  -3.918 8.95e-05 ***
## Elevation    0.0035406  0.0004925   7.189 6.53e-13 ***
## Nearest      0.0088256  0.0102621   0.860    0.390    
## Scruz       -0.0057094  0.0035251  -1.620    0.105    
## Adjacent    -0.0006630  0.0001653  -4.012 6.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 31.74914)
## 
##     Null deviance: 3510.73  on 29  degrees of freedom
## Residual deviance:  716.85  on 24  degrees of freedom
## AIC: 889.68
## 
## Number of Fisher Scoring iterations: 5
                                                    # orthogonality but SE estimates are higher
# Call:
# glm(formula = Species ~ ., family = poisson, data = gala)

# Deviance Residuals: 
    # Min       1Q   Median       3Q      Max  
# -8.2752  -4.4966  -0.9443   1.9168  10.1849  

# Coefficients:
              # Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  3.1548079  0.2915897  10.819  < 2e-16 ***
# Area        -0.0005799  0.0001480  -3.918 8.95e-05 ***
# Elevation    0.0035406  0.0004925   7.189 6.53e-13 ***
# Nearest      0.0088256  0.0102621   0.860    0.390    
# Scruz       -0.0057094  0.0035251  -1.620    0.105    
# Adjacent    -0.0006630  0.0001653  -4.012 6.01e-05 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for poisson family taken to be 31.74914)

    # Null deviance: 3510.73  on 29  degrees of freedom
# Residual deviance:  716.85  on 24  degrees of freedom
# AIC: 889.68

# Number of Fisher Scoring iterations: 5



### Rate Regression example in Section 3.2 in Faraway's book

library(faraway)
data(dicentric)
# Purott and Reeder (1976) on he effect of gamma radiation 
# on the numbers of chromosomal abnormalities
# ?dicentric
dicentric                           # Note that variable "cells" represents the number of hundreds of cells
##    cells  ca doseamt doserate
## 1    478  25     1.0     0.10
## 2   1907 102     1.0     0.25
## 3   2258 149     1.0     0.50
## 4   2329 160     1.0     1.00
## 5   1238  75     1.0     1.50
## 6   1491 100     1.0     2.00
## 7   1518  99     1.0     2.50
## 8    764  50     1.0     3.00
## 9   1367 100     1.0     4.00
## 10   328  52     2.5     0.10
## 11   185  51     2.5     0.25
## 12   342 100     2.5     0.50
## 13   310 100     2.5     1.00
## 14   278 107     2.5     1.50
## 15   259 107     2.5     2.00
## 16   249 102     2.5     2.50
## 17   298 110     2.5     3.00
## 18   243 107     2.5     4.00
## 19   210 100     5.0     0.10
## 20   138 113     5.0     0.25
## 21   160 144     5.0     0.50
## 22   120 106     5.0     1.00
## 23    90 111     5.0     1.50
## 24   100 132     5.0     2.00
## 25   313 419     5.0     2.50
## 26   182 225     5.0     3.00
## 27   144 206     5.0     4.00
   # cells  ca doseamt doserate
# 1    478  25     1.0     0.10
# 2   1907 102     1.0     0.25
# 3   2258 149     1.0     0.50
# 4   2329 160     1.0     1.00
# 5   1238  75     1.0     1.50
# 6   1491 100     1.0     2.00
# 7   1518  99     1.0     2.50
# 8    764  50     1.0     3.00
# 9   1367 100     1.0     4.00
# 10   328  52     2.5     0.10
# 11   185  51     2.5     0.25
# 12   342 100     2.5     0.50
# 13   310 100     2.5     1.00
# 14   278 107     2.5     1.50
# 15   259 107     2.5     2.00
# 16   249 102     2.5     2.50
# 17   298 110     2.5     3.00
# 18   243 107     2.5     4.00
# 19   210 100     5.0     0.10
# 20   138 113     5.0     0.25
# 21   160 144     5.0     0.50
# 22   120 106     5.0     1.00
# 23    90 111     5.0     1.50
# 24   100 132     5.0     2.00
# 25   313 419     5.0     2.50
# 26   182 225     5.0     3.00
# 27   144 206     5.0     4.00

round(xtabs(ca/cells ~ doseamt+doserate,dicentric),2)
##        doserate
## doseamt  0.1 0.25  0.5    1  1.5    2  2.5    3    4
##     1   0.05 0.05 0.07 0.07 0.06 0.07 0.07 0.07 0.07
##     2.5 0.16 0.28 0.29 0.32 0.38 0.41 0.41 0.37 0.44
##     5   0.48 0.82 0.90 0.88 1.23 1.32 1.34 1.24 1.43
       # doserate
# doseamt  0.1 0.25  0.5    1  1.5    2  2.5    3    4
    # 1   0.05 0.05 0.07 0.07 0.06 0.07 0.07 0.07 0.07
    # 2.5 0.16 0.28 0.29 0.32 0.38 0.41 0.41 0.37 0.44
    # 5   0.48 0.82 0.90 0.88 1.23 1.32 1.34 1.24 1.43
    
lmod <- lm(ca/cells ~ log(doserate)*factor(doseamt),dicentric)
summary(lmod)
## 
## Call:
## lm(formula = ca/cells ~ log(doserate) * factor(doseamt), data = dicentric)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.184275 -0.004212  0.001314  0.021208  0.089076 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.063489   0.019528   3.251  0.00382 ** 
## log(doserate)                    0.004573   0.016692   0.274  0.78680    
## factor(doseamt)2.5               0.276315   0.027616  10.005 1.92e-09 ***
## factor(doseamt)5                 1.004119   0.027616  36.359  < 2e-16 ***
## log(doserate):factor(doseamt)2.5 0.063933   0.023606   2.708  0.01317 *  
## log(doserate):factor(doseamt)5   0.239129   0.023606  10.130 1.54e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05858 on 21 degrees of freedom
## Multiple R-squared:  0.9874, Adjusted R-squared:  0.9844 
## F-statistic:   330 on 5 and 21 DF,  p-value: < 2.2e-16
# Call:
# lm(formula = ca/cells ~ log(doserate) * factor(doseamt), data = dicentric)

# Residuals:
      # Min        1Q    Median        3Q       Max 
# -0.184275 -0.004212  0.001314  0.021208  0.089076 

# Coefficients:
                                 # Estimate Std. Error t value Pr(>|t|)    
# (Intercept)                      0.063489   0.019528   3.251  0.00382 ** 
# log(doserate)                    0.004573   0.016692   0.274  0.78680    
# factor(doseamt)2.5               0.276315   0.027616  10.005 1.92e-09 ***
# factor(doseamt)5                 1.004119   0.027616  36.359  < 2e-16 ***
# log(doserate):factor(doseamt)2.5 0.063933   0.023606   2.708  0.01317 *  
# log(doserate):factor(doseamt)5   0.239129   0.023606  10.130 1.54e-09 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# Residual standard error: 0.05858 on 21 degrees of freedom
# Multiple R-squared:  0.9874,    Adjusted R-squared:  0.9844 
# F-statistic:   330 on 5 and 21 DF,  p-value: < 2.2e-16

plot(residuals(lmod) ~fitted(lmod),xlab="Fitted",ylab="Residuals")          # non-constant variance
abline(h=0)

dicentric$dosef <- factor(dicentric$doseamt)
pmod <- glm(ca ~ log(cells)+log(doserate)*dosef, family=poisson,dicentric)  # interested in multiplicative effect
summary(pmod)
## 
## Call:
## glm(formula = ca ~ log(cells) + log(doserate) * dosef, family = poisson, 
##     data = dicentric)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.49901  -0.62229  -0.05021   0.76919   1.59525  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -2.76534    0.38116  -7.255 4.02e-13 ***
## log(cells)              1.00252    0.05137  19.517  < 2e-16 ***
## log(doserate)           0.07200    0.03547   2.030 0.042403 *  
## dosef2.5                1.62984    0.10273  15.866  < 2e-16 ***
## dosef5                  2.76673    0.12287  22.517  < 2e-16 ***
## log(doserate):dosef2.5  0.16111    0.04837   3.331 0.000866 ***
## log(doserate):dosef5    0.19316    0.04299   4.493 7.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 916.127  on 26  degrees of freedom
## Residual deviance:  21.748  on 20  degrees of freedom
## AIC: 211.15
## 
## Number of Fisher Scoring iterations: 4
# Call:
# glm(formula = ca ~ log(cells) + log(doserate) * dosef, family = poisson, 
    # data = dicentric)

# Deviance Residuals: 
     # Min        1Q    Median        3Q       Max  
# -1.49901  -0.62229  -0.05021   0.76919   1.59525  

# Coefficients:
                       # Estimate Std. Error z value Pr(>|z|)    
# (Intercept)            -2.76534    0.38116  -7.255 4.02e-13 ***
# log(cells)              1.00252    0.05137  19.517  < 2e-16 ***
# log(doserate)           0.07200    0.03547   2.030 0.042403 *  
# dosef2.5                1.62984    0.10273  15.866  < 2e-16 ***
# dosef5                  2.76673    0.12287  22.517  < 2e-16 ***
# log(doserate):dosef2.5  0.16111    0.04837   3.331 0.000866 ***
# log(doserate):dosef5    0.19316    0.04299   4.493 7.03e-06 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for poisson family taken to be 1)

    # Null deviance: 916.127  on 26  degrees of freedom
# Residual deviance:  21.748  on 20  degrees of freedom
# AIC: 211.15

# Number of Fisher Scoring iterations: 4



rmod <- glm(ca ~ offset(log(cells))+log(doserate)*dosef,    # offset the coefficient of log(cells) to be 1
family=poisson,dicentric)
summary(rmod)
## 
## Call:
## glm(formula = ca ~ offset(log(cells)) + log(doserate) * dosef, 
##     family = poisson, data = dicentric)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.49101  -0.62473  -0.05078   0.76786   1.59115  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -2.74671    0.03426 -80.165  < 2e-16 ***
## log(doserate)           0.07178    0.03518   2.041 0.041299 *  
## dosef2.5                1.62542    0.04946  32.863  < 2e-16 ***
## dosef5                  2.76109    0.04349  63.491  < 2e-16 ***
## log(doserate):dosef2.5  0.16122    0.04830   3.338 0.000844 ***
## log(doserate):dosef5    0.19350    0.04243   4.561  5.1e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4753.00  on 26  degrees of freedom
## Residual deviance:   21.75  on 21  degrees of freedom
## AIC: 209.16
## 
## Number of Fisher Scoring iterations: 4
# Call:
# glm(formula = ca ~ offset(log(cells)) + log(doserate) * dosef, 
    # family = poisson, data = dicentric)

# Deviance Residuals: 
     # Min        1Q    Median        3Q       Max  
# -1.49101  -0.62473  -0.05078   0.76786   1.59115  

# Coefficients:
                       # Estimate Std. Error z value Pr(>|z|)    
# (Intercept)            -2.74671    0.03426 -80.165  < 2e-16 ***
# log(doserate)           0.07178    0.03518   2.041 0.041299 *  
# dosef2.5                1.62542    0.04946  32.863  < 2e-16 ***
# dosef5                  2.76109    0.04349  63.491  < 2e-16 ***
# log(doserate):dosef2.5  0.16122    0.04830   3.338 0.000844 ***
# log(doserate):dosef5    0.19350    0.04243   4.561  5.1e-06 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for poisson family taken to be 1)

    # Null deviance: 4753.00  on 26  degrees of freedom
# Residual deviance:   21.75  on 21  degrees of freedom
# AIC: 209.16

# Number of Fisher Scoring iterations: 4

### Negative Binomial Regression example in Section 3.3 in Faraway's book
data(solder)
# five factors relevant to a wave-soldering procedure for mounting components on printed circuit boards. 
# The response variable, skips, is a count of how many solder skips appeared to a visual inspection.
tail(solder)
##     Opening Solder Mask PadType Panel skips
## 895       S   Thin   B6      W9     1    13
## 896       S   Thin   B6      W9     2    21
## 897       S   Thin   B6      W9     3    15
## 898       S   Thin   B6      L9     1    11
## 899       S   Thin   B6      L9     2    33
## 900       S   Thin   B6      L9     3    15
    # Opening Solder Mask PadType Panel skips
# 895       S   Thin   B6      W9     1    13
# 896       S   Thin   B6      W9     2    21
# 897       S   Thin   B6      W9     3    15
# 898       S   Thin   B6      L9     1    11
# 899       S   Thin   B6      L9     2    33
# 900       S   Thin   B6      L9     3    15


modp <- glm(skips ~ . , family=poisson, data=solder)
summary(modp)
## 
## Call:
## glm(formula = skips ~ ., family = poisson, data = solder)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.6413  -1.2562  -0.5286   0.5670   4.7289  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.10042    0.09267 -11.874  < 2e-16 ***
## OpeningM     0.57161    0.05707  10.017  < 2e-16 ***
## OpeningS     1.81475    0.05044  35.980  < 2e-16 ***
## SolderThin   0.84682    0.03327  25.453  < 2e-16 ***
## MaskA3       0.51315    0.07098   7.230 4.83e-13 ***
## MaskA6       1.81103    0.06609  27.404  < 2e-16 ***
## MaskB3       1.20225    0.06697  17.953  < 2e-16 ***
## MaskB6       1.86648    0.06310  29.580  < 2e-16 ***
## PadTypeD6   -0.40328    0.06028  -6.690 2.24e-11 ***
## PadTypeD7   -0.08979    0.05521  -1.626 0.103852    
## PadTypeL4    0.22460    0.05117   4.389 1.14e-05 ***
## PadTypeL6   -0.70633    0.06637 -10.642  < 2e-16 ***
## PadTypeL7   -0.48260    0.06176  -7.814 5.52e-15 ***
## PadTypeL8   -0.30778    0.05862  -5.251 1.51e-07 ***
## PadTypeL9   -0.63793    0.06489  -9.831  < 2e-16 ***
## PadTypeW4   -0.19021    0.05671  -3.354 0.000796 ***
## PadTypeW9   -1.56252    0.09165 -17.048  < 2e-16 ***
## Panel        0.14670    0.01745   8.405  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 8788.2  on 899  degrees of freedom
## Residual deviance: 1829.0  on 882  degrees of freedom
## AIC: 3967.6
## 
## Number of Fisher Scoring iterations: 5
# Call:
# glm(formula = skips ~ ., family = poisson, data = solder)

# Deviance Residuals: 
    # Min       1Q   Median       3Q      Max  
# -3.6413  -1.2562  -0.5286   0.5670   4.7289  

# Coefficients:
            # Estimate Std. Error z value Pr(>|z|)    
# (Intercept) -1.10042    0.09267 -11.874  < 2e-16 ***
# OpeningM     0.57161    0.05707  10.017  < 2e-16 ***
# OpeningS     1.81475    0.05044  35.980  < 2e-16 ***
# SolderThin   0.84682    0.03327  25.453  < 2e-16 ***
# MaskA3       0.51315    0.07098   7.230 4.83e-13 ***
# MaskA6       1.81103    0.06609  27.404  < 2e-16 ***
# MaskB3       1.20225    0.06697  17.953  < 2e-16 ***
# MaskB6       1.86648    0.06310  29.580  < 2e-16 ***
# PadTypeD6   -0.40328    0.06028  -6.690 2.24e-11 ***
# PadTypeD7   -0.08979    0.05521  -1.626 0.103852    
# PadTypeL4    0.22460    0.05117   4.389 1.14e-05 ***
# PadTypeL6   -0.70633    0.06637 -10.642  < 2e-16 ***
# PadTypeL7   -0.48260    0.06176  -7.814 5.52e-15 ***
# PadTypeL8   -0.30778    0.05862  -5.251 1.51e-07 ***
# PadTypeL9   -0.63793    0.06489  -9.831  < 2e-16 ***
# PadTypeW4   -0.19021    0.05671  -3.354 0.000796 ***
# PadTypeW9   -1.56252    0.09165 -17.048  < 2e-16 ***
# Panel        0.14670    0.01745   8.405  < 2e-16 ***
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# (Dispersion parameter for poisson family taken to be 1)

    # Null deviance: 8788.2  on 899  degrees of freedom
# Residual deviance: 1829.0  on 882  degrees of freedom
# AIC: 3967.6

# Number of Fisher Scoring iterations: 5

deviance(modp); df.residual(modp)       
## [1] 1829.002
## [1] 882
pchisq(deviance(modp),df.residual(modp),lower=FALSE) # Overdispersion
## [1] 1.958183e-68
modp2 <- glm(skips ~ (Opening +Solder + Mask + PadType + Panel)^2 , 
family=poisson, data=solder)            

summary(modp2)
## 
## Call:
## glm(formula = skips ~ (Opening + Solder + Mask + PadType + Panel)^2, 
##     family = poisson, data = solder)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3705  -0.8845  -0.2963   0.5035   3.4497  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -3.790428   0.439188  -8.631  < 2e-16 ***
## OpeningM              1.256622   0.387231   3.245 0.001174 ** 
## OpeningS              3.409668   0.317714  10.732  < 2e-16 ***
## SolderThin            2.799378   0.255617  10.951  < 2e-16 ***
## MaskA3                1.208574   0.396865   3.045 0.002324 ** 
## MaskA6                3.232144   0.324781   9.952  < 2e-16 ***
## MaskB3                2.401436   0.381673   6.292 3.14e-10 ***
## MaskB6                3.518764   0.361837   9.725  < 2e-16 ***
## PadTypeD6            -0.417397   0.406617  -1.027 0.304650    
## PadTypeD7             0.494889   0.353410   1.400 0.161416    
## PadTypeL4             1.255929   0.326825   3.843 0.000122 ***
## PadTypeL6             0.180790   0.423799   0.427 0.669675    
## PadTypeL7             0.560943   0.400805   1.400 0.161651    
## PadTypeL8             0.684032   0.372300   1.837 0.066163 .  
## PadTypeL9            -0.563980   0.421458  -1.338 0.180843    
## PadTypeW4             0.292149   0.370794   0.788 0.430755    
## PadTypeW9            -0.977399   0.586792  -1.666 0.095780 .  
## Panel                 0.359817   0.111883   3.216 0.001300 ** 
## OpeningM:SolderThin  -0.898370   0.172621  -5.204 1.95e-07 ***
## OpeningS:SolderThin  -0.943802   0.146888  -6.425 1.32e-10 ***
## OpeningM:MaskA3       0.488411   0.307829   1.587 0.112595    
## OpeningS:MaskA3      -0.277080   0.240924  -1.150 0.250113    
## OpeningM:MaskA6       1.211972   0.208135   5.823 5.78e-09 ***
## OpeningS:MaskA6             NA         NA      NA       NA    
## OpeningM:MaskB3      -0.429647   0.310225  -1.385 0.166067    
## OpeningS:MaskB3      -0.286922   0.237220  -1.210 0.226464    
## OpeningM:MaskB6       0.003031   0.286150   0.011 0.991549    
## OpeningS:MaskB6      -0.638078   0.223648  -2.853 0.004330 ** 
## OpeningM:PadTypeD6   -0.296590   0.270427  -1.097 0.272751    
## OpeningS:PadTypeD6    0.293517   0.225890   1.299 0.193815    
## OpeningM:PadTypeD7   -0.046054   0.234077  -0.197 0.844026    
## OpeningS:PadTypeD7    0.072605   0.201275   0.361 0.718305    
## OpeningM:PadTypeL4   -0.053800   0.211326  -0.255 0.799045    
## OpeningS:PadTypeL4   -0.146391   0.181862  -0.805 0.420846    
## OpeningM:PadTypeL6   -0.230838   0.277505  -0.832 0.405502    
## OpeningS:PadTypeL6   -0.069632   0.233476  -0.298 0.765519    
## OpeningM:PadTypeL7   -0.058891   0.274677  -0.214 0.830235    
## OpeningS:PadTypeL7    0.152334   0.235529   0.647 0.517779    
## OpeningM:PadTypeL8   -0.043336   0.243936  -0.178 0.858995    
## OpeningS:PadTypeL8   -0.079820   0.209168  -0.382 0.702753    
## OpeningM:PadTypeL9    0.198778   0.276167   0.720 0.471664    
## OpeningS:PadTypeL9    0.131243   0.241726   0.543 0.587169    
## OpeningM:PadTypeW4   -0.845318   0.232904  -3.629 0.000284 ***
## OpeningS:PadTypeW4   -0.308013   0.187299  -1.644 0.100073    
## OpeningM:PadTypeW9    0.341746   0.410668   0.832 0.405313    
## OpeningS:PadTypeW9    0.410660   0.361239   1.137 0.255618    
## OpeningM:Panel       -0.072105   0.074451  -0.968 0.332805    
## OpeningS:Panel       -0.142014   0.062907  -2.258 0.023975 *  
## SolderThin:MaskA3    -0.524187   0.189994  -2.759 0.005798 ** 
## SolderThin:MaskA6    -1.797891   0.211964  -8.482  < 2e-16 ***
## SolderThin:MaskB3    -0.796132   0.183613  -4.336 1.45e-05 ***
## SolderThin:MaskB6    -0.885348   0.176606  -5.013 5.36e-07 ***
## SolderThin:PadTypeD6 -0.167949   0.154149  -1.090 0.275925    
## SolderThin:PadTypeD7 -0.351283   0.137127  -2.562 0.010415 *  
## SolderThin:PadTypeL4 -0.654798   0.124536  -5.258 1.46e-07 ***
## SolderThin:PadTypeL6 -0.621968   0.156753  -3.968 7.25e-05 ***
## SolderThin:PadTypeL7 -0.845301   0.144681  -5.843 5.14e-09 ***
## SolderThin:PadTypeL8 -0.685421   0.139256  -4.922 8.57e-07 ***
## SolderThin:PadTypeL9 -0.245324   0.160855  -1.525 0.127229    
## SolderThin:PadTypeW4 -0.180558   0.145491  -1.241 0.214596    
## SolderThin:PadTypeW9 -0.299370   0.219428  -1.364 0.172468    
## SolderThin:Panel      0.139539   0.040894   3.412 0.000644 ***
## MaskA3:PadTypeD6      0.116158   0.324330   0.358 0.720232    
## MaskA6:PadTypeD6      0.216038   0.296473   0.729 0.466190    
## MaskB3:PadTypeD6      0.224998   0.300249   0.749 0.453633    
## MaskB6:PadTypeD6      0.245083   0.284969   0.860 0.389770    
## MaskA3:PadTypeD7     -0.091424   0.274262  -0.333 0.738873    
## MaskA6:PadTypeD7     -0.192284   0.251813  -0.764 0.445108    
## MaskB3:PadTypeD7     -0.270055   0.258299  -1.046 0.295786    
## MaskB6:PadTypeD7     -0.217501   0.241330  -0.901 0.367450    
## MaskA3:PadTypeL4      0.022784   0.257184   0.089 0.929409    
## MaskA6:PadTypeL4     -0.279622   0.239522  -1.167 0.243041    
## MaskB3:PadTypeL4     -0.111186   0.242380  -0.459 0.646431    
## MaskB6:PadTypeL4     -0.248325   0.228901  -1.085 0.277984    
## MaskA3:PadTypeL6      0.276448   0.331857   0.833 0.404825    
## MaskA6:PadTypeL6     -0.180375   0.318414  -0.566 0.571069    
## MaskB3:PadTypeL6      0.056605   0.317345   0.178 0.858432    
## MaskB6:PadTypeL6     -0.153403   0.302870  -0.506 0.612507    
## MaskA3:PadTypeL7     -0.085910   0.314366  -0.273 0.784637    
## MaskA6:PadTypeL7     -0.140562   0.291300  -0.483 0.629426    
## MaskB3:PadTypeL7      0.044196   0.291202   0.152 0.879367    
## MaskB6:PadTypeL7     -0.253888   0.278761  -0.911 0.362415    
## MaskA3:PadTypeL8     -0.040412   0.293952  -0.137 0.890654    
## MaskA6:PadTypeL8     -0.344603   0.275354  -1.251 0.210754    
## MaskB3:PadTypeL8      0.021563   0.274125   0.079 0.937301    
## MaskB6:PadTypeL8     -0.239589   0.261266  -0.917 0.359127    
## MaskA3:PadTypeL9      0.155884   0.317129   0.492 0.623038    
## MaskA6:PadTypeL9     -0.171715   0.297031  -0.578 0.563193    
## MaskB3:PadTypeL9     -0.182335   0.304914  -0.598 0.549848    
## MaskB6:PadTypeL9     -0.226337   0.286099  -0.791 0.428877    
## MaskA3:PadTypeW4      0.102471   0.307238   0.334 0.738739    
## MaskA6:PadTypeW4      0.175358   0.283053   0.620 0.535572    
## MaskB3:PadTypeW4      0.098612   0.287371   0.343 0.731484    
## MaskB6:PadTypeW4      0.407102   0.269275   1.512 0.130574    
## MaskA3:PadTypeW9     -0.467682   0.466202  -1.003 0.315776    
## MaskA6:PadTypeW9     -0.720748   0.424011  -1.700 0.089162 .  
## MaskB3:PadTypeW9     -0.592526   0.432584  -1.370 0.170770    
## MaskB6:PadTypeW9      0.126737   0.379424   0.334 0.738362    
## MaskA3:Panel         -0.046212   0.088742  -0.521 0.602545    
## MaskA6:Panel         -0.057908   0.083316  -0.695 0.487031    
## MaskB3:Panel         -0.115932   0.083653  -1.386 0.165785    
## MaskB6:Panel         -0.186039   0.079046  -2.354 0.018594 *  
## PadTypeD6:Panel      -0.101387   0.074753  -1.356 0.175008    
## PadTypeD7:Panel      -0.073970   0.068566  -1.079 0.280670    
## PadTypeL4:Panel      -0.117661   0.063676  -1.848 0.064629 .  
## PadTypeL6:Panel      -0.126999   0.082249  -1.544 0.122569    
## PadTypeL7:Panel      -0.184934   0.076590  -2.415 0.015753 *  
## PadTypeL8:Panel      -0.114420   0.072797  -1.572 0.116005    
## PadTypeL9:Panel       0.055960   0.081316   0.688 0.491338    
## PadTypeW4:Panel      -0.096242   0.070465  -1.366 0.171998    
## PadTypeW9:Panel      -0.227473   0.113050  -2.012 0.044204 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 8788.2  on 899  degrees of freedom
## Residual deviance: 1068.8  on 790  degrees of freedom
## AIC: 3391.4
## 
## Number of Fisher Scoring iterations: 6
deviance(modp2); df.residual(modp2)
## [1] 1068.817
## [1] 790
pchisq(deviance(modp2),df.residual(modp2),lower=FALSE) # Still overdispersion
## [1] 1.130696e-10
library(MASS)
## Warning: package 'MASS' was built under R version 3.1.3
modn <- glm(skips ~ . , negative.binomial(1),solder) # NB with link k=1
summary(modn)                           # Less overdispersion
## 
## Call:
## glm(formula = skips ~ ., family = negative.binomial(1), data = solder)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9628  -0.8021  -0.2373   0.3204   2.1097  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.69933    0.16334 -10.404  < 2e-16 ***
## OpeningM     0.50854    0.08797   5.781 1.03e-08 ***
## OpeningS     1.99966    0.08069  24.783  < 2e-16 ***
## SolderThin   1.04894    0.06364  16.483  < 2e-16 ***
## MaskA3       0.65710    0.10463   6.280 5.29e-10 ***
## MaskA6       2.52649    0.12123  20.841  < 2e-16 ***
## MaskB3       1.27261    0.10780  11.806  < 2e-16 ***
## MaskB6       2.08026    0.10482  19.845  < 2e-16 ***
## PadTypeD6   -0.46118    0.13788  -3.345 0.000859 ***
## PadTypeD7    0.01608    0.13350   0.120 0.904185    
## PadTypeL4    0.46883    0.13046   3.594 0.000344 ***
## PadTypeL6   -0.47115    0.13799  -3.414 0.000669 ***
## PadTypeL7   -0.29494    0.13620  -2.165 0.030618 *  
## PadTypeL8   -0.08493    0.13432  -0.632 0.527372    
## PadTypeL9   -0.52125    0.13854  -3.763 0.000179 ***
## PadTypeW4   -0.14250    0.13481  -1.057 0.290781    
## PadTypeW9   -1.48361    0.15317  -9.686  < 2e-16 ***
## Panel        0.16932    0.03811   4.443 1.00e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1) family taken to be 0.5560534)
## 
##     Null deviance: 1743.01  on 899  degrees of freedom
## Residual deviance:  558.67  on 882  degrees of freedom
## AIC: 3883.7
## 
## Number of Fisher Scoring iterations: 9
modn <- glm.nb(skips ~ .,solder)
summary (modn)
## 
## Call:
## glm.nb(formula = skips ~ ., data = solder, init.theta = 4.397157245, 
##     link = log)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7376  -1.0068  -0.3834   0.4460   2.7829  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.42245    0.14274  -9.965  < 2e-16 ***
## OpeningM     0.50294    0.07976   6.306 2.87e-10 ***
## OpeningS     1.91317    0.07152  26.750  < 2e-16 ***
## SolderThin   0.93932    0.05362  17.517  < 2e-16 ***
## MaskA3       0.58981    0.09651   6.112 9.87e-10 ***
## MaskA6       2.26734    0.10182  22.269  < 2e-16 ***
## MaskB3       1.21101    0.09637  12.566  < 2e-16 ***
## MaskB6       1.99037    0.09223  21.580  < 2e-16 ***
## PadTypeD6   -0.46592    0.11238  -4.146 3.38e-05 ***
## PadTypeD7   -0.03315    0.10673  -0.311 0.756114    
## PadTypeL4    0.38268    0.10265   3.728 0.000193 ***
## PadTypeL6   -0.57844    0.11413  -5.068 4.01e-07 ***
## PadTypeL7   -0.36656    0.11094  -3.304 0.000953 ***
## PadTypeL8   -0.15890    0.10821  -1.468 0.141986    
## PadTypeL9   -0.56600    0.11393  -4.968 6.77e-07 ***
## PadTypeW4   -0.20044    0.10873  -1.844 0.065255 .  
## PadTypeW9   -1.56460    0.13621 -11.486  < 2e-16 ***
## Panel        0.16369    0.03139   5.214 1.85e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.3972) family taken to be 1)
## 
##     Null deviance: 4043.3  on 899  degrees of freedom
## Residual deviance: 1008.3  on 882  degrees of freedom
## AIC: 3683.3
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.397 
##           Std. Err.:  0.495 
## 
##  2 x log-likelihood:  -3645.309