### Poisson Regression example in Section 3.1 in Faraway's book
library(faraway)
data(gala)
# The data is from Johnson and Raven (1973) and Weisberg (2005).
# There are 30 Galapagos islands, each with different geological variables.
# Consider the relationship between the number of species (counts) and these geological variables.
# ?gala # Data description
gala
## Species Endemics Area Elevation Nearest Scruz Adjacent
## Baltra 58 23 25.09 346 0.6 0.6 1.84
## Bartolome 31 21 1.24 109 0.6 26.3 572.33
## Caldwell 3 3 0.21 114 2.8 58.7 0.78
## Champion 25 9 0.10 46 1.9 47.4 0.18
## Coamano 2 1 0.05 77 1.9 1.9 903.82
## Daphne.Major 18 11 0.34 119 8.0 8.0 1.84
## Daphne.Minor 24 0 0.08 93 6.0 12.0 0.34
## Darwin 10 7 2.33 168 34.1 290.2 2.85
## Eden 8 4 0.03 71 0.4 0.4 17.95
## Enderby 2 2 0.18 112 2.6 50.2 0.10
## Espanola 97 26 58.27 198 1.1 88.3 0.57
## Fernandina 93 35 634.49 1494 4.3 95.3 4669.32
## Gardner1 58 17 0.57 49 1.1 93.1 58.27
## Gardner2 5 4 0.78 227 4.6 62.2 0.21
## Genovesa 40 19 17.35 76 47.4 92.2 129.49
## Isabela 347 89 4669.32 1707 0.7 28.1 634.49
## Marchena 51 23 129.49 343 29.1 85.9 59.56
## Onslow 2 2 0.01 25 3.3 45.9 0.10
## Pinta 104 37 59.56 777 29.1 119.6 129.49
## Pinzon 108 33 17.95 458 10.7 10.7 0.03
## Las.Plazas 12 9 0.23 94 0.5 0.6 25.09
## Rabida 70 30 4.89 367 4.4 24.4 572.33
## SanCristobal 280 65 551.62 716 45.2 66.6 0.57
## SanSalvador 237 81 572.33 906 0.2 19.8 4.89
## SantaCruz 444 95 903.82 864 0.6 0.0 0.52
## SantaFe 62 28 24.08 259 16.5 16.5 0.52
## SantaMaria 285 73 170.92 640 2.6 49.2 0.10
## Seymour 44 16 1.84 147 0.6 9.6 25.09
## Tortuga 16 8 1.24 186 6.8 50.9 17.95
## Wolf 21 12 2.85 253 34.1 254.7 2.33
# Species Area Elevation Nearest Scruz Adjacent
# Baltra 58 25.09 346 0.6 0.6 1.84
# Bartolome 31 1.24 109 0.6 26.3 572.33
# Caldwell 3 0.21 114 2.8 58.7 0.78
# Champion 25 0.10 46 1.9 47.4 0.18
# Coamano 2 0.05 77 1.9 1.9 903.82
# Daphne.Major 18 0.34 119 8.0 8.0 1.84
# Daphne.Minor 24 0.08 93 6.0 12.0 0.34
# Darwin 10 2.33 168 34.1 290.2 2.85
# Eden 8 0.03 71 0.4 0.4 17.95
# Enderby 2 0.18 112 2.6 50.2 0.10
# Espanola 97 58.27 198 1.1 88.3 0.57
# Fernandina 93 634.49 1494 4.3 95.3 4669.32
# Gardner1 58 0.57 49 1.1 93.1 58.27
# Gardner2 5 0.78 227 4.6 62.2 0.21
# Genovesa 40 17.35 76 47.4 92.2 129.49
# Isabela 347 4669.32 1707 0.7 28.1 634.49
# Marchena 51 129.49 343 29.1 85.9 59.56
# Onslow 2 0.01 25 3.3 45.9 0.10
# Pinta 104 59.56 777 29.1 119.6 129.49
# Pinzon 108 17.95 458 10.7 10.7 0.03
# Las.Plazas 12 0.23 94 0.5 0.6 25.09
# Rabida 70 4.89 367 4.4 24.4 572.33
# SanCristobal 280 551.62 716 45.2 66.6 0.57
# SanSalvador 237 572.33 906 0.2 19.8 4.89
# SantaCruz 444 903.82 864 0.6 0.0 0.52
# SantaFe 62 24.08 259 16.5 16.5 0.52
# SantaMaria 285 170.92 640 2.6 49.2 0.10
# Seymour 44 1.84 147 0.6 9.6 25.09
# Tortuga 16 1.24 186 6.8 50.9 17.95
# Wolf 21 2.85 253 34.1 254.7 2.33
gala <- gala[,-2]
modl <- lm(Species ~ . , gala) # try linear regression
summary(modl)
##
## Call:
## lm(formula = Species ~ ., data = gala)
##
## Residuals:
## Min 1Q Median 3Q Max
## -111.679 -34.898 -7.862 33.460 182.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.068221 19.154198 0.369 0.715351
## Area -0.023938 0.022422 -1.068 0.296318
## Elevation 0.319465 0.053663 5.953 3.82e-06 ***
## Nearest 0.009144 1.054136 0.009 0.993151
## Scruz -0.240524 0.215402 -1.117 0.275208
## Adjacent -0.074805 0.017700 -4.226 0.000297 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared: 0.7658, Adjusted R-squared: 0.7171
## F-statistic: 15.7 on 5 and 24 DF, p-value: 6.838e-07
# Call:
# lm(formula = Species ~ ., data = gala)
# Residuals:
# Min 1Q Median 3Q Max
# -111.679 -34.898 -7.862 33.460 182.584
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 7.068221 19.154198 0.369 0.715351
# Area -0.023938 0.022422 -1.068 0.296318
# Elevation 0.319465 0.053663 5.953 3.82e-06 ***
# Nearest 0.009144 1.054136 0.009 0.993151
# Scruz -0.240524 0.215402 -1.117 0.275208
# Adjacent -0.074805 0.017700 -4.226 0.000297 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# Residual standard error: 60.98 on 24 degrees of freedom
# Multiple R-squared: 0.7658, Adjusted R-squared: 0.7171
# F-statistic: 15.7 on 5 and 24 DF, p-value: 6.838e-07
plot(predict(modl),residuals(modl),xlab="Fitted",ylab="Residuals") # non-constant variance

modt <- lm(sqrt(Species) ~ . , gala) # sqrt transform
summary(modt)
##
## Call:
## lm(formula = sqrt(Species) ~ ., data = gala)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5572 -1.4969 -0.3031 1.3527 5.2110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3919243 0.8712678 3.893 0.000690 ***
## Area -0.0019718 0.0010199 -1.933 0.065080 .
## Elevation 0.0164784 0.0024410 6.751 5.55e-07 ***
## Nearest 0.0249326 0.0479495 0.520 0.607844
## Scruz -0.0134826 0.0097980 -1.376 0.181509
## Adjacent -0.0033669 0.0008051 -4.182 0.000333 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.774 on 24 degrees of freedom
## Multiple R-squared: 0.7827, Adjusted R-squared: 0.7374
## F-statistic: 17.29 on 5 and 24 DF, p-value: 2.874e-07
# Call:
# lm(formula = sqrt(Species) ~ ., data = gala)
# Residuals:
# Min 1Q Median 3Q Max
# -4.5572 -1.4969 -0.3031 1.3527 5.2110
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 3.3919243 0.8712678 3.893 0.000690 ***
# Area -0.0019718 0.0010199 -1.933 0.065080 .
# Elevation 0.0164784 0.0024410 6.751 5.55e-07 ***
# Nearest 0.0249326 0.0479495 0.520 0.607844
# Scruz -0.0134826 0.0097980 -1.376 0.181509
# Adjacent -0.0033669 0.0008051 -4.182 0.000333 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# Residual standard error: 2.774 on 24 degrees of freedom
# Multiple R-squared: 0.7827, Adjusted R-squared: 0.7374
# F-statistic: 17.29 on 5 and 24 DF, p-value: 2.874e-07
plot(predict(modt),residuals(modt),xlab="Fitted",ylab="Residuals") # better fit

modp <- glm(Species ~ .,family=poisson, gala)
summary(modp)
##
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.2752 -4.4966 -0.9443 1.9168 10.1849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.155e+00 5.175e-02 60.963 < 2e-16 ***
## Area -5.799e-04 2.627e-05 -22.074 < 2e-16 ***
## Elevation 3.541e-03 8.741e-05 40.507 < 2e-16 ***
## Nearest 8.826e-03 1.821e-03 4.846 1.26e-06 ***
## Scruz -5.709e-03 6.256e-04 -9.126 < 2e-16 ***
## Adjacent -6.630e-04 2.933e-05 -22.608 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 716.85 on 24 degrees of freedom
## AIC: 889.68
##
## Number of Fisher Scoring iterations: 5
# Call:
# glm(formula = Species ~ ., family = poisson, data = gala)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -8.2752 -4.4966 -0.9443 1.9168 10.1849
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 3.155e+00 5.175e-02 60.963 < 2e-16 ***
# Area -5.799e-04 2.627e-05 -22.074 < 2e-16 ***
# Elevation 3.541e-03 8.741e-05 40.507 < 2e-16 ***
# Nearest 8.826e-03 1.821e-03 4.846 1.26e-06 ***
# Scruz -5.709e-03 6.256e-04 -9.126 < 2e-16 ***
# Adjacent -6.630e-04 2.933e-05 -22.608 < 2e-16 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# (Dispersion parameter for poisson family taken to be 1)
# Null deviance: 3510.73 on 29 degrees of freedom
# Residual deviance: 716.85 on 24 degrees of freedom
# AIC: 889.68
# Number of Fisher Scoring iterations: 5
plot(log(fitted(modp)),log((gala$Species-fitted(modp))^2),
xlab=expression(hat(mu)),ylab=expression((yhat(mu))^2),xlim=c(0,10),ylim=c(0,10))
abline(0,1)

dp <-sum(residuals(modp,type="pearson")^2)/modp$df.res;dp
## [1] 31.74914
summary(modp, dispersion=dp) # No effect on slope coefficients due to
##
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.2752 -4.4966 -0.9443 1.9168 10.1849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.1548079 0.2915897 10.819 < 2e-16 ***
## Area -0.0005799 0.0001480 -3.918 8.95e-05 ***
## Elevation 0.0035406 0.0004925 7.189 6.53e-13 ***
## Nearest 0.0088256 0.0102621 0.860 0.390
## Scruz -0.0057094 0.0035251 -1.620 0.105
## Adjacent -0.0006630 0.0001653 -4.012 6.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 31.74914)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 716.85 on 24 degrees of freedom
## AIC: 889.68
##
## Number of Fisher Scoring iterations: 5
# orthogonality but SE estimates are higher
# Call:
# glm(formula = Species ~ ., family = poisson, data = gala)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -8.2752 -4.4966 -0.9443 1.9168 10.1849
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) 3.1548079 0.2915897 10.819 < 2e-16 ***
# Area -0.0005799 0.0001480 -3.918 8.95e-05 ***
# Elevation 0.0035406 0.0004925 7.189 6.53e-13 ***
# Nearest 0.0088256 0.0102621 0.860 0.390
# Scruz -0.0057094 0.0035251 -1.620 0.105
# Adjacent -0.0006630 0.0001653 -4.012 6.01e-05 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# (Dispersion parameter for poisson family taken to be 31.74914)
# Null deviance: 3510.73 on 29 degrees of freedom
# Residual deviance: 716.85 on 24 degrees of freedom
# AIC: 889.68
# Number of Fisher Scoring iterations: 5
### Rate Regression example in Section 3.2 in Faraway's book
library(faraway)
data(dicentric)
# Purott and Reeder (1976) on he effect of gamma radiation
# on the numbers of chromosomal abnormalities
# ?dicentric
dicentric # Note that variable "cells" represents the number of hundreds of cells
## cells ca doseamt doserate
## 1 478 25 1.0 0.10
## 2 1907 102 1.0 0.25
## 3 2258 149 1.0 0.50
## 4 2329 160 1.0 1.00
## 5 1238 75 1.0 1.50
## 6 1491 100 1.0 2.00
## 7 1518 99 1.0 2.50
## 8 764 50 1.0 3.00
## 9 1367 100 1.0 4.00
## 10 328 52 2.5 0.10
## 11 185 51 2.5 0.25
## 12 342 100 2.5 0.50
## 13 310 100 2.5 1.00
## 14 278 107 2.5 1.50
## 15 259 107 2.5 2.00
## 16 249 102 2.5 2.50
## 17 298 110 2.5 3.00
## 18 243 107 2.5 4.00
## 19 210 100 5.0 0.10
## 20 138 113 5.0 0.25
## 21 160 144 5.0 0.50
## 22 120 106 5.0 1.00
## 23 90 111 5.0 1.50
## 24 100 132 5.0 2.00
## 25 313 419 5.0 2.50
## 26 182 225 5.0 3.00
## 27 144 206 5.0 4.00
# cells ca doseamt doserate
# 1 478 25 1.0 0.10
# 2 1907 102 1.0 0.25
# 3 2258 149 1.0 0.50
# 4 2329 160 1.0 1.00
# 5 1238 75 1.0 1.50
# 6 1491 100 1.0 2.00
# 7 1518 99 1.0 2.50
# 8 764 50 1.0 3.00
# 9 1367 100 1.0 4.00
# 10 328 52 2.5 0.10
# 11 185 51 2.5 0.25
# 12 342 100 2.5 0.50
# 13 310 100 2.5 1.00
# 14 278 107 2.5 1.50
# 15 259 107 2.5 2.00
# 16 249 102 2.5 2.50
# 17 298 110 2.5 3.00
# 18 243 107 2.5 4.00
# 19 210 100 5.0 0.10
# 20 138 113 5.0 0.25
# 21 160 144 5.0 0.50
# 22 120 106 5.0 1.00
# 23 90 111 5.0 1.50
# 24 100 132 5.0 2.00
# 25 313 419 5.0 2.50
# 26 182 225 5.0 3.00
# 27 144 206 5.0 4.00
round(xtabs(ca/cells ~ doseamt+doserate,dicentric),2)
## doserate
## doseamt 0.1 0.25 0.5 1 1.5 2 2.5 3 4
## 1 0.05 0.05 0.07 0.07 0.06 0.07 0.07 0.07 0.07
## 2.5 0.16 0.28 0.29 0.32 0.38 0.41 0.41 0.37 0.44
## 5 0.48 0.82 0.90 0.88 1.23 1.32 1.34 1.24 1.43
# doserate
# doseamt 0.1 0.25 0.5 1 1.5 2 2.5 3 4
# 1 0.05 0.05 0.07 0.07 0.06 0.07 0.07 0.07 0.07
# 2.5 0.16 0.28 0.29 0.32 0.38 0.41 0.41 0.37 0.44
# 5 0.48 0.82 0.90 0.88 1.23 1.32 1.34 1.24 1.43
lmod <- lm(ca/cells ~ log(doserate)*factor(doseamt),dicentric)
summary(lmod)
##
## Call:
## lm(formula = ca/cells ~ log(doserate) * factor(doseamt), data = dicentric)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.184275 -0.004212 0.001314 0.021208 0.089076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.063489 0.019528 3.251 0.00382 **
## log(doserate) 0.004573 0.016692 0.274 0.78680
## factor(doseamt)2.5 0.276315 0.027616 10.005 1.92e-09 ***
## factor(doseamt)5 1.004119 0.027616 36.359 < 2e-16 ***
## log(doserate):factor(doseamt)2.5 0.063933 0.023606 2.708 0.01317 *
## log(doserate):factor(doseamt)5 0.239129 0.023606 10.130 1.54e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05858 on 21 degrees of freedom
## Multiple R-squared: 0.9874, Adjusted R-squared: 0.9844
## F-statistic: 330 on 5 and 21 DF, p-value: < 2.2e-16
# Call:
# lm(formula = ca/cells ~ log(doserate) * factor(doseamt), data = dicentric)
# Residuals:
# Min 1Q Median 3Q Max
# -0.184275 -0.004212 0.001314 0.021208 0.089076
# Coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.063489 0.019528 3.251 0.00382 **
# log(doserate) 0.004573 0.016692 0.274 0.78680
# factor(doseamt)2.5 0.276315 0.027616 10.005 1.92e-09 ***
# factor(doseamt)5 1.004119 0.027616 36.359 < 2e-16 ***
# log(doserate):factor(doseamt)2.5 0.063933 0.023606 2.708 0.01317 *
# log(doserate):factor(doseamt)5 0.239129 0.023606 10.130 1.54e-09 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# Residual standard error: 0.05858 on 21 degrees of freedom
# Multiple R-squared: 0.9874, Adjusted R-squared: 0.9844
# F-statistic: 330 on 5 and 21 DF, p-value: < 2.2e-16
plot(residuals(lmod) ~fitted(lmod),xlab="Fitted",ylab="Residuals") # non-constant variance
abline(h=0)

dicentric$dosef <- factor(dicentric$doseamt)
pmod <- glm(ca ~ log(cells)+log(doserate)*dosef, family=poisson,dicentric) # interested in multiplicative effect
summary(pmod)
##
## Call:
## glm(formula = ca ~ log(cells) + log(doserate) * dosef, family = poisson,
## data = dicentric)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.49901 -0.62229 -0.05021 0.76919 1.59525
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.76534 0.38116 -7.255 4.02e-13 ***
## log(cells) 1.00252 0.05137 19.517 < 2e-16 ***
## log(doserate) 0.07200 0.03547 2.030 0.042403 *
## dosef2.5 1.62984 0.10273 15.866 < 2e-16 ***
## dosef5 2.76673 0.12287 22.517 < 2e-16 ***
## log(doserate):dosef2.5 0.16111 0.04837 3.331 0.000866 ***
## log(doserate):dosef5 0.19316 0.04299 4.493 7.03e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 916.127 on 26 degrees of freedom
## Residual deviance: 21.748 on 20 degrees of freedom
## AIC: 211.15
##
## Number of Fisher Scoring iterations: 4
# Call:
# glm(formula = ca ~ log(cells) + log(doserate) * dosef, family = poisson,
# data = dicentric)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -1.49901 -0.62229 -0.05021 0.76919 1.59525
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -2.76534 0.38116 -7.255 4.02e-13 ***
# log(cells) 1.00252 0.05137 19.517 < 2e-16 ***
# log(doserate) 0.07200 0.03547 2.030 0.042403 *
# dosef2.5 1.62984 0.10273 15.866 < 2e-16 ***
# dosef5 2.76673 0.12287 22.517 < 2e-16 ***
# log(doserate):dosef2.5 0.16111 0.04837 3.331 0.000866 ***
# log(doserate):dosef5 0.19316 0.04299 4.493 7.03e-06 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# (Dispersion parameter for poisson family taken to be 1)
# Null deviance: 916.127 on 26 degrees of freedom
# Residual deviance: 21.748 on 20 degrees of freedom
# AIC: 211.15
# Number of Fisher Scoring iterations: 4
rmod <- glm(ca ~ offset(log(cells))+log(doserate)*dosef, # offset the coefficient of log(cells) to be 1
family=poisson,dicentric)
summary(rmod)
##
## Call:
## glm(formula = ca ~ offset(log(cells)) + log(doserate) * dosef,
## family = poisson, data = dicentric)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.49101 -0.62473 -0.05078 0.76786 1.59115
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.74671 0.03426 -80.165 < 2e-16 ***
## log(doserate) 0.07178 0.03518 2.041 0.041299 *
## dosef2.5 1.62542 0.04946 32.863 < 2e-16 ***
## dosef5 2.76109 0.04349 63.491 < 2e-16 ***
## log(doserate):dosef2.5 0.16122 0.04830 3.338 0.000844 ***
## log(doserate):dosef5 0.19350 0.04243 4.561 5.1e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 4753.00 on 26 degrees of freedom
## Residual deviance: 21.75 on 21 degrees of freedom
## AIC: 209.16
##
## Number of Fisher Scoring iterations: 4
# Call:
# glm(formula = ca ~ offset(log(cells)) + log(doserate) * dosef,
# family = poisson, data = dicentric)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -1.49101 -0.62473 -0.05078 0.76786 1.59115
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -2.74671 0.03426 -80.165 < 2e-16 ***
# log(doserate) 0.07178 0.03518 2.041 0.041299 *
# dosef2.5 1.62542 0.04946 32.863 < 2e-16 ***
# dosef5 2.76109 0.04349 63.491 < 2e-16 ***
# log(doserate):dosef2.5 0.16122 0.04830 3.338 0.000844 ***
# log(doserate):dosef5 0.19350 0.04243 4.561 5.1e-06 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# (Dispersion parameter for poisson family taken to be 1)
# Null deviance: 4753.00 on 26 degrees of freedom
# Residual deviance: 21.75 on 21 degrees of freedom
# AIC: 209.16
# Number of Fisher Scoring iterations: 4
### Negative Binomial Regression example in Section 3.3 in Faraway's book
data(solder)
# five factors relevant to a wave-soldering procedure for mounting components on printed circuit boards.
# The response variable, skips, is a count of how many solder skips appeared to a visual inspection.
tail(solder)
## Opening Solder Mask PadType Panel skips
## 895 S Thin B6 W9 1 13
## 896 S Thin B6 W9 2 21
## 897 S Thin B6 W9 3 15
## 898 S Thin B6 L9 1 11
## 899 S Thin B6 L9 2 33
## 900 S Thin B6 L9 3 15
# Opening Solder Mask PadType Panel skips
# 895 S Thin B6 W9 1 13
# 896 S Thin B6 W9 2 21
# 897 S Thin B6 W9 3 15
# 898 S Thin B6 L9 1 11
# 899 S Thin B6 L9 2 33
# 900 S Thin B6 L9 3 15
modp <- glm(skips ~ . , family=poisson, data=solder)
summary(modp)
##
## Call:
## glm(formula = skips ~ ., family = poisson, data = solder)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.6413 -1.2562 -0.5286 0.5670 4.7289
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.10042 0.09267 -11.874 < 2e-16 ***
## OpeningM 0.57161 0.05707 10.017 < 2e-16 ***
## OpeningS 1.81475 0.05044 35.980 < 2e-16 ***
## SolderThin 0.84682 0.03327 25.453 < 2e-16 ***
## MaskA3 0.51315 0.07098 7.230 4.83e-13 ***
## MaskA6 1.81103 0.06609 27.404 < 2e-16 ***
## MaskB3 1.20225 0.06697 17.953 < 2e-16 ***
## MaskB6 1.86648 0.06310 29.580 < 2e-16 ***
## PadTypeD6 -0.40328 0.06028 -6.690 2.24e-11 ***
## PadTypeD7 -0.08979 0.05521 -1.626 0.103852
## PadTypeL4 0.22460 0.05117 4.389 1.14e-05 ***
## PadTypeL6 -0.70633 0.06637 -10.642 < 2e-16 ***
## PadTypeL7 -0.48260 0.06176 -7.814 5.52e-15 ***
## PadTypeL8 -0.30778 0.05862 -5.251 1.51e-07 ***
## PadTypeL9 -0.63793 0.06489 -9.831 < 2e-16 ***
## PadTypeW4 -0.19021 0.05671 -3.354 0.000796 ***
## PadTypeW9 -1.56252 0.09165 -17.048 < 2e-16 ***
## Panel 0.14670 0.01745 8.405 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8788.2 on 899 degrees of freedom
## Residual deviance: 1829.0 on 882 degrees of freedom
## AIC: 3967.6
##
## Number of Fisher Scoring iterations: 5
# Call:
# glm(formula = skips ~ ., family = poisson, data = solder)
# Deviance Residuals:
# Min 1Q Median 3Q Max
# -3.6413 -1.2562 -0.5286 0.5670 4.7289
# Coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -1.10042 0.09267 -11.874 < 2e-16 ***
# OpeningM 0.57161 0.05707 10.017 < 2e-16 ***
# OpeningS 1.81475 0.05044 35.980 < 2e-16 ***
# SolderThin 0.84682 0.03327 25.453 < 2e-16 ***
# MaskA3 0.51315 0.07098 7.230 4.83e-13 ***
# MaskA6 1.81103 0.06609 27.404 < 2e-16 ***
# MaskB3 1.20225 0.06697 17.953 < 2e-16 ***
# MaskB6 1.86648 0.06310 29.580 < 2e-16 ***
# PadTypeD6 -0.40328 0.06028 -6.690 2.24e-11 ***
# PadTypeD7 -0.08979 0.05521 -1.626 0.103852
# PadTypeL4 0.22460 0.05117 4.389 1.14e-05 ***
# PadTypeL6 -0.70633 0.06637 -10.642 < 2e-16 ***
# PadTypeL7 -0.48260 0.06176 -7.814 5.52e-15 ***
# PadTypeL8 -0.30778 0.05862 -5.251 1.51e-07 ***
# PadTypeL9 -0.63793 0.06489 -9.831 < 2e-16 ***
# PadTypeW4 -0.19021 0.05671 -3.354 0.000796 ***
# PadTypeW9 -1.56252 0.09165 -17.048 < 2e-16 ***
# Panel 0.14670 0.01745 8.405 < 2e-16 ***
# ---
# Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
# (Dispersion parameter for poisson family taken to be 1)
# Null deviance: 8788.2 on 899 degrees of freedom
# Residual deviance: 1829.0 on 882 degrees of freedom
# AIC: 3967.6
# Number of Fisher Scoring iterations: 5
deviance(modp); df.residual(modp)
## [1] 1829.002
## [1] 882
pchisq(deviance(modp),df.residual(modp),lower=FALSE) # Overdispersion
## [1] 1.958183e-68
modp2 <- glm(skips ~ (Opening +Solder + Mask + PadType + Panel)^2 ,
family=poisson, data=solder)
summary(modp2)
##
## Call:
## glm(formula = skips ~ (Opening + Solder + Mask + PadType + Panel)^2,
## family = poisson, data = solder)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3705 -0.8845 -0.2963 0.5035 3.4497
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.790428 0.439188 -8.631 < 2e-16 ***
## OpeningM 1.256622 0.387231 3.245 0.001174 **
## OpeningS 3.409668 0.317714 10.732 < 2e-16 ***
## SolderThin 2.799378 0.255617 10.951 < 2e-16 ***
## MaskA3 1.208574 0.396865 3.045 0.002324 **
## MaskA6 3.232144 0.324781 9.952 < 2e-16 ***
## MaskB3 2.401436 0.381673 6.292 3.14e-10 ***
## MaskB6 3.518764 0.361837 9.725 < 2e-16 ***
## PadTypeD6 -0.417397 0.406617 -1.027 0.304650
## PadTypeD7 0.494889 0.353410 1.400 0.161416
## PadTypeL4 1.255929 0.326825 3.843 0.000122 ***
## PadTypeL6 0.180790 0.423799 0.427 0.669675
## PadTypeL7 0.560943 0.400805 1.400 0.161651
## PadTypeL8 0.684032 0.372300 1.837 0.066163 .
## PadTypeL9 -0.563980 0.421458 -1.338 0.180843
## PadTypeW4 0.292149 0.370794 0.788 0.430755
## PadTypeW9 -0.977399 0.586792 -1.666 0.095780 .
## Panel 0.359817 0.111883 3.216 0.001300 **
## OpeningM:SolderThin -0.898370 0.172621 -5.204 1.95e-07 ***
## OpeningS:SolderThin -0.943802 0.146888 -6.425 1.32e-10 ***
## OpeningM:MaskA3 0.488411 0.307829 1.587 0.112595
## OpeningS:MaskA3 -0.277080 0.240924 -1.150 0.250113
## OpeningM:MaskA6 1.211972 0.208135 5.823 5.78e-09 ***
## OpeningS:MaskA6 NA NA NA NA
## OpeningM:MaskB3 -0.429647 0.310225 -1.385 0.166067
## OpeningS:MaskB3 -0.286922 0.237220 -1.210 0.226464
## OpeningM:MaskB6 0.003031 0.286150 0.011 0.991549
## OpeningS:MaskB6 -0.638078 0.223648 -2.853 0.004330 **
## OpeningM:PadTypeD6 -0.296590 0.270427 -1.097 0.272751
## OpeningS:PadTypeD6 0.293517 0.225890 1.299 0.193815
## OpeningM:PadTypeD7 -0.046054 0.234077 -0.197 0.844026
## OpeningS:PadTypeD7 0.072605 0.201275 0.361 0.718305
## OpeningM:PadTypeL4 -0.053800 0.211326 -0.255 0.799045
## OpeningS:PadTypeL4 -0.146391 0.181862 -0.805 0.420846
## OpeningM:PadTypeL6 -0.230838 0.277505 -0.832 0.405502
## OpeningS:PadTypeL6 -0.069632 0.233476 -0.298 0.765519
## OpeningM:PadTypeL7 -0.058891 0.274677 -0.214 0.830235
## OpeningS:PadTypeL7 0.152334 0.235529 0.647 0.517779
## OpeningM:PadTypeL8 -0.043336 0.243936 -0.178 0.858995
## OpeningS:PadTypeL8 -0.079820 0.209168 -0.382 0.702753
## OpeningM:PadTypeL9 0.198778 0.276167 0.720 0.471664
## OpeningS:PadTypeL9 0.131243 0.241726 0.543 0.587169
## OpeningM:PadTypeW4 -0.845318 0.232904 -3.629 0.000284 ***
## OpeningS:PadTypeW4 -0.308013 0.187299 -1.644 0.100073
## OpeningM:PadTypeW9 0.341746 0.410668 0.832 0.405313
## OpeningS:PadTypeW9 0.410660 0.361239 1.137 0.255618
## OpeningM:Panel -0.072105 0.074451 -0.968 0.332805
## OpeningS:Panel -0.142014 0.062907 -2.258 0.023975 *
## SolderThin:MaskA3 -0.524187 0.189994 -2.759 0.005798 **
## SolderThin:MaskA6 -1.797891 0.211964 -8.482 < 2e-16 ***
## SolderThin:MaskB3 -0.796132 0.183613 -4.336 1.45e-05 ***
## SolderThin:MaskB6 -0.885348 0.176606 -5.013 5.36e-07 ***
## SolderThin:PadTypeD6 -0.167949 0.154149 -1.090 0.275925
## SolderThin:PadTypeD7 -0.351283 0.137127 -2.562 0.010415 *
## SolderThin:PadTypeL4 -0.654798 0.124536 -5.258 1.46e-07 ***
## SolderThin:PadTypeL6 -0.621968 0.156753 -3.968 7.25e-05 ***
## SolderThin:PadTypeL7 -0.845301 0.144681 -5.843 5.14e-09 ***
## SolderThin:PadTypeL8 -0.685421 0.139256 -4.922 8.57e-07 ***
## SolderThin:PadTypeL9 -0.245324 0.160855 -1.525 0.127229
## SolderThin:PadTypeW4 -0.180558 0.145491 -1.241 0.214596
## SolderThin:PadTypeW9 -0.299370 0.219428 -1.364 0.172468
## SolderThin:Panel 0.139539 0.040894 3.412 0.000644 ***
## MaskA3:PadTypeD6 0.116158 0.324330 0.358 0.720232
## MaskA6:PadTypeD6 0.216038 0.296473 0.729 0.466190
## MaskB3:PadTypeD6 0.224998 0.300249 0.749 0.453633
## MaskB6:PadTypeD6 0.245083 0.284969 0.860 0.389770
## MaskA3:PadTypeD7 -0.091424 0.274262 -0.333 0.738873
## MaskA6:PadTypeD7 -0.192284 0.251813 -0.764 0.445108
## MaskB3:PadTypeD7 -0.270055 0.258299 -1.046 0.295786
## MaskB6:PadTypeD7 -0.217501 0.241330 -0.901 0.367450
## MaskA3:PadTypeL4 0.022784 0.257184 0.089 0.929409
## MaskA6:PadTypeL4 -0.279622 0.239522 -1.167 0.243041
## MaskB3:PadTypeL4 -0.111186 0.242380 -0.459 0.646431
## MaskB6:PadTypeL4 -0.248325 0.228901 -1.085 0.277984
## MaskA3:PadTypeL6 0.276448 0.331857 0.833 0.404825
## MaskA6:PadTypeL6 -0.180375 0.318414 -0.566 0.571069
## MaskB3:PadTypeL6 0.056605 0.317345 0.178 0.858432
## MaskB6:PadTypeL6 -0.153403 0.302870 -0.506 0.612507
## MaskA3:PadTypeL7 -0.085910 0.314366 -0.273 0.784637
## MaskA6:PadTypeL7 -0.140562 0.291300 -0.483 0.629426
## MaskB3:PadTypeL7 0.044196 0.291202 0.152 0.879367
## MaskB6:PadTypeL7 -0.253888 0.278761 -0.911 0.362415
## MaskA3:PadTypeL8 -0.040412 0.293952 -0.137 0.890654
## MaskA6:PadTypeL8 -0.344603 0.275354 -1.251 0.210754
## MaskB3:PadTypeL8 0.021563 0.274125 0.079 0.937301
## MaskB6:PadTypeL8 -0.239589 0.261266 -0.917 0.359127
## MaskA3:PadTypeL9 0.155884 0.317129 0.492 0.623038
## MaskA6:PadTypeL9 -0.171715 0.297031 -0.578 0.563193
## MaskB3:PadTypeL9 -0.182335 0.304914 -0.598 0.549848
## MaskB6:PadTypeL9 -0.226337 0.286099 -0.791 0.428877
## MaskA3:PadTypeW4 0.102471 0.307238 0.334 0.738739
## MaskA6:PadTypeW4 0.175358 0.283053 0.620 0.535572
## MaskB3:PadTypeW4 0.098612 0.287371 0.343 0.731484
## MaskB6:PadTypeW4 0.407102 0.269275 1.512 0.130574
## MaskA3:PadTypeW9 -0.467682 0.466202 -1.003 0.315776
## MaskA6:PadTypeW9 -0.720748 0.424011 -1.700 0.089162 .
## MaskB3:PadTypeW9 -0.592526 0.432584 -1.370 0.170770
## MaskB6:PadTypeW9 0.126737 0.379424 0.334 0.738362
## MaskA3:Panel -0.046212 0.088742 -0.521 0.602545
## MaskA6:Panel -0.057908 0.083316 -0.695 0.487031
## MaskB3:Panel -0.115932 0.083653 -1.386 0.165785
## MaskB6:Panel -0.186039 0.079046 -2.354 0.018594 *
## PadTypeD6:Panel -0.101387 0.074753 -1.356 0.175008
## PadTypeD7:Panel -0.073970 0.068566 -1.079 0.280670
## PadTypeL4:Panel -0.117661 0.063676 -1.848 0.064629 .
## PadTypeL6:Panel -0.126999 0.082249 -1.544 0.122569
## PadTypeL7:Panel -0.184934 0.076590 -2.415 0.015753 *
## PadTypeL8:Panel -0.114420 0.072797 -1.572 0.116005
## PadTypeL9:Panel 0.055960 0.081316 0.688 0.491338
## PadTypeW4:Panel -0.096242 0.070465 -1.366 0.171998
## PadTypeW9:Panel -0.227473 0.113050 -2.012 0.044204 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 8788.2 on 899 degrees of freedom
## Residual deviance: 1068.8 on 790 degrees of freedom
## AIC: 3391.4
##
## Number of Fisher Scoring iterations: 6
deviance(modp2); df.residual(modp2)
## [1] 1068.817
## [1] 790
pchisq(deviance(modp2),df.residual(modp2),lower=FALSE) # Still overdispersion
## [1] 1.130696e-10
library(MASS)
## Warning: package 'MASS' was built under R version 3.1.3
modn <- glm(skips ~ . , negative.binomial(1),solder) # NB with link k=1
summary(modn) # Less overdispersion
##
## Call:
## glm(formula = skips ~ ., family = negative.binomial(1), data = solder)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9628 -0.8021 -0.2373 0.3204 2.1097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.69933 0.16334 -10.404 < 2e-16 ***
## OpeningM 0.50854 0.08797 5.781 1.03e-08 ***
## OpeningS 1.99966 0.08069 24.783 < 2e-16 ***
## SolderThin 1.04894 0.06364 16.483 < 2e-16 ***
## MaskA3 0.65710 0.10463 6.280 5.29e-10 ***
## MaskA6 2.52649 0.12123 20.841 < 2e-16 ***
## MaskB3 1.27261 0.10780 11.806 < 2e-16 ***
## MaskB6 2.08026 0.10482 19.845 < 2e-16 ***
## PadTypeD6 -0.46118 0.13788 -3.345 0.000859 ***
## PadTypeD7 0.01608 0.13350 0.120 0.904185
## PadTypeL4 0.46883 0.13046 3.594 0.000344 ***
## PadTypeL6 -0.47115 0.13799 -3.414 0.000669 ***
## PadTypeL7 -0.29494 0.13620 -2.165 0.030618 *
## PadTypeL8 -0.08493 0.13432 -0.632 0.527372
## PadTypeL9 -0.52125 0.13854 -3.763 0.000179 ***
## PadTypeW4 -0.14250 0.13481 -1.057 0.290781
## PadTypeW9 -1.48361 0.15317 -9.686 < 2e-16 ***
## Panel 0.16932 0.03811 4.443 1.00e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1) family taken to be 0.5560534)
##
## Null deviance: 1743.01 on 899 degrees of freedom
## Residual deviance: 558.67 on 882 degrees of freedom
## AIC: 3883.7
##
## Number of Fisher Scoring iterations: 9
modn <- glm.nb(skips ~ .,solder)
summary (modn)
##
## Call:
## glm.nb(formula = skips ~ ., data = solder, init.theta = 4.397157245,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7376 -1.0068 -0.3834 0.4460 2.7829
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.42245 0.14274 -9.965 < 2e-16 ***
## OpeningM 0.50294 0.07976 6.306 2.87e-10 ***
## OpeningS 1.91317 0.07152 26.750 < 2e-16 ***
## SolderThin 0.93932 0.05362 17.517 < 2e-16 ***
## MaskA3 0.58981 0.09651 6.112 9.87e-10 ***
## MaskA6 2.26734 0.10182 22.269 < 2e-16 ***
## MaskB3 1.21101 0.09637 12.566 < 2e-16 ***
## MaskB6 1.99037 0.09223 21.580 < 2e-16 ***
## PadTypeD6 -0.46592 0.11238 -4.146 3.38e-05 ***
## PadTypeD7 -0.03315 0.10673 -0.311 0.756114
## PadTypeL4 0.38268 0.10265 3.728 0.000193 ***
## PadTypeL6 -0.57844 0.11413 -5.068 4.01e-07 ***
## PadTypeL7 -0.36656 0.11094 -3.304 0.000953 ***
## PadTypeL8 -0.15890 0.10821 -1.468 0.141986
## PadTypeL9 -0.56600 0.11393 -4.968 6.77e-07 ***
## PadTypeW4 -0.20044 0.10873 -1.844 0.065255 .
## PadTypeW9 -1.56460 0.13621 -11.486 < 2e-16 ***
## Panel 0.16369 0.03139 5.214 1.85e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(4.3972) family taken to be 1)
##
## Null deviance: 4043.3 on 899 degrees of freedom
## Residual deviance: 1008.3 on 882 degrees of freedom
## AIC: 3683.3
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 4.397
## Std. Err.: 0.495
##
## 2 x log-likelihood: -3645.309