### Deviance example in Section 6.3 in Faraway's book.

### IRLS example in Section 6.2 in Faraway's book.

library(faraway)

data(bliss)
# The data is from 5 experiments in Bliss(1935) on the effect of insecticide concentration.
# In each experiment, there are 30 insects. There are five doses of insecticide concentration. 
# The number of death and alive insects are recorded.

# ?bliss

bliss
##   dead alive conc
## 1    2    28    0
## 2    8    22    1
## 3   15    15    2
## 4   23     7    3
## 5   27     3    4
#   dead alive conc
# 1    2    28    0
# 2    8    22    1
# 3   15    15    2
# 4   23     7    3
# 5   27     3    4

modl <- glm(cbind(dead,alive) ~ conc,family=binomial(link=logit), bliss) # logit link is default for binomial family
summary(modl) $coef
##              Estimate Std. Error   z value     Pr(>|z|)
## (Intercept) -2.323790  0.4178878 -5.560798 2.685438e-08
## conc         1.161895  0.1814158  6.404598 1.507665e-10
             # Estimate Std. Error   z value     Pr(>|z|)
# (Intercept) -2.323790  0.4178878 -5.560798 2.685438e-08
# conc         1.161895  0.1814158  6.404598 1.507665e-10

### Obtaining the coefficients from IRLS
y <- bliss$dead/30; mu <- y             # Response is the sample proportion
eta <- logit(mu)                        
z <- eta + (y-mu)/(mu*(1-mu))           # Initial response
w <- 30*mu*(1-mu)                       # Initial weights
lmod <- lm(z ~ conc, weights=w, bliss)  # Initial LS estimate of beta
coef(lmod)
## (Intercept)        conc 
##   -2.302462    1.153587
# (Intercept)        conc 
#   -2.302462    1.153587


### 5 iterations of IRLS

for(i in 1:5){
 eta <- lmod$fit
 mu <- ilogit(eta)                      # g-inverse of eta to update mu
 z <- eta + (y-mu)/(mu*(1-mu))          # Updating the response
 w <- 30*mu*(1-mu)                      # Updating the weights
 lmod <- lm(z ~ bliss$conc, weights=w)  # Updating the estimate of beta
 cat(i,coef(lmod),"\n")
}
## 1 -2.323672 1.161847 
## 2 -2.32379 1.161895 
## 3 -2.32379 1.161895 
## 4 -2.32379 1.161895 
## 5 -2.32379 1.161895
# 1 -2.323672 1.161847 
# 2 -2.32379 1.161895 
# 3 -2.32379 1.161895 
# 4 -2.32379 1.161895 
# 5 -2.32379 1.161895 

summary(lmod)                           # The SE calculation from LS fit is not correct
## 
## Call:
## lm(formula = z ~ bliss$conc, weights = w)
## 
## Weighted Residuals:
##          1          2          3          4          5 
## -4.325e-01  3.644e-01 -4.163e-16  6.415e-02 -2.081e-01 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.32379    0.14621  -15.89 0.000542 ***
## bliss$conc   1.16189    0.06348   18.30 0.000356 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3499 on 3 degrees of freedom
## Multiple R-squared:  0.9911, Adjusted R-squared:  0.9882 
## F-statistic: 335.1 on 1 and 3 DF,  p-value: 0.0003557
# Call:
# lm(formula = z ~ bliss$conc, weights = w)

# Weighted Residuals:
         # 1          2          3          4          5 
# -4.325e-01  3.644e-01 -4.163e-16  6.415e-02 -2.081e-01 

# Coefficients:
            # Estimate Std. Error t value Pr(>|t|)    
# (Intercept) -2.32379    0.14621  -15.89 0.000542 ***
# bliss$conc   1.16189    0.06348   18.30 0.000356 ***
# ---
# Signif. codes:  0 ?***? 0.001 ?**? 0.01 ?*? 0.05 ?.? 0.1 ? ? 1

# Residual standard error: 0.3499 on 3 degrees of freedom
# Multiple R-squared:  0.9911,    Adjusted R-squared:  0.9882 
# F-statistic: 335.1 on 1 and 3 DF,  p-value: 0.0003557


xm <- model.matrix(lmod)
wm <- diag(w)
t(xm) %*% wm %*% xm                     # Fisher Information Matrix for beta
##             (Intercept) bliss$conc
## (Intercept)    23.26472   46.52945
## bliss$conc     46.52945  123.44324
            # (Intercept) bliss$conc
# (Intercept)    23.26472   46.52945
# bliss$conc     46.52945  123.44324

sqrt(diag(solve(t(xm) %*% wm %*% xm)))  # Correct estimation of SE from WLS.
## (Intercept)  bliss$conc 
##   0.4178878   0.1814158
# (Intercept)  bliss$conc 
  # 0.4178878   0.1814158 


data(bliss)
# The data is from 5 experiments in Bliss(1935) on the effect of insecticide concentration.
# In each experiment, there are 30 insects. There are five doses of insecticide concentration. 
# The number of death and alive insects are recorded.

# ?bliss                                # Data description

bliss
##   dead alive conc
## 1    2    28    0
## 2    8    22    1
## 3   15    15    2
## 4   23     7    3
## 5   27     3    4
#   dead alive conc
# 1    2    28    0
# 2    8    22    1
# 3   15    15    2
# 4   23     7    3
# 5   27     3    4

modl <- glm(cbind(dead,alive) ~ conc,family=binomial(link=logit), bliss)# logit link is default for binomial family

summary(modl)                               # Deviance at the end of the summary
## 
## Call:
## glm(formula = cbind(dead, alive) ~ conc, family = binomial(link = logit), 
##     data = bliss)
## 
## Deviance Residuals: 
##       1        2        3        4        5  
## -0.4510   0.3597   0.0000   0.0643  -0.2045  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.3238     0.4179  -5.561 2.69e-08 ***
## conc          1.1619     0.1814   6.405 1.51e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 64.76327  on 4  degrees of freedom
## Residual deviance:  0.37875  on 3  degrees of freedom
## AIC: 20.854
## 
## Number of Fisher Scoring iterations: 4
# Call:
# glm(formula = cbind(dead, alive) ~ conc, family = binomial(link = logit), 
    # data = bliss)

# Deviance Residuals: 
      # 1        2        3        4        5  
# -0.4510   0.3597   0.0000   0.0643  -0.2045  

# Coefficients:
            # Estimate Std. Error z value Pr(>|z|)    
# (Intercept)  -2.3238     0.4179  -5.561 2.69e-08 ***
# conc          1.1619     0.1814   6.405 1.51e-10 ***
# ---
# Signif. codes:  0 ?***? 0.001 ?**? 0.01 ?*? 0.05 ?.? 0.1 ? ? 1

# (Dispersion parameter for binomial family taken to be 1)

    # Null deviance: 64.76327  on 4  degrees of freedom
# Residual deviance:  0.37875  on 3  degrees of freedom
# AIC: 20.854

# Number of Fisher Scoring iterations: 4

### Goodness-of-fit example in Section 6.3 in Faraway's book.

1-pchisq(deviance(modl),df.residual(modl))  # Chi-square test shows no evidence of a lack of fit
## [1] 0.9445968
# [1] 0.9445968
anova(modl,test="Chi")                      # Analysis of deviance shows again the significance of concentration
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: cbind(dead, alive)
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                     4     64.763              
## conc  1   64.385         3      0.379 1.024e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Analysis of Deviance Table

# Model: binomial, link: logit

# Response: cbind(dead, alive)

# Terms added sequentially (first to last)

     # Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                     4     64.763              
# conc  1   64.385         3      0.379 1.024e-15 ***
# ---
# Signif. codes:  0 ?***? 0.001 ?**? 0.01 ?*? 0.05 ?.? 0.1 ? ? 1

modl2 <- glm(cbind(dead, alive) ~ conc+I(conc^2), family=binomial,bliss)# A more complex model with quadratic terms 
anova(modl,modl2,test="Chi")                # No need of the quadratic term
## Analysis of Deviance Table
## 
## Model 1: cbind(dead, alive) ~ conc
## Model 2: cbind(dead, alive) ~ conc + I(conc^2)
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1         3    0.37875                     
## 2         2    0.19549  1  0.18325   0.6686
# Analysis of Deviance Table

# Model 1: cbind(dead, alive) ~ conc
# Model 2: cbind(dead, alive) ~ conc + I(conc^2)
  # Resid. Df Resid. Dev Df Deviance Pr(>Chi)
# 1         3    0.37875                     
# 2         2    0.19549  1  0.18325   0.6686


### Residual example in Section 6.4 in Faraway's book.
residuals(modl,"response");bliss$dead/30 - fitted(modl)                 # Response residuals
##             1             2             3             4             5 
## -2.250510e-02  2.834353e-02 -3.330669e-16  4.989802e-03 -1.082823e-02
##             1             2             3             4             5 
## -2.250510e-02  2.834353e-02 -3.330669e-16  4.989802e-03 -1.082823e-02
            # 1             2             3             4             5 
# -2.250510e-02  2.834353e-02 -3.330669e-16  4.989802e-03 -1.082823e-02 
            # 1             2             3             4             5 
# -2.250510e-02  2.834353e-02 -3.330669e-16  4.989802e-03 -1.082823e-02

rd <- residuals(modl);rd                    # Deviance residuals 
##           1           2           3           4           5 
## -0.45101510  0.35969607  0.00000000  0.06430235 -0.20449347
          # 1           2           3           4           5 
# -0.45101510  0.35969607  0.00000000  0.06430235 -0.20449347

rp <- residuals(modl,"pearson");rp          # Pearson residuals
##             1             2             3             4             5 
## -4.325234e-01  3.643729e-01 -3.648565e-15  6.414687e-02 -2.081068e-01
            # 1             2             3             4             5 
# -4.325234e-01  3.643729e-01 -3.648565e-15  6.414687e-02 -2.081068e-01
 
library(wle)                                                            # Package on weighted likelihood estimation
## Warning: package 'wle' was built under R version 3.1.3
## Loading required package: circular
## 
## Attaching package: 'circular'
## 
## The following objects are masked from 'package:stats':
## 
##     sd, var
ra <- residualsAnscombe(bliss$dead/30,mu=modl$fitted.values,
 family=binomial());ra                                                  # Anscombe residuals    
##             1             2             3             4             5 
##  3.781873e-01  2.814127e-01 -3.303109e-15 -2.039920e-01 -4.979114e-01
            # 1             2             3             4             5 
 # 3.781873e-01  2.814127e-01 -3.303109e-15 -2.039920e-01 -4.979114e-01