Graphical Model Checks for GLMs

This is a very quick extension of this article to GLM models with gaussian, poisson and binomial families.

#invlogit function for binomial models, code taken from the arm package
invlogit<-function(x){
  1/(1+exp(-x))
}

#the function
glm.test<-function(m){
  require(plyr)
  #the model frame
  dat<-model.frame(m)
  #the model matrix
  f<-formula(m)
  modmat<-model.matrix(f,dat)
  #the standard deviation of the residuals
  sd.resid<-sd(resid(m))
  #sample size
  n<-dim(dat)[1]
  #the family
  fam<-family(m)[1]
  #simulate 8 response vectors from model
  if(fam=="gaussian"){
    ys<-lapply(1:8,function(x) rnorm(n,modmat%*%coef(m),sd.resid))
    #refit the models
    ms<-llply(ys,function(y) glm(y~modmat[,-1],family="gaussian")) #remove first column since GLM automatically adds an intercept terms
  }
  if(fam=="poisson"){
    ys<-lapply(1:8,function(x) rpois(n,exp(modmat%*%coef(m))))
    #refit the models
    ms<-llply(ys,function(y) glm(y~modmat[,-1],family="poisson"))
  }
  if(fam=="binomial"){
    names(dat)[dim(dat)[2]]<-"W"
    ys<-lapply(1:8,function(x) rbinom(n,dat$W,invlogit(modmat%*%coef(m)))/dat$W)
    #refit the models
    ms<-llply(ys,function(y) glm(y~modmat[,-1],family="binomial",weights=dat$W))
  }
  #put the residuals and fitted values in a list
  df<-llply(ms,function(x) data.frame(Fitted=fitted(x),Resid=resid(x,type="pearson")))
  #select a random number from 2 to 8
  rnd<-sample(2:8,1)
  #put the original data into the list
  df<-c(df[1:(rnd-1)],list(data.frame(Fitted=fitted(m),Resid=resid(m,type="pearson"))),df[rnd:8])
  
  #plot 
  par(mfrow=c(3,3))
  l_ply(df,function(x){
    plot(Resid~Fitted,x,xlab="Fitted",ylab="Residuals")
    abline(h=0,lwd=2,lty=2)
  })
  
  l_ply(df,function(x){
    qqnorm(x$Resid)
    qqline(x$Resid)
  })
  
  out<-list(Position=rnd)
  return(out)
}

Let’s put this in action:

#test for a poisson model
m<-glm(cyl~mpg+disp,family="poisson",data=mtcars)
glm.test(m) #seems to be a poor model

## Loading required package: plyr

## $Position
## [1] 4

#test for a binomial model
library(reshape2)
#use the Titanic dataset to model the proportion of survivors to the titanic depending on various parameters
tit<-melt(Titanic[,,,])
P_survi<-tit[17:32,"value"]/(tit[17:32,"value"]+tit[1:16,"value"])
N<-tit[17:32,"value"]+tit[1:16,"value"]
tit_m<-cbind(tit[1:16,1:3],P_survi,N)
tit_m<-tit_m[!is.na(tit_m$P_survi),]
m<-glm(P_survi~Class+Sex+Age,tit_m,family="binomial",weights=tit_m$N)
glm.test(m) #over-dispersed data

## $Position
## [1] 5

So far the function will only work for the canonical link function (ie log for poisson data). I

Graphical Model Checks for GLMs

Lionel

03.08.2015