This is a very quick extension of this article to GLM models with gaussian, poisson and binomial families.
#invlogit function for binomial models, code taken from the arm package
invlogit<-function(x){
1/(1+exp(-x))
}
#the function
glm.test<-function(m){
require(plyr)
#the model frame
dat<-model.frame(m)
#the model matrix
f<-formula(m)
modmat<-model.matrix(f,dat)
#the standard deviation of the residuals
sd.resid<-sd(resid(m))
#sample size
n<-dim(dat)[1]
#the family
fam<-family(m)[1]
#simulate 8 response vectors from model
if(fam=="gaussian"){
ys<-lapply(1:8,function(x) rnorm(n,modmat%*%coef(m),sd.resid))
#refit the models
ms<-llply(ys,function(y) glm(y~modmat[,-1],family="gaussian")) #remove first column since GLM automatically adds an intercept terms
}
if(fam=="poisson"){
ys<-lapply(1:8,function(x) rpois(n,exp(modmat%*%coef(m))))
#refit the models
ms<-llply(ys,function(y) glm(y~modmat[,-1],family="poisson"))
}
if(fam=="binomial"){
names(dat)[dim(dat)[2]]<-"W"
ys<-lapply(1:8,function(x) rbinom(n,dat$W,invlogit(modmat%*%coef(m)))/dat$W)
#refit the models
ms<-llply(ys,function(y) glm(y~modmat[,-1],family="binomial",weights=dat$W))
}
#put the residuals and fitted values in a list
df<-llply(ms,function(x) data.frame(Fitted=fitted(x),Resid=resid(x,type="pearson")))
#select a random number from 2 to 8
rnd<-sample(2:8,1)
#put the original data into the list
df<-c(df[1:(rnd-1)],list(data.frame(Fitted=fitted(m),Resid=resid(m,type="pearson"))),df[rnd:8])
#plot
par(mfrow=c(3,3))
l_ply(df,function(x){
plot(Resid~Fitted,x,xlab="Fitted",ylab="Residuals")
abline(h=0,lwd=2,lty=2)
})
l_ply(df,function(x){
qqnorm(x$Resid)
qqline(x$Resid)
})
out<-list(Position=rnd)
return(out)
}
Let’s put this in action:
#test for a poisson model
m<-glm(cyl~mpg+disp,family="poisson",data=mtcars)
glm.test(m) #seems to be a poor model
## Loading required package: plyr
## $Position
## [1] 4
#test for a binomial model
library(reshape2)
#use the Titanic dataset to model the proportion of survivors to the titanic depending on various parameters
tit<-melt(Titanic[,,,])
P_survi<-tit[17:32,"value"]/(tit[17:32,"value"]+tit[1:16,"value"])
N<-tit[17:32,"value"]+tit[1:16,"value"]
tit_m<-cbind(tit[1:16,1:3],P_survi,N)
tit_m<-tit_m[!is.na(tit_m$P_survi),]
m<-glm(P_survi~Class+Sex+Age,tit_m,family="binomial",weights=tit_m$N)
glm.test(m) #over-dispersed data
## $Position
## [1] 5
So far the function will only work for the canonical link function (ie log for poisson data). I