1 Introduction

In this section we illustrate the large sample distribution of the maximum likelihood estimators of the \(\beta\) vector of parameters in the generalized linear models. We select two subclasses of GLMs for this purpose: Gamma and Poisson subclasses.

2 Poisson class, P.

In this case we use the canonical link function \(g(\mu) = log(\mu)\) with \(\mu\) the mean of the Poisson distribution.

set.seed(1)
n <- 75
X <- round(cbind( runif(n), rnorm(n,mean=2.5), rnorm(n,mean=4,sd=0.7)),3)
colnames(X) <- c("X1","X2","X3")
dim(X)

## [1] 75  3

head(X)

##         X1    X2    X3
## [1,] 0.266 3.738 4.019
## [2,] 0.372 2.221 2.824
## [3,] 0.573 4.258 4.738
## [4,] 0.908 3.061 3.216
## [5,] 0.202 2.047 4.235
## [6,] 0.898 1.668 4.346

t_beta <- c(0.3,-0.85,1.4) # True vector of parameters
t_beta

## [1]  0.30 -0.85  1.40

mus <- exp(X%*%t_beta)

Some generated mean rates:

mus[1]

## [1] 12.54221

mus[5]

## [1] 70.08088

mus[10]

## [1] 4.318681

X <- as.data.frame(X)
y <- vector()
for(i in 1:n){
y[i] <- rpois(1,lambda=mus[i]) 
}

A look to the simulated data

head(y)

## [1]  11  12  19   9  73 130

We put the estimates in the object: modelo

modelo <- glm( y ~ X1 + X2 + X3 - 1, data = X, family=poisson(link="log"))

The estimate of the beta, \((\hat{\beta_1},\hat{\beta_2},\hat{\beta_3})\), that we obtained:

coefficients(modelo)

##         X1         X2         X3 
##  0.3642174 -0.8632765  1.3984768

Not too bad!

2.1 Simulation I

In this point we would like to know about the behaviour of the \(\hat{\beta}\) with different samples of size 75. So, we will make a simulation with 2000 replicates.

R <- 2000
beta_est <- matrix(0,R,length(t_beta))

for(j in 1:R){
  for(i in 1:n){
    y[i] <- rpois(1,lambda=mus[i]) 
  }
  modelo <- glm( y ~ X1 + X2 + X3 - 1, data = X, family=poisson)  
  beta_est[j,] <- coefficients(modelo)  
}
dim(beta_est)

## [1] 2000    3

head(beta_est)

##           [,1]       [,2]     [,3]
## [1,] 0.3054600 -0.8300668 1.389878
## [2,] 0.1947542 -0.8598615 1.419746
## [3,] 0.2943365 -0.8388141 1.396366
## [4,] 0.2452309 -0.8403793 1.400544
## [5,] 0.3832833 -0.8286538 1.379890
## [6,] 0.3147097 -0.8435801 1.397779

Now, we build the histogram of the sample distribution of the estimators.

par(mfrow = c(1,3))

# n = 75

hist(beta_est[,1],probability =TRUE,main="n=75",xlab="beta_1")
abline(v=t_beta[1],col=2)
hist(beta_est[,2],main="",xlab="beta_2")
abline(v=t_beta[2],col=2)
hist(beta_est[,3],main="",xlab="beta_3")
abline(v=t_beta[3],col=2)

A little detail, what happen if we calculate the mean of the estimates?

round(apply(beta_est,2,mean),4)

## [1]  0.2986 -0.8504  1.4003

Note the closeness of the previous means and the original parameters. That was not a coincidence and it is called the asymptotic unbiasedness property of the likelihood estimators. Our next step is to increase the size sample from 75 to 150 and to run again the simulation. Let’s do it!

n <- 150
X <- round(cbind( runif(n),rnorm(n,mean=2.5),rnorm(n,mean=4,sd=0.7)),3)
colnames(X) <- c("X1","X2","X3")
t_beta <- c(0.3,-0.85,1.4) 
mus <- exp(X%*%t_beta)
X <- as.data.frame(X)
R <- 2000
beta_est <- matrix(0,R,length(t_beta))
for(j in 1:R){
  for(i in 1:n){
    y[i] <- rpois(1,lambda=mus[i]) 
  }
  modelo <- glm( y ~ X1 + X2 + X3 - 1, data = X, family=poisson)  
  beta_est[j,] <- coefficients(modelo)  
}

head(beta_est)

##           [,1]       [,2]     [,3]
## [1,] 0.3112911 -0.8449979 1.396879
## [2,] 0.2797386 -0.8414142 1.400866
## [3,] 0.3418305 -0.8599406 1.397906
## [4,] 0.3134295 -0.8505529 1.396809
## [5,] 0.3199420 -0.8504368 1.399191
## [6,] 0.2889897 -0.8343628 1.394790

par(mfrow = c(1,3))
hist(beta_est[,1],probability =TRUE,main="n=150",xlab="beta_1")
abline(v=t_beta[1],col=3)
hist(beta_est[,2],main="",xlab="beta_2")
abline(v=t_beta[2],col=3)
hist(beta_est[,3],main="",xlab="beta_3")
abline(v=t_beta[3],col=3)

Once again, if we calculate the mean of the estimates:

round(apply(beta_est,2,mean),4)

## [1]  0.3004 -0.8500  1.4000

Almost the exact true \(\beta\)! we evidence again the asymptotic unbiasedness property. Hence, to increase the sample size tends to decrease the bias of the likelihood estimators of the GLMs parameters.

3 Gamma class, G.

In this case we use the canonical link function: \(g(\mu) = 1/\mu\) where \(\mu\) is the mean of the gamma distribution.

n <- 75
X <- round( cbind(1,rbinom(n,2,prob=0.5)),3)
colnames(X) <- c("X0","X1")
head(X)

##      X0 X1
## [1,]  1  1
## [2,]  1  2
## [3,]  1  2
## [4,]  1  0
## [5,]  1  2
## [6,]  1  1

t_beta <- c(2,0.5) # # True vector of parameters
class(X%*%t_beta)

## [1] "matrix"

mus <- 1/(X%*%t_beta)
head(mus)

##           [,1]
## [1,] 0.4000000
## [2,] 0.3333333
## [3,] 0.3333333
## [4,] 0.5000000
## [5,] 0.3333333
## [6,] 0.4000000

X <- as.data.frame(X)

Now, we will generate the response variable

y <- vector()
a <- 0.5
for(i in 1:n){
  y[i] <- rgamma(1,shape=mus[i]/a,scale=a) 
}

A look to the simulated data

head(y)

## [1] 0.812321854 0.474464491 1.322622983 1.397729753 0.047241528 0.002230431

We save the estimates in the object: modelo.

modelo <- glm( y ~ X1, data = X,family=Gamma(link="inverse"))

The estimate of the beta, \((\hat{\beta_0},\hat{\beta_1})\), that we obtained:

coefficients(modelo)

## (Intercept)          X1 
##   1.5324503   0.3194273

3.1 Simulation II

In this point we would like to know about the behaviour of the \(\hat{\beta}\) with different samples of size 75.Hence, we will make a simulation with 2000 replicates.

R <- 2000
beta_est <- matrix(0,R,length(t_beta))

for(j in 1:R){
  for(i in 1:n){
    y[i] <- rgamma(1,s=mus[i]/a,scale=a) 
  }
  
  modelo <- glm( y ~ X1, data = X, family=Gamma(link="inverse"))
  beta_est[j,] <- coefficients(modelo)
}

dim(beta_est)

## [1] 2000    2

head(beta_est)

##          [,1]        [,2]
## [1,] 2.529832  0.29653244
## [2,] 1.661596  0.61541646
## [3,] 2.845312  0.32871863
## [4,] 2.055302  0.13261503
## [5,] 2.128957 -0.07600624
## [6,] 1.820576  0.61509114

Now, we compute the sample mean of the estimates.

round(apply(beta_est,2,mean),3)

## [1] 2.068 0.533

That was a reasonable result because the size of the samples are not too big. Now, we build the histogram of the sample distribution of the estimators.

par(mfrow = c(1,2))
hist(beta_est[,1],probability =TRUE,main="n=75",xlab="beta_0")
abline(v=t_beta[1],col=2)
hist(beta_est[,2],main="",xlab="beta_1")
abline(v=t_beta[2],col=2)

Our next step is to increase the size sample from 75 to 250 and to run again the simulation. Let’s do it!

n <- 250
X <- round( cbind(1,rbinom(n,2,prob=0.5)),3)
colnames(X) <- c("X0","X1")
head(X)

##      X0 X1
## [1,]  1  2
## [2,]  1  1
## [3,]  1  0
## [4,]  1  1
## [5,]  1  1
## [6,]  1  1

t_beta <- c(2,0.5) 
mus <- 1/(X%*%t_beta)
X <- as.data.frame(X)
y <- vector()
a <- 0.5
R <- 2000
beta_est <- matrix(0,R,length(t_beta))
for(j in 1:R){
  for(i in 1:n){
    y[i] <- rgamma(1,s=mus[i]/a,scale=a) 
  }
  
  modelo <- glm( y ~ X1, data = X, family=Gamma(link="inverse"))
  beta_est[j,] <- coefficients(modelo)
}

dim(beta_est)

## [1] 2000    2

head(beta_est)

##          [,1]      [,2]
## [1,] 2.117790 0.4335311
## [2,] 1.906907 0.5667441
## [3,] 2.007842 0.7777121
## [4,] 1.901961 0.6618822
## [5,] 2.147098 0.2727310
## [6,] 2.222859 0.2653439

round(apply(beta_est,2,mean),3)

## [1] 2.029 0.501

par(mfrow = c(1,2))
hist(beta_est[,1],probability =TRUE,main="n=250",xlab="beta_0")
abline(v=t_beta[1],col=2)
hist(beta_est[,2],main="",xlab="beta_1")
abline(v=t_beta[2],col=2)

4 Final conclusions

It seems that the sample distributions of the estimators are quite symmetric and center at the true parameters in both cases.
In the Gamma example we needed a bigger sample size to obtain better asymptotics results. Therefore, the results depends, in general, of the systematic component and the random component of the GLMs.
These two exercises gives us some evidence of the consistent and asymptotic normality of the likelihood estimators in the GLMs.

I would like to invite you to try with the others subclasses of GLMs.

Asymptotic properties of Beta estimators in GLMs

Carlos Alberto Cardozo Delgado