Logistic Regression: The Beetle data set

Maximum likelihood estimation in the logistic regression model

This real data example illustrates the method of maximum likelihood estimation for the parameters of the logistic regression model. The R code compares the results using the following methods:

The Newton’s method.
A direct implementation. Implementing the log-likelihood function and maximising it using the command optim().
Using the command glm().

The results are virtually the same, as expected.

# Delete Memory
rm(list=ls())

# Required packages
library(knitr)
library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

## Warning: package 'knitr' was built under R version 3.4.3
# Beetle data
dat =  cbind(c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113,1.8369, 1.861, 1.8839), 
             c(59, 60, 62, 56, 63, 59, 62, 60), 
             c(6, 13, 18, 28, 52, 53, 61, 60))

# Naming the columns
colnames(dat) <- c("Dose", "N","x")

# Displaying the data
kable(dat, digits = 4)

Dose	N	x
1.6907	59	6
1.7242	60	13
1.7552	62	18
1.7842	56	28
1.8113	63	52
1.8369	59	53
1.8610	62	61
1.8839	60	60

ns = length(dat[,1]) # number of responses
ni <- dat[,2] # numbers of beetles
yi <- dat[,3] # numbers of dead beetles after exposure
xi <- dat[,1] # Dose

###############################################################################################################
# Maximum Likelihood Estimation using three methods
###############################################################################################################

#--------------------------
# Using  Newton's method
#--------------------------
theta = c(0,0) # Initial value

N = 100 # Number of iterations 

iter <- matrix(0,nrow=N,ncol=2)

# Running the iterations
for(i in 1:N){
  iter[i,] <- theta
  pi <- exp(theta[1] + theta[2]*xi)/( 1 + exp(theta[1] + theta[2]*xi) )
  S <- c( sum(yi - ni*pi), sum(xi*(yi - ni*pi)))
  I <- rbind( c(sum(ni*pi*(1-pi)), sum(ni*xi*pi*(1-pi)) ), c(sum(ni*xi*pi*(1-pi)), sum(ni*xi^2*pi*(1-pi)) ) )
  theta <- theta + solve(I)%*%S
  
}

# Illustrating the iterations
plot(iter[1:10,], type = "p")

head(iter)

##           [,1]     [,2]
## [1,]   0.00000  0.00000
## [2,] -37.85638 21.33743
## [3,] -53.85319 30.38351
## [4,] -59.96521 33.84419
## [5,] -60.70778 34.26485
## [6,] -60.71745 34.27032

df <- data.frame(x = iter[,1], y = iter[,2], f = iter[,2])
p <- df %>%
  plot_ly(
    x = ~x,
    y = ~y,
    frame = ~f,
    type = 'scatter',
    mode = 'markers',
    showlegend = F,
    size = 40
  )

p

# MLE after 100 iterations
theta

##           [,1]
## [1,] -60.71745
## [2,]  34.27033

# LD50
-theta[1]/theta[2]

## [1] 1.771721

#--------------------------
# Direct implementation
#--------------------------

# log-likelihood function
lpl = function(par){
  var <- vector()
  for(i in 1:ns) var[i] = dat[i,3]*log(plogis(par[1]+par[2]*dat[i,1])) + (dat[i,2]-dat[i,3])*log(1-plogis(par[1]+par[2]*dat[i,1]))
  return(-sum(var))
}

# Optimisation step using optim
OPT <- optim(c(-60,30),lpl,control=list(maxit=20000,abstol=1e-18,reltol=1e-18))
# MLE
OPT$par

## [1] -60.71746  34.27033

#--------------------------
# Using glm
#--------------------------
glm(cbind(yi,ni-yi) ~ xi, family=binomial(logit), data=data.frame(dat))

## 
## Call:  glm(formula = cbind(yi, ni - yi) ~ xi, family = binomial(logit), 
##     data = data.frame(dat))
## 
## Coefficients:
## (Intercept)           xi  
##      -60.72        34.27  
## 
## Degrees of Freedom: 7 Total (i.e. Null);  6 Residual
## Null Deviance:       284.2 
## Residual Deviance: 11.23     AIC: 41.43

###############################################################################################################
# Visualisation of the fitted dose-response model
###############################################################################################################

# Proportions
propi <- yi/ni

# Fitted logistic model
fit.logis <- Vectorize(function(d)  plogis(theta[1] + theta[2]*d))

curve(fit.logis,1.6,2, xlab = "Dose", ylab = "Proportion", main = "Fitted Model", lwd = 2, cex.axis = 1.5, cex.lab = 1.5)
points(xi,propi,pch=10,col="red",lwd=2)

Logistic Regression: The Beetle data set

F. Javier Rubio

01 February, 2020

Maximum likelihood estimation in the logistic regression model