This real data example illustrates the method of maximum likelihood estimation for the parameters of the logistic regression model. The R code compares the results using the following methods:
optim().glm().The results are virtually the same, as expected.
# Delete Memory
rm(list=ls())
# Required packages
library(knitr)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Warning: package 'knitr' was built under R version 3.4.3
# Beetle data
dat = cbind(c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113,1.8369, 1.861, 1.8839),
c(59, 60, 62, 56, 63, 59, 62, 60),
c(6, 13, 18, 28, 52, 53, 61, 60))
# Naming the columns
colnames(dat) <- c("Dose", "N","x")
# Displaying the data
kable(dat, digits = 4)
| Dose | N | x |
|---|---|---|
| 1.6907 | 59 | 6 |
| 1.7242 | 60 | 13 |
| 1.7552 | 62 | 18 |
| 1.7842 | 56 | 28 |
| 1.8113 | 63 | 52 |
| 1.8369 | 59 | 53 |
| 1.8610 | 62 | 61 |
| 1.8839 | 60 | 60 |
ns = length(dat[,1]) # number of responses
ni <- dat[,2] # numbers of beetles
yi <- dat[,3] # numbers of dead beetles after exposure
xi <- dat[,1] # Dose
###############################################################################################################
# Maximum Likelihood Estimation using three methods
###############################################################################################################
#--------------------------
# Using Newton's method
#--------------------------
theta = c(0,0) # Initial value
N = 100 # Number of iterations
iter <- matrix(0,nrow=N,ncol=2)
# Running the iterations
for(i in 1:N){
iter[i,] <- theta
pi <- exp(theta[1] + theta[2]*xi)/( 1 + exp(theta[1] + theta[2]*xi) )
S <- c( sum(yi - ni*pi), sum(xi*(yi - ni*pi)))
I <- rbind( c(sum(ni*pi*(1-pi)), sum(ni*xi*pi*(1-pi)) ), c(sum(ni*xi*pi*(1-pi)), sum(ni*xi^2*pi*(1-pi)) ) )
theta <- theta + solve(I)%*%S
}
# Illustrating the iterations
plot(iter[1:10,], type = "p")
head(iter)
## [,1] [,2]
## [1,] 0.00000 0.00000
## [2,] -37.85638 21.33743
## [3,] -53.85319 30.38351
## [4,] -59.96521 33.84419
## [5,] -60.70778 34.26485
## [6,] -60.71745 34.27032
df <- data.frame(x = iter[,1], y = iter[,2], f = iter[,2])
p <- df %>%
plot_ly(
x = ~x,
y = ~y,
frame = ~f,
type = 'scatter',
mode = 'markers',
showlegend = F,
size = 40
)
p
# MLE after 100 iterations
theta
## [,1]
## [1,] -60.71745
## [2,] 34.27033
# LD50
-theta[1]/theta[2]
## [1] 1.771721
#--------------------------
# Direct implementation
#--------------------------
# log-likelihood function
lpl = function(par){
var <- vector()
for(i in 1:ns) var[i] = dat[i,3]*log(plogis(par[1]+par[2]*dat[i,1])) + (dat[i,2]-dat[i,3])*log(1-plogis(par[1]+par[2]*dat[i,1]))
return(-sum(var))
}
# Optimisation step using optim
OPT <- optim(c(-60,30),lpl,control=list(maxit=20000,abstol=1e-18,reltol=1e-18))
# MLE
OPT$par
## [1] -60.71746 34.27033
#--------------------------
# Using glm
#--------------------------
glm(cbind(yi,ni-yi) ~ xi, family=binomial(logit), data=data.frame(dat))
##
## Call: glm(formula = cbind(yi, ni - yi) ~ xi, family = binomial(logit),
## data = data.frame(dat))
##
## Coefficients:
## (Intercept) xi
## -60.72 34.27
##
## Degrees of Freedom: 7 Total (i.e. Null); 6 Residual
## Null Deviance: 284.2
## Residual Deviance: 11.23 AIC: 41.43
###############################################################################################################
# Visualisation of the fitted dose-response model
###############################################################################################################
# Proportions
propi <- yi/ni
# Fitted logistic model
fit.logis <- Vectorize(function(d) plogis(theta[1] + theta[2]*d))
curve(fit.logis,1.6,2, xlab = "Dose", ylab = "Proportion", main = "Fitted Model", lwd = 2, cex.axis = 1.5, cex.lab = 1.5)
points(xi,propi,pch=10,col="red",lwd=2)