1 Using simulated data

1.1 Individually randomly sampled

rm(list = ls())
library(maxLik)
library(tidyverse)

xvec1 <- rbinom(5000, 1, 0.3)

xvec2 <- rbinom(5000, 1, 0.7)

yvec <- rbinom(5000, 1, 0.1)

data2 <- tibble(Y = yvec, X1 = xvec1, X2 = xvec2)


1.2 estimates using glm as benchmark

logit = glm(yvec ~ -1 + xvec1 + xvec2,
            data=data2, family=binomial(link='logit'))
summary(logit)
## 
## Call:
## glm(formula = yvec ~ -1 + xvec1 + xvec2, family = binomial(link = "logit"), 
##     data = data2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1774  -0.7116  -0.5112  -0.2808   2.5507  
## 
## Coefficients:
##       Estimate Std. Error z value Pr(>|z|)    
## xvec1 -1.24451    0.09423  -13.21   <2e-16 ***
## xvec2 -1.96924    0.05915  -33.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6931.5  on 5000  degrees of freedom
## Residual deviance: 4101.4  on 4998  degrees of freedom
## AIC: 4105.4
## 
## Number of Fisher Scoring iterations: 5


1.3 estimates using optim

BL_LL = function(param, Data, X, Y){
  num = as.matrix(Data[,X]) %*% as.vector(param[1:length(X)])
  prb = exp(num) / (1+exp(num))
  llik = Data[,Y]*log(prb) + (1-Data[,Y])*log(1-prb)
  return(-sum(llik))}

D = data2
X = c('X1', "X2")
Y ='Y'
sv = matrix(0, ncol=length(X))

k = optim(par=sv, fn=BL_LL, Data=D, X=X, Y=Y, method="BFGS")
k
## $par
##          [,1]      [,2]
## [1,] -1.24458 -1.969195
## 
## $value
## [1] 2050.723
## 
## $counts
## function gradient 
##       17        6 
## 
## $convergence
## [1] 0
## 
## $message
## NULL


1.4 estimates using maxLik

BL_LL = function(param, Data, X, Y){
  num = as.matrix(Data[,X]) %*% as.vector(param[1:length(X)])
  prb = exp(num) / (1+exp(num))
  llik = Data[,Y]*log(prb) + (1-Data[,Y])*log(1-prb)
  return(-sum(llik))}

D = data2
X = c('X1', "X2")
Y ='Y'
sv = matrix(0, ncol=length(X))

k = maxLik(BL_LL, start=sv, Data=D, X=X, Y=Y, method="BFGS")
k
## Maximum Likelihood estimation
## BFGS maximization, 52 iterations
## Return code 0: successful convergence 
## Log-Likelihood: 99526.88 (2 free parameter(s))
## Estimate(s): 9.228009 27.58616

something is wrong here???


2 Using real-world data

Example from this site.

data = NULL
data = read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")

data$rank2 = ifelse(data$rank==2,1,0)
data$rank3 = ifelse(data$rank==3,1,0)
data$rank4 = ifelse(data$rank==4,1,0)
data$cst = 1

2.1 estimates using glm as benchmark

logit = glm(admit ~ -1 + cst + gre + gpa + rank2 + rank3 + rank4,
            data=data, family=binomial(link='logit'))
summary(logit)
## 
## Call:
## glm(formula = admit ~ -1 + cst + gre + gpa + rank2 + rank3 + 
##     rank4, family = binomial(link = "logit"), data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6268  -0.8662  -0.6388   1.1490   2.0790  
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)    
## cst   -3.989979   1.139951  -3.500 0.000465 ***
## gre    0.002264   0.001094   2.070 0.038465 *  
## gpa    0.804038   0.331819   2.423 0.015388 *  
## rank2 -0.675443   0.316490  -2.134 0.032829 *  
## rank3 -1.340204   0.345306  -3.881 0.000104 ***
## rank4 -1.551464   0.417832  -3.713 0.000205 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 554.52  on 400  degrees of freedom
## Residual deviance: 458.52  on 394  degrees of freedom
## AIC: 470.52
## 
## Number of Fisher Scoring iterations: 4

2.2 estimates using optim

BL_LL = function(param, Data, X, Y){
  num = as.matrix(Data[,X]) %*% as.vector(param[1:length(X)])
  prb = exp(num) / (1+exp(num))
  llik = Data[,Y]*log(prb) + (1-Data[,Y])*log(1-prb)
  return(-sum(llik))}


D = data
X = c('cst','gre','gpa','rank2','rank3','rank4')
Y ='admit'
sv = matrix(0, ncol=length(X))

k = optim(par=sv, fn=BL_LL, Data=D, X=X, Y=Y, method="BFGS")
k
## $par
##           [,1]        [,2]      [,3]      [,4]      [,5]      [,6]
## [1,] -3.772676 0.001375522 0.8982012 -0.675543 -1.356555 -1.563396
## 
## $value
## [1] 229.5915
## 
## $counts
## function gradient 
##       49       12 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

2.3 estimates using maxLik

k = maxLik(BL_LL, start=sv, Data=D, X=X, Y=Y, method="BFGS")
k
## Maximum Likelihood estimation
## BFGS maximization, 120 iterations
## Return code 0: successful convergence 
## Log-Likelihood: 7220.065 (6 free parameter(s))
## Estimate(s): 0.1401774 0.0442631 0.270924 -0.04615459 0.05586034 0.1174557