## simulate data
n = 500

beta = c(1, .5)

x = rep(0, n)    # 500 empty vectors
y = rep(0, n)
py1 = rep(0, n)

set.seed(34)
for (i in 1:n) {
  x[i] = rnorm(1)
  py1[i] = 1/(1  + exp(-(beta[1] + beta[2] * x[i])))
  y[i] = rbinom(1, 1, py1[i])    
}

ddf = data.frame(x, y = as.factor(y), py1)
(head(ddf))
##            x y       py1
## 1 -0.1388900 0 0.7171879
## 2 -0.7113750 0 0.6557277
## 3 -0.5752482 1 0.6709259
## 4  0.1882496 1 0.7491577
## 5  0.6706200 1 0.7917176
## 6  0.8844074 1 0.8087957
summary(ddf)
##        x            y            py1        
##  Min.   :-3.32039   0:136   Min.   :0.3407  
##  1st Qu.:-0.66455   1:364   1st Qu.:0.6610  
##  Median :-0.06432           Median :0.7247  
##  Mean   : 0.01974           Mean   :0.7222  
##  3rd Qu.: 0.73023           3rd Qu.:0.7966  
##  Max.   : 2.94274           Max.   :0.9221

rbinom(n, size, prob) generates a given number of random values of given probability from a given sample. The size is the total number of trials.

## plot
# x vs p(y=1|x)
(plot(x, py1))

## NULL
## marginal of y
(table(ddf$y)/n)
## 
##     0     1 
## 0.272 0.728
## x|y
boxplot(x ~ y, ddf)

1 is more likely than 0

## fit logit in R
lfit = glm(y ~ x, ddf, family=binomial())
summary(lfit)
## 
## Call:
## glm(formula = y ~ x, family = binomial(), data = ddf)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2264  -1.1725   0.6333   0.8171   1.5922  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.0578     0.1075   9.838  < 2e-16 ***
## x             0.6274     0.1120   5.600 2.15e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 585.24  on 499  degrees of freedom
## Residual deviance: 549.92  on 498  degrees of freedom
## AIC: 553.92
## 
## Number of Fisher Scoring iterations: 4
## plot fit
phat = predict(lfit, type="response")
plot(x, py1, col="blue", xlab="x", ylab="P(Y=1|x)")
points(x, phat, col="red", lwd=2)

glm(formula, family=binomial, data, …) is used to fit generalized linear models, specified by giving a symbolic description of the linear predictor and a description of the error distribution. family) is a description of the error disribution and link function used to be in the model. Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Intercept: 1.0578, x: 0.6274

Blue: real value Red: predicted value

## - log likelihood
mLL1 = function(x,y,beta){
  n = length(y)
  mll = 0.0
  for (i in 1:n) {
    py1 = 1/(1+ exp(-(beta[1] + beta[2] * x[i])))
    if (y[i] ==1) {
      mll = mll - log(py1)
    } else {
      mll = mll - log(1 - py1)
    }
  }
  return(mll)
}

Let’s compute the likelihood (maximize it later). Typically, you don’t optimize the likelihood, you optimize the log likelihood. You have to multiply the likelihoods, but if you have the log likelihood, you just need to add them, which is more computationally stable.

## get -LL on beta1 grid
nval = 1000
p = 2 # number of parameters
bMat = matrix(0.0, nval, p)    # 1000*2 matrix full of 0s
bMat[,1] = lfit$coef[1]
bMat[,2] = seq(from=0, to=1, length.out=nval)    # generates sequence of numbers with equal intervals
llv = rep(0, nval)    # 1*1000 vector of 0s

# compute the sum of -LLs
for (i in 1:nval){
  llv[i] = mLL1(x, y, bMat[i,])
}

plot(bMat[,2], llv)
abline(v=beta[2], col="blue")
ii = which.min(llv)
abline(v=bMat[ii,2], col="red")    # min(-LL)

## row of bMat at min:
bMat[ii, ]
## [1] 1.0577788 0.6276276
##  check
lfit$coef
## (Intercept)           x 
##   1.0577788   0.6274077

bMat is a matrix; first column is the intercepts. second column is a sequence of numbers from 0 to 1 We can compute the sum of (-)log likelihoods for each row of the matrix.

Blue is the true value of the slope (kept the intercept constant and varied the slope), red is the minimum.

## get -LL on bivariate grid
nval = 1
b1g = seq(from=0, to=2, length.out=nval)
b2g = seq(from=0, to=1, length.out=nval)
bg = expand.grid(b1g, b2g)
nn = nrow(bg)

llv1 = rep(0, nn)
tm1 = system.time({
for (i in 1:nn){
  if ((i%%100)==0) cat("i: ", i, "\n")
  llv1[i] = mLL1(x, y, bg[i,])
  }
})

# takes too long

expand.grid() creates a data frame from all combinations of the supplied vectors or factors.

bgM = as.matrix(bg)
llv2 = rep(0, nn)
tm2 = system.time({
  for (i in 1:nn) {
    if ((i%%1000)==0) cat("i: ", i, "\n")
    llv2[i] = mLL1(x, y, bgM[i, ])
  }
})
bgMM = cbind(bgM[, 1], bgM[, 2])
llv3 = rep(0, nn)
tm3 = system.time({
  for (i in 1:nn){
    if ((i%%1000)==0) cat("i: ", i, "\n")
llv3[i] = mLL1(x, y, bgMM[i, ])
  }
})
mLL = function(x, y, beta){
  py1 = 1/(1+exp(-beta[1] + beta[2]*x))
  return(-sum(ifelse(y==1, log(py1), log(1-py1))))
}

llv4 = rep(0, nn)
tm4 = system.time({
  for(i in 1:nn){
    llv4[i] = mLL(x, y, bgMM[i, ])
  }
})