We will use a version of the example from https://rpubs.com/bbolker/3423 with simulated data. First we created a function that would generate data for an arbitrary number of groups. The data simulates 3 individuals per group, with each individual being measured every two days. We simulate a non-linear response given by the four parameter logistic function (SSfpl). This response will have group-specific noise on asymp.R and residual noise (set to 0.01 for both).

Below is the data-generating function.

library(nlme)
library(ggplot2)
library(lme4)
library(gridExtra)
createDataset <- function(nGroups, totalDaysMeasured, asymp.L,asymp.R,scale,xmid) {
  #EFFECT: Creates a dataset consisting of 3*nGroups individuals sampled over totalDaysMeasured days.  Each response 
  # consists of the four parameter logistic SSfpl with noise associated with group on asymp.R
  residualSD <- 0.01
  groupNoiseSD <- 0.01
  nDays <- totalDaysMeasured
  groups <- rep(paste0("g",1:nGroups),each=3)
  nIndividuals <- length(groups)

  days <- seq(from=0,to=2*(nDays-1),by=2)
  data <- data.frame(Day = rep(days,each=nIndividuals),
                    Group = rep(groups,nDays),
                    Individual = rep(1:nIndividuals,nDays))

  asymp.RNoiseByIndividual <- rnorm(nIndividuals,sd=groupNoiseSD) 
  names(asymp.RNoiseByIndividual) <-paste0("g",1:nGroups)
  data$X <- with(data,SSfpl(Day, asymp.L, asymp.R[Group]+asymp.RNoiseByIndividual[Individual], xmid, scale))+rnorm(nrow(data),mean=0,sd=residualSD)
  data
}

We will create 4 different datasets - from two groups to five groups - in order to compare performance of nlme and nlmer, using 100 days and 4PL parameters as follows:

asymp.L = 1
scale = 5
xmid = 100
asymp.R specific for each group

totalDaysMeasured <- 100
asymp.L <- 1
scale <- 5
xmid <- totalDaysMeasured 
  
asymp.R <- c("g1"=0.3,"g2"=0.4,"g3"=0.5,"g4"=0.6,"g5"=0.7)
data2Groups <- createDataset(2,totalDaysMeasured, asymp.L, asymp.R[1:2],scale,xmid)
data3Groups <- createDataset(3,totalDaysMeasured, asymp.L, asymp.R[1:3],scale,xmid)
data4Groups <- createDataset(4,totalDaysMeasured, asymp.L, asymp.R[1:4], scale,xmid)
data5Groups <- createDataset(5,totalDaysMeasured, asymp.L, asymp.R[1:5], scale,xmid)

grid.arrange(
  ggplot(data2Groups,aes(x=Day,y=X,colour=Group))+geom_point(),
  ggplot(data3Groups,aes(x=Day,y=X,colour=Group))+geom_point(),
  ggplot(data4Groups,aes(x=Day,y=X,colour=Group))+geom_point(),
  ggplot(data5Groups,aes(x=Day,y=X,colour=Group))+geom_point())

We will set the initial values to be the exact values used to generate the data

initialValues2Groups <-  c( asymp.L=asymp.L,
                     asymp.R1=asymp.R[[1]], asymp.R2=asymp.R[[2]],
                     xmid=xmid,
                     scale=scale)

initialValues3Groups <-  c( asymp.L=asymp.L,
                     asymp.R1=asymp.R[[1]], asymp.R2=asymp.R[[2]], asymp.R3=asymp.R[[3]],
                     xmid=xmid,
                     scale=scale)

initialValues4Groups <-  c( asymp.L=asymp.L,
                     asymp.R1=asymp.R[[1]], asymp.R2=asymp.R[[2]], asymp.R3=asymp.R[[3]], asymp.R4=asymp.R[[4]],
                     xmid=xmid,
                     scale=scale)

initialValues5Groups <-  c( asymp.L=asymp.L,
                     asymp.R1=asymp.R[[1]], asymp.R2=asymp.R[[2]], asymp.R3=asymp.R[[3]], asymp.R4=asymp.R[[4]], asymp.R5=asymp.R[[5]],
                     xmid=xmid,
                     scale=scale)

NLME Fit

Below we do the NLME fit for the four cases. The speed is more or less the same on all cases:

fpl <- function(x,A,B,xmid,scale) {
  A+(B-A)/(1+exp((xmid-x)/scale))
}

fitNLME <- function(data, initialValues) {
  labels <- names(initialValues)
  initialValuesReordered <- c(labels[grepl("asymp.R",labels )],"xmid","scale","asymp.L")
  
  startTimeNLME <- Sys.time()
  nlmeFit <- nlme(model  = X ~ fpl(Day, asymp.L, asymp.R, xmid, scale),
       fixed  = list(asymp.R ~ Group, xmid+scale+asymp.L~1),
       random = asymp.R ~ 1 | Individual,
       start =  list(fixed=initialValues[initialValuesReordered]), 
       data=data)
  endTimeNLME <- Sys.time()
  print(paste0("NLME Time Required for ",deparse(substitute(data)),": ",endTimeNLME-startTimeNLME))
  nlmeFit
}

fit2Groups <- fitNLME(data2Groups,initialValues2Groups)

## [1] "NLME Time Required for data2Groups: 0.0458040237426758"

fit3Groups <- fitNLME(data3Groups,initialValues3Groups)

## [1] "NLME Time Required for data3Groups: 0.0375699996948242"

fit4Groups <- fitNLME(data4Groups,initialValues4Groups)

## [1] "NLME Time Required for data4Groups: 0.0526559352874756"

fit5Groups <- fitNLME(data5Groups,initialValues5Groups)

## [1] "NLME Time Required for data5Groups: 0.0502560138702393"

NLMER Fit

And below we do the nlmer fit for the four cases. We followed the methodology described by Ben Bolker (https://rpubs.com/bbolker/3423) to incorporate non-trivial fixed effects in nlmer. The speed increases with the number of fixed effects.

fitNlmer <- function(data, initialValues, env=parent.frame()) {
  nGroups <- length(unique(data$Group))
  mm <- model.matrix(~Group,data=data)
  stringForEquation <- "B0"
  for (i in 1:(nGroups-1)) {
    indicatorVariableName <- paste0("grp",i+1)
    assign(indicatorVariableName,mm[,i+1])
    stringForEquation <- paste0(stringForEquation,"+",
                                      "B",i,"*",
                                      indicatorVariableName)
  }
  nparams <- c("A",paste0("B",0:(nGroups-1)),"xmid","scale")
  fpl <- deriv(as.formula(paste0("~A+((",stringForEquation,")-A)/(1+exp((xmid-x)/scale))")),
               nparams,
               function.arg=c("x",nparams),
               envir=environment())
  attr(fpl,"pnames") <- nparams

  tmpstr <- deparse(fpl)
  L1 <- grep("^ +\\.value +<-",tmpstr)
  L2 <- grep("^ +attr\\(\\.value",tmpstr)
  tmpstr2 <- c(tmpstr[1:L1],
  paste0(".actualArgs <- as.list(match.call()[",
         deparse(nparams),"])"),
  tmpstr[(L1+1):(L2-1)],
   "dimnames(.grad) <- list(NULL, .actualArgs)",
  tmpstr[L2:length(tmpstr)])
  fpl <- eval(parse(text=tmpstr2),envir = environment())
  nlmerString <- paste0("X ~ fpl(Day, asymp.L, ",paste(paste0("asymp.R",1:nGroups),collapse=", "),", xmid, scale) ~ asymp.R1|Individual")
  startTimeNLMER <- Sys.time()
  nlmerFit <- do.call("nlmer",list(
      as.formula(nlmerString),
      start=initialValues,
      data=data)
  )

  endTimeNLMER <- Sys.time()
  print(paste0("Time required for the ",deparse(substitute(data)),": ",endTimeNLMER-startTimeNLMER))
  nlmerFit
}

fitNlmer2Groups <- fitNlmer(data2Groups, initialValues2Groups)

## [1] "Time required for the data2Groups: 0.404773950576782"

fitNlmer3Groups <- fitNlmer(data3Groups, initialValues3Groups)

## [1] "Time required for the data3Groups: 0.579570055007935"

fitNlmer4Groups <- fitNlmer(data4Groups, initialValues4Groups)

## [1] "Time required for the data4Groups: 0.957509994506836"

fitNlmer5Groups <- fitNlmer(data5Groups, initialValues5Groups)

## [1] "Time required for the data5Groups: 1.68412184715271"

QUESTION:

This is a trivial example, but with more of my own complicated cases the differences in performance are huge, something that takes seconds in NLME would take minutes in NLMER.

Is there any way to improve the performance of NLMER when using non-trivial fixed effects? Would NLMER be any better than NLME to make it worth the wait?

Reproducible example of non-trivial fixed effects in nlmer and comparison in speed to nlme

Ramiro Barrantes Reynolds

11 August, 2020

NLME Fit

NLMER Fit

QUESTION: