Coding Progress With A Simple Kennedy-O'Hagan Problem

# Loading libraries to be used later
library(mvtnorm)
library(lhs)

Basic modular approach

In computer model calibration, one standard approach was developed by Kennedy and O'Hagan (KOH). In this approach, the values of field data can be viewed as the sum of a component which can be adequately represented by the computational model, a discrepancy term, and mean zero random noise: \[ y_f(x) = \eta(x,\theta)+\delta(x)+\epsilon \]

where the discrepancy, $ \delta(x) $, is often modeled as a Gaussian process. In addition, as outlined by Kennedy and O'Hagan as well as Higdon, the simulator can be represented by a Gaussian process approximation to the response surface when the number of observations from the simulator is limited.

Below we will carry out this analysis on a simple example where both the underlying reality and the approximating simulator are simple, known functions. For simplicity, in the following we will assume that the scale of the random error is known.

# Function describing underlying reality
reality <- function(x){
  10*dgamma(x+2.2,shape=1.4,scale=3)+dgamma(x+2.2,shape=1.4,scale=3)*sin(2*pi*x/1.5)
}
# Simulator with two "tunable" parameters meant to approximate the above function
simulator <- function(x,alpha,beta){10*dgamma(x+2.2,shape=alpha,scale=beta)}

# Number of field observations and simulator evaluations for the example
nField = 20
nSim = 36

# Generate "field data" with noise
xsField <- seq(-2.0,2.8,length=nField)
fsField <- reality(xsField) + 0.001*rnorm(nField)
fsField <- as.matrix(fsField)

# Generate simulator inputs using a maximin Latin-Hypercube design to cover the parameter space
designpoints <- maximinLHS(nSim,3)
xsSim <- designpoints[,1]*5-2
asSim <- designpoints[,2]*1+1.1
bsSim <- designpoints[,3]*1+2.5

# Obtain simulator evaluations
fsSim <- simulator(xsSim,asSim,bsSim)
fsSim <- as.matrix(fsSim)

# Build input matrix then scale each input dimension to the range [0,1]
inpDat <- cbind(c(xsField,xsSim),c(rep(mean(asSim),nField),asSim),c(rep(mean(bsSim),nField),bsSim))
minInp <- apply(inpDat,2,min)
rangeInp <- apply(inpDat,2,function(a){max(a)-min(a)})
inpDat <- apply(inpDat,2,function(a){(a-min(a))/(max(a)-min(a))})
xsField <- (xsField-minInp[1])/rangeInp[1]

# Build output vector and standardize to have mean 0 and variance 1
outDat <- as.matrix(c(fsField,fsSim))
meanOut <- mean(outDat)
sdOut <- sd(outDat)
outDat <- (outDat - meanOut)/sdOut

To carry out the calibration above, we will use the modular approach discussed by Higdon, Bayarri, and KOH and others. In this approach, we will first model the response surface of the simulator, then fix the GP parameters for this surface to point estimates from their posterior for the estimation of the discrepancy and calibration step. We define the functions for obtaining the parameters for approximating the response surface below:

########################
# Correlation Function for GP Model
#
# Input:
#   x1,x2: two px1 vectors defined on the input space
#   scale: px1 vector dictating the correlation length in each input dimension
#   rough: scalar. Often will be set to fixed value of 2 to make the 
#           GP response infinitely differentiable
#
# Output:
#   scalar, correlation between the two parameter values
#
corrF <- function(x1,x2,scale,rough){exp(-sum(scale*abs(x1-x2)^rough))}

########################
# Build Correlation Matrix for GP model
#
# Input:
#   Xmat: nxp matrix of input values
#   scale: px1 vector dictating the correlation length in each input dimension
#   rough: scalar. Often will be set to fixed value of 2 to make the 
#           GP response infinitely differentiable
#
# Output:
#   chain: samples from the Markov chain generated by the MH algorithm
#
# Requires:
# - corrF()
#   - Correlation Function - corrF()
#       - See above
#
getCorrMat <- function(Xmat,scale,rough=2){
  Xmat <- as.matrix(Xmat)
  n <- dim(Xmat)[1]
  corrMat <- matrix(0,nrow=n,ncol=n)
  for (i in 1:n){
    for (j in 1:i){
      corrMat[i,j] <- corrF(Xmat[i,],Xmat[j,],scale,rough)
      corrMat[j,i] <- corrMat[i,j]
    }
  }
  corrMat <- (corrMat+t(corrMat))/2
  return(corrMat)
}

########################
# Get vector of correlations between new inputs and the current design points
#
# Input:
#   predXvec: n*xp matrix of inputs to predict at
#   Xmat: nxp matrix of input values
#   scale: px1 vector dictating the correlation length in each input dimension
#   rough: scalar. Often will be set to fixed value of 2 to make the 
#           GP response infinitely differentiable
#
# Output:
#   chain: samples from the Markov chain generated by the MH algorithm
#
# Requires:
# - corrF()
#   - Correlation Function - corrF()
#       - See above
#
# 
getCorrVec <- function(predXvec,Xmat,scale,rough=2){
  Xmat <- as.matrix(Xmat); predXvec <- as.matrix(predXvec)
  n <- dim(Xmat)[1]
  corrMat <- matrix(0,nrow=n,ncol=1)
  for (j in 1:n){
    corrMat[j,1] <- corrF(predXvec,Xmat[j,],scale,rough)
  }
  # Returns as a matrix for ease of later calculations
  return(corrMat)
}

########################
# Calculate the likelihood function
#
# Input:
#   params: px1 vector of the model parameters for the GP
#   x: nxp matrix of input values
#   y: nx1 vector of observed response values
#
# Output:
#   Scalar value of the likelihood function
#
# Requires:
# - getCorrMat()
#   - See above
# - mvtnorm R package
# 
likelihoodFunctionSim <- function(params,x,y){
  corrMat <- getCorrMat(x,params[3:5])
  covMat <- corrMat/params[6]
  # Adding to the diagonal of the covariance matrix for numerical stability
  covMat = covMat + 0.0001*diag(dim(covMat)[1])
  # Use dmvnorm presuming it is optimized well
  return(dmvnorm(c(y),mean=rep(0,length(y)),sigma=covMat))
}

########################
# Obtain the value of the prior for the given parameter values
#
# Input:
#   param: given value for the GP parameters
#
# Output:
#   Scalar value of the prior at this point
#
# Requires:
# 
priorFunctionSim <- function(param){
  lprior <- param
  lprior[1] <- dnorm(param[1],0.3,0.25)
  lprior[2] <- dnorm(param[2],0.5,0.25)
  lprior[3:5] <- dbeta(exp(-param[3:5]),1,0.6)
  lprior[6] <- dgamma(param[6],5,5)
  return(sum(log(lprior)))
}

########################
# Obtain new proposal values for Metropolis-Hastings algorithm
#
# Input:
#   location: current value for the GP parameters
#
# Output:
#   list with the new proposed value for the GP scale parameter
#     and the transition probability in each direction
#
# Requires:
# 
proposalFunctionSim <- function(location){
  scale = c(0.05,0.05,0.25,0.25,0.25,0.25)
  prop <- c(rnorm(2,location[1:2],scale[1:2]),rlnorm(4,meanlog = log(location[3:6]), sdlog=scale[3:6]))
  pps <- c(c(dnorm(location[1:2],prop[1:2],scale[1:2]),dlnorm(location[3:6],log(prop[3:6]),scale[3:6])),c(dnorm(prop[1:2],location[1:2],scale[1:2]),dlnorm(log(prop[3:6]),location[3:6],scale[3:6])))
  return(list(proposal=prop,propProbs=pps))
}

########################
# Simple Metropolis Hastings Algorithm
#
# Input:
#   startvalue: parameter vector to initialize the Markov Chain
#   iterations: number of total iterations to run for the chain
#   inputData: inputs or predictors for the probablisitic model
#   outputData: outputs or responses for the probabilitic model
#   diagnostics: Logical dictating if diagnostic output should be displayed
#
# Output:
#   chain: samples from the Markov chain generated by the MH algorithm
#
# Requires:
# - proposalFunctionSim(),priorFunctionSim(),likelihoodFunctionSim()
#   - Proposal function - proposalFunctionSim()
#       - See above
#
#   - Prior function - priorFunctionSim()
#       - See above
#
#   - Likelihood function - likelihoodFunctionSim()
#       - See above
#
metropolisMCMCSim <- function(startvalue, iterations, inputData,outputData,diagnostics=F){
  chain = array(dim = c(iterations+1,length(startvalue)))
  oldprob = array(dim = c(iterations+1))

  chain[1,] = startvalue
  oldprob[1] = -Inf
  tb <- txtProgressBar(min=1,max=iterations,initial=1)
  i=1
  its=1
  while (i <= iterations){
    temp = proposalFunctionSim(chain[i,])

    proposal = temp$proposal
    proposalProbs = temp$propProbs
    priors = priorFunctionSim(proposal)
    probab = priors+log(likelihoodFunctionSim(proposal,inputData,outputData))
    if (max(c(-Inf,(probab - oldprob[i] + log(proposalProbs[1]) - log(proposalProbs[2]))),na.rm=T)> log(runif(1))){
      chain[i+1,] = proposal
      oldprob[i+1] = probab
      i=i+1
      setTxtProgressBar(tb, i)
    } else{
      chain[i+1,] = chain[i,]
      oldprob[i+1] = oldprob[i]
      i=i+1
      setTxtProgressBar(tb, i)
    }
    its = its +1
  }
  close(tb)
  print(length(unique(as.matrix(chain[,1])))/length(as.matrix(chain[,1])))
  if(diagnostics){plot(oldprob,main="Unnormalized Posterior Probability at Each Step",type="l",lwd=2)}
  return(chain)
}

We also define the functions for calibration after the GP response surface approximation to the simulator has been obtained:


########################
# Calculate the likelihood function
#
# Input:
#   params: px1 vector of the model parameters for the GP
#   x: nxp matrix of input values
#   y: nx1 vector of observed response values
#   nField: number of field observations. Used to know which parts of the input matrix and output vector
#             correspond to field and which to simulated data
#
# Output:
#   Scalar value of the likelihood function
#
# Requires:
# - getCorrMat()
#   - See above
# - mvtnorm R package
# 
likelihoodFunctionField <- function(params,x,y,nField){
  corrMat <- getCorrMat(x,params[3:5])
  covMat <- corrMat/params[6]
  #
  covMat_disc <- getCorrMat(x[1:nField],params[7])/params[8]
  temp <- 0*covMat
  # Noise term adjusted from 0.001 to 0.003 because of output scaling
  temp[1:nField,1:nField] <- covMat_disc + 0.003*diag(nField)
  # Added for numerical stability
  covMat = covMat + temp + 0.0001*diag(dim(covMat)[1])
  return(dmvnorm(c(y),mean=rep(0,length(y)),sigma=covMat))
}

########################
# Obtain the value of the prior for the given parameter values
#
# Input:
#   param: given value for the GP parameters
#
# Output:
#   Scalar value of the prior at this point
#
# Requires:
# 
priorFunctionField <- function(param){
  lprior <- rep(1,length(param))
  lprior[1] <- dnorm(param[1],0.3,0.25)
  lprior[2] <- dnorm(param[2],0.5,0.25)
  lprior[7] <- dbeta(exp(-param[7]),1,0.5)
  lprior[8] <- dgamma(param[8],1,0.0001)  
  return(sum(log(lprior)))
}

########################
# Obtain new proposal values for Metropolis-Hastings algorithm
#
# Input:
#   location: current value for the GP parameters
#
# Output:
#   list with the new proposed value for the GP scale parameter
#     and the transition probability in each direction
#
# Requires:
# 
proposalFunctionField <- function(location){
  scale = c(0.1,0.1,0.25,0.25)
  prop <- c(rnorm(2,location[1:2],scale[1:2]),location[3:6],rlnorm(2,meanlog = log(location[7:8]), sdlog=scale[3:4]))
  pps <- c(c(dnorm(location[1:2],prop[1:2],scale[1:2]),rep(1,4),dlnorm(location[7:8],log(prop[7:8]),scale[3:4])),c(dnorm(prop[1:2],location[1:2],scale[1:2]),rep(1,4),dlnorm(log(prop[7:8]),location[7:8],scale[3:4])))
  return(list(proposal=prop,propProbs=pps))
}

########################
# Simple Metropolis Hastings Algorithm
#
# Input:
#   startvalue: parameter vector to initialize the Markov Chain
#   iterations: number of total iterations to run for the chain
#   inputData: inputs or predictors for the probablisitic model
#   outputData: outputs or responses for the probabilitic model
#   nField: scalar used to separate simulated from field data in input record
#   diagnostics: Logical dictating if diagnostic output should be displayed
#
# Output:
#   chain: samples from the Markov chain generated by the MH algorithm
#
# Requires:
# - proposalFunctionSim(),priorFunctionSim(),likelihoodFunctionSim()
#   - Proposal function - proposalFunctionSim()
#       - See above
#
#   - Prior function - priorFunctionSim()
#       - See above
#
#   - Likelihood function - likelihoodFunctionSim()
#       - See above
#
metropolisMCMCField <- function(startvalue, iterations, inputData,outputData,nField,diagnostics=F){
  chain = array(dim = c(iterations+1,length(startvalue)))
  oldprob = array(dim = c(iterations+1))

  chain[1,] = startvalue
  oldprob[1] = -Inf
  tb <- txtProgressBar(min=1,max=iterations,initial=1)
  i=1
  its=1
  while (i <= iterations){
    temp = proposalFunctionField(chain[i,])

    proposal = temp$proposal
    proposalProbs = temp$propProbs
    priors = priorFunctionField(proposal)
    inputData[1:nField,2] <- proposal[1]
    inputData[1:nField,3] <- proposal[2]
    probab = priors+log(likelihoodFunctionField(proposal,inputData,outputData,nField))
    if (max(c(-Inf,(probab - oldprob[i] + log(proposalProbs[1]) - log(proposalProbs[2]))),na.rm=T)> log(runif(1))){
      chain[i+1,] = proposal
      oldprob[i+1] = probab
      i=i+1
      setTxtProgressBar(tb, i)
    } else{
      chain[i+1,] = chain[i,]
      oldprob[i+1] = oldprob[i]
      i=i+1
      setTxtProgressBar(tb, i)
    }
    its = its +1
  }
  close(tb)
  print(length(unique(as.matrix(chain[,7])))/length(as.matrix(chain[,7])))
  if(diagnostics){plot(oldprob,main="Unnormalized Posterior Probability at Each Step",type="l",lwd=2)}
  return(chain)
}

Having defined the functions defined above, we can obtain the posterior median of the GP model parameters for emulating the simulator and use those values to draw samples from the posterior for the calibration parameters, as well as for the GP parameters for the discrepancy function.

# Emulate the computer model first and get point estimates for model GP
# parameters
mcmcSamplesSim <- metropolisMCMCSim(startvalue = c(0.3, 0.5, 20, 1, 1, 0.4), 
    iterations = 10000, inputData = inpDat[-(1:nField), ], outputData = outDat[-(1:nField)])

## ===========================================================================
## [1] 0.3487

goodSamplesSim <- mcmcSamplesSim[-(1:500), ]
medians <- apply(goodSamplesSim, 2, median)
inpDat[1:nField, 2] <- medians[1]
inpDat[1:nField, 3] <- medians[2]

# Redefine model functions for estimating the discrepancy and making full
# model predictions.
mcmcSamples <- metropolisMCMCField(startvalue = c(medians, 1, 1), iterations = 10000, 
    inputData = inpDat, outputData = outDat, nField)

## ===========================================================================
## [1] 0.4126

goodSamples <- mcmcSamples[-(1:500), ]

We'll define a function to take in the input values we would like to predict at, the

predictY <- function(predX, inpDat, outDat, samp, nField, useDiscrep = T, onlyDiscrep = F) {
    if (onlyDiscrep) {
        useDiscrep = T
    }
    inpDat[1:nField, 2] <- samp[1]
    inpDat[1:nField, 3] <- samp[2]
    inpDat <- rbind(c(predX, samp[1], samp[2]), inpDat)

    corrMat_Sim <- getCorrMat(inpDat, samp[3:5])
    covMat_Sim <- corrMat_Sim/samp[6]
    # 
    covMat_disc <- getCorrMat(inpDat[1:(nField + 1), 1], samp[7])/samp[8]
    temp <- 0 * covMat_Sim
    temp[1:(nField + 1), 1:(nField + 1)] <- covMat_disc + 0.03 * diag(nField + 
        1)
    bigcovMat = covMat_Sim + temp + 1e-06 * diag(dim(covMat_Sim)[1])
    covMat <- bigcovMat[-1, -1]
    invCovMat <- solve(covMat)
    predf <- bigcovMat[1, -1] %*% (invCovMat %*% outDat)
    pcs <- getCorrVec(c(predX, samp[1], samp[2]), inpDat[-1, ], scale = samp[3:5])/(samp[6])
    pcd <- getCorrVec(c(predX), inpDat[-1, 1], scale = samp[7])/(samp[8])
    pcd[-(1:nField), 1] <- 0
    if (!useDiscrep) {
        predf <- t(pcs) %*% (invCovMat %*% outDat)
    }
    if (onlyDiscrep) {
        predf <- t(pcd) %*% (invCovMat %*% outDat)
    }
    predVar <- bigcovMat[1, 1] - bigcovMat[1, -1] %*% invCovMat %*% bigcovMat[-1, 
        1]
    predSE <- sqrt(apply(cbind(predVar, rep(0, length(predX))), 1, max))
    return(c(predf, predSE))
}

We can then plot some various predictions based on the posterior samples. First, we can look at a few sample paths from the posterior:

# Plot 4 sample paths from posterior draws
samp4 <- sample(1:dim(goodSamples)[1], size = 4)
samp4 <- goodSamples[samp4, ]
predX <- seq(-2.1, 4, length = 100)
stdpredX <- (predX - minInp[1])/rangeInp[1]
plot(xsField, (fsField - meanOut)/sdOut, xlim = c(min(stdpredX), max(stdpredX)), 
    ylim = c(-2, 2))
points(stdpredX, (reality(stdpredX * rangeInp[1] + minInp[1]) - meanOut)/sdOut, 
    lwd = 2, type = "l")

for (i in 1:4) {
    inpDat[1:nField, 2] <- samp4[i, 1]
    inpDat[1:nField, 3] <- samp4[i, 2]
    preds <- sapply(stdpredX, predictY, inpDat = inpDat, outDat = outDat, samp = samp4[i, 
        ], nField = nField)
    points(stdpredX, preds[1, ], type = "l", col = rgb((i - 1)/3, 0, 1 - (i - 
        1)/3, 0.75))
    points(stdpredX, preds[1, ] - 1.96 * preds[2, ], type = "l", col = rgb((i - 
        1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)
    points(stdpredX, preds[1, ] + 1.96 * preds[2, ], type = "l", col = rgb((i - 
        1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)
}

plot of chunk unnamed-chunk-7

Alternatively, we can look only at the sample median:

# Plot sample path from posterior median:
medians <- apply(goodSamples, 2, median)
print(medians)

## [1]  0.4007  0.4539  3.8107  0.6743  0.1118  0.2138 44.4269  2.9100

inpDat[1:nField, 2] <- medians[1]
inpDat[1:nField, 3] <- medians[2]
preds <- sapply(stdpredX, predictY, inpDat = inpDat, outDat = outDat, samp = medians, 
    nField = nField)

plot(xsField, (fsField - meanOut)/sdOut, xlim = c(min(stdpredX), max(stdpredX)), 
    ylim = c(-2, 2))
points(stdpredX, (reality(stdpredX * rangeInp[1] + minInp[1]) - meanOut)/sdOut, 
    lwd = 2, type = "l")
points(stdpredX, preds[1, ], type = "l", col = rgb((i - 1)/3, 0, 1 - (i - 1)/3, 
    0.75))
points(stdpredX, preds[1, ] - 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)
points(stdpredX, preds[1, ] + 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)

plot of chunk unnamed-chunk-8

This can be broken down into two components, the simulator only component:

# Plot sample path from posterior median WITHOUT discrepancy:
preds <- sapply(stdpredX, predictY, inpDat = inpDat, outDat = outDat, samp = medians, 
    nField = nField, useDiscrep = F)

plot(xsField, (fsField - meanOut)/sdOut, xlim = c(min(stdpredX), max(stdpredX)), 
    ylim = c(-2, 2))
points(stdpredX, (reality(stdpredX * rangeInp[1] + minInp[1]) - meanOut)/sdOut, 
    lwd = 2, type = "l")
points(stdpredX, preds[1, ], type = "l", col = rgb((i - 1)/3, 0, 1 - (i - 1)/3, 
    0.75))
points(stdpredX, preds[1, ] - 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)
points(stdpredX, preds[1, ] + 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)

plot of chunk unnamed-chunk-9

and the discrepancy only component:

# Plot sample path from posterior median OF ONLY discrepancy:
preds <- sapply(stdpredX, predictY, inpDat = inpDat, outDat = outDat, samp = medians, 
    nField = nField, onlyDiscrep = T)

plot(xsField, (fsField - meanOut)/sdOut, xlim = c(min(stdpredX), max(stdpredX)), 
    ylim = c(-2, 2))
points(stdpredX, (reality(stdpredX * rangeInp[1] + minInp[1]) - meanOut)/sdOut, 
    lwd = 2, type = "l")
points(stdpredX, preds[1, ], type = "l", col = rgb((i - 1)/3, 0, 1 - (i - 1)/3, 
    0.75))
points(stdpredX, preds[1, ] - 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)
points(stdpredX, preds[1, ] + 1.96 * preds[2, ], type = "l", col = rgb((i - 
    1)/3, 0, 1 - (i - 1)/3, 0.75), lty = 2)

plot of chunk unnamed-chunk-10

Lastly a couple plots to show the distribution of draws from the posterior and the MCMC traceplots.

par(mfrow = c(2, 2))
hist(goodSamples[, 1], breaks = 20)
hist(goodSamples[, 2], breaks = 20)
plot(goodSamples[, 1], type = "l")
plot(goodSamples[, 2], type = "l")

$plot of chunk unnamed-chunk-11$