GP-simulation-2/10

library(plgp)
library(mvtnorm)
library(laGP)
source('ud.R')

## function to estimate lengthscale parameter d with 10 samples from (0,1)
gpi.d <- function(input){
  method <- input[1] ## method(=1,2,3) of sampling
  d <- input[2] ## d is true parameter
  if(method == 1){ ## method = 1, sample from unifdist with dmax = 0.9
    x <- ud.1(10,1,1,10^5, 0.9)$X
  }
  else if(method == 2){ ## method = 2, sample from unifdist with dmax = 1
    x <- ud.1(10,1,1,10^5, 1)$X
  }
  else if(method == 3){ ## method = 3, sample from maximin
    x <- maximin(10,1)
  }
  else {stop("wrong method")}
  n <- length(x)
  D <- distance(x)
  eps <- sqrt(.Machine$double.eps)
  sigma <- exp(-D/d + diag(eps,n)) ## coverance matrix
  y <- rmvnorm(1, sigma=sigma)
  gpi <- newGP(x, y, d = 0.1, g = 0, dK = T) ## not sure how to assign a value to "d"
  mle <- mleGP(gpi, param = "d",  tmax = max(dist(x)))
  deleteGP(gpi)
  return(cbind(design = method, dtrue = d, dhat = mle$d)) ## returns estimated d parameter
}

## true parameters, focus on small and large values
d <- c(seq(.Machine$double.eps,0.1,length.out = 10), seq(0.2,0.7,length.out = 5), seq(0.8,1,length.out = 20))
A <- expand.grid(m = 1:3, d = d)
## 30 replicates
set <- matrix(rep(t(A),30), ncol=ncol(A), byrow=TRUE)
colnames(set) <- c("method", "dtrue")

library(foreach)
library(doParallel)

cl <- makeCluster(8)
registerDoParallel(cl)
out <- foreach(i = 1:nrow(set), .combine='rbind', .packages=c('plgp', 'mvtnorm', 'laGP' )) %dopar% { gpi.d(set[i,]) }
  
stopCluster(cl)
save.image('GP-1d-1.RData')

library(ggplot2)
load('GP-1d-1.RData')
out1 <- as.data.frame(out)
row.names(out1) <- NULL
outs <- out1[out1$dtrue <=0.1,]
outm <- out1[out1$dtrue >0.1 & out1$dtrue <0.8,]
outl <- out1[out1$dtrue >=0.80,]

out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,3)))
ggplot(out1, aes(x=dtrue, y=dhat, fill=design)) +
  geom_boxplot() + labs(title="dtrue in (0,0.1). red: unifdist, dmax=0.9; green: unifdist, dmax = 1; blue: maximin")+
  geom_hline(yintercept = d[1:10], linetype = 2)

out1 <- transform(outm, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=dhat, fill=design)) +
  geom_boxplot() + labs(title="dtrue in (0.2,0.7). red: unifdist, dmax=0.9; green: unifdist, dmax = 1; blue: maximin")+
  geom_hline(yintercept = d[11:15], linetype = 2)

out1 <- transform(outl, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=dhat, fill=design)) +
  geom_boxplot() + labs(title="dtrue in (0.8,1). red: unifdist, dmax=0.9; green: unifdist, dmax = 1; blue: maximin")

Comments

The plots are divided by dtrue in (0,0.1), (0.2,0.7) and (0.8,1).

I have no idea why when dtrue is larger than 0.2, the dhat is always around 0.3 for all the three designs.

When dtrue is .Machine$double.eps, two unifdist designs are obviously better than maximin design. However when dtrue is larger than 0.01, the performances are similar. unifdist with dmax = 0.9 is generally slightly better than that with dmax = 1.

So I did more simulations of d $\in$ (0,0.01). The plots are as follows.

unifdist is generally better than maximin over the 30 replicates, especially when dtrue is close to 0.

library(ggplot2)
load('GP-1d-2.RData')
out1 <- as.data.frame(out)
row.names(out1) <- NULL

outs <- out1[out1$dtrue <=0.005,]
outm <- out1[out1$dtrue >0.005,]

out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,3)))
ggplot(out1, aes(x=dtrue, y=dhat, fill=design)) +
  geom_boxplot() + labs(title="dtrue in (0,0.005). red: unifdist, dmax=0.9; green: unifdist, dmax = 1; blue: maximin")+
  geom_hline(yintercept = d[1:5], linetype = 2)

out1 <- transform(outm, design = as.factor(design), dtrue = as.factor(round(dtrue,3)))
ggplot(out1, aes(x=dtrue, y=dhat, fill=design)) +
  geom_boxplot() + labs(title="dtrue in (0.005,0.01). red: unifdist, dmax=0.9; green: unifdist, dmax = 1; blue: maximin")+
  geom_hline(yintercept = d[6:10], linetype = 2)

GP-simulation-2/10

Boya Zhang

February 10, 2018

Comments