gpi_mse_compare

n=8, 2-d designs comparision.

generate 6 designs
combine all design points and generate reponse
fit GP and estimate d seperately for each design
plot

library(plgp)
library(mvtnorm)
library(laGP)

library(foreach)
library(doParallel)

source('ud.R')
n <- 8
dim <- 2
eps <- sqrt(.Machine$double.eps)

gpi_mse_cp <- function(dtrue, n, dim){
    x1 <- maximin(n,dim)
    x2 <- matrix(runif(n*dim), ncol = dim)
    x3 <- ud.1(n,dim,1,10^5,1)$X
    x4 <- ud(n,dim,1,10^5,1)$X
    x5 <- ud(n,dim,1,10^5,0.9)$X
    x6 <- ud(n,dim,1,10^5,0.8)$X
    x <- rbind(x1,x2,x3,x4,x5,x6)
    D <- distance(x)
    N <- nrow(x)
    sigma <- exp(-D/dtrue + diag(eps, N))
    y <- t(rmvnorm(1, sigma = sigma))
    dhat <- rep(NA,6)
    for (i in 1:6){
    gpi <- newGP(x[(n*(i-1)+1): (i*n), ], y[(n*(i-1)+1): (i*n), ], d = 0.1, g = eps, dK = T)
    dhat[i] <- mleGP(gpi, param = "d", tmax = 10)$d
    deleteGP(gpi)
    }
    out <- matrix(c(dtrue, dhat), nrow = 1)
  return(out)
}

# system.time(test <- gpi_mse_cp(0.01, n, dim)) about 92 seconds per call

dtrue <- c(seq(.Machine$double.eps,0.1,length.out = 20), seq(0.2,1.3,length.out = 30), seq(1.3,sqrt(2),length.out = 20))
A <- expand.grid(dtrue = dtrue, n = 8, dim = 2)
set <- matrix(rep(t(A),30), ncol=ncol(A), byrow=TRUE)
colnames(set) <- c("dtrue","n", "dim")
cl <- makeCluster(8)
registerDoParallel(cl)
out <- foreach(i = 1:nrow(set), .combine='rbind', .packages=c('plgp', 'mvtnorm', 'laGP' )) %dopar% { gpi_mse_cp(set[i,1], set[i,2], set[i,3]) }

stopCluster(cl)
save.image('gpi_mse_cp.RData')

#---------------------------------------------plot--------------------------------------------------------------------------------------------------
setwd("/home/boyazhang/repos/unifdist/code")
load('gpi_mse_cp.RData')
library(ggplot2)
outb <- cbind(out[,c(1,2)], 1)

for(i in 3:7){
  outb <- rbind(outb, cbind(out[,c(1,i)], i-1))
}
outb[,2] <- (outb[,2]-outb[,1])^2
out <- as.data.frame(outb)
colnames(out) <- c("dtrue", "squared_error", "design")

outs <- out[out$dtrue <=0.1,]
outm <- out[out$dtrue >0.1 & out$dtrue <1.3,]
outl <- out[out$dtrue >=1.3,]


out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
  geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
  ylim(low=0, high=.05)

## Warning: Removed 165 rows containing non-finite values (stat_boxplot).

out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
  geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
  ylim(low=0, high=.015)

## Warning: Removed 334 rows containing non-finite values (stat_boxplot).

out1 <- transform(outm, design = as.factor(design), dtrue = as.factor(round(dtrue,1)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
  geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
  ylim(low=0, high=.5)

## Warning: Removed 469 rows containing non-finite values (stat_boxplot).

out1 <- transform(outl, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
  geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
  ylim(low=0, high=1)

## Warning: Removed 569 rows containing non-finite values (stat_boxplot).

# notes: sometimes squared_error can be larger than 50
large_se <- out[out$squared_error>=10,]

# where large squared error happens:
hist(large_se[,1], xlab = "dtrue", main = "se > 10 frequency")

# which design causes this most often
hist(large_se[,3], xlab = "design", main = "se > 10 frequency")

# overall performance of d in (0, 0.1)
for(i in 1:6){
  mean(outs$squared_error[outs$design==i])
}

# overall performance of d in (0.1, 1.3)
for(i in 1:6){
  mean(outm$squared_error[outs$design==i])
}

# overall performance of d in (1.3, sqrt(2))
for(i in 1:6){
  mean(outl$squared_error[outs$design==i])
}

gpi_mse_compare

Boya Zhang

February 27, 2018

n=8, 2-d designs comparision.