n=8, 2-d designs comparision.
- generate 6 designs
- combine all design points and generate reponse
- fit GP and estimate d seperately for each design
- plot
library(plgp)
library(mvtnorm)
library(laGP)
library(foreach)
library(doParallel)
source('ud.R')
n <- 8
dim <- 2
eps <- sqrt(.Machine$double.eps)
gpi_mse_cp <- function(dtrue, n, dim){
x1 <- maximin(n,dim)
x2 <- matrix(runif(n*dim), ncol = dim)
x3 <- ud.1(n,dim,1,10^5,1)$X
x4 <- ud(n,dim,1,10^5,1)$X
x5 <- ud(n,dim,1,10^5,0.9)$X
x6 <- ud(n,dim,1,10^5,0.8)$X
x <- rbind(x1,x2,x3,x4,x5,x6)
D <- distance(x)
N <- nrow(x)
sigma <- exp(-D/dtrue + diag(eps, N))
y <- t(rmvnorm(1, sigma = sigma))
dhat <- rep(NA,6)
for (i in 1:6){
gpi <- newGP(x[(n*(i-1)+1): (i*n), ], y[(n*(i-1)+1): (i*n), ], d = 0.1, g = eps, dK = T)
dhat[i] <- mleGP(gpi, param = "d", tmax = 10)$d
deleteGP(gpi)
}
out <- matrix(c(dtrue, dhat), nrow = 1)
return(out)
}
# system.time(test <- gpi_mse_cp(0.01, n, dim)) about 92 seconds per call
dtrue <- c(seq(.Machine$double.eps,0.1,length.out = 20), seq(0.2,1.3,length.out = 30), seq(1.3,sqrt(2),length.out = 20))
A <- expand.grid(dtrue = dtrue, n = 8, dim = 2)
set <- matrix(rep(t(A),30), ncol=ncol(A), byrow=TRUE)
colnames(set) <- c("dtrue","n", "dim")
cl <- makeCluster(8)
registerDoParallel(cl)
out <- foreach(i = 1:nrow(set), .combine='rbind', .packages=c('plgp', 'mvtnorm', 'laGP' )) %dopar% { gpi_mse_cp(set[i,1], set[i,2], set[i,3]) }
stopCluster(cl)
save.image('gpi_mse_cp.RData')
#---------------------------------------------plot--------------------------------------------------------------------------------------------------
setwd("/home/boyazhang/repos/unifdist/code")
load('gpi_mse_cp.RData')
library(ggplot2)
outb <- cbind(out[,c(1,2)], 1)
for(i in 3:7){
outb <- rbind(outb, cbind(out[,c(1,i)], i-1))
}
outb[,2] <- (outb[,2]-outb[,1])^2
out <- as.data.frame(outb)
colnames(out) <- c("dtrue", "squared_error", "design")
outs <- out[out$dtrue <=0.1,]
outm <- out[out$dtrue >0.1 & out$dtrue <1.3,]
outl <- out[out$dtrue >=1.3,]
out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
ylim(low=0, high=.05)
## Warning: Removed 165 rows containing non-finite values (stat_boxplot).

out1 <- transform(outs, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
ylim(low=0, high=.015)
## Warning: Removed 334 rows containing non-finite values (stat_boxplot).

out1 <- transform(outm, design = as.factor(design), dtrue = as.factor(round(dtrue,1)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
ylim(low=0, high=.5)
## Warning: Removed 469 rows containing non-finite values (stat_boxplot).

out1 <- transform(outl, design = as.factor(design), dtrue = as.factor(round(dtrue,2)))
ggplot(out1, aes(x=dtrue, y=squared_error, fill=design)) +
geom_boxplot() + labs(title="1: maximin, 2: random, 3: ud_maximin, 4: ud.r_1, 5: ud.r_0.9, 6: ud.r_0.8") +
ylim(low=0, high=1)
## Warning: Removed 569 rows containing non-finite values (stat_boxplot).

# notes: sometimes squared_error can be larger than 50
large_se <- out[out$squared_error>=10,]
# where large squared error happens:
hist(large_se[,1], xlab = "dtrue", main = "se > 10 frequency")

# which design causes this most often
hist(large_se[,3], xlab = "design", main = "se > 10 frequency")

# overall performance of d in (0, 0.1)
for(i in 1:6){
mean(outs$squared_error[outs$design==i])
}
# overall performance of d in (0.1, 1.3)
for(i in 1:6){
mean(outm$squared_error[outs$design==i])
}
# overall performance of d in (1.3, sqrt(2))
for(i in 1:6){
mean(outl$squared_error[outs$design==i])
}