### We have found that random design can have better performance than ud.
### TO figure out why, let's find some non-unifdist design with lower mse.
setwd("/home/boyazhang/repos/unifdist/code")
source('ud.R')
## Loading required package: mvtnorm
## Loading required package: tgp
##
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
## The following object is masked from 'package:base':
##
## print.default
library(plgp)
library(mvtnorm)
library(laGP)
##
## Attaching package: 'laGP'
## The following object is masked from 'package:plgp':
##
## distance
ud.obj <- list()
n <- 8
dim <- 2
## find the best ud.maximin design with lowest ks distance among 10 random realization
ks <- rep(NA,10)
for(i in 1:10){
test <- ud.1(n, dim, 1)
ud.obj[[i]] <- test
ks[i] <- test$ksopt
}
ud.maximin <-ud.obj[[which.min(ks)]]
ud.maximin.ks <- min(ks)
## find the best ud.rand design with lowest ks distance among 10 random realization
ks <- rep(NA,10)
for(i in 1:10){
test <- ud(n, dim, 1)
ud.obj[[i]] <- test
ks[i] <- test$ksopt
}
ud.rand <-ud.obj[[which.min(ks)]]
ud.rand.ks <- min(ks)
These are outstanding unifdist representatives. They are very “unifdist” with very low ksopt.
designPlot(ud.maximin)
edfPlot(ud.maximin)
designPlot(ud.rand)
edfPlot(ud.rand)
ud.maximin.ks
## [1] 0.036
ud.rand.ks
## [1] 0.035
Calculate mse of d for our unifdist representatives. dtrue is random from uniform(0,1).
## calculate mse of parameter d with input design; dtrue is from uniform distribution
I <- 1000
gpi.mse <- function(x){
D <- distance(x)
eps <- sqrt(.Machine$double.eps)
dtrue <- runif(I)
dhat <- rep(NA, I)
for(i in 1:I){
sigma <- exp(-D/dtrue[i] + diag(eps, n))
y <- rmvnorm(1, sigma = sigma)
gpi <- newGP(x, y, d = 0.1, g = eps, dK = T)
dhat[i] <- mleGP(gpi, param = "d", tmax = 10)$d
deleteGP(gpi)
}
mse <- mean((dtrue - dhat)^2)
return(mse)
}
ud.maximin.mse <- gpi.mse(ud.maximin$X)
ud.rand.mse <- gpi.mse(ud.rand$X)
ud.maximin.mse
## [1] 0.1601145
ud.rand.mse
## [1] 0.3403251
Let’s try to find 50 random designs with lower mse. This seems to be not very difficult, since tot is not very large.
## find 20 random designs with lower mse
rand_design <- list()
mser <- rep(NA, 50)
ks <- rep(NA, 50)
i <- 1
tot <- 0
du <- seq(from = .Machine$double.eps , to = sqrt(dim), length.out = 1000) ## true uniform distances
while(i <= 50){
tot <- tot+1
x <- matrix(runif(n*dim), ncol = dim)
mse1 <- gpi.mse(x)
if(mse1 < ud.maximin.mse){
mser[i] <- mse1
rand_design[[i]] <- x
dx <- dist(x)
ks[i] <- ks.test(dx, du)$statistic
i <- i+1
}
}
tot
## [1] 187
Look at some plots of better random designs:
## plot several random designs with lower mse than ud.maximin
par(mfcol = c(2,2))
for(i in 1:4){
plot(rand_design[[i]], main = "random design with 8 points")
}
par(mfcol = c(1,1))
plot(ks, mser, main = "ksopt and mse of d for a 2-d 8-point design")
## distance edf plot for any design X
edfPlotx <- function(X)
{
n <- nrow(X)
dim <- ncol(X)
dmax <- sqrt(dim)
du <- seq(from = .Machine$double.eps , to = dmax, length.out = 1000) ## true uniform distances
ksopt <- ks.test(X, du)$statistic
d <- dist(X)
par(mfcol = c(1,2))
plot(ecdf(d), xlab="distances", main=paste0("ecdf:", n, ",", dim, ",", ksopt), xlim = c(0, dmax))
segments(0,0,dmax,1) ## add cdf of target uniform distribution
breaks <- seq(0, max(d), length.out = 10)
hist(d, breaks = breaks, freq = FALSE, xlab="distances", main='', xlim = c(0, dmax))
segments(0,1/dmax,dmax,1/dmax)
lines(c(dmax,dmax), c(1/dmax, 0), lty = 2)
}
## plot their distance distributions
for(i in 1:10){
edfPlotx(rand_design[[i]])
}