Better_random

### We have found that random design can have better performance than ud.
### TO figure out why, let's find some non-unifdist design with lower mse. 
setwd("/home/boyazhang/repos/unifdist/code")
source('ud.R')

## Loading required package: mvtnorm

## Loading required package: tgp

## 
## Attaching package: 'EnvStats'

## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm

## The following object is masked from 'package:base':
## 
##     print.default

library(plgp)
library(mvtnorm)
library(laGP)

## 
## Attaching package: 'laGP'

## The following object is masked from 'package:plgp':
## 
##     distance

ud.obj <- list()
n <- 8
dim <- 2

## find the best ud.maximin design with lowest ks distance among 10 random realization
ks <- rep(NA,10)
for(i in 1:10){
  test <- ud.1(n, dim, 1)
  ud.obj[[i]] <- test
  ks[i] <- test$ksopt
}
ud.maximin <-ud.obj[[which.min(ks)]]
ud.maximin.ks <- min(ks)

## find the best ud.rand design with lowest ks distance among 10 random realization
ks <- rep(NA,10)
for(i in 1:10){
  test <- ud(n, dim, 1)
  ud.obj[[i]] <- test
  ks[i] <- test$ksopt
}
ud.rand <-ud.obj[[which.min(ks)]]
ud.rand.ks <- min(ks)

These are outstanding unifdist representatives. They are very “unifdist” with very low ksopt.

designPlot(ud.maximin)

edfPlot(ud.maximin)

designPlot(ud.rand)

edfPlot(ud.rand)

ud.maximin.ks

## [1] 0.036

ud.rand.ks

## [1] 0.035

Calculate mse of d for our unifdist representatives. dtrue is random from uniform(0,1).

## calculate mse of parameter d with input design; dtrue is from uniform distribution
I <- 1000
gpi.mse <- function(x){
  D <- distance(x)
  eps <- sqrt(.Machine$double.eps)
  dtrue <- runif(I)
  dhat <- rep(NA, I)
  for(i in 1:I){
    sigma <- exp(-D/dtrue[i] + diag(eps, n))
    y <- rmvnorm(1, sigma = sigma)
    gpi <- newGP(x, y, d = 0.1, g = eps, dK = T)
    dhat[i] <- mleGP(gpi, param = "d", tmax = 10)$d
    deleteGP(gpi)
  }
  mse <- mean((dtrue - dhat)^2)
  return(mse)
}
ud.maximin.mse <- gpi.mse(ud.maximin$X)
ud.rand.mse <- gpi.mse(ud.rand$X)
ud.maximin.mse

## [1] 0.1601145

ud.rand.mse

## [1] 0.3403251

Let’s try to find 50 random designs with lower mse. This seems to be not very difficult, since tot is not very large.

## find 20 random designs with lower mse
rand_design <- list()
mser <- rep(NA, 50)
ks <- rep(NA, 50)
i <- 1
tot <- 0
du <- seq(from = .Machine$double.eps , to = sqrt(dim), length.out = 1000)  ## true uniform distances
while(i <= 50){
  tot <- tot+1
  x <- matrix(runif(n*dim), ncol = dim)
  mse1 <- gpi.mse(x)
  if(mse1 < ud.maximin.mse){
    mser[i] <- mse1
    rand_design[[i]] <- x
    dx <- dist(x)
    ks[i] <- ks.test(dx, du)$statistic
    i <- i+1
  }
}
tot

## [1] 187

Look at some plots of better random designs:

## plot several random designs with lower mse than ud.maximin
par(mfcol = c(2,2))
for(i in 1:4){
  plot(rand_design[[i]], main = "random design with 8 points")
}

par(mfcol = c(1,1))
plot(ks, mser, main = "ksopt and mse of d for a 2-d 8-point design")

## distance edf plot for any design X
edfPlotx <- function(X)
{
  n <- nrow(X)
  dim <- ncol(X)
  dmax <- sqrt(dim)
  du <- seq(from = .Machine$double.eps , to = dmax, length.out = 1000)  ## true uniform distances
  ksopt <- ks.test(X, du)$statistic
  d <- dist(X)
  par(mfcol = c(1,2))
  plot(ecdf(d), xlab="distances", main=paste0("ecdf:", n, ",", dim, ",", ksopt), xlim = c(0, dmax))
  segments(0,0,dmax,1)  ## add cdf of target uniform distribution
  breaks <- seq(0, max(d), length.out = 10)
  hist(d, breaks = breaks, freq = FALSE, xlab="distances", main='', xlim = c(0, dmax))
  segments(0,1/dmax,dmax,1/dmax)
  lines(c(dmax,dmax), c(1/dmax, 0), lty = 2)
}


## plot their distance distributions
for(i in 1:10){
  edfPlotx(rand_design[[i]])
}

Better_random_design

Boya Zhang

February 19, 2018