library(ggplot2)
library(plgp)
## Loading required package: mvtnorm
## Loading required package: tgp
load("testn.RData")
row.names(out2) <- NULL
out <- as.data.frame(out2)
out <- transform(out2, n = as.factor(out$n), m = out$m, k = out$k, ksopt = out$ksopt )
ggplot(out[out$m==1,], aes(x=n, y=ksopt)) +
geom_boxplot() + labs(title="dim = 1")
ggplot(out[out$m==2,], aes(x=n, y=ksopt)) +
geom_boxplot() + labs(title="dim = 2")
ggplot(out[out$m==3,], aes(x=n, y=ksopt)) +
geom_boxplot() + labs(title="dim = 3")
When dim = 1, as sample size increases, it’s more and more difficult to make it uniform.
But when dim = 2 and 3, the ksopt, optimized ks-distance decreases first then increases.
The pattern looks very interesting. Probably when n is large like 100, changing only one point at each time is not efficient enough to make it converge to the minimum after 10^5 iterations. But for 1-d I’m still concerned about whether a uniform(0,1) is possible or not.
load("testk1.RData")
tesk <- as.data.frame(testk)
par(mfcol=c(1,3))
for(i in 1:3){
temp <- 10*i
testk1 <- testk[testk[,1]==temp,]
boxplot(ksopt~k, testk1, xlab = "k", ylab = "ks-dist", main = paste0("n=",temp,",dim=1,dmax=0.9"))
}
load('testk.RData')
testk <- as.data.frame(testk)
for(i in 1:3){
temp <- 10*i
testk1 <- testk[testk$n==temp,]
boxplot(ksopt~k, testk1[testk1$m==1,], xlab = "k", ylab = "ks-dist", main = paste0("n=",temp,",dim=1,dmax=1"))
}
for(i in 1:3){
temp <- 10*i
testk1 <- testk[testk$n==temp,]
boxplot(ksopt~k, testk1[testk1$m==2,], xlab = "k", ylab = "ks-dist", main = paste0("n=",temp,",dim=2,dmax=1"))
}
From those boxplots, xlab is k, number of points changed in each iteration; ylab is ksopt, optimized ks-distance after 10^5 iterations.
Maximin initial is used here.
when n=10,20,30, dim=1, dmax=0.9 or 1, with k increases, final ksopt is increasing.
When n=10,20,30, dim=2, dmax=1, with k increases final ksopt is still increasing.
It seems in all the cases, k=1 is better.
library(rbenchmark)
benchmark("k=1" = {t1 <- ud.1(30,1,1,10^5,1)},
"k=2" = {t2 <- ud.1(30,1,2,10^5,1)},
"k=3" = {t3 <- ud.1(30,1,3,10^5,1)},
"k=4" = {t4 <- ud.1(30,1,10,10^5,1)},
"k=5" = {t5 <- ud.1(30,1,15,10^5,1)},
replications = 1,
columns = c("test", "replications", "elapsed",
"relative", "user.self", "sys.self"))
## test replications elapsed relative user.self sys.self
## 1 k=1 1 54.988 1.017 54.948 0.000
## 2 k=2 1 54.303 1.004 54.272 0.004
## 3 k=3 1 56.626 1.047 56.516 0.000
## 4 k=4 1 54.066 1.000 53.312 0.012
## 5 k=5 1 54.187 1.002 53.876 0.004
Just run 1 replication here, I have run more replicates, the result indicate that when n=30, dim=1, k=1,2,3,10,15, the time used to finish 10^5 runs are almost same.
For one time run with 10^5 interations, it takes about 30 seconds.
load('test2ds.RData')
row.names(out1) <- NULL
row.names(out2) <- NULL
out1 <- as.data.frame(out1)
out2 <- as.data.frame(out2)
out1 <- cbind(out1, method = "random initial")# random initial
out2 <- cbind(out2, method = "maximin initial")# maximin initial
out <- rbind(out1,out2)
out <- transform(out, n = as.factor(n), method = as.factor(method))
ggplot(out[out$m==1,], aes(x=n, y=ksopt, fill=method)) +
geom_boxplot() + labs(title="dim = 1, k = 1, 30 replicates")
ggplot(out[out$m==2,], aes(x=n, y=ksopt, fill=method)) +
geom_boxplot()+ labs(title="dim = 2, k = 1, 30 replicates")
I compared unifdist with random initial and maximin initial:
It turns out that when n = 10,20,30, dim=1,2, k=1, final ks distance is not significantly reduced after using maximin initial.
It seems using maximin initial is not very helpful to make the design more uniform.
Although the design is not as uniform as we expect, I started to run some GP to check whether this design helps in prediction.
I know the goal of the project is to show that unifdist works better in estimating the “d”.
So, I fixed the nugget paramter as 0.2. I guess the more exact “d” is, the better prediction it will gives.
load("GP-1d.RData")
boxplot(maximin.rmse, ud.rmse, ud.rmse1, main = "rmse with 10 replications", col = c(2,3,4))
legend("topleft", c("maximin", "unifdist dmax=0.9", "unifdist dmax=1"), col=c(2,3,4), pch = c(1,1,1))
It looks like maximin design is better than unifdist in rmse.
The code I used for “GP-1d.RData” is attached below. Not pretty confident about it.
library(laGP)
source('ud.R')
## true surface with input x from 0 to 1
f <- function(x) (sin(pi*10*x/5) + 0.2*cos(4*pi*10*x/5)) + rnorm(1, sd=0.2)
## return the rmse, with input design x and y(x)
gpi.rmse <- function(x, f){
if(class(x)!="matrix"){ stop("x must be a matrix")}
y <- do.call('f', list(x))
gpi <- newGP(x, y, d = 0.1, g = 0.2, dK = T)
mle <- mleGP(gpi, param = "d", tmax = max(dist(x)))
xx <- matrix(seq(0, 1, length=40), ncol = 1)
yytrue <- do.call('f', list(xx))
p <- predGP(gpi, xx)
deleteGP(gpi)
rmse <- sqrt(mean((yytrue - p$mean)^2))
return(rmse)
}
# 1-d with 10 replicates
R <- 10
ud.rmse <- rep(NA, R)
ud.rmse1 <- rep(NA, R)
maximin.rmse <- rep(NA, R)
for( i in 1:R){
x <- ud.11(10,1,1,0.9)$X ## function with stopping condition: ksopt <=0.07 or T >=10^6
ud.rmse[i] <- gpi.rmse(x,f)
x <- ud.11(10,1,1,1)$X
ud.rmse1[i] <- gpi.rmse(x,f)
x <- maximin(10,1)
maximin.rmse[i] <- gpi.rmse(x,f)
}
Probably this is the correct approach to do the simulation. I generated gp with nugget = 0 and specifying nugget “g” = 0 in function newGP.
I haven’t run this with more iterations and under more settings.
My plan is to run this at d = seq(0,0.1,length.out=10) and seq(0.8,1,length.out=10), at least 30 replicates for each. It’s running right now.
library(plgp)
library(mvtnorm)
library(laGP)
##
## Attaching package: 'laGP'
## The following object is masked from 'package:plgp':
##
## distance
## returns estimated d parameter
gpi.d <- function(x, d){
n <- length(x)
D <- distance(x)
eps <- sqrt(.Machine$double.eps)
sigma <- exp(-D/d + diag(eps,n))
y <- rmvnorm(1, sigma=sigma)
gpi <- newGP(x, y, d = 0.1, g = 0, dK = T)
mle <- mleGP(gpi, param = "d", tmax = max(dist(x)))
deleteGP(gpi)
return(c(dtrue=d, dhat = mle$d))
}
d <- 0.8
ud1 <- ud.1(10,1,1,10^5, 0.9)
edfPlot(ud1)
designPlot(ud1)
x <- ud1$X
gpi.d(x, d)
## dtrue dhat
## 0.8000000 0.3120734
ud2 <- ud.1(10,1,1)
edfPlot(ud2)
designPlot(ud2)
x <- ud2$X
gpi.d(x, d)
## dtrue dhat
## 0.8000000 0.2695604
x <- maximin(10,1)
plot(x, rep(0, length(x)))
gpi.d(x, d)
## dtrue dhat
## 0.800000 0.237609
d <- 0.05
ud1 <- ud.1(10,1,1,10^5, 0.9)
edfPlot(ud1)
designPlot(ud1)
x <- ud1$X
gpi.d(x, d)
## dtrue dhat
## 0.05000000 0.05942989
ud2 <- ud.1(10,1,1)
edfPlot(ud2)
designPlot(ud2)
x <- ud2$X
gpi.d(x, d)
## dtrue dhat
## 0.05000000 0.03430093
x <- maximin(10,1)
plot(x, rep(0, length(x)))
gpi.d(x, d)
## dtrue dhat
## 0.05000000 0.07584913