what factors actually affect mse?

k-s distance? proportion of more large and small distances

how to really make it miss the signal?

1-d

setwd("/home/boyazhang/repos/unifdist/code")
source("ud.R")
## Loading required package: mvtnorm
## Loading required package: tgp
## 
## Attaching package: 'EnvStats'
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
## The following object is masked from 'package:base':
## 
##     print.default
library(plgp)
library(mvtnorm)
library(laGP)
## 
## Attaching package: 'laGP'
## The following object is masked from 'package:plgp':
## 
##     distance
# function to plot signal and design
signalmiss_r <- function(n, d){
  xx <- runif(n)
  z <- seq(0,1,length.out = 100)
  x <- c(xx, z)
  l <- length(x)
  D <- distance(x)
  eps <- sqrt(.Machine$double.eps)
  sigma <- exp(-D/d + diag(eps,l)) ## coverance matrix
  y <- rmvnorm(1, sigma=sigma)
  plot(z,y[(length(xx)+1):l], type = "l",main = paste("random, d =", d))
  points(xx, y[1:length(xx)])
}

signalmiss_ud <- function(n, d){
  xx <- ud(n,1,1,10^5,1)$X
  z <- seq(0,1,length.out = 100)
  x <- c(xx, z)
  l <- length(x)
  D <- distance(x)
  eps <- sqrt(.Machine$double.eps)
  sigma <- exp(-D/d + diag(eps,l)) ## coverance matrix
  y <- rmvnorm(1, sigma=sigma)
  plot(z,y[(length(xx)+1):l], type = "l",main = paste("unifdist, d =", d))
  points(xx, y[1:length(xx)])
}

par(mfcol = c(3,2))
n <- 7
d <- seq(.Machine$double.eps,0.1,length.out = 6)
for(i in 1:6){
  signalmiss_r(n, d[i])
  signalmiss_ud(n, d[i])
}

d <- seq(0.1,1,length.out = 6)
for(i in 1:6){
  signalmiss_r(n, d[i])
  signalmiss_ud(n, d[i])
}

## when d is larger than 0.1, it's hard to miss the signal
## calculate mse of parameter d with input design
I <- 1000
n <- 8
dim <- 2

### relationship of k-s distance and mse 
gpi.mse <- function(x, drate){
  D <- distance(x)
  eps <- sqrt(.Machine$double.eps)
  dtrue <- runif(I)
  dhat <- rep(NA, I)
  for(i in 1:I){
    sigma <- exp(-D/dtrue[i] + diag(eps, n))
    y <- rmvnorm(1, sigma = sigma)
    gpi <- newGP(x, y, d = 0.1, g = eps, dK = T)
    dhat[i] <- mleGP(gpi, param = "d", tmax = 10)$d
    deleteGP(gpi)
  }
  mse <- mean((dtrue - dhat)^2)
  return(mse)
}

T <- 300
ksd <- rep(NA, T)
rand.mse <- rep(NA, T)
for(i in 1:T){
  x <- matrix(runif(n*dim), ncol = dim)
  dx <- dist(x)
  du <- seq(from = .Machine$double.eps , to = sqrt(dim), length.out = 1000)  ## true uniform distances
  ksd[i] <- ks.test(dx, du)$statistic
  rand.mse[i] <- gpi.mse(x)
}
cor(ksd, rand.mse)
## [1] -0.03533505
plot(ksd,rand.mse)