library(gdata)
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
## 
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
## 
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
## 
##     nobs
## The following object is masked from 'package:utils':
## 
##     object.size
## The following object is masked from 'package:base':
## 
##     startsWith
# update only the changed row and column of distance matrix
# use pbeta instead of ks.test
bd <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
  k <- 1
  dmax <- 1
  X1 <- matrix(runif(n*m), ncol=m)
  dX1 <- as.matrix(dist(X1, diag = T, upper = T))
  f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
  ks1 <- myks_beta(upperTriangle(dX1), f1, dim=m, shape1, shape2) ## k-s test
  dprime <- dX1
  for(t in 1:T) {
    row <- sample(1:n, k)
    xold <- X1[row,]       ## random row selection
    X1[row,] <- matrix(runif(m*k), ncol = m)   ## random new row
    olddist <- dprime[row,]
    XR <- matrix(rep(X1[row,],n), nrow = n, byrow = T)
    newdist <- sqrt(rowSums((XR-X1)^2))
    dprime[row,] <- newdist
    dprime[,row] <- newdist
    ksprime <- myks_beta(upperTriangle(dprime), f1, dim=m, shape1, shape2)
    if(ksprime < ks1) { ks1 <- ksprime  ## accept
    } else { X1[row,] <- xold; dprime[,row] <- olddist; 
    dprime[row,] <- olddist}         ## reject
  }
  return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}


myks_beta <- function(d, f1, dim, shape1, shape2){
  d <- sort(d)/sqrt(dim)
  f2 <- pbeta(d, shape1 = shape1, shape2 = shape2)
  return(max(abs(f2-f1)))
}

# calculate the whole distance matrix but use pbeta instead of ks.test
bd1 <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
  k <- 1
  dmax <- 1
  X1 <- matrix(runif(n*m), ncol=m)
  dX1 <- dist(X1)
  f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
  ks1 <- myks_beta(dX1, f1, dim=m, shape1, shape2) ## k-s test
  for(t in 1:T) {
    row <- sample(1:n, k)
    xold <- X1[row,]       ## random row selection
    X1[row,] <- matrix(runif(m*k), ncol = m)   ## random new row
    dprime <- dist(X1)
    ksprime <- myks_beta(dprime, f1, dim=m, shape1, shape2)
    if(ksprime < ks1) { ks1 <- ksprime  ## accept
    } else { X1[row,] <- xold}         ## reject
  }
  return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}
#system.time(test <- bd(8,2,10^5))

bd_old <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
  k <- 1
  dmax <- 1
  X1 <- matrix(runif(n*m), ncol=m)
  dX1 <- dist(X1)
  du <- dmax*rbeta(1000, shape1, shape2)*sqrt(m)  ## true uniform distances
  kst <- ks.test(dX1, du) ## k-s test
  ks1 <- kst$statistic ## k-s distance of two distributions
  for(t in 1:T) {
    row <- sample(1:n, k)
    xold <- X1[row,]       ## random row selection
    X1[row,] <- matrix(runif(m*k), ncol = m)   ## random new row
    dprime <- dist(X1)
    kstprime <- ks.test(dprime, du)
    ksprime <- kstprime$statistic
    if(ksprime < ks1) { ks1 <- ksprime  ## accept
    } else { X1[row,] <- xold   }         ## reject
  }
  return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}
#system.time(bd_old(8,2))


maximin <- function (n, m, T = 1e+05, Xorig = NULL) 
{
  X <- matrix(runif(n * m), ncol = m)
  X <- rbind(X, Xorig)
  d <- distance(X)
  d <- as.numeric(d[upper.tri(d)])
  md <- min(d)
  for (t in 1:T) {
    row <- sample(1:n, 1)
    xold <- X[row, ]
    X[row, ] <- runif(m)
    dprime <- distance(X)
    dprime <- as.numeric(dprime[upper.tri(dprime)])
    mdprime <- min(dprime)
    if (mdprime > md) {
      md <- mdprime
    }
    else {
      X[row, ] <- xold
    }
  }
  return(X)
}
## compare ks.test and myks_beta
# myks_beta
myks_beta <- function(d, f1, dim, shape1, shape2){
  d <- sort(d)/sqrt(dim)
  f2 <- pbeta(d, shape1 = shape1, shape2 = shape2)
  return(max(abs(f2-f1)))
}
# compare the two functions for different n
n <- 8
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
##    user  system elapsed 
##    0.35    0.00    0.35
system.time(for(i in 1:10^4){ks.test(x, y)})
##    user  system elapsed 
##   1.882   0.000   1.883
n <- 32
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
##    user  system elapsed 
##   1.888   0.000   1.888
system.time(for(i in 1:10^4){ks.test(x, y)})
##    user  system elapsed 
##   2.588   0.000   2.588
n <- 48
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
##    user  system elapsed 
##   3.955   0.000   3.955
system.time(for(i in 1:10^4){ks.test(x, y)})
##    user  system elapsed 
##    3.54    0.00    3.54
n <- 60
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
##    user  system elapsed 
##   6.074   0.000   6.074
system.time(for(i in 1:10^4){ks.test(x, y)})
##    user  system elapsed 
##   4.479   0.000   4.478
Rprof("Rprof1", memory.profiling = T)
for( i in 1:10^4){myks_beta(x, f1, 2, 3, 6)}
Rprof(NULL)
summaryRprof("Rprof1", memory = "both")
## $by.self
##                self.time self.pct total.time total.pct mem.total
## "pbeta"             4.76    78.03       4.76     78.03     362.8
## "order"             0.76    12.46       0.90     14.75      74.7
## "sort.int"          0.16     2.62       1.10     18.03      85.8
## "myks_beta"         0.12     1.97       6.10    100.00     474.5
## "formals"           0.08     1.31       0.08      1.31       2.9
## "sort"              0.06     0.98       1.20     19.67      94.8
## "eval"              0.04     0.66       6.10    100.00     474.5
## "sort.default"      0.04     0.66       1.14     18.69      88.8
## "match.arg"         0.04     0.66       0.18      2.95       8.8
## "abs"               0.02     0.33       0.02      0.33       2.4
## "c"                 0.02     0.33       0.02      0.33       0.0
## 
## $by.total
##                       total.time total.pct mem.total self.time self.pct
## "myks_beta"                 6.10    100.00     474.5      0.12     1.97
## "eval"                      6.10    100.00     474.5      0.04     0.66
## "block_exec"                6.10    100.00     474.5      0.00     0.00
## "call_block"                6.10    100.00     474.5      0.00     0.00
## "evaluate"                  6.10    100.00     474.5      0.00     0.00
## "evaluate_call"             6.10    100.00     474.5      0.00     0.00
## "evaluate::evaluate"        6.10    100.00     474.5      0.00     0.00
## "handle"                    6.10    100.00     474.5      0.00     0.00
## "in_dir"                    6.10    100.00     474.5      0.00     0.00
## "knitr::knit"               6.10    100.00     474.5      0.00     0.00
## "process_file"              6.10    100.00     474.5      0.00     0.00
## "process_group"             6.10    100.00     474.5      0.00     0.00
## "process_group.block"       6.10    100.00     474.5      0.00     0.00
## "rmarkdown::render"         6.10    100.00     474.5      0.00     0.00
## "timing_fn"                 6.10    100.00     474.5      0.00     0.00
## "withCallingHandlers"       6.10    100.00     474.5      0.00     0.00
## "withVisible"               6.10    100.00     474.5      0.00     0.00
## "pbeta"                     4.76     78.03     362.8      4.76    78.03
## "sort"                      1.20     19.67      94.8      0.06     0.98
## "sort.default"              1.14     18.69      88.8      0.04     0.66
## "sort.int"                  1.10     18.03      85.8      0.16     2.62
## "order"                     0.90     14.75      74.7      0.76    12.46
## "match.arg"                 0.18      2.95       8.8      0.04     0.66
## "formals"                   0.08      1.31       2.9      0.08     1.31
## "abs"                       0.02      0.33       2.4      0.02     0.33
## "c"                         0.02      0.33       0.0      0.02     0.33
## 
## $sample.interval
## [1] 0.02
## 
## $sampling.time
## [1] 6.1
Rprof("Rprof1", memory.profiling = T)
for( i in 1:10^4){ks.test(x,y)}
Rprof(NULL)
summaryRprof("Rprof1", memory = "both")
## $by.self
##                  self.time self.pct total.time total.pct mem.total
## "ifelse"              1.62    34.18       2.88     60.76     141.6
## "order"               1.02    21.52       1.22     25.74      59.8
## "unique.default"      0.62    13.08       0.62     13.08      34.4
## "cumsum"              0.54    11.39       0.54     11.39      28.5
## "ks.test"             0.26     5.49       4.74    100.00     225.9
## "match.arg"           0.10     2.11       0.18      3.80       9.6
## "c"                   0.06     1.27       0.06      1.27       2.1
## "max"                 0.06     1.27       0.06      1.27       1.4
## "unique"              0.04     0.84       0.66     13.92      34.4
## "deparse"             0.04     0.84       0.18      3.80       2.0
## ".deparseOpts"        0.04     0.84       0.08      1.69       1.9
## "abs"                 0.04     0.84       0.04      0.84       0.2
## "pkstwo"              0.04     0.84       0.04      0.84       0.0
## "pmatch"              0.04     0.84       0.04      0.84       1.9
## "eval"                0.02     0.42       4.74    100.00     225.9
## "paste"               0.02     0.42       0.08      1.69       0.2
## "formals"             0.02     0.42       0.04      0.84       0.0
## "lapply"              0.02     0.42       0.04      0.84       1.0
## "vapply"              0.02     0.42       0.04      0.84       3.2
## "-"                   0.02     0.42       0.02      0.42       0.1
## "baseenv"             0.02     0.42       0.02      0.42       2.7
## "FUN"                 0.02     0.42       0.02      0.42       1.0
## "match.fun"           0.02     0.42       0.02      0.42       0.1
## "mode"                0.02     0.42       0.02      0.42       0.0
## "sys.function"        0.02     0.42       0.02      0.42       0.0
## 
## $by.total
##                       total.time total.pct mem.total self.time self.pct
## "ks.test"                   4.74    100.00     225.9      0.26     5.49
## "eval"                      4.74    100.00     225.9      0.02     0.42
## "block_exec"                4.74    100.00     225.9      0.00     0.00
## "call_block"                4.74    100.00     225.9      0.00     0.00
## "evaluate"                  4.74    100.00     225.9      0.00     0.00
## "evaluate_call"             4.74    100.00     225.9      0.00     0.00
## "evaluate::evaluate"        4.74    100.00     225.9      0.00     0.00
## "handle"                    4.74    100.00     225.9      0.00     0.00
## "in_dir"                    4.74    100.00     225.9      0.00     0.00
## "knitr::knit"               4.74    100.00     225.9      0.00     0.00
## "process_file"              4.74    100.00     225.9      0.00     0.00
## "process_group"             4.74    100.00     225.9      0.00     0.00
## "process_group.block"       4.74    100.00     225.9      0.00     0.00
## "rmarkdown::render"         4.74    100.00     225.9      0.00     0.00
## "timing_fn"                 4.74    100.00     225.9      0.00     0.00
## "withCallingHandlers"       4.74    100.00     225.9      0.00     0.00
## "withVisible"               4.74    100.00     225.9      0.00     0.00
## "ifelse"                    2.88     60.76     141.6      1.62    34.18
## "order"                     1.22     25.74      59.8      1.02    21.52
## "unique"                    0.66     13.92      34.4      0.04     0.84
## "unique.default"            0.62     13.08      34.4      0.62    13.08
## "cumsum"                    0.54     11.39      28.5      0.54    11.39
## "match.arg"                 0.18      3.80       9.6      0.10     2.11
## "deparse"                   0.18      3.80       2.0      0.04     0.84
## ".deparseOpts"              0.08      1.69       1.9      0.04     0.84
## "paste"                     0.08      1.69       0.2      0.02     0.42
## "c"                         0.06      1.27       2.1      0.06     1.27
## "max"                       0.06      1.27       1.4      0.06     1.27
## "abs"                       0.04      0.84       0.2      0.04     0.84
## "pkstwo"                    0.04      0.84       0.0      0.04     0.84
## "pmatch"                    0.04      0.84       1.9      0.04     0.84
## "formals"                   0.04      0.84       0.0      0.02     0.42
## "lapply"                    0.04      0.84       1.0      0.02     0.42
## "vapply"                    0.04      0.84       3.2      0.02     0.42
## "%in%"                      0.04      0.84       0.0      0.00     0.00
## "unlist"                    0.04      0.84       1.0      0.00     0.00
## "-"                         0.02      0.42       0.1      0.02     0.42
## "baseenv"                   0.02      0.42       2.7      0.02     0.42
## "FUN"                       0.02      0.42       1.0      0.02     0.42
## "match.fun"                 0.02      0.42       0.1      0.02     0.42
## "mode"                      0.02      0.42       0.0      0.02     0.42
## "sys.function"              0.02      0.42       0.0      0.02     0.42
## 
## $sample.interval
## [1] 0.02
## 
## $sampling.time
## [1] 4.74
## Time-consuming part of the two functions
### ks.test statistic calculation, no-tie case
n.x <- length(x)
n.y <- length(y)
w <- c(x,y)
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
STATISTIC <- max(abs(z))
  
# myks_beta: sort and pbeta are both O(n^2) while the latter is slower
myks_beta <- function(d, f1, dim, shape1, shape2){
  d <- sort(d)/sqrt(dim)
  f2 <- pbeta(d, shape1 = shape1, shape2 = shape2) # O(n^2)
  return(max(abs(f2-f1)))
}

system.time(for(i in 1:10^4){pbeta(x, 3, 6)})
##    user  system elapsed 
##   4.697   0.000   4.698
system.time(for(i in 1:10^4){sort(x)})
##    user  system elapsed 
##   0.915   0.000   0.915