library(gdata)
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
# update only the changed row and column of distance matrix
# use pbeta instead of ks.test
bd <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
k <- 1
dmax <- 1
X1 <- matrix(runif(n*m), ncol=m)
dX1 <- as.matrix(dist(X1, diag = T, upper = T))
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
ks1 <- myks_beta(upperTriangle(dX1), f1, dim=m, shape1, shape2) ## k-s test
dprime <- dX1
for(t in 1:T) {
row <- sample(1:n, k)
xold <- X1[row,] ## random row selection
X1[row,] <- matrix(runif(m*k), ncol = m) ## random new row
olddist <- dprime[row,]
XR <- matrix(rep(X1[row,],n), nrow = n, byrow = T)
newdist <- sqrt(rowSums((XR-X1)^2))
dprime[row,] <- newdist
dprime[,row] <- newdist
ksprime <- myks_beta(upperTriangle(dprime), f1, dim=m, shape1, shape2)
if(ksprime < ks1) { ks1 <- ksprime ## accept
} else { X1[row,] <- xold; dprime[,row] <- olddist;
dprime[row,] <- olddist} ## reject
}
return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}
myks_beta <- function(d, f1, dim, shape1, shape2){
d <- sort(d)/sqrt(dim)
f2 <- pbeta(d, shape1 = shape1, shape2 = shape2)
return(max(abs(f2-f1)))
}
# calculate the whole distance matrix but use pbeta instead of ks.test
bd1 <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
k <- 1
dmax <- 1
X1 <- matrix(runif(n*m), ncol=m)
dX1 <- dist(X1)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
ks1 <- myks_beta(dX1, f1, dim=m, shape1, shape2) ## k-s test
for(t in 1:T) {
row <- sample(1:n, k)
xold <- X1[row,] ## random row selection
X1[row,] <- matrix(runif(m*k), ncol = m) ## random new row
dprime <- dist(X1)
ksprime <- myks_beta(dprime, f1, dim=m, shape1, shape2)
if(ksprime < ks1) { ks1 <- ksprime ## accept
} else { X1[row,] <- xold} ## reject
}
return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}
#system.time(test <- bd(8,2,10^5))
bd_old <- function(n, m, T = 10^5, shape1 = 3, shape2 = 6){
k <- 1
dmax <- 1
X1 <- matrix(runif(n*m), ncol=m)
dX1 <- dist(X1)
du <- dmax*rbeta(1000, shape1, shape2)*sqrt(m) ## true uniform distances
kst <- ks.test(dX1, du) ## k-s test
ks1 <- kst$statistic ## k-s distance of two distributions
for(t in 1:T) {
row <- sample(1:n, k)
xold <- X1[row,] ## random row selection
X1[row,] <- matrix(runif(m*k), ncol = m) ## random new row
dprime <- dist(X1)
kstprime <- ks.test(dprime, du)
ksprime <- kstprime$statistic
if(ksprime < ks1) { ks1 <- ksprime ## accept
} else { X1[row,] <- xold } ## reject
}
return(list(n = n, dim = m, dmax = dmax, X = X1, ksopt = ks1))
}
#system.time(bd_old(8,2))
maximin <- function (n, m, T = 1e+05, Xorig = NULL)
{
X <- matrix(runif(n * m), ncol = m)
X <- rbind(X, Xorig)
d <- distance(X)
d <- as.numeric(d[upper.tri(d)])
md <- min(d)
for (t in 1:T) {
row <- sample(1:n, 1)
xold <- X[row, ]
X[row, ] <- runif(m)
dprime <- distance(X)
dprime <- as.numeric(dprime[upper.tri(dprime)])
mdprime <- min(dprime)
if (mdprime > md) {
md <- mdprime
}
else {
X[row, ] <- xold
}
}
return(X)
}
## compare ks.test and myks_beta
# myks_beta
myks_beta <- function(d, f1, dim, shape1, shape2){
d <- sort(d)/sqrt(dim)
f2 <- pbeta(d, shape1 = shape1, shape2 = shape2)
return(max(abs(f2-f1)))
}
# compare the two functions for different n
n <- 8
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
## user system elapsed
## 0.35 0.00 0.35
system.time(for(i in 1:10^4){ks.test(x, y)})
## user system elapsed
## 1.882 0.000 1.883
n <- 32
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
## user system elapsed
## 1.888 0.000 1.888
system.time(for(i in 1:10^4){ks.test(x, y)})
## user system elapsed
## 2.588 0.000 2.588
n <- 48
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
## user system elapsed
## 3.955 0.000 3.955
system.time(for(i in 1:10^4){ks.test(x, y)})
## user system elapsed
## 3.54 0.00 3.54
n <- 60
x <- runif(choose(n,2))
y <- rbeta(1000,3,6)
f1 <- cumsum(rep(1,n*(n-1)/2))/(n*(n-1)/2)
system.time(for(i in 1:10^4){myks_beta(x, f1, 2, 3, 6)})
## user system elapsed
## 6.074 0.000 6.074
system.time(for(i in 1:10^4){ks.test(x, y)})
## user system elapsed
## 4.479 0.000 4.478
Rprof("Rprof1", memory.profiling = T)
for( i in 1:10^4){myks_beta(x, f1, 2, 3, 6)}
Rprof(NULL)
summaryRprof("Rprof1", memory = "both")
## $by.self
## self.time self.pct total.time total.pct mem.total
## "pbeta" 4.76 78.03 4.76 78.03 362.8
## "order" 0.76 12.46 0.90 14.75 74.7
## "sort.int" 0.16 2.62 1.10 18.03 85.8
## "myks_beta" 0.12 1.97 6.10 100.00 474.5
## "formals" 0.08 1.31 0.08 1.31 2.9
## "sort" 0.06 0.98 1.20 19.67 94.8
## "eval" 0.04 0.66 6.10 100.00 474.5
## "sort.default" 0.04 0.66 1.14 18.69 88.8
## "match.arg" 0.04 0.66 0.18 2.95 8.8
## "abs" 0.02 0.33 0.02 0.33 2.4
## "c" 0.02 0.33 0.02 0.33 0.0
##
## $by.total
## total.time total.pct mem.total self.time self.pct
## "myks_beta" 6.10 100.00 474.5 0.12 1.97
## "eval" 6.10 100.00 474.5 0.04 0.66
## "block_exec" 6.10 100.00 474.5 0.00 0.00
## "call_block" 6.10 100.00 474.5 0.00 0.00
## "evaluate" 6.10 100.00 474.5 0.00 0.00
## "evaluate_call" 6.10 100.00 474.5 0.00 0.00
## "evaluate::evaluate" 6.10 100.00 474.5 0.00 0.00
## "handle" 6.10 100.00 474.5 0.00 0.00
## "in_dir" 6.10 100.00 474.5 0.00 0.00
## "knitr::knit" 6.10 100.00 474.5 0.00 0.00
## "process_file" 6.10 100.00 474.5 0.00 0.00
## "process_group" 6.10 100.00 474.5 0.00 0.00
## "process_group.block" 6.10 100.00 474.5 0.00 0.00
## "rmarkdown::render" 6.10 100.00 474.5 0.00 0.00
## "timing_fn" 6.10 100.00 474.5 0.00 0.00
## "withCallingHandlers" 6.10 100.00 474.5 0.00 0.00
## "withVisible" 6.10 100.00 474.5 0.00 0.00
## "pbeta" 4.76 78.03 362.8 4.76 78.03
## "sort" 1.20 19.67 94.8 0.06 0.98
## "sort.default" 1.14 18.69 88.8 0.04 0.66
## "sort.int" 1.10 18.03 85.8 0.16 2.62
## "order" 0.90 14.75 74.7 0.76 12.46
## "match.arg" 0.18 2.95 8.8 0.04 0.66
## "formals" 0.08 1.31 2.9 0.08 1.31
## "abs" 0.02 0.33 2.4 0.02 0.33
## "c" 0.02 0.33 0.0 0.02 0.33
##
## $sample.interval
## [1] 0.02
##
## $sampling.time
## [1] 6.1
Rprof("Rprof1", memory.profiling = T)
for( i in 1:10^4){ks.test(x,y)}
Rprof(NULL)
summaryRprof("Rprof1", memory = "both")
## $by.self
## self.time self.pct total.time total.pct mem.total
## "ifelse" 1.62 34.18 2.88 60.76 141.6
## "order" 1.02 21.52 1.22 25.74 59.8
## "unique.default" 0.62 13.08 0.62 13.08 34.4
## "cumsum" 0.54 11.39 0.54 11.39 28.5
## "ks.test" 0.26 5.49 4.74 100.00 225.9
## "match.arg" 0.10 2.11 0.18 3.80 9.6
## "c" 0.06 1.27 0.06 1.27 2.1
## "max" 0.06 1.27 0.06 1.27 1.4
## "unique" 0.04 0.84 0.66 13.92 34.4
## "deparse" 0.04 0.84 0.18 3.80 2.0
## ".deparseOpts" 0.04 0.84 0.08 1.69 1.9
## "abs" 0.04 0.84 0.04 0.84 0.2
## "pkstwo" 0.04 0.84 0.04 0.84 0.0
## "pmatch" 0.04 0.84 0.04 0.84 1.9
## "eval" 0.02 0.42 4.74 100.00 225.9
## "paste" 0.02 0.42 0.08 1.69 0.2
## "formals" 0.02 0.42 0.04 0.84 0.0
## "lapply" 0.02 0.42 0.04 0.84 1.0
## "vapply" 0.02 0.42 0.04 0.84 3.2
## "-" 0.02 0.42 0.02 0.42 0.1
## "baseenv" 0.02 0.42 0.02 0.42 2.7
## "FUN" 0.02 0.42 0.02 0.42 1.0
## "match.fun" 0.02 0.42 0.02 0.42 0.1
## "mode" 0.02 0.42 0.02 0.42 0.0
## "sys.function" 0.02 0.42 0.02 0.42 0.0
##
## $by.total
## total.time total.pct mem.total self.time self.pct
## "ks.test" 4.74 100.00 225.9 0.26 5.49
## "eval" 4.74 100.00 225.9 0.02 0.42
## "block_exec" 4.74 100.00 225.9 0.00 0.00
## "call_block" 4.74 100.00 225.9 0.00 0.00
## "evaluate" 4.74 100.00 225.9 0.00 0.00
## "evaluate_call" 4.74 100.00 225.9 0.00 0.00
## "evaluate::evaluate" 4.74 100.00 225.9 0.00 0.00
## "handle" 4.74 100.00 225.9 0.00 0.00
## "in_dir" 4.74 100.00 225.9 0.00 0.00
## "knitr::knit" 4.74 100.00 225.9 0.00 0.00
## "process_file" 4.74 100.00 225.9 0.00 0.00
## "process_group" 4.74 100.00 225.9 0.00 0.00
## "process_group.block" 4.74 100.00 225.9 0.00 0.00
## "rmarkdown::render" 4.74 100.00 225.9 0.00 0.00
## "timing_fn" 4.74 100.00 225.9 0.00 0.00
## "withCallingHandlers" 4.74 100.00 225.9 0.00 0.00
## "withVisible" 4.74 100.00 225.9 0.00 0.00
## "ifelse" 2.88 60.76 141.6 1.62 34.18
## "order" 1.22 25.74 59.8 1.02 21.52
## "unique" 0.66 13.92 34.4 0.04 0.84
## "unique.default" 0.62 13.08 34.4 0.62 13.08
## "cumsum" 0.54 11.39 28.5 0.54 11.39
## "match.arg" 0.18 3.80 9.6 0.10 2.11
## "deparse" 0.18 3.80 2.0 0.04 0.84
## ".deparseOpts" 0.08 1.69 1.9 0.04 0.84
## "paste" 0.08 1.69 0.2 0.02 0.42
## "c" 0.06 1.27 2.1 0.06 1.27
## "max" 0.06 1.27 1.4 0.06 1.27
## "abs" 0.04 0.84 0.2 0.04 0.84
## "pkstwo" 0.04 0.84 0.0 0.04 0.84
## "pmatch" 0.04 0.84 1.9 0.04 0.84
## "formals" 0.04 0.84 0.0 0.02 0.42
## "lapply" 0.04 0.84 1.0 0.02 0.42
## "vapply" 0.04 0.84 3.2 0.02 0.42
## "%in%" 0.04 0.84 0.0 0.00 0.00
## "unlist" 0.04 0.84 1.0 0.00 0.00
## "-" 0.02 0.42 0.1 0.02 0.42
## "baseenv" 0.02 0.42 2.7 0.02 0.42
## "FUN" 0.02 0.42 1.0 0.02 0.42
## "match.fun" 0.02 0.42 0.1 0.02 0.42
## "mode" 0.02 0.42 0.0 0.02 0.42
## "sys.function" 0.02 0.42 0.0 0.02 0.42
##
## $sample.interval
## [1] 0.02
##
## $sampling.time
## [1] 4.74
## Time-consuming part of the two functions
### ks.test statistic calculation, no-tie case
n.x <- length(x)
n.y <- length(y)
w <- c(x,y)
z <- cumsum(ifelse(order(w) <= n.x, 1/n.x, -1/n.y))
STATISTIC <- max(abs(z))
# myks_beta: sort and pbeta are both O(n^2) while the latter is slower
myks_beta <- function(d, f1, dim, shape1, shape2){
d <- sort(d)/sqrt(dim)
f2 <- pbeta(d, shape1 = shape1, shape2 = shape2) # O(n^2)
return(max(abs(f2-f1)))
}
system.time(for(i in 1:10^4){pbeta(x, 3, 6)})
## user system elapsed
## 4.697 0.000 4.698
system.time(for(i in 1:10^4){sort(x)})
## user system elapsed
## 0.915 0.000 0.915