R Markdown
read.ms.output <- function( file.ms.output=NA ) {
txt=NA
if( !is.na(file.ms.output) ) txt <- scan(file=file.ms.output,
what="character", sep="\n", quiet=TRUE)
if( is.na(txt[1]) ){
print("Usage: read.ms.output(txt), or read.ms.output(file=filename)")
return()
}
nsam <- as.integer( strsplit(txt[1], split=" ")[[1]][2] )
ndraws <- as.integer( strsplit( txt[1], split=" ")[[1]][3] )
h <- numeric()
result <- list()
gamlist <- list()
positions <- list()
marker <- grep("prob",txt)
probs <- sapply(strsplit(txt[marker], split=":"), function(vec) as.numeric(vec[2]))
marker <- grep("time",txt)
times <- sapply(strsplit(txt[marker], split="\t"), function(vec){ as.numeric(vec[2:3])} )
## THE OUTPUT TEXT FOR EACH DRAW SHOULD CONTAIN THE WORD "segsites"
marker <- grep("segsites", txt)
if( length(marker) != ndraws){
stop( paste("length: ", length(marker), " ndraws: ", ndraws) )
stopifnot(length(marker) == ndraws)
}
## GET NUMBERS OF SEGREGATING SITES IN EACH DRAW
segsites <- sapply(strsplit(txt[marker], split=" "), function(vec) as.integer(vec[2]) )
for(draw in seq(along=marker)) {
if(!(draw %% 100)) cat(draw, " ")
if(segsites[draw] > 0) {
tpos <- strsplit(txt[marker[draw]+1], split=" ")
positions[[draw]] <- as.numeric( tpos[[1]][ 2:(segsites[draw]+1) ] )
haplotypes <- txt[(marker[draw] + 2):(marker[draw] + 2 + nsam - 1)]
haplotypes <- strsplit(haplotypes, split="")
h <- sapply(haplotypes, function(el) c(as.integer(el)))
## IF THERE'S 1 SEGREGATING SITE, THIS WON'T BE A MATRIX
if(segsites[draw] == 1) h <- as.matrix(h)
## OTHERWISE, IT NEEDS TO BE TRANSPOSED
else h <- t(h)
}
else {
h <- matrix(nrow=nsam, ncol=0)
positions[[draw]]<- NA
}
gamlist[[draw]] <- h
stopifnot(all(dim(h) == c(nsam, segsites[draw])))
}
cat("\n")
list(segsites=segsites, gametes=gamlist, probs=probs, times=t(times), positions=positions, nsam=nsam, nreps=ndraws )
}
system("ms 10 1 -t 10 -G 10 > ms.out")
ms <- read.ms.output("ms.out")
data = ms$gametes[[1]]
ms$gametes
observation = ncol(ms$gametes[[1]])
observation
pairdif = function(data){
dif = 0
for (i in 1:(nrow(data)-1)){
for(j in (i+1):(nrow(data))){
dif = dif + sum(data[i,] != data[j,])
}
}
return(2*dif/(nrow(data)*(nrow(data)-1)))
}
stats = c(ncol(ms$gametes[[1]]), pairdif(ms$gametes[[1]]))
stats
reps = 10000
all.sim.stats = matrix(0, nrow=reps, ncol=length(stats))
all.sim.stats
all.theta = vector(mode = "numeric", reps)
for(i in 1:reps){
## G to kseroume
## t oxi
t = runif(1, 2, 100)
all.theta[i] = t
system(paste("ms 10 1 -t ", t, " -G 10 > ms.out", sep=" "))
simdata = read.ms.output("ms.out")
sim.stats = c(ncol(simdata$gametes[[1]]), pairdif((simdata$gametes[[1]])))
all.sim.stats[i,] = sim.stats
print(i)
}
all.sim.stats
eucl = function(m, obs){
difs = apply(m, 1, function(x){ sqrt(sum(obs - x)**2)})
return(difs)
}
difs = eucl(all.sim.stats, stats)
accepted.dif.indexes = which(difs < 5)
mean(all.theta[accepted.dif.indexes])
plot(density(all.theta), col='blue', ylim=c(0, 0.1))
points(density(all.theta[accepted.dif.indexes]), col='red', type='l')