library("Rcpp")
setwd("/media/ekaterina/Data/Study/Magistracy/10term/NIR/Prog/")
sourceCpp("Rccp/ssw_R.cpp")
source("se.R")
SubstituteSymbol <- function(x,k, len_alphabet)
{
symbol <- sample(c("A", "C", "G", "T"), size = 1)
substr(x,k,k) <- symbol
return(x)
}
InsertWithLeftShift <- function(x,k, len_alphabet)
{
substr(x, 1, k-1) <- substr(x, 2, k)
res <- SubstituteSymbol(x,k, len_alphabet)
return(res)
}
InsertWithRightShift <- function(x,k, len_alphabet)
{
substr(x, k+1,nchar(x)) <- substr(x, k, nchar(x) - 1)
res <- SubstituteSymbol(x,k, len_alphabet)
return(res)
}
DelAndLeftShift <- function(x,k, len_alphabet)
{
substr(x, 2, k) <- substr(x, 1, k-1)
res <- SubstituteSymbol(x,1, len_alphabet)
return(res)
}
DelAndRightShift <- function(x,k, len_alphabet)
{
substr(x, k,nchar(x)-1) <- substr(x, k+1, nchar(x))
res <- SubstituteSymbol(x,nchar(x), len_alphabet)
return(res)
}
Neighbor <- function(x, len_alphabet = 4)
{
x <- toString(x)
k = sample(1:len_alphabet, 1)
roll <- sample(1:5, 1, prob = c(0.5, 1/8,1/8,1/8,1/8))
switch(roll,
SubstituteSymbol(x,k, len_alphabet),
InsertWithLeftShift(x,k, len_alphabet),
InsertWithRightShift(x,k, len_alphabet),
DelAndLeftShift(x,k, len_alphabet),
DelAndRightShift(x,k, len_alphabet))
}
MetropolisUpdate <- function(y,s,w, query, len_alphabet = 4)
{
x <- query
y_new <- Neighbor(y)
s_new <- pairwiseAlignmentSSW(x, y_new)
print(w)
#alpha <- w[s_new]-w[s]
alpha <- min(1,w[s_new]/w[s])
print(alpha)
if(runif(1)< alpha)
return(list(y_new, s_new))
else
return(list(y,s))
}
isflat <- function(H, Smax, Smin)
{
H1 <- H[Smin:Smax]
m <- mean(H1)
all(H1>20) & all(H1 > 0.75*m) & all(H1 < 1.25*m)
}
WangLandau <- function(w, phi, phiFinal,Smax, Smin, Nsim, query, subject, len_alphabet = 4)
{
x <- query
y <- subject
s <- pairwiseAlignmentSSW(x,y)
while(phi > phiFinal)
{
H <- rep(0, Smax)
while(!isflat(H, Smax, Smin))
{
res <- MetropolisUpdate(y, s, w, query)
y <- res[[1]]
s <- res[[2]]
H[s] <- H[s] + 1
w[s] <- w[s]/ phi
}
phi <- sqrt(phi)
print(H)
}
w <- w - max(w)
return(w)
}
get.string <- function(l)
{
letters <- c("A", "T", "C", "G")
ssubject <- sample(x = letters, size = l, replace = TRUE)
subject <- paste(ssubject, collapse = '')
}
######## Параметры ########
l = 5 # длина строки
query <- "ACGTG"
len_alphabet <- 4
Smin <- 1 # мин значение score
Smax <- l # макс значение score
Nsim <- 10^6
weight<- rep(1, Smax)
weight_log <- rep(0, Smax)
phi <- exp(0.1)
phi_log <- 0.1
phiFinal <- exp(0.0002)
phiFinal_log <- 0.0002
########## Sampling ############
########## Wang-Landau #########
#weight.wl <- WangLandau(w = weight, phi, phiFinal, Smax, Smin, Nsim, query, get.string(l))
# s <- numeric(Nsim)
# subject <- get.string(l)
# s[1] <- pairwiseAlignmentSSW(query, subject)
# for(j in 2:Nsim)
# {
#
# res <- MetropolisUpdate(y, s[j-1], weight.wl, query)
# y <- res[[1]]
# s[j] <- res[[2]]
# print(j)
# }
# write(s, "WL_new", ncolumns = 1)
# write(weight.wl, "weight.wl")
#
# # ##### Monte-Carlo ######
# x <- query
# resSample = rep(0, Nsim)
# for (i in 1:Nsim)
# {
# y <- get.string(l)
# resSample[i] <- pairwiseAlignmentSSW(x,y)
# print(i)
# }
#
# write(resSample, "MC_new", ncolumns = 1)
resWL <- read.table("WL_new")
#resWL <- read.table("ScoreWL")
weight.wl <- c(2.822286e-18, 1.666932e-18, 9.286511e-18, 7.171526e-17, 1.288682e-15)
#mh1 <- read.table("ScoreMH1_2")
resSample <- read.table("MC_new")
hist <- table(resWL)
#const.wl <- sum(exp(-weight.wl[resWL]))/Nsim
const.wl <- sum(1/weight.wl[resWL$V1])/Nsim
#prob.wl <- (1/const.wl)*(hist*exp(-weight.wl))
prob2 <- 1/weight.wl/sum(1/weight.wl)
#barplot(prob.wl, main = "Scores with weights", col=rgb(0,0,1,1/4))
hist.sample <- table(resSample)
prob.sample <- hist.sample/sum(hist.sample)
#barplot(prob.sample,add = TRUE, col=rgb(0,1,0,1/4))
######## Conf intervals ########
#MC
conf.mc <- function(z, alpha = 0.05)
{
confUp <- z + qnorm(1-alpha/2)*sqrt(z*(1-z)/Nsim)
conftDown <- z - qnorm(1-alpha/2)*sqrt(z*(1-z)/Nsim)
c(z,conftDown, confUp)
}
#WL
conf.w <- function(x, w, const)
{
df.conf <- matrix(data = 0,nrow = 5, ncol = 3)
for (i in 1:l)
{
tmp <- se.bm(t(x), g <- function(x) (x == i)*(1/(w[x]*const)))
se <- tmp$se.mean
m <-tmp$mu
z <- qnorm((1.95) / 2)
df.conf[i,] <- c(m, m - z*se, m + z*se)
}
return(df.conf)
}
conf.prob.sample <- lapply(prob.sample, conf.mc)
ci.sample <- cbind(x= c(rep("Sample",5)), as.data.frame(t(as.data.frame(conf.prob.sample)), row.names = 1))
ci.wl <-cbind(x=c(rep("WL",5)), as.data.frame(conf.w(resWL, weight.wl, const.wl)))
ci <- rbind(ci.sample, ci.wl)
lengthCI <- ci[,4] - ci[,3]
ci <- cbind(ci, lengthCI)
colnames(ci) <- c("x","PropScore", "LowerCI", "UpperCI", "lengthCI")
####### Plots #######
require(ggplot2)
## Loading required package: ggplot2
plot.CI <- function(p1, p2,num_score)
{
ggplot(ci[c(p1, p2),], aes(x = x,y = PropScore)) +
geom_point(size = 4) +
geom_errorbar(aes(ymax = UpperCI, ymin = LowerCI))+
ggtitle(paste("Score = ", num_score))
}
for (i in 1:l)
{
print(plot.CI(i, i+l,i))
print(ci[c(i, i+l),])
}

## x PropScore LowerCI UpperCI lengthCI
## 1 Sample 0.3167130 0.3158012 0.3176248 0.001823529
## 6 WL 0.3172508 0.3102537 0.3242479 0.013994231

## x PropScore LowerCI UpperCI lengthCI
## 2 Sample 0.5590660 0.5580929 0.5600391 0.00194624
## 7 WL 0.5600546 0.5495526 0.5705565 0.02100389

## x PropScore LowerCI UpperCI lengthCI
## 3 Sample 0.1095240 0.1089119 0.1101361 0.001224176
## 8 WL 0.1078056 0.1059827 0.1096285 0.003645858

## x PropScore LowerCI UpperCI lengthCI
## 4 Sample 0.01372400 0.01349597 0.01395203 0.0004560555
## 9 WL 0.01392169 0.01363701 0.01420636 0.0005693434

## x PropScore LowerCI UpperCI lengthCI
## 5 Sample 0.0009730000 0.0009118927 0.001034107 1.222146e-04
## 10 WL 0.0009673733 0.0009332804 0.001001466 6.818579e-05