library("Rcpp")
setwd("/media/ekaterina/Data/Study/Magistracy/10term/NIR/Prog/")
sourceCpp("Rccp/ssw_R.cpp")
source("se.R")


SubstituteSymbol <- function(x,k, len_alphabet)
{
  symbol <- sample(c("A", "C", "G", "T"), size = 1)
  substr(x,k,k) <- symbol
  return(x)
}

InsertWithLeftShift <- function(x,k, len_alphabet)
{
  substr(x, 1, k-1) <- substr(x, 2, k)
  res <- SubstituteSymbol(x,k, len_alphabet)
  return(res)
}

InsertWithRightShift <- function(x,k, len_alphabet)
{
  substr(x, k+1,nchar(x)) <- substr(x, k, nchar(x) - 1)
  res <- SubstituteSymbol(x,k, len_alphabet)
  return(res)
}

DelAndLeftShift <- function(x,k, len_alphabet)
{
  substr(x, 2, k) <- substr(x, 1, k-1)
  res <- SubstituteSymbol(x,1, len_alphabet)
  return(res)
}

DelAndRightShift <- function(x,k, len_alphabet)
{
  substr(x, k,nchar(x)-1) <- substr(x, k+1, nchar(x))
  res <- SubstituteSymbol(x,nchar(x), len_alphabet)
  return(res)
}

Neighbor <- function(x, len_alphabet = 4)
{
  x <- toString(x)
  k = sample(1:len_alphabet, 1)
  roll <- sample(1:5, 1, prob = c(0.5, 1/8,1/8,1/8,1/8))
  switch(roll,
         SubstituteSymbol(x,k, len_alphabet),
         InsertWithLeftShift(x,k, len_alphabet),
         InsertWithRightShift(x,k, len_alphabet),
         DelAndLeftShift(x,k, len_alphabet),
         DelAndRightShift(x,k, len_alphabet))
}



MetropolisUpdate <- function(y,s,w, query, len_alphabet = 4)
{
  x <- query
  y_new <- Neighbor(y)
  s_new <- pairwiseAlignmentSSW(x, y_new)
  print(w)
  #alpha <- w[s_new]-w[s]
  alpha <- min(1,w[s_new]/w[s])
  print(alpha)
  if(runif(1)< alpha)
    return(list(y_new, s_new))
  else
    return(list(y,s))
}


isflat <- function(H, Smax, Smin)
{
  H1 <- H[Smin:Smax]
  m <- mean(H1)
  all(H1>20) & all(H1 > 0.75*m) & all(H1 < 1.25*m)
}

WangLandau <- function(w, phi, phiFinal,Smax, Smin, Nsim, query, subject, len_alphabet = 4)
{
  x <- query
  y <- subject
  s <- pairwiseAlignmentSSW(x,y)
  while(phi > phiFinal)
  {
    H <- rep(0, Smax)
    while(!isflat(H, Smax, Smin))
    {
      res <- MetropolisUpdate(y, s, w, query)
      y <- res[[1]]
      s <- res[[2]]
      H[s] <- H[s] + 1
      w[s] <- w[s]/ phi
      
    }
    phi <- sqrt(phi)
    print(H)
  }
  w <- w - max(w)
  return(w)
}   



get.string <- function(l)
{
  letters <- c("A", "T", "C", "G")
  ssubject <- sample(x = letters, size = l, replace = TRUE)
  subject <- paste(ssubject, collapse = '')
}

######## Параметры ########
l = 8 # длина строки
query <- "ACGTGACG"
len_alphabet <- 4
Smin <- 1  # мин значение score
Smax <- l  # макс значение score
Nsim <- 10^6 
weight<- rep(1, Smax)
weight_log <- rep(0, Smax)
phi <- exp(0.1) 
phi_log <- 0.1
phiFinal <- exp(0.0002)
phiFinal_log <- 0.0002


########## Sampling ############

########## Wang-Landau #########
#weight.wl <- WangLandau(w = weight, phi, phiFinal, Smax, Smin, Nsim, query, get.string(l))

# s <- numeric(Nsim)
# subject <- get.string(l)
# s[1] <- pairwiseAlignmentSSW(query, subject)
# for(j in 2:Nsim)
# {
# 
#       res <- MetropolisUpdate(y, s[j-1], weight.wl, query)
#       y <- res[[1]]
#       s[j] <- res[[2]]
#       print(j)
# }

# write(s, "WL_new", ncolumns = 1)
# write(weight.wl, "weight.wl")
# 
# # ##### Monte-Carlo ######
# x <- query
# resSample = rep(0, Nsim)
# for (i in 1:Nsim)
# {
#       y <- get.string(l)
#       resSample[i] <- pairwiseAlignmentSSW(x,y)
#       print(i)
# }
# 
# write(resSample, "MC_new", ncolumns = 1)





# resWL <- read.table("WL_new")
# #resWL <- read.table("ScoreWL")
# weight.wl <- c(2.822286e-18, 1.666932e-18, 9.286511e-18, 7.171526e-17, 1.288682e-15)
# 
# #mh1 <- read.table("ScoreMH1_2")
# resSample <- read.table("MC_new")

load("Score8.Rdata")
resWL <- s
hist <- table(resWL)
#const.wl <- sum(exp(-weight.wl[resWL]))/Nsim
const.wl <- sum(1/weight.wl[resWL])/Nsim
#prob.wl <- (1/const.wl)*(hist*exp(-weight.wl))
prob2 <- 1/weight.wl/sum(1/weight.wl)
#barplot(prob.wl, main = "Scores with weights", col=rgb(0,0,1,1/4))


hist.sample <- table(resSample)
prob.sample <- hist.sample/sum(hist.sample)
#barplot(prob.sample,add = TRUE, col=rgb(0,1,0,1/4))



######## Conf intervals ########

#MC
conf.mc <- function(z, alpha = 0.05)
{ 
      confUp <- z + qnorm(1-alpha/2)*sqrt(z*(1-z)/Nsim)
      conftDown <- z - qnorm(1-alpha/2)*sqrt(z*(1-z)/Nsim)
      c(z,conftDown, confUp)
}


#WL
conf.w <- function(x, w, const)
{
      df.conf <- matrix(data = 0,nrow = l, ncol = 3)
      for (i in 1:l)
      {
            tmp <- se.bm(t(x), g <- function(x) (x == i)*(1/(w[x]*const)))
            se <- tmp$se.mean
            m <-tmp$mu
            z <- qnorm((1.95) / 2)
            df.conf[i,] <- c(m, m - z*se, m + z*se)
      }
      return(df.conf)
}

conf.prob.sample <- lapply(prob.sample, conf.mc)
ci.sample <- cbind(x= c(rep("Sample",l)), as.data.frame(t(as.data.frame(conf.prob.sample)), row.names = 1))

ci.wl <-cbind(x=c(rep("WL",l)), as.data.frame(conf.w(resWL, weight.wl, const.wl)))

ci <- rbind(ci.sample, ci.wl)
lengthCI <- ci[,4] - ci[,3]
ci <- cbind(ci, lengthCI)
colnames(ci) <- c("x","PropScore", "LowerCI", "UpperCI", "lengthCI")


####### Plots #######
require(ggplot2)
## Loading required package: ggplot2
plot.CI <- function(p1, p2,num_score)
{
      ggplot(ci[c(p1, p2),], aes(x = x,y = PropScore)) +
            geom_point(size = 4) +
            geom_errorbar(aes(ymax = UpperCI, ymin = LowerCI))+
            ggtitle(paste("Score = ", num_score))
}

for (i in 1:l)
{
      
      print(plot.CI(i, i+l,i))
      print(ci[c(i, i+l),])
      
}

##        x  PropScore    LowerCI    UpperCI    lengthCI
## 1 Sample 0.08707600 0.08652340 0.08762860 0.001105210
## 9     WL 0.09029463 0.08584821 0.09474106 0.008892848

##         x PropScore   LowerCI   UpperCI    lengthCI
## 2  Sample 0.5718790 0.5709092 0.5728488 0.001939606
## 10     WL 0.5646383 0.5453133 0.5839634 0.038650125

##         x PropScore   LowerCI   UpperCI    lengthCI
## 3  Sample 0.2634610 0.2625976 0.2643244 0.001726769
## 11     WL 0.2647573 0.2566033 0.2729113 0.016307995

##         x  PropScore    LowerCI    UpperCI     lengthCI
## 4  Sample 0.06438600 0.06390495 0.06486705 0.0009621043
## 12     WL 0.06663724 0.06469915 0.06857533 0.0038761772

##         x  PropScore    LowerCI    UpperCI     lengthCI
## 5  Sample 0.01129400 0.01108689 0.01150111 0.0004142243
## 13     WL 0.01164804 0.01126654 0.01202955 0.0007630100

##         x   PropScore     LowerCI     UpperCI     lengthCI
## 6  Sample 0.001671000 0.001590948 0.001751052 0.0001601044
## 14     WL 0.001786172 0.001716338 0.001856006 0.0001396683

##         x    PropScore      LowerCI      UpperCI     lengthCI
## 7  Sample 0.0002120000 0.0001834655 0.0002405345 5.706896e-05
## 15     WL 0.0002229628 0.0002119345 0.0002339911 2.205661e-05

##         x    PropScore      LowerCI      UpperCI     lengthCI
## 8  Sample 2.100000e-05 1.201841e-05 2.998159e-05 1.796318e-05
## 16     WL 1.536842e-05 1.428155e-05 1.645530e-05 2.173758e-06