Do the Burrows-Wheeler Transformation

Make the rotation matrix

rotation_matrix <- function(t){
  tv <- strsplit(t, "")[[1]] # split string
  tvtv <- rep(tv, 2) # repeated vector for rotations
  bwm <- matrix(ncol = length(tv), nrow = length(tv)) # empty matrix
  
  for(i in seq(tv)){
    bwm[i,] <- tvtv[i:(i-1+length(tv))] # make rotations
  }
  bwm <- bwm[do.call(order, as.data.frame(bwm)),] # order starting from first column
  return(bwm)
}

Example call rotation_matrix

rotation_matrix("TAATA$")
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] "$"  "T"  "A"  "A"  "T"  "A" 
## [2,] "A"  "$"  "T"  "A"  "A"  "T" 
## [3,] "A"  "A"  "T"  "A"  "$"  "T" 
## [4,] "A"  "T"  "A"  "$"  "T"  "A" 
## [5,] "T"  "A"  "$"  "T"  "A"  "A" 
## [6,] "T"  "A"  "A"  "T"  "A"  "$"

Burrows Wheeler transformation

bwt <- function(t){
  bwm <- rotation_matrix(t) # make rotation matrix
  Last <- bwm[,ncol(bwm)] # get last column
  return(paste(Last, collapse = "")) # return in string
}

Example call bwt

bwt("TAATA$")
## [1] "ATTAA$"

Reverse BWT

rank_bw <- function(bw){
  bwv <- strsplit(bw, "")[[1]] # split string in vector
  totals <- c() # empty totals vector
  rank <- rep(NA, length(bwv)) # empty rank vector with predefined size
  for(i in seq(bwv)){
    if(!(bwv[i] %in% names(totals))){ # add to item to totals if doesn't exist
      totals[bwv[i]] <- 1 # add make it 1
    } else {
      totals[bwv[i]] <- totals[bwv[i]] + 1 # add 1 to totals for that character
    }
    rank[i] <- totals[bwv[i]] # fill the rank vector with rank
  }
  return(list(totals = totals, rank = rank)) # return a list of with totals and rank
}

get_first <- function(totals){
  totals <- totals[order(names(totals))] # order alpabetically on character
  FirstL <- list() # empty list
  for(char in names(totals)){
    FirstL[[char]] <- data.frame(c = rep(char, totals[char]),
                 n = 1:totals[char]) # make a data.frame for each character with ascending rank
  }
  return(do.call(rbind, FirstL)) # return concatenated data.frame
}

reverse_bwt <- function(bw){
  bwv <- strsplit(bw, "")[[1]] # split string in vector
  rank_list <- rank_bw(bw) # call rank_bw
  totals <- rank_list$totals # get totals vector
  rank <- rank_list$rank # get rank vector
  First <- get_first(totals) # get data.frame of First with rank
  i <- 1
  out <- "$" # start building from last character, so with the dollarsign (eof)
  while(bwv[i] != "$"){ # if dollarsign is found again, you're finished
    appchar <- bwv[i] # get character that appends before last found character
    out <- paste0(appchar, out) # append character 
    i <- which(First$c == appchar)[1] + rank[i] - 1 # find character with rank in First based on rank of character in last
  }
  return(paste(out, collapse = ""))
}

Example call reverse_bwt

bw_transformed <- bwt("TAATA$")
bw_transformed
## [1] "ATTAA$"
reverse_bwt(bw_transformed)
## [1] "TAATA$"

Some BWT examples

bw_transf <- bwt("Ich_hätti_gärn_es_Glas_Rivella$")
bw_transf
## [1] "anishsllghIv___c_tR$lGerä_eatäi"
reverse_bwt(bw_transf)
## [1] "Ich_hätti_gärn_es_Glas_Rivella$"
bw_transf <- bwt("Gruezi_mitenand$")
bw_transf
## [1] "dinntu$zm_eaGire"
reverse_bwt(bw_transf)
## [1] "Gruezi_mitenand$"
bw_transf <- bwt("Today_I_milked_the_cows_and_after_that_I_washed_the_caquelon$")
bw_transf
## [1] "ndseeytIrddI__cwhd__neeohhhkutatttsm__lie_oaTlcaewaaf___$q_oa"
reverse_bwt(bw_transf)
## [1] "Today_I_milked_the_cows_and_after_that_I_washed_the_caquelon$"
bw_transf <- bwt("Den_typischen_Aareböötle-Captain_oder_die_typische_Aareböötle-Kapitänin_gibt_es_nicht$")
bw_transf
## [1] "eetnertnsnentKCAA__teeissi-o_$llhirrhDd__cccgndnappp-tteiie_ä_bbööyyaaeaaeiihbpiöö__tt"
reverse_bwt(bw_transf)
## [1] "Den_typischen_Aareböötle$"
bw_transf <- bwt("ATAAGATAAGATAAGATAAGCCTTAA$")
bw_transf
## [1] "AATTTTTAAAA$GGGGCAAAATAAAAC"
reverse_bwt(bw_transf)
## [1] "ATAAGATAAGATAAGATAAGCCTTAA$"