Reverse complementary problem @rosalind

rev_comp<- function(sequ){
  #split & convert the sequence into a reverse character vector 
  rev_seq <- rev(strsplit(sequ, "")[[1]])
  #empty vector to store the reverse complementary sequences
  revcom<-c() 
  for (i in 1:length(rev_seq)) {
    if (rev_seq[i] == "A") {
      revcom[i] <- "T"
      } else if (rev_seq[i] == "T") {
      revcom[i] <- "A"
      }else if (rev_seq[i] == "G") {
      revcom[i] <- "C"
      }else {
      revcom[i] <- "G"
      } 
    }  
  #collapse to get rid of "" spaces in the vector
  return(paste(revcom, collapse=""))
  }
#Example:

sequence<-c("AAAACCCGGT")

rev_comp(sequence)
## [1] "ACCGGGTTTT"
abc<-c("GATATC")
rev_comp(abc)
## [1] "GATATC"

For the palindrome:

To find_reverse_palindromes function takes a DNA sequence as input, and iterates over all possible subsequences of length between 4 and 12. For each subsequence, it checks if the subsequence is a reverse palindrome by comparing it to its own reverse complementary sequence with the code above. When a reverse palindrome is found: its start positions and length are stored in a dataframe.

rev_pal<- function(seq) {
  #empty list to store palindrome locations and length
  pal_loc<-data.frame(matrix(ncol=2))
  colnames(pal_loc)<-c("Position","Length")
  for (i in 1:(nchar(seq)-4)) {
    for (j in 4:12){
      #To prevent end of the sequence to be checked:
      if(i+j <=nchar(seq)) {
      #Get a substring starting from i, ending at i+j 
      split_seq<-substr(seq, i, i+j)
      # check if its palindrome
      if ( rev_comp(split_seq) == split_seq) {
        #add the positin and length to dataframe:
        subdf<-data.frame(i,nchar(split_seq))
        colnames(subdf)<-c("Position","Length")
        #Add to the final df
        pal_loc<-rbind(pal_loc, subdf)
      }
      j<- j+1
    }
    i<-i+1
    }
    }
  #First row has NA values due to first rbind(pal_loc,subdf)
  #Remove it
  pal_loc<-pal_loc[-1,]
  return(pal_loc)
}
example<-c("GCGCATGCGGTAGAGGGTTGGCCACAAAGAGCTCCCAATATGGTAATTTCCGCCTCAGCTAGGTGCCCAAATCATAACCTGTGTCTGCGGGATTCCCTATACTTGAAAGTTGGCGGAAACCGTATGTTGTGGTGTCCTGCTCGTACCAAATATAATATAGTTAGCTGTGCTGTAGTAGCGAGGGTTTAGTAATGCCCAGTAGAGCGACGAGCTATTTGCAAAAGTGCCCTGCGGGTGCCATACCACTTCAATGGGGATAATAACTTTGGCCCGGAAACAAGGCTCTTAGGAACAGCTCCTCCAACCCGACCCTTCGAGAACGAGAGATTGAAATCCGCGCTGTAGGAGATATACTTTCGTGGAATTACCAGCAACGGGACCGCTGGAACGAACGGGGGTTATCTAACGGTTTCTGATGATCTGGTTACTCACGTACTTCCTCCTGAGAGCACACATATATGTGACTCTGCACCGTCGCTAACTTCTTGTCTTCTATAACGTCTAATACTTTATTGTTGTCGATAAGCCAATGAGCAGGCACCTCTAAGCGTTCAATTCACTGGCCCTTAGGTCTCGTACTAACTTATGCTACCTGGTTGTGTCTCCGAACAAGGTTAGGACCGTCGTAAACTACCCACTGTTACTCGTAGTCGGATGGGTTCTCAAGGCGGGGCCAGGAAAACACGAAGTATAGGACACTGATGGCGTCTCCTGTTCACTTATCGGTTAACGCGAGTGAAGAGCCTCACGACGTTATATCGTTTACCGTACTAAGCGTGTCCGGCAAGCACAAACGCCAGAATTAGAATCAACTTTTGGCTCAAGATAGGCTAGTATAACGCCAGCTCACCCCTTTGCGTTTGTGCTGTAACAACGTTTTTCTCGATGACTCCCCGTGCATGTACGATCCATAT")
rev_pal(example)
##    Position Length
## 2         3      6
## 3        19      6
## 4        29      6
## 5       215      8
## 6       216      6
## 7       452     12
## 8       453     10
## 9       454      8
## 10      455      6
## 11      726      6
## 12      873      6

Another example with multiple GCATGC in the sequence:

random<-c("AGCATGCAGCATGCTGCATGCCTGCATGCGGGCATGCTTAGCATGC")
rev_pal(random)
##   Position Length
## 2        2      6
## 3        7     10
## 4        8      8
## 5        9      6
## 6       16      6
## 7       24      6
## 8       32      6
## 9       41      6