#Load Necessary Library
library(tidyverse)
#Construction of function
BC <- function(){
print(str_count(dnaseq, "A")) #520
print(str_count(dnaseq, "G")) #652
print(str_count(dnaseq, "C")) #596
print(str_count(dnaseq, "T")) #541
}
#Display of base composition
BC()
[1] 520
[1] 652
[1] 596
[1] 541
#Load Necessary Library
library(devtools)
#Define the DNA sequence
dnaseq <- "TGCGAGTGTTGAAGTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCCAGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCTGAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACGGACGTTGACGGGGTCTATACCTGCGACCCGCGTCAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCGCTAAAGTTCTTCACCCCCGCAATGACGATGCCAACGAATCAATGCCCGTGGCGGATGCAGGTTCATCACATTACGCAAGAAACGCCGGATGTGTGGACGATTTCCCTGATTTGCCACGATTACTACCCATATCGCGCCGGGCAATATGCACTGGTCAGCGTGCGAAACTCAGCGGAAACGCTGCGTGCTTACACCATTTCCTCCACGCCAGGCGTGAGTGAATATATCACCCTGACCGTGCGGCGGATTGATGACGGTGTCGGCTCCCAGTGGCTGACGCGCGATGTAAAACGCGGTGATTATCTCTGGCTTTCGGACGCGATGGGGGAATTTACCTGCGACGATAAAGCAGAAGATAAATTCCTGTTGCTGGCGGCAGGCTGCGGCGTCACGCCGATTATGTCGATGCGTCGCTGGCTTGCGAAGAACCGTCCACAGGCCGATGTGCGGGTGATCTACAACGTGCGTACGCCGCAGGATGTTATTTTCGCCGATGAGTGGCGTAACTATCCGGTAACGCTGGTGGCAGAAAATAACGTTACCGAAGGCTTTATCGCTGGTCGTCTCACTCGCGAACTGCTGGCAGGTGTACCTGACTTAGCTTCACGTACCGTGATGACCTGCGGCCCGGCTCCGTATATGGATTGGGTAGAGCAGGAAGTGAAAGCGCTCGGCGTGACGCGTTTCTTTAAAGAGAAATTCTTCACCCCAGTAGCAGAAGCGGCGACCAGCGGTCTGAAATTCACCAAACTGCAACCGGCACGAGAATTTTACGCCCCGGTTGGCACCACGCTACTGGAGGCGCTGGAAAGCAATAACGTTCCGGTTGTCGCTGCCTGCCGTGCGGGTGTTTGCGGCTGCTGTAAGACAAAAGTGGTTTCCGGTGAATATACGGTGAGCAGCACAATGACGCTGACCGACGCCGAAATCGCTGAAGGTTACGTACTGGCCTGCTCCTGCCATCCGCAGGGGGATTTGGTTCTCGCATAACGTTGGCGGTGCGCTGCTGGAGCAACTGAAGCGTCAGCAAAGCTGGCTGAAGAATAAACATATCGACTTACGTGTCTGCGGTGTTGCCAACTCGAAGGCTCTGCTCACCAATGTACATGGCCTTAATCTGGAAAACTGGCAGGAAGAACTGGCGCAAGCCAAAGAGCCGTTTAATCTCGGGCGCTTAATTCGCCTCGTGAAAGAATATCATCTGCTGAACCCGGTCATTGTTGACTGCACTTCCAGCCAGGCAGTGGCGGATCAATATGCCGACTTCCTGCGCGAAGGTTTCCACGTTGTCACGCCGAACAAAAAGGCCAACACCTCGTCGATGGATTACTACCATCAGTTGCGTTATGCGGCGGAAAAATCGCGGCGTAAATTCCTCTATGACACCAACGTTGGGGCTGGATTACCGGTTATTGAGAACCTGCAAAATCTGCTCAATGCAGGTGATGAATTGATGAAGTTCTCCGGCATTCTTTCTGGTTCGCTTTCTTATATCTTCGGCAAGTTAGACGAAGGCATGAGTTTCT"
#Perform the Reverse Complement of the DNA Sequence
tktools::revcomp(dnaseq)
[1] "AGAAACTCATGCCTTCGTCTAACTTGCCGAAGATATAAGAAAGCGAACCAGAAAGAATGCCGGAGAACTTCATCAATTCATCACCTGCATTGAGCAGATTTTGCAGGTTCTCAATAACCGGTAATCCAGCCCCAACGTTGGTGTCATAGAGGAATTTACGCCGCGATTTTTCCGCCGCATAACGCAACTGATGGTAGTAATCCATCGACGAGGTGTTGGCCTTTTTGTTCGGCGTGACAACGTGGAAACCTTCGCGCAGGAAGTCGGCATATTGATCCGCCACTGCCTGGCTGGAAGTGCAGTCAACAATGACCGGGTTCAGCAGATGATATTCTTTCACGAGGCGAATTAAGCGCCCGAGATTAAACGGCTCTTTGGCTTGCGCCAGTTCTTCCTGCCAGTTTTCCAGATTAAGGCCATGTACATTGGTGAGCAGAGCCTTCGAGTTGGCAACACCGCAGACACGTAAGTCGATATGTTTATTCTTCAGCCAGCTTTGCTGACGCTTCAGTTGCTCCAGCAGCGCACCGCCAACGTTATGCGAGAACCAAATCCCCCTGCGGATGGCAGGAGCAGGCCAGTACGTAACCTTCAGCGATTTCGGCGTCGGTCAGCGTCATTGTGCTGCTCACCGTATATTCACCGGAAACCACTTTTGTCTTACAGCAGCCGCAAACACCCGCACGGCAGGCAGCGACAACCGGAACGTTATTGCTTTCCAGCGCCTCCAGTAGCGTGGTGCCAACCGGGGCGTAAAATTCTCGTGCCGGTTGCAGTTTGGTGAATTTCAGACCGCTGGTCGCCGCTTCTGCTACTGGGGTGAAGAATTTCTCTTTAAAGAAACGCGTCACGCCGAGCGCTTTCACTTCCTGCTCTACCCAATCCATATACGGAGCCGGGCCGCAGGTCATCACGGTACGTGAAGCTAAGTCAGGTACACCTGCCAGCAGTTCGCGAGTGAGACGACCAGCGATAAAGCCTTCGGTAACGTTATTTTCTGCCACCAGCGTTACCGGATAGTTACGCCACTCATCGGCGAAAATAACATCCTGCGGCGTACGCACGTTGTAGATCACCCGCACATCGGCCTGTGGACGGTTCTTCGCAAGCCAGCGACGCATCGACATAATCGGCGTGACGCCGCAGCCTGCCGCCAGCAACAGGAATTTATCTTCTGCTTTATCGTCGCAGGTAAATTCCCCCATCGCGTCCGAAAGCCAGAGATAATCACCGCGTTTTACATCGCGCGTCAGCCACTGGGAGCCGACACCGTCATCAATCCGCCGCACGGTCAGGGTGATATATTCACTCACGCCTGGCGTGGAGGAAATGGTGTAAGCACGCAGCGTTTCCGCTGAGTTTCGCACGCTGACCAGTGCATATTGCCCGGCGCGATATGGGTAGTAATCGTGGCAAATCAGGGAAATCGTCCACACATCCGGCGTTTCTTGCGTAATGTGATGAACCTGCATCCGCCACGGGCATTGATTCGTTGGCATCGTCATTGCGGGGGTGAAGAACTTTAGCGCCGAAGTAGGAAAGCTCCATCGCTTCCTGGTAGGACATCGACTTCAACAACCTCGCATCGGGCACCTGACGCGGGTCGCAGGTATAGACCCCGTCAACGTCCGTCCAAATCTCGCAACAATCGGCGCGTAAACAGGCAGCCAGCACCGCAGCAGAGTAGTCGGAACCGTTGCGTCCAAGCACCACCAGTTCGCCTTTTTCATTACCGGCGGTGAAACCTGCCATCAGCACCATGTGATCAGCCGGAATGCGGCTTGCCGCAATACGGCGGGTGGACTCAGCAATATCGACGGTAGATTCGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGCGGGGGCAGAGAGGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCA"
Write a program to translate an open reading frame (ORF) into an amino acid sequence.
Assuming the prompt is instructing the student to choose ONE ORF, I choose to translate the longest ORF in the first reading frame (bases 805 - 1773).
#Load Necessary Library
library(seqinr)
#Selection of what ORF I want to Translate
substring(dnaseq,805,1773)
[1] "ATGACGATGCCAACGAATCAATGCCCGTGGCGGATGCAGGTTCATCACATTACGCAAGAAACGCCGGATGTGTGGACGATTTCCCTGATTTGCCACGATTACTACCCATATCGCGCCGGGCAATATGCACTGGTCAGCGTGCGAAACTCAGCGGAAACGCTGCGTGCTTACACCATTTCCTCCACGCCAGGCGTGAGTGAATATATCACCCTGACCGTGCGGCGGATTGATGACGGTGTCGGCTCCCAGTGGCTGACGCGCGATGTAAAACGCGGTGATTATCTCTGGCTTTCGGACGCGATGGGGGAATTTACCTGCGACGATAAAGCAGAAGATAAATTCCTGTTGCTGGCGGCAGGCTGCGGCGTCACGCCGATTATGTCGATGCGTCGCTGGCTTGCGAAGAACCGTCCACAGGCCGATGTGCGGGTGATCTACAACGTGCGTACGCCGCAGGATGTTATTTTCGCCGATGAGTGGCGTAACTATCCGGTAACGCTGGTGGCAGAAAATAACGTTACCGAAGGCTTTATCGCTGGTCGTCTCACTCGCGAACTGCTGGCAGGTGTACCTGACTTAGCTTCACGTACCGTGATGACCTGCGGCCCGGCTCCGTATATGGATTGGGTAGAGCAGGAAGTGAAAGCGCTCGGCGTGACGCGTTTCTTTAAAGAGAAATTCTTCACCCCAGTAGCAGAAGCGGCGACCAGCGGTCTGAAATTCACCAAACTGCAACCGGCACGAGAATTTTACGCCCCGGTTGGCACCACGCTACTGGAGGCGCTGGAAAGCAATAACGTTCCGGTTGTCGCTGCCTGCCGTGCGGGTGTTTGCGGCTGCTGTAAGACAAAAGTGGTTTCCGGTGAATATACGGTGAGCAGCACAATGACGCTGACCGACGCCGAAATCGCTGAAGGTTACGTACTGGCCTGCTCCTGCCATCCGCAGGGGGATTTGGTTCTCGCATAA"
#Preparing Selected ORF for Translation
TRANSORF <- s2c("ATGACGATGCCAACGAATCAATGCCCGTGGCGGATGCAGGTTCATCACATTACGCAAGAAACGCCGGATGTGTGGACGATTTCCCTGATTTGCCACGATTACTACCCATATCGCGCCGGGCAATATGCACTGGTCAGCGTGCGAAACTCAGCGGAAACGCTGCGTGCTTACACCATTTCCTCCACGCCAGGCGTGAGTGAATATATCACCCTGACCGTGCGGCGGATTGATGACGGTGTCGGCTCCCAGTGGCTGACGCGCGATGTAAAACGCGGTGATTATCTCTGGCTTTCGGACGCGATGGGGGAATTTACCTGCGACGATAAAGCAGAAGATAAATTCCTGTTGCTGGCGGCAGGCTGCGGCGTCACGCCGATTATGTCGATGCGTCGCTGGCTTGCGAAGAACCGTCCACAGGCCGATGTGCGGGTGATCTACAACGTGCGTACGCCGCAGGATGTTATTTTCGCCGATGAGTGGCGTAACTATCCGGTAACGCTGGTGGCAGAAAATAACGTTACCGAAGGCTTTATCGCTGGTCGTCTCACTCGCGAACTGCTGGCAGGTGTACCTGACTTAGCTTCACGTACCGTGATGACCTGCGGCCCGGCTCCGTATATGGATTGGGTAGAGCAGGAAGTGAAAGCGCTCGGCGTGACGCGTTTCTTTAAAGAGAAATTCTTCACCCCAGTAGCAGAAGCGGCGACCAGCGGTCTGAAATTCACCAAACTGCAACCGGCACGAGAATTTTACGCCCCGGTTGGCACCACGCTACTGGAGGCGCTGGAAAGCAATAACGTTCCGGTTGTCGCTGCCTGCCGTGCGGGTGTTTGCGGCTGCTGTAAGACAAAAGTGGTTTCCGGTGAATATACGGTGAGCAGCACAATGACGCTGACCGACGCCGAAATCGCTGAAGGTTACGTACTGGCCTGCTCCTGCCATCCGCAGGGGGATTTGGTTCTCGCATAA")
#Translating the ORF
translate(seq=TRANSORF)
[1] "M" "T" "M" "P" "T" "N" "Q" "C" "P" "W" "R" "M" "Q" "V" "H" "H" "I" "T" "Q" "E" "T" "P" "D" "V" "W" "T" "I" "S"
[29] "L" "I" "C" "H" "D" "Y" "Y" "P" "Y" "R" "A" "G" "Q" "Y" "A" "L" "V" "S" "V" "R" "N" "S" "A" "E" "T" "L" "R" "A"
[57] "Y" "T" "I" "S" "S" "T" "P" "G" "V" "S" "E" "Y" "I" "T" "L" "T" "V" "R" "R" "I" "D" "D" "G" "V" "G" "S" "Q" "W"
[85] "L" "T" "R" "D" "V" "K" "R" "G" "D" "Y" "L" "W" "L" "S" "D" "A" "M" "G" "E" "F" "T" "C" "D" "D" "K" "A" "E" "D"
[113] "K" "F" "L" "L" "L" "A" "A" "G" "C" "G" "V" "T" "P" "I" "M" "S" "M" "R" "R" "W" "L" "A" "K" "N" "R" "P" "Q" "A"
[141] "D" "V" "R" "V" "I" "Y" "N" "V" "R" "T" "P" "Q" "D" "V" "I" "F" "A" "D" "E" "W" "R" "N" "Y" "P" "V" "T" "L" "V"
[169] "A" "E" "N" "N" "V" "T" "E" "G" "F" "I" "A" "G" "R" "L" "T" "R" "E" "L" "L" "A" "G" "V" "P" "D" "L" "A" "S" "R"
[197] "T" "V" "M" "T" "C" "G" "P" "A" "P" "Y" "M" "D" "W" "V" "E" "Q" "E" "V" "K" "A" "L" "G" "V" "T" "R" "F" "F" "K"
[225] "E" "K" "F" "F" "T" "P" "V" "A" "E" "A" "A" "T" "S" "G" "L" "K" "F" "T" "K" "L" "Q" "P" "A" "R" "E" "F" "Y" "A"
[253] "P" "V" "G" "T" "T" "L" "L" "E" "A" "L" "E" "S" "N" "N" "V" "P" "V" "V" "A" "A" "C" "R" "A" "G" "V" "C" "G" "C"
[281] "C" "K" "T" "K" "V" "V" "S" "G" "E" "Y" "T" "V" "S" "S" "T" "M" "T" "L" "T" "D" "A" "E" "I" "A" "E" "G" "Y" "V"
[309] "L" "A" "C" "S" "C" "H" "P" "Q" "G" "D" "L" "V" "L" "A" "*"
#Load Necessary Library
library(stringr)
#Convert DNA Sequence to String
DNA__1 <- toString(dnaseq)
#Search and Report EcoRI Site
hitcounter <- function() {
if (DNA__1=="GAATTC")
return(1) & which(strsplit(DNA__1,"")[[1]]=="GAATTC")
else
return(0)
}
#Print Results
print(hitcounter())
[1] 0
According to my code, there are zero EcoRI sites present in the data sequence.