Write R code to do each of the following tasks:
Retrieve the sequence SARS coronavirus MA15 ExoN1 () with accession number FJ882953 from the ACNUC “genbank”.
# Choose the ACNUC
choosebank("genbank")
My_Que <- query("My_Que", "AC=FJ882953")
# write to fasta file
write.fasta(getSequence(My_Que[['req']][[1]]),getName(My_Que[['req']][[1]]),"SARS.fasta")
Find and plot all potential start and stop codons in the first 1000 bases in the sequence. Use a blue color for the start codons and orange for the stop codons your plot.
######################################################
# Find all potential start and stop sequences in a
# DNA sequences
######################################################
findPotentialStartsAndStops2 <- function(MySeq)
{
# Define a vector with the sequences of potential start and stop codons
codons <- c("atg", "taa", "tag", "tga")
# Find the start positions of all occurrences of "atg" in sequence "sequence"
positions <- start(matchPattern("atg", MySeq ))
# Find the total number of potential start and stop codons in sequence "sequence"
numoccurrences <- length(positions)
# Make a vector "types" containing "numoccurrences" copies of "codon"
types <- rep("atg", numoccurrences)
# Find the number of occurrences of each type of potential start or stop codon
for (i in 2:4)
{
codon <- codons[i]
# Find the start positions of all occurrences of "codon" in sequence "sequence"
codonpositions <- start(matchPattern(codon,MySeq ))
# Find the total number of potential start and stop codons in sequence "sequence"
numoccurrences <- length(codonpositions)
# Add the vector "codonpositions" to the end of vector "positions":
positions <- append(positions, codonpositions, after=length(positions))
# Add the vector "rep(codon, numoccurrences)" to the end of vector "types":
types <- append(types, rep(codon, numoccurrences), after=length(types))
}
# Sort the vectors "positions" and "types" in order of position along the input sequence:
indices <- order(positions)
positions <- positions[indices]
types <- types[indices]
# Return a list variable including vectors "positions" and "types":
mylist <- list(positions,types)
names(mylist) <- c("positions", "types")
return(mylist)
}
Function to plot potential start and stop sequences.
######################################################
# Plot potential start and stop sequences.
######################################################
plotPotentialStartsAndStops <- function(sequence)
{
# Make a plot showing the positions of the start and stop codons in the input sequence:
# Draw a line at y=0 from 1 to the length of the sequence:
x <- c(1, nchar(sequence))
print(x)
y <- c(0, 0)
y
plot(
x,
y,
ylim = c(0, 3),
type = "l",
axes = FALSE,
xlab = "Nucleotide",
ylab = "Reading frame",
main = "Predicted start (red) and stop (blue) codons"
)
segments(1, 1, nchar(sequence), 1)
segments(1, 2, nchar(sequence), 2)
# Add the x-axis at y=0:
axis(1, pos = 0)
# Add the y-axis labels:
text(0.9, 0.5, "+1")
text(0.9, 1.5, "+2")
text(0.9, 2.5, "+3")
# Draw in each predicted start/stop codon:
mylist <- findPotentialStartsAndStops2(sequence)
positions = mylist[["positions"]]
types = mylist[["types"]]
numcodons <- length(positions)
for (i in 1:numcodons)
{
position <- positions[i]
type <- types[i]
remainder <- (position - 1) %% 3
if (remainder == 0)
# +1 reading frame
{
if (type == "atg") {
segments(position, 0, position, 1, lwd = 1,col = "red")}
else {
segments(position, 0, position, 1, lwd = 1,col = "blue")}
}
else if (remainder == 1){
if (type == "atg") {
segments(position, 1, position, 2, lwd = 1, col = "red")}
else {
segments(position, 1, position, 2, lwd = 1, col = "blue")}
}
else if (remainder == 2){
if (type == "atg") {
segments(position, 2, position, 3, lwd = 1, col = "red")}
else {
segments(position, 2, position, 3, lwd = 1, col = "blue")}
}
}
}
# Get the sequence to vector of characters
My_Seq <- read.fasta("SARS.fasta", as.string = TRUE)
# Convert vector of characters to string
My_SeqS <- My_Seq$FJ882953[1:1000]
# measure length
length(My_SeqS)
## [1] 1000
# conversion of a vector of chars into a string
My_SeqS <- c2s(My_SeqS)
# find potential start and stop
findPotentialStartsAndStops2(My_SeqS)
## $positions
## [1] 13 26 37 48 66 71 89 96 99 115 129 195
## [13] 212 223 227 295 306 355 369 384 387 418 429 439
## [25] 448 450 456 484 500 523 531 555 585 598 607 610
## [37] 628 633 645 654 655 660 664 679 688 696 697 715
## [49] 720 739 751 759 780 807 820 860 883 922 927 928
## [61] 933 934 952 961 964 997 1015 1033 1057 1090 1105 1115
## [73] 1162 1169 1185 1187 1188 1192 1195 1206 1207 1216 1236 1249
## [85] 1264 1272 1279 1294 1312 1314 1320 1323 1328 1333 1366 1372
## [97] 1405 1429 1435 1438 1464 1477 1480 1507 1513 1546 1551 1563
## [109] 1566 1567 1579 1587 1594 1603 1609 1632 1635 1636 1690 1698
## [121] 1708 1735 1747 1753 1776 1815 1882 1884 1906 1936 1938 1948
## [133] 1976 2018 2025 2028 2068 2089 2110 2113 2119 2131 2143 2157
## [145] 2197 2233 2250 2254 2263 2265 2272 2288 2296 2340 2344 2417
## [157] 2425 2442 2452 2458 2466 2467 2485 2509 2536 2539 2550 2580
## [169] 2583 2591 2598 2605 2658 2670 2689 2697 2742 2745 2758 2764
## [181] 2766 2767 2776 2788 2790 2791 2815 2836 2845 2853 2868 2890
## [193] 2906 2914 2920 2922 2923 2934 2953 2955 2956 2958 2965 2987
## [205] 3012 3030 3040 3055 3057 3058 3067 3072 3073 3090 3091 3130
## [217] 3145 3174 3175 3184 3199 3223 3235 3255 3265 3270 3280 3283
## [229] 3289 3316 3323 3327 3333 3336 3343 3354 3360 3372 3399 3407
## [241] 3421 3423 3424 3433 3438 3441 3456 3484 3493 3507 3520 3525
## [253] 3528 3535 3553 3564 3565 3628 3693 3694 3708 3709 3722 3733
## [265] 3736 3741 3748 3763 3802 3834 3837 3850 3862 3864 3865 3892
## [277] 3907 3925 3933 3937 3948 3949 3962 3967 3973 3980 3991 3999
## [289] 4010 4014 4018 4020 4030 4036 4053 4084 4088 4104 4120 4122
## [301] 4123 4131 4153 4171 4180 4192 4198 4215 4237 4239 4243 4257
## [313] 4278 4286 4296 4300 4317 4319 4327 4331 4335 4339 4347 4349
## [325] 4366 4372 4381 4402 4407 4432 4435 4446 4467 4476 4479 4480
## [337] 4496 4509 4512 4518 4525 4531 4550 4561 4575 4596 4612 4614
## [349] 4645 4659 4722 4729 4738 4747 4795 4801 4816 4824 4852 4860
## [361] 4888 4913 4919 4920 4926 4956 4963 4965 4978 4986 4989 4995
## [373] 4996 5002 5023 5026 5028 5029 5041 5044 5071 5073 5074 5089
## [385] 5096 5106 5122 5148 5158 5161 5167 5170 5188 5199 5215 5229
## [397] 5257 5272 5274 5281 5311 5314 5335 5337 5351 5352 5370 5374
## [409] 5401 5403 5413 5442 5451 5460 5462 5468 5484 5485 5488 5494
## [421] 5515 5530 5532 5547 5573 5574 5576 5593 5599 5623 5631 5632
## [433] 5644 5673 5680 5701 5723 5745 5749 5751 5803 5811 5833 5847
## [445] 5857 5866 5868 5892 5922 5932 5935 5950 5956 5971 5973 5974
## [457] 5979 5987 5988 6045 6048 6054 6057 6067 6073 6106 6118 6139
## [469] 6213 6232 6246 6263 6280 6301 6309 6342 6349 6354 6372 6378
## [481] 6388 6399 6400 6409 6414 6426 6432 6433 6443 6453 6478 6487
## [493] 6489 6490 6501 6507 6513 6531 6547 6550 6565 6579 6594 6628
## [505] 6636 6652 6662 6669 6706 6721 6727 6754 6760 6766 6778 6784
## [517] 6802 6807 6810 6829 6848 6864 6876 6885 6894 6922 6946 6948
## [529] 6955 6970 6979 6992 7003 7032 7041 7051 7069 7083 7104 7110
## [541] 7119 7132 7151 7179 7194 7196 7222 7250 7264 7277 7295 7300
## [553] 7304 7336 7347 7358 7362 7385 7386 7388 7396 7420 7438 7440
## [565] 7445 7446 7461 7467 7473 7522 7540 7552 7555 7557 7558 7570
## [577] 7588 7609 7630 7633 7644 7650 7672 7695 7696 7728 7737 7744
## [589] 7753 7771 7773 7779 7786 7788 7801 7819 7848 7850 7873 7903
## [601] 7909 7921 7925 7930 7932 7938 7963 7973 7984 8016 8028 8034
## [613] 8037 8083 8091 8095 8106 8113 8116 8140 8145 8151 8158 8167
## [625] 8177 8188 8191 8197 8204 8205 8227 8233 8239 8241 8256 8265
## [637] 8280 8298 8301 8312 8323 8335 8347 8377 8382 8398 8412 8418
## [649] 8425 8446 8455 8467 8474 8479 8528 8553 8554 8556 8568 8569
## [661] 8598 8611 8626 8628 8629 8644 8649 8659 8665 8671 8697 8698
## [673] 8715 8718 8751 8778 8790 8794 8824 8866 8872 8875 8908 8923
## [685] 8928 8933 8958 8959 8965 8973 8992 8995 9018 9026 9030 9052
## [697] 9076 9081 9084 9094 9096 9100 9109 9114 9121 9138 9163 9166
## [709] 9175 9178 9180 9181 9220 9222 9227 9228 9237 9241 9279 9282
## [721] 9297 9327 9350 9351 9373 9384 9397 9399 9414 9416 9492 9510
## [733] 9511 9513 9538 9548 9576 9588 9606 9628 9640 9653 9658 9660
## [745] 9673 9722 9729 9739 9769 9790 9819 9838 9858 9870 9873 9874
## [757] 9880 9895 9897 9955 9962 9985 9995 10005 10027 10029 10044 10045
## [769] 10068 10091 10096 10102 10107 10108 10138 10161 10190 10207 10213 10219
## [781] 10228 10234 10249 10290 10294 10302 10334 10335 10342 10354 10369 10371
## [793] 10378 10384 10396 10402 10407 10408 10430 10439 10471 10476 10483 10491
## [805] 10504 10545 10551 10554 10564 10572 10587 10591 10594 10606 10609 10626
## [817] 10629 10630 10636 10649 10650 10662 10663 10671 10683 10687 10731 10736
## [829] 10749 10767 10772 10773 10776 10795 10806 10812 10813 10828 10830 10837
## [841] 10843 10867 10885 10904 10911 10920 10932 10983 11012 11026 11033 11044
## [853] 11050 11107 11111 11120 11128 11136 11138 11147 11148 11152 11158 11167
## [865] 11173 11188 11194 11207 11211 11223 11240 11241 11259 11260 11262 11263
## [877] 11265 11272 11289 11291 11292 11295 11325 11329 11331 11337 11354 11364
## [889] 11376 11383 11411 11418 11422 11430 11446 11495 11514 11589 11590 11617
## [901] 11624 11625 11650 11656 11662 11664 11677 11683 11704 11710 11737 11741
## [913] 11746 11751 11794 11799 11812 11827 11844 11845 11872 11888 11918 11931
## [925] 11938 11957 11965 12001 12018 12027 12048 12049 12060 12064 12066 12070
## [937] 12076 12096 12108 12111 12118 12124 12130 12136 12138 12146 12167 12182
## [949] 12183 12191 12211 12231 12235 12242 12251 12263 12268 12277 12280 12282
## [961] 12283 12285 12292 12309 12316 12318 12348 12368 12382 12387 12409 12411
## [973] 12415 12429 12463 12465 12472 12490 12493 12499 12503 12549 12568 12580
## [985] 12582 12583 12588 12597 12611 12649 12651 12652 12657 12673 12691 12733
## [997] 12739 12748 12754 12756 12796 12826 12831 12858 12867 12871 12878 12894
## [1009] 12924 12978 12988 12997 13008 13041 13046 13086 13099 13103 13132 13147
## [1021] 13150 13159 13174 13186 13191 13198 13228 13230 13231 13252 13280 13293
## [1033] 13300 13306 13326 13328 13341 13356 13373 13410 13416 13418 13437 13478
## [1045] 13485 13511 13512 13529 13544 13548 13561 13566 13577 13578 13593 13602
## [1057] 13628 13629 13644 13649 13652 13656 13660 13688 13692 13702 13707 13712
## [1069] 13718 13734 13736 13737 13743 13749 13757 13788 13790 13791 13793 13794
## [1081] 13806 13820 13821 13829 13839 13856 13860 13865 13869 13892 13913 13918
## [1093] 13923 13925 13937 13946 13952 13956 13968 13970 13992 14006 14054 14056
## [1105] 14072 14076 14091 14101 14105 14109 14130 14144 14150 14151 14193 14229
## [1117] 14240 14241 14244 14271 14273 14318 14321 14336 14339 14379 14384 14397
## [1129] 14405 14408 14418 14447 14453 14460 14470 14474 14498 14502 14514 14525
## [1141] 14534 14543 14571 14577 14580 14591 14592 14607 14619 14637 14642 14666
## [1153] 14685 14690 14691 14706 14719 14724 14747 14751 14760 14763 14772 14783
## [1165] 14796 14798 14810 14817 14829 14850 14853 14856 14862 14868 14879 14880
## [1177] 14887 14888 14894 14895 14906 14925 14931 14933 14948 14956 14957 14964
## [1189] 14969 14976 14988 15002 15021 15028 15029 15036 15059 15068 15078 15095
## [1201] 15129 15133 15137 15153 15155 15158 15175 15195 15198 15208 15213 15217
## [1213] 15222 15227 15229 15258 15270 15296 15300 15320 15324 15328 15334 15353
## [1225] 15357 15381 15383 15398 15402 15405 15414 15440 15443 15446 15462 15464
## [1237] 15468 15471 15476 15480 15488 15515 15516 15528 15534 15539 15543 15548
## [1249] 15549 15560 15561 15582 15595 15596 15598 15599 15609 15611 15612 15614
## [1261] 15630 15636 15641 15656 15659 15663 15669 15678 15704 15712 15717 15726
## [1273] 15735 15741 15750 15784 15788 15792 15803 15804 15845 15866 15867 15884
## [1285] 15895 15896 15900 15924 15926 15951 15962 15966 15968 15996 16007 16008
## [1297] 16010 16011 16027 16036 16046 16048 16052 16056 16058 16059 16062 16086
## [1309] 16094 16095 16102 16133 16188 16203 16220 16221 16226 16250 16263 16271
## [1321] 16280 16293 16295 16302 16304 16307 16322 16330 16331 16353 16365 16377
## [1333] 16383 16385 16416 16421 16428 16433 16440 16448 16454 16461 16464 16473
## [1345] 16475 16509 16551 16563 16574 16605 16626 16644 16655 16667 16688 16692
## [1357] 16698 16701 16728 16737 16742 16746 16748 16784 16787 16794 16808 16823
## [1369] 16825 16833 16847 16865 16868 16907 16908 16917 16922 16948 16989 17039
## [1381] 17052 17060 17070 17072 17079 17082 17093 17108 17112 17115 17118 17121
## [1393] 17147 17157 17160 17171 17183 17192 17207 17210 17232 17240 17247 17249
## [1405] 17250 17260 17268 17273 17274 17279 17291 17295 17363 17367 17378 17394
## [1417] 17413 17414 17423 17434 17466 17475 17483 17492 17498 17499 17505 17510
## [1429] 17526 17538 17548 17573 17574 17576 17606 17615 17673 17690 17705 17727
## [1441] 17745 17750 17751 17756 17778 17796 17798 17813 17852 17854 17859 17862
## [1453] 17873 17874 17897 17915 17939 17942 17955 17964 17967 18018 18023 18036
## [1465] 18045 18051 18076 18077 18087 18100 18101 18103 18115 18116 18131 18141
## [1477] 18145 18203 18215 18222 18227 18240 18257 18276 18281 18284 18302 18306
## [1489] 18312 18318 18339 18341 18360 18369 18388 18393 18410 18413 18423 18428
## [1501] 18436 18444 18452 18494 18501 18514 18515 18540 18561 18602 18630 18635
## [1513] 18642 18652 18653 18657 18659 18684 18696 18701 18702 18722 18728 18734
## [1525] 18741 18747 18749 18757 18758 18762 18765 18770 18779 18780 18792 18801
## [1537] 18813 18827 18833 18834 18839 18846 18874 18881 18900 18903 18920 18921
## [1549] 18963 18968 18972 18986 18996 18999 19002 19019 19040 19056 19065 19067
## [1561] 19092 19098 19115 19128 19134 19157 19173 19175 19182 19190 19193 19197
## [1573] 19202 19227 19242 19247 19275 19278 19287 19295 19307 19320 19325 19359
## [1585] 19367 19391 19397 19398 19418 19425 19429 19430 19432 19433 19449 19455
## [1597] 19473 19482 19517 19523 19533 19535 19542 19545 19557 19559 19593 19596
## [1609] 19616 19619 19626 19628 19641 19647 19665 19667 19677 19689 19695 19701
## [1621] 19719 19731 19743 19755 19763 19776 19796 19808 19822 19823 19827 19848
## [1633] 19881 19883 19887 19904 19914 19926 19928 19937 19940 19964 19992 20000
## [1645] 20012 20027 20040 20049 20057 20082 20111 20121 20137 20145 20164 20168
## [1657] 20169 20187 20201 20222 20237 20261 20263 20264 20267 20273 20301 20306
## [1669] 20323 20336 20351 20357 20379 20393 20397 20409 20411 20412 20426 20429
## [1681] 20450 20478 20483 20487 20500 20511 20522 20526 20581 20586 20599 20608
## [1693] 20616 20625 20639 20643 20648 20669 20671 20672 20674 20675 20678 20711
## [1705] 20726 20743 20744 20772 20775 20811 20850 20852 20853 20882 20913 20916
## [1717] 20919 20934 20941 20945 20946 20952 20963 20966 20978 20979 20985 21020
## [1729] 21032 21050 21056 21062 21080 21084 21100 21114 21137 21140 21143 21167
## [1741] 21177 21207 21209 21220 21224 21228 21285 21289 21290 21306 21311 21326
## [1753] 21328 21336 21353 21354 21358 21359 21384 21396 21426 21438 21445 21454
## [1765] 21486 21492 21495 21501 21519 21521 21522 21524 21537 21562 21563 21582
## [1777] 21584 21585 21594 21614 21645 21647 21669 21702 21707 21740 21772 21773
## [1789] 21794 21804 21816 21818 21834 21837 21843 21852 21876 21883 21904 21905
## [1801] 21915 21917 21924 21948 21950 21963 21965 21984 21990 22014 22020 22025
## [1813] 22037 22044 22061 22064 22067 22074 22092 22100 22113 22131 22146 22223
## [1825] 22240 22250 22251 22253 22254 22259 22274 22281 22305 22314 22323 22332
## [1837] 22338 22362 22388 22394 22404 22440 22442 22449 22466 22470 22491 22503
## [1849] 22545 22553 22569 22574 22577 22578 22595 22601 22616 22628 22629 22631
## [1861] 22634 22643 22671 22677 22683 22694 22695 22702 22728 22737 22739 22755
## [1873] 22761 22767 22773 22782 22787 22797 22806 22821 22823 22839 22841 22869
## [1885] 22886 22889 22890 22895 22943 22956 22964 22967 23004 23013 23034 23040
## [1897] 23042 23066 23112 23114 23121 23130 23148 23157 23165 23195 23201 23219
## [1909] 23229 23252 23256 23265 23267 23285 23333 23363 23370 23375 23390 23391
## [1921] 23427 23454 23460 23488 23495 23502 23505 23523 23526 23547 23559 23576
## [1933] 23578 23590 23595 23606 23613 23617 23640 23643 23649 23666 23670 23687
## [1945] 23715 23736 23761 23780 23799 23820 23828 23838 23853 23868 23871 23876
## [1957] 23886 23888 23899 23900 23909 23916 23921 23925 23931 23933 23937 23966
## [1969] 23997 23999 24000 24004 24005 24029 24033 24054 24097 24103 24111 24119
## [1981] 24140 24149 24150 24180 24192 24255 24263 24275 24288 24297 24303 24329
## [1993] 24332 24333 24351 24354 24378 24386 24419 24422 24434 24447 24465 24480
## [2005] 24484 24489 24519 24547 24572 24590 24599 24647 24648 24672 24690 24692
## [2017] 24740 24777 24779 24798 24812 24813 24828 24887 24891 24915 24948 24959
## [2029] 24960 24969 24977 24980 24981 24993 25016 25017 25029 25032 25043 25070
## [2041] 25084 25108 25109 25113 25140 25167 25169 25170 25175 25176 25182 25219
## [2053] 25230 25242 25243 25282 25289 25294 25318 25361 25414 25427 25433 25441
## [2065] 25451 25530 25555 25561 25577 25589 25592 25602 25603 25607 25651 25652
## [2077] 25654 25685 25690 25691 25709 25739 25745 25799 25805 25820 25828 25840
## [2089] 25871 25904 25909 25928 25940 25954 25994 26004 26017 26018 26020 26021
## [2101] 26039 26052 26066 26079 26113 26116 26120 26123 26161 26170 26219 26224
## [2113] 26230 26233 26264 26272 26282 26294 26307 26316 26346 26360 26386 26395
## [2125] 26413 26421 26424 26427 26442 26453 26476 26499 26502 26535 26575 26583
## [2137] 26606 26619 26625 26627 26635 26681 26718 26742 26756 26764 26784 26807
## [2149] 26823 26833 26839 26898 26916 26923 26968 26973 26992 27015 27023 27027
## [2161] 27036 27050 27067 27079 27090 27091 27122 27130 27133 27142 27145 27158
## [2173] 27166 27170 27190 27193 27194 27196 27197 27207 27214 27225 27235 27236
## [2185] 27243 27254 27260 27306 27326 27384 27390 27401 27411 27438 27557 27566
## [2197] 27585 27600 27601 27604 27605 27616 27620 27640 27664 27667 27669 27709
## [2209] 27732 27741 27742 27760 27776 27788 27796 27815 27818 27826 27833 27845
## [2221] 27858 27864 27873 27896 27918 27921 27931 27942 27951 27953 27995 27999
## [2233] 28016 28017 28041 28061 28065 28078 28082 28087 28090 28092 28114 28156
## [2245] 28162 28170 28182 28225 28260 28276 28315 28326 28327 28375 28385 28386
## [2257] 28405 28422 28459 28477 28489 28500 28537 28540 28545 28651 28660 28690
## [2269] 28712 28717 28729 28755 28764 28774 28792 28825 28840 28849 28956 28972
## [2281] 29033 29048 29071 29076 29085 29095 29103 29104 29142 29155 29188 29212
## [2293] 29214 29215 29259 29278 29282 29286 29287 29315 29316 29332 29348 29357
## [2305] 29358 29360 29361 29377 29384 29387 29418 29437 29438 29448 29452 29465
## [2317] 29471 29475 29481 29491 29501 29507 29512 29518 29529 29584 29588 29590
## [2329] 29594 29611 29623 29625 29630 29636 29642 29645
##
## $types
## [1] "tag" "taa" "taa" "tag" "atg" "tag" "taa" "taa" "taa" "tga" "taa" "tag"
## [13] "tga" "taa" "atg" "tag" "tag" "tga" "atg" "tag" "tag" "tga" "atg" "taa"
## [25] "tga" "atg" "taa" "tga" "atg" "tag" "taa" "atg" "atg" "taa" "taa" "taa"
## [37] "tag" "atg" "taa" "atg" "tga" "tag" "tga" "tga" "tga" "atg" "tga" "taa"
## [49] "atg" "tga" "tga" "atg" "atg" "atg" "tga" "atg" "tga" "tga" "atg" "tga"
## [61] "atg" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "taa" "tga" "tga" "atg"
## [73] "taa" "atg" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "tga"
## [85] "tga" "tag" "tga" "atg" "taa" "atg" "tag" "tga" "atg" "atg" "tga" "tag"
## [97] "tga" "tag" "tag" "atg" "atg" "taa" "taa" "tag" "tga" "tga" "atg" "tga"
## [109] "atg" "tga" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "tga" "tga" "taa"
## [121] "tga" "tga" "taa" "taa" "taa" "taa" "tga" "atg" "tga" "tga" "atg" "tga"
## [133] "atg" "atg" "atg" "taa" "taa" "tga" "tga" "atg" "tga" "tag" "tga" "atg"
## [145] "tga" "taa" "taa" "atg" "tga" "atg" "taa" "atg" "tga" "tag" "tga" "atg"
## [157] "taa" "taa" "tga" "tga" "atg" "tga" "tga" "tga" "tga" "tag" "atg" "taa"
## [169] "atg" "atg" "tag" "taa" "atg" "taa" "taa" "taa" "atg" "tga" "tga" "tga"
## [181] "atg" "tga" "tga" "taa" "atg" "tga" "tga" "tga" "atg" "tag" "tga" "tga"
## [193] "atg" "tga" "tga" "atg" "tga" "tag" "tga" "atg" "tga" "atg" "tga" "atg"
## [205] "atg" "atg" "tga" "tga" "atg" "tga" "tga" "atg" "tga" "atg" "tga" "tga"
## [217] "tga" "atg" "tga" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "taa" "atg"
## [229] "tga" "taa" "atg" "tga" "taa" "atg" "taa" "tga" "atg" "tag" "atg" "atg"
## [241] "tga" "atg" "tga" "taa" "taa" "atg" "tag" "taa" "taa" "atg" "taa" "taa"
## [253] "atg" "tga" "taa" "atg" "tga" "taa" "atg" "tga" "atg" "tga" "atg" "tga"
## [265] "taa" "tga" "tag" "taa" "tga" "atg" "tga" "taa" "tga" "atg" "tga" "taa"
## [277] "taa" "tga" "atg" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "tga" "atg"
## [289] "atg" "tag" "tga" "atg" "tag" "tga" "taa" "tga" "atg" "tga" "tga" "atg"
## [301] "tga" "taa" "atg" "tga" "taa" "taa" "atg" "atg" "taa" "atg" "taa" "tag"
## [313] "tga" "atg" "atg" "tga" "taa" "atg" "atg" "atg" "atg" "tag" "taa" "atg"
## [325] "taa" "taa" "taa" "tga" "atg" "tag" "taa" "tag" "tga" "taa" "atg" "tga"
## [337] "atg" "atg" "tga" "atg" "taa" "tga" "atg" "taa" "tag" "atg" "taa" "atg"
## [349] "tga" "tag" "tag" "tga" "taa" "tga" "tga" "tga" "tga" "taa" "taa" "taa"
## [361] "taa" "atg" "atg" "tga" "atg" "atg" "tga" "atg" "taa" "atg" "taa" "atg"
## [373] "tga" "taa" "tag" "tga" "atg" "tga" "tag" "tga" "tga" "atg" "tga" "tag"
## [385] "atg" "taa" "atg" "taa" "taa" "atg" "tga" "taa" "tag" "tag" "tga" "atg"
## [397] "tag" "tga" "atg" "taa" "taa" "taa" "tga" "atg" "atg" "tga" "atg" "taa"
## [409] "taa" "atg" "taa" "taa" "tag" "tga" "atg" "atg" "atg" "tga" "taa" "taa"
## [421] "atg" "tga" "atg" "tag" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "tga"
## [433] "taa" "taa" "taa" "tga" "atg" "tga" "tga" "atg" "taa" "atg" "tga" "atg"
## [445] "taa" "taa" "atg" "tag" "atg" "tga" "taa" "atg" "taa" "tga" "atg" "tga"
## [457] "taa" "atg" "tga" "tga" "atg" "atg" "tag" "tga" "tag" "taa" "taa" "taa"
## [469] "tag" "tga" "tag" "atg" "tga" "tga" "tag" "tag" "tga" "tga" "tag" "atg"
## [481] "taa" "atg" "tga" "taa" "taa" "tag" "atg" "tga" "atg" "atg" "taa" "taa"
## [493] "atg" "tga" "tag" "tag" "taa" "atg" "taa" "tag" "taa" "atg" "tag" "taa"
## [505] "tag" "taa" "atg" "atg" "taa" "tag" "tag" "taa" "tag" "taa" "taa" "atg"
## [517] "taa" "atg" "tga" "taa" "atg" "taa" "tag" "taa" "taa" "taa" "taa" "atg"
## [529] "tag" "taa" "taa" "atg" "tga" "taa" "tag" "tga" "tga" "tga" "tag" "tga"
## [541] "tag" "tga" "atg" "tag" "taa" "atg" "tag" "atg" "tag" "atg" "atg" "tag"
## [553] "atg" "atg" "atg" "atg" "atg" "atg" "tga" "atg" "taa" "tga" "taa" "atg"
## [565] "atg" "tga" "atg" "atg" "atg" "tga" "tag" "tag" "tga" "atg" "tga" "tga"
## [577] "taa" "tga" "tga" "tag" "tga" "atg" "tga" "atg" "tga" "tag" "tga" "taa"
## [589] "taa" "taa" "atg" "tag" "tga" "atg" "atg" "taa" "tga" "atg" "tga" "tag"
## [601] "tga" "taa" "atg" "tga" "atg" "atg" "tag" "atg" "taa" "tag" "tag" "tag"
## [613] "atg" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "tag" "tga" "tga" "taa"
## [625] "atg" "taa" "taa" "tga" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "tag"
## [637] "atg" "atg" "taa" "atg" "tga" "taa" "tag" "tag" "taa" "tag" "atg" "taa"
## [649] "taa" "taa" "tag" "taa" "atg" "taa" "atg" "atg" "tga" "atg" "atg" "tga"
## [661] "atg" "tga" "tga" "atg" "tga" "taa" "atg" "tga" "atg" "tag" "atg" "tga"
## [673] "tag" "tag" "tag" "tga" "atg" "tga" "tag" "tga" "tag" "tga" "tga" "taa"
## [685] "atg" "atg" "atg" "tga" "taa" "tag" "tag" "tga" "atg" "atg" "atg" "taa"
## [697] "tag" "tag" "taa" "tga" "atg" "tga" "tag" "atg" "atg" "tag" "tag" "atg"
## [709] "taa" "taa" "atg" "tga" "tga" "atg" "atg" "tga" "tag" "taa" "tag" "atg"
## [721] "tag" "tga" "atg" "tga" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg"
## [733] "tga" "atg" "atg" "atg" "taa" "atg" "tga" "taa" "tag" "atg" "taa" "atg"
## [745] "tag" "atg" "taa" "tag" "taa" "taa" "tag" "tga" "tag" "taa" "atg" "tga"
## [757] "tag" "tga" "atg" "tag" "atg" "tga" "atg" "taa" "taa" "atg" "atg" "tga"
## [769] "atg" "atg" "taa" "taa" "atg" "tga" "tag" "atg" "atg" "tag" "taa" "tga"
## [781] "taa" "taa" "taa" "tag" "atg" "atg" "atg" "tga" "taa" "taa" "taa" "atg"
## [793] "atg" "tag" "taa" "tga" "atg" "tga" "atg" "atg" "tga" "tag" "taa" "atg"
## [805] "tga" "taa" "taa" "atg" "atg" "atg" "atg" "tga" "tag" "taa" "tag" "tga"
## [817] "atg" "tga" "taa" "atg" "tga" "atg" "tga" "tga" "atg" "tga" "tag" "atg"
## [829] "tga" "atg" "atg" "tga" "atg" "tag" "tag" "atg" "tga" "tga" "atg" "tag"
## [841] "atg" "taa" "taa" "atg" "taa" "tga" "tga" "atg" "atg" "atg" "atg" "taa"
## [853] "taa" "taa" "atg" "atg" "tag" "tga" "atg" "atg" "tga" "atg" "tga" "tga"
## [865] "tag" "tag" "taa" "atg" "atg" "tag" "atg" "tga" "atg" "tga" "atg" "tga"
## [877] "atg" "tag" "tga" "atg" "tga" "atg" "atg" "taa" "atg" "tag" "atg" "tag"
## [889] "taa" "taa" "atg" "tag" "tag" "tag" "tga" "atg" "tag" "atg" "tga" "tag"
## [901] "atg" "tga" "taa" "tag" "tga" "atg" "taa" "taa" "taa" "atg" "taa" "atg"
## [913] "tga" "taa" "tag" "tag" "taa" "atg" "atg" "tga" "tga" "atg" "atg" "tag"
## [925] "taa" "atg" "taa" "tag" "atg" "atg" "atg" "tga" "tag" "taa" "atg" "tga"
## [937] "tga" "taa" "tga" "atg" "taa" "tga" "tga" "tga" "atg" "atg" "atg" "atg"
## [949] "tga" "atg" "tga" "taa" "tag" "atg" "atg" "atg" "tag" "tga" "taa" "atg"
## [961] "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "atg"
## [973] "taa" "atg" "tga" "atg" "tag" "tag" "tga" "taa" "atg" "taa" "taa" "taa"
## [985] "atg" "tga" "tga" "tag" "atg" "tga" "atg" "tga" "atg" "taa" "tag" "atg"
## [997] "tag" "taa" "tga" "atg" "tag" "taa" "tga" "taa" "taa" "tag" "atg" "tag"
## [1009] "atg" "tag" "taa" "taa" "tag" "tga" "atg" "taa" "taa" "atg" "atg" "tag"
## [1021] "atg" "tga" "taa" "tga" "tga" "taa" "taa" "atg" "tga" "tag" "atg" "atg"
## [1033] "tag" "tga" "tga" "atg" "atg" "taa" "taa" "tag" "tga" "atg" "tga" "taa"
## [1045] "taa" "atg" "tga" "tag" "tag" "taa" "atg" "taa" "atg" "tga" "taa" "taa"
## [1057] "atg" "tga" "tag" "tag" "atg" "tga" "atg" "taa" "taa" "atg" "tga" "tag"
## [1069] "atg" "tga" "atg" "tga" "taa" "tga" "taa" "tga" "atg" "tga" "atg" "tga"
## [1081] "taa" "atg" "tga" "tag" "tga" "atg" "taa" "tag" "tga" "taa" "atg" "atg"
## [1093] "tga" "atg" "tag" "tga" "tag" "taa" "taa" "atg" "tga" "tag" "tga" "atg"
## [1105] "tga" "tag" "tga" "atg" "atg" "tga" "taa" "tga" "atg" "tga" "taa" "taa"
## [1117] "atg" "tga" "tag" "taa" "atg" "tag" "taa" "tag" "atg" "tga" "tag" "taa"
## [1129] "atg" "taa" "tag" "tag" "atg" "tga" "atg" "atg" "tag" "taa" "atg" "tag"
## [1141] "taa" "atg" "taa" "taa" "taa" "atg" "tga" "taa" "taa" "tga" "taa" "atg"
## [1153] "tga" "atg" "tga" "taa" "atg" "tga" "tag" "tga" "tga" "taa" "tga" "atg"
## [1165] "taa" "atg" "taa" "taa" "taa" "taa" "taa" "atg" "taa" "tag" "atg" "tga"
## [1177] "atg" "tga" "atg" "tga" "atg" "taa" "taa" "atg" "taa" "atg" "tga" "taa"
## [1189] "atg" "tag" "tag" "tag" "tag" "atg" "tga" "tag" "tga" "tag" "tag" "taa"
## [1201] "taa" "atg" "taa" "tga" "atg" "tag" "atg" "atg" "tga" "atg" "taa" "atg"
## [1213] "tag" "taa" "atg" "taa" "taa" "tag" "taa" "taa" "tga" "atg" "atg" "atg"
## [1225] "taa" "tga" "atg" "atg" "taa" "tag" "taa" "atg" "taa" "atg" "tga" "atg"
## [1237] "taa" "taa" "tag" "tga" "atg" "atg" "tga" "tag" "tag" "atg" "tga" "atg"
## [1249] "tga" "atg" "tga" "taa" "atg" "tga" "atg" "tga" "tga" "atg" "tga" "atg"
## [1261] "taa" "taa" "atg" "tag" "tag" "tag" "taa" "taa" "atg" "atg" "tga" "atg"
## [1273] "tga" "tga" "taa" "atg" "tag" "taa" "atg" "tga" "tag" "atg" "tga" "atg"
## [1285] "atg" "tga" "tga" "tga" "atg" "taa" "atg" "tga" "atg" "tag" "atg" "tga"
## [1297] "atg" "tga" "atg" "atg" "taa" "atg" "taa" "taa" "atg" "tga" "taa" "tga"
## [1309] "atg" "tga" "atg" "tag" "tag" "atg" "atg" "tga" "atg" "tag" "taa" "atg"
## [1321] "atg" "tga" "atg" "tga" "atg" "tga" "tag" "atg" "tga" "taa" "tag" "atg"
## [1333] "taa" "atg" "atg" "tag" "tga" "atg" "tga" "atg" "tag" "atg" "tga" "taa"
## [1345] "atg" "tga" "tga" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "taa" "taa"
## [1357] "tag" "taa" "tga" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "tga" "taa"
## [1369] "atg" "tag" "tag" "atg" "tga" "atg" "tga" "tag" "atg" "atg" "taa" "tag"
## [1381] "atg" "atg" "tga" "atg" "atg" "tga" "taa" "tag" "taa" "atg" "tag" "tag"
## [1393] "tag" "tga" "taa" "tga" "tag" "atg" "taa" "atg" "tga" "tag" "tga" "atg"
## [1405] "tga" "atg" "taa" "atg" "tga" "tga" "atg" "tag" "tga" "taa" "tag" "taa"
## [1417] "atg" "tga" "tag" "atg" "tga" "tga" "tga" "tag" "atg" "tga" "taa" "taa"
## [1429] "taa" "atg" "atg" "atg" "tga" "atg" "tag" "taa" "taa" "tag" "tag" "tga"
## [1441] "tga" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "taa" "atg" "tga" "tag"
## [1453] "atg" "tga" "tag" "atg" "atg" "taa" "taa" "tag" "taa" "tga" "taa" "tga"
## [1465] "atg" "tga" "atg" "tga" "tag" "atg" "tga" "atg" "atg" "tga" "atg" "taa"
## [1477] "atg" "tag" "atg" "tag" "atg" "taa" "tag" "taa" "tag" "tag" "atg" "tga"
## [1489] "tga" "taa" "taa" "atg" "tga" "taa" "atg" "taa" "atg" "tag" "taa" "tag"
## [1501] "atg" "tga" "tga" "atg" "tga" "atg" "tga" "tga" "tga" "atg" "tga" "atg"
## [1513] "taa" "atg" "tga" "tga" "atg" "taa" "taa" "atg" "tga" "atg" "atg" "atg"
## [1525] "tag" "tga" "atg" "atg" "tga" "tag" "atg" "tag" "atg" "tga" "taa" "tga"
## [1537] "tga" "tag" "atg" "tga" "tga" "taa" "atg" "tga" "tga" "taa" "atg" "tga"
## [1549] "tga" "tag" "atg" "atg" "atg" "tag" "tga" "tag" "atg" "taa" "tga" "atg"
## [1561] "taa" "tga" "atg" "tag" "tga" "tga" "tga" "atg" "tag" "atg" "tga" "taa"
## [1573] "atg" "taa" "taa" "taa" "tga" "tag" "tga" "atg" "tag" "tga" "atg" "atg"
## [1585] "tag" "atg" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "tag" "atg"
## [1597] "tga" "taa" "tag" "atg" "taa" "atg" "taa" "taa" "tga" "atg" "taa" "taa"
## [1609] "tag" "atg" "tga" "atg" "tga" "taa" "taa" "atg" "tga" "taa" "taa" "taa"
## [1621] "taa" "taa" "tga" "taa" "taa" "taa" "atg" "tag" "atg" "tga" "tga" "tga"
## [1633] "tga" "atg" "tag" "tag" "tag" "taa" "atg" "taa" "taa" "taa" "tag" "atg"
## [1645] "taa" "taa" "taa" "taa" "tag" "tga" "tag" "taa" "atg" "tga" "atg" "atg"
## [1657] "tga" "taa" "atg" "atg" "atg" "taa" "atg" "tga" "tag" "tag" "taa" "tag"
## [1669] "atg" "tga" "taa" "atg" "atg" "tga" "tga" "tga" "atg" "tga" "taa" "taa"
## [1681] "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "taa" "atg" "atg"
## [1693] "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg" "taa"
## [1705] "tag" "atg" "tga" "tga" "taa" "atg" "taa" "atg" "tga" "taa" "taa" "taa"
## [1717] "atg" "tag" "atg" "atg" "tga" "tag" "atg" "tga" "atg" "tga" "taa" "taa"
## [1729] "tag" "tag" "taa" "taa" "atg" "tga" "atg" "atg" "atg" "taa" "atg" "taa"
## [1741] "taa" "tga" "atg" "atg" "atg" "taa" "tga" "atg" "tga" "taa" "taa" "taa"
## [1753] "atg" "taa" "atg" "tga" "atg" "tga" "tag" "tag" "tga" "taa" "taa" "atg"
## [1765] "tag" "tag" "tga" "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "tga"
## [1777] "atg" "tga" "tag" "taa" "taa" "atg" "taa" "taa" "atg" "atg" "atg" "tga"
## [1789] "tga" "taa" "taa" "atg" "atg" "taa" "tga" "tga" "taa" "atg" "atg" "tga"
## [1801] "taa" "atg" "taa" "tga" "atg" "tga" "atg" "taa" "taa" "taa" "taa" "atg"
## [1813] "atg" "taa" "tag" "atg" "tag" "tga" "taa" "tga" "taa" "taa" "tag" "taa"
## [1825] "atg" "atg" "tga" "atg" "tga" "atg" "atg" "tga" "tga" "atg" "taa" "tga"
## [1837] "tga" "taa" "atg" "tga" "taa" "taa" "atg" "taa" "atg" "atg" "taa" "tga"
## [1849] "taa" "atg" "taa" "tga" "atg" "tga" "atg" "atg" "tag" "atg" "tga" "atg"
## [1861] "taa" "tag" "tga" "taa" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "taa"
## [1873] "taa" "taa" "tag" "tag" "atg" "tag" "tga" "taa" "atg" "tga" "atg" "taa"
## [1885] "taa" "atg" "tga" "atg" "tag" "tga" "taa" "atg" "tga" "taa" "taa" "taa"
## [1897] "atg" "taa" "tga" "atg" "tga" "tga" "taa" "tga" "tag" "taa" "taa" "atg"
## [1909] "tga" "atg" "taa" "tga" "atg" "atg" "atg" "tag" "tga" "atg" "atg" "tga"
## [1921] "tag" "tag" "tag" "atg" "tag" "tga" "tag" "taa" "taa" "taa" "tag" "taa"
## [1933] "atg" "atg" "taa" "tag" "taa" "atg" "tga" "atg" "taa" "atg" "tag" "taa"
## [1945] "tga" "tga" "atg" "tga" "taa" "tga" "taa" "taa" "tga" "taa" "taa" "tga"
## [1957] "tga" "atg" "atg" "tga" "atg" "atg" "tag" "tga" "taa" "atg" "tag" "atg"
## [1969] "tga" "atg" "tga" "atg" "tga" "tag" "tag" "atg" "atg" "atg" "tag" "atg"
## [1981] "atg" "atg" "tga" "taa" "tag" "taa" "atg" "taa" "taa" "tag" "taa" "taa"
## [1993] "atg" "tga" "tga" "taa" "tga" "taa" "atg" "taa" "taa" "tga" "taa" "taa"
## [2005] "atg" "tga" "tga" "atg" "atg" "atg" "atg" "atg" "tga" "tga" "taa" "atg"
## [2017] "taa" "tga" "atg" "taa" "atg" "tga" "tga" "atg" "tga" "taa" "tga" "atg"
## [2029] "tga" "taa" "taa" "atg" "tga" "tga" "atg" "tga" "taa" "atg" "atg" "taa"
## [2041] "atg" "atg" "tga" "tag" "atg" "tga" "atg" "tga" "atg" "tga" "tga" "taa"
## [2053] "atg" "atg" "tga" "taa" "tga" "atg" "atg" "atg" "taa" "taa" "atg" "tag"
## [2065] "taa" "atg" "atg" "tga" "atg" "atg" "tag" "atg" "tga" "atg" "atg" "tga"
## [2077] "atg" "taa" "atg" "tga" "taa" "tga" "tga" "tga" "tag" "taa" "atg" "atg"
## [2089] "tga" "tga" "atg" "taa" "taa" "atg" "taa" "atg" "atg" "tga" "atg" "tga"
## [2101] "tag" "taa" "tga" "atg" "taa" "tag" "taa" "tag" "tag" "tag" "taa" "tga"
## [2113] "tag" "taa" "taa" "tga" "tga" "tga" "taa" "taa" "taa" "atg" "tga" "taa"
## [2125] "atg" "tag" "taa" "tag" "tag" "atg" "taa" "taa" "taa" "taa" "taa" "tga"
## [2137] "atg" "tag" "tga" "atg" "tag" "atg" "atg" "tga" "atg" "tga" "tga" "atg"
## [2149] "tag" "tga" "taa" "tag" "tag" "tga" "taa" "taa" "tag" "tag" "taa" "tga"
## [2161] "atg" "tga" "tag" "tga" "atg" "tga" "tga" "taa" "taa" "tag" "tga" "taa"
## [2173] "taa" "taa" "tag" "atg" "tga" "atg" "tga" "atg" "tag" "taa" "atg" "tga"
## [2185] "tag" "tga" "tga" "tag" "taa" "tga" "taa" "taa" "tag" "tga" "tag" "taa"
## [2197] "taa" "atg" "tga" "atg" "tga" "taa" "tga" "tag" "taa" "taa" "atg" "tag"
## [2209] "taa" "atg" "tga" "tga" "atg" "atg" "tag" "taa" "taa" "atg" "tga" "taa"
## [2221] "tag" "taa" "tag" "tag" "tag" "atg" "atg" "atg" "taa" "atg" "tag" "tag"
## [2233] "atg" "tga" "tag" "taa" "taa" "taa" "atg" "tga" "taa" "atg" "tag" "tga"
## [2245] "taa" "atg" "atg" "taa" "atg" "tag" "tag" "atg" "tga" "tga" "atg" "tga"
## [2257] "atg" "tag" "taa" "atg" "tga" "tga" "taa" "taa" "atg" "tag" "taa" "tag"
## [2269] "atg" "tag" "tga" "tag" "tga" "tga" "taa" "taa" "tga" "taa" "taa" "tga"
## [2281] "atg" "atg" "atg" "tga" "atg" "taa" "atg" "tga" "tga" "tga" "taa" "tga"
## [2293] "atg" "tga" "tga" "tga" "atg" "atg" "tga" "atg" "tga" "tga" "taa" "atg"
## [2305] "tga" "atg" "tga" "atg" "atg" "taa" "tag" "atg" "tga" "taa" "taa" "tag"
## [2317] "tag" "taa" "taa" "tag" "taa" "atg" "taa" "tag" "tga" "tga" "taa" "atg"
## [2329] "tag" "atg" "taa" "atg" "taa" "taa" "tag" "tag"
plotPotentialStartsAndStops(My_SeqS)
## [1] 1 31646
Find and Plot the potential ORF in the reverse complement of the last 1000 nucleotides in the sequence. Use a green shade in your plot.
######################################################
# Find ORF in sequences find the possible ORF in
# DNA sequences and return the RRF start and stop index
# and its length
######################################################
findORFsinSeq <- function(sequence)
{
require(Biostrings)
# Make vectors "positions" and "types" containing information on the positions of ATGs in the sequence:
mylist <- findPotentialStartsAndStops2(sequence)
positions <- mylist[[1]]
types <- mylist[[2]]
# Make vectors "orfstarts" and "orfstops" to store the predicted start and stop codons of ORFs
orfstarts <- numeric()
orfstops <- numeric()
# Make a vector "orflengths" to store the lengths of the ORFs
orflengths <- numeric()
# Print out the positions of ORFs in the sequence:
# Find the length of vector "positions"
numpositions <- length(positions)
# There must be at least one start codon and one stop codon to have an ORF.
if (numpositions >= 2)
{
for (i in 1:(numpositions-1))
{
posi <- positions[i]
typei <- types[i]
found <- 0
while (found == 0)
{
for (j in (i+1):numpositions)
{
posj <- positions[j]
typej <- types[j]
posdiff <- posj - posi
posdiffmod3 <- posdiff %% 3
# Add in the length of the stop codon
orflength <- posj - posi + 3
if (typei == "atg" && (typej == "taa" || typej == "tag" || typej == "tga") && posdiffmod3 == 0)
{
# Check if we have already used the stop codon at posj+2 in an ORF
numorfs <- length(orfstops)
usedstop <- -1
if (numorfs > 0)
{
for (k in 1:numorfs)
{
orfstopk <- orfstops[k]
if (orfstopk == (posj + 2)) { usedstop <- 1 }
}
}
if (usedstop == -1)
{
orfstarts <- append(orfstarts, posi, after=length(orfstarts))
orfstops <- append(orfstops, posj+2, after=length(orfstops)) # Including the stop codon.
orflengths <- append(orflengths, orflength, after=length(orflengths))
}
found <- 1
break
}
if (j == numpositions) { found <- 1 }
}
}
}
}
# Sort the final ORFs by start position:
indices <- order(orfstarts)
orfstarts <- orfstarts[indices]
orfstops <- orfstops[indices]
# Find the lengths of the ORFs that we have
orflengths <- numeric()
numorfs <- length(orfstarts)
for (i in 1:numorfs)
{
orfstart <- orfstarts[i]
orfstop <- orfstops[i]
orflength <- orfstop - orfstart + 1
orflengths <- append(orflengths,orflength,after=length(orflengths))
}
mylist <- list(orfstarts, orfstops, orflengths)
return(mylist)
}
######################################################
# Plot ORF in sequences
######################################################
plotORFsinSeq <- function(sequence)
{
# Make vectors "positions" and "types" containing information on the positions of ATGs in the sequence:
mylist <- findPotentialStartsAndStops2(sequence)
positions <- mylist[[1]]
types <- mylist[[2]]
# Make vectors "orfstarts" and "orfstops" to store the predicted start and stop codons of ORFs
orfstarts <- numeric()
orfstops <- numeric()
# Make a vector "orflengths" to store the lengths of the ORFs
orflengths <- numeric()
# Print out the positions of ORFs in the sequence:
numpositions <- length(positions) # Find the length of vector "positions"
# There must be at least one start codon and one stop codon to have an ORF.
if (numpositions >= 2)
{
for (i in 1:(numpositions-1))
{
posi <- positions[i]
typei <- types[i]
found <- 0
while (found == 0)
{
for (j in (i+1):numpositions)
{
posj <- positions[j]
typej <- types[j]
posdiff <- posj - posi
posdiffmod3 <- posdiff %% 3
orflength <- posj - posi + 3 # Add in the length of the stop codon
if (typei == "atg" && (typej == "taa" || typej == "tag" || typej == "tga") && posdiffmod3 == 0)
{
# Check if we have already used the stop codon at posj+2 in an ORF
numorfs <- length(orfstops)
usedstop <- -1
if (numorfs > 0)
{
for (k in 1:numorfs)
{
orfstopk <- orfstops[k]
if (orfstopk == (posj + 2)) { usedstop <- 1 }
}
}
if (usedstop == -1)
{
orfstarts <- append(orfstarts, posi, after=length(orfstarts))
orfstops <- append(orfstops, posj+2, after=length(orfstops)) # Including the stop codon.
orflengths <- append(orflengths, orflength, after=length(orflengths))
}
found <- 1
break
}
if (j == numpositions) { found <- 1 }
}
}
}
}
# Sort the final ORFs by start position:
indices <- order(orfstarts)
orfstarts <- orfstarts[indices]
orfstops <- orfstops[indices]
# Make a plot showing the positions of ORFs in the input sequence:
# Draw a line at y=0 from 1 to the length of the sequence:
x <- c(1,nchar(sequence))
y <- c(0,0)
plot(x, y, ylim=c(0,3), type="l", axes=FALSE, xlab="Nucleotide", ylab=
?????"Reading frame", main="Predicted ORFs")
segments(1,1,nchar(sequence),1)
segments(1,2,nchar(sequence),2)
# Add the x-axis at y=0:
axis(1, pos=0)
# Add the y-axis labels:
text(0.9,0.5,"+1")
text(0.9,1.5,"+2")
text(0.9,2.5,"+3")
# Make a plot of the ORFs in the sequence:
numorfs <- length(orfstarts)
for (i in 1:numorfs)
{
orfstart <- orfstarts[i]
orfstop <- orfstops[i]
remainder <- (orfstart-1) %% 3
if (remainder == 0) # +1 reading frame
{
rect(orfstart,0,orfstop,1,col="cyan",border="black")
}
else if (remainder == 1)
{
rect(orfstart,1,orfstop,2,col="cyan",border="black")
}
else if (remainder == 2)
{
rect(orfstart,2,orfstop,3,col="cyan",border="black")
}
}
}
findORFsinSeq(My_SeqS)
## [[1]]
## [1] 66 227 369 429 555 654 696 1033 1192 1206 1216 1294
## [13] 1314 1333 1438 1464 1566 1635 1884 2113 2157 2254 2265 2466
## [25] 2583 2658 2742 2766 2845 2922 2955 3270 3283 3336 3360 3399
## [37] 3441 3507 3528 3834 3864 4020 4122 4153 4198 4215 4296 4327
## [49] 4335 4407 4479 4518 4596 4926 4995 5122 5161 5229 5370 5484
## [61] 5515 5623 5631 5751 5922 5950 6048 6378 6432 6531 6669 6784
## [73] 6948 7336 7347 7440 7461 7650 7773 7788 7801 7932 8037 8116
## [85] 8227 8241 8280 8412 8553 8665 8790 9018 9096 9121 9166 9180
## [97] 9282 9384 9510 9538 9588 9660 9873 10029 10294 10302 10371 10378
## [109] 10491 10554 10564 10629 10662 10683 10767 10776 10812 10843 10983 11026
## [121] 11152 11211 11259 11295 11589 11664 11710 11827 11844 12018 12066 12111
## [133] 12282 12387 12582 12651 12733 12924 13132 13150 13230 13341 13418 13511
## [145] 13561 13577 13652 13718 13790 13856 13913 13970 14105 14150 14339 14405
## [157] 14453 14514 14543 14666 14783 14856 14879 14894 14969 15155 15195 15353
## [169] 15446 15488 15611 15704 15726 15803 15866 15926 16058 16203 16220 16271
## [181] 16377 16385 16416 16433 16461 16475 16626 16667 16742 16787 16865 16907
## [193] 17052 17060 17079 17115 17192 17210 17249 17291 17498 17538 17573 17750
## [205] 17873 17915 18045 18131 18215 18302 18494 18602 18659 18765 18779 18833
## [217] 18920 18972 18986 19040 19175 19202 19295 19325 19359 19391 19455 19523
## [229] 19619 19796 19883 19928 20000 20168 20357 20379 20411 20483 20678 20811
## [241] 20852 20919 20945 20978 21080 21114 21143 21209 21353 21454 21521 21584
## [253] 21647 21818 21834 21917 22064 22250 22314 22442 22470 22577 22628 22694
## [265] 22889 22967 23114 23219 23375 23643 23666 23888 23909 23916 23933 24054
## [277] 24119 24332 24419 24572 24779 24980 25032 25140 25169 25230 25294 25361
## [289] 25433 25555 25577 25607 25651 26079 26360 26413 26718 27036 27193 27235
## [301] 27600 27604 27741 27776 27826 27921 27953 28016 28082 28092 28405 28477
## [313] 28545 29071 29085 29214 29286 29357 29377 29507 29590
##
## [[2]]
## [1] 98 13375 386 458 647 662 1187 1059 1197 1238 1251 1314
## [13] 1322 1368 1479 1565 1589 1700 2030 2121 2252 2265 2342 2582
## [25] 2600 2672 2747 2855 2892 2936 3257 3329 3291 3356 3374 3440
## [37] 3458 3527 3743 3839 4016 4055 4133 4173 4239 4259 4319 4341
## [49] 4349 4448 4514 4577 4661 4991 5108 5160 5169 5354 5444 5549
## [61] 5532 5634 5675 5894 5981 5958 6059 6416 6503 6596 6812 6804
## [73] 7034 7398 7388 7448 7646 7730 7781 7850 7821 8018 8147 8142
## [85] 8235 8267 8303 8420 8717 8673 8975 9083 9140 9165 9177 9230
## [97] 9299 9416 9578 9630 9608 9731 10007 10292 10344 10337 10478 10386
## [109] 10547 10628 10593 10652 10673 10733 10775 10808 10913 10869 11138 11046
## [121] 11160 11225 11291 11339 11627 11753 11739 11847 11933 12062 12098 12185
## [133] 12350 12551 12590 12833 12741 12980 13149 13161 13328 13358 13480 13531
## [145] 21447 13651 13690 13759 13831 13867 13939 14008 14146 14320 14386 14410
## [157] 14500 14573 14644 14749 14812 14864 14890 14950 15004 15160 15200 15445
## [169] 15478 15598 15658 15790 15737 15847 15898 16048 16135 16223 16252 16309
## [181] 16385 16423 16430 16456 16466 16657 16646 16690 16786 16810 16870 17041
## [193] 17072 17095 17084 17120 17209 17242 17281 17365 17512 17576 17608 17854
## [205] 17899 17944 18053 18205 18259 18415 18517 18655 18760 18782 18829 18841
## [217] 18970 19001 19021 19159 19195 19249 19309 19369 19400 19432 19475 19618
## [229] 19765 19810 19906 19939 20014 20263 20395 20399 20428 20671 20713 20852
## [241] 20884 20936 20968 21022 21142 21179 21169 21292 21361 25221 21565 21616
## [253] 21775 21907 21839 22063 22069 22396 22325 22576 22493 22618 22636 22888
## [265] 22945 23068 23167 23365 23497 23651 23689 23902 23923 23927 24007 24113
## [277] 24277 24388 24424 24742 24979 25072 25115 25169 25291 26054 25416 25429
## [289] 25453 25563 25594 25654 26115 26309 27025 26478 26744 27227 27216 27603
## [301] 27734 27618 27860 27817 28080 27953 27997 28063 29350 28388 28461 28491
## [313] 28757 29097 29144 29261 29318 29389 29454 29590 29625
##
## [[3]]
## [1] 33 13149 18 30 93 9 492 27 6 33 36 21
## [13] 9 36 42 102 24 66 147 9 96 12 78 117
## [25] 18 15 6 90 48 15 303 60 9 21 15 42
## [37] 18 21 216 6 153 36 12 21 42 45 24 15
## [49] 15 42 36 60 66 66 114 39 9 126 75 66
## [61] 18 12 45 144 60 9 12 39 72 66 144 21
## [73] 87 63 42 9 186 81 9 63 21 87 111 27
## [85] 9 27 24 9 165 9 186 66 45 45 12 51
## [97] 18 33 69 93 21 72 135 264 51 36 108 9
## [109] 57 75 30 24 12 51 9 33 102 27 156 21
## [121] 9 15 33 45 39 90 30 21 90 45 33 75
## [133] 69 165 9 183 9 57 18 12 99 18 63 21
## [145] 7887 75 39 42 42 12 27 39 42 171 48 6
## [157] 48 60 102 84 30 9 12 57 36 6 6 93
## [169] 33 111 48 87 12 45 33 123 78 21 33 39
## [181] 9 39 15 24 6 183 21 24 45 24 6 135
## [193] 21 36 6 6 18 33 33 75 15 39 36 105
## [205] 27 30 9 75 45 114 24 54 102 18 51 9
## [217] 51 30 36 120 21 48 15 45 42 42 21 96
## [229] 147 15 24 12 15 96 39 21 18 189 36 42
## [241] 33 18 24 45 63 66 27 84 9 3768 45 33
## [253] 129 90 6 147 6 147 12 135 24 42 9 195
## [265] 57 102 54 147 123 9 24 15 15 12 75 60
## [277] 159 57 6 171 201 93 84 30 123 825 123 69
## [289] 21 9 18 48 465 231 666 66 27 192 24 369
## [301] 135 15 120 42 255 33 45 48 1269 297 57 15
## [313] 213 27 60 48 33 33 78 84 36
plotORFsinSeq(My_SeqS)
## Contacting Delphi...the oracle is unavailable.
## We apologize for any inconvenience.
Extract and translate one potential gene. What is the length of the resultant protein sequence?
Seq_T <- seqinr::translate(s2c(substring(My_SeqS,66,98)))
# get the Length of the potential sequences
length(Seq_T)
## [1] 11
For the whole sequence, identify the significant ORFs. Use at least 20 random sequences. Justify your answer.
# sequences is pass a a DNA sequences
generateSeqsWithMultinomialModel <- function(sequence, index)
{
My_Base1 <- count(sequence, 1)
# Calculate the percentage in sequences
Pro_Seq <-
c((My_Base1["a"] / sum(My_Base1) * 100),
(My_Base1["g"] / sum(My_Base1) * 100),
(My_Base1["c"] / sum(My_Base1) * 100),
(My_Base1["t"] / sum(My_Base1) * 100)
)
My_Rad_Seq = {
}
for (i in 1:index) {
My_Rad_Seq <- c (My_Rad_Seq,
c2s(sample(
c('a', 'c', 'g', 't'),
length(sequence),
replace = TRUE,
prob = Pro_Seq
)))
}
return (My_Rad_Seq)
}
#generate random sequences
randseqs <- generateSeqsWithMultinomialModel(getSequence(My_Que$req[[1]]), 20)
# find ORF
randseqorflengths <- numeric() # Tell R that we want to make a new vector of numbers
for (i in 1:20)
{
# print(i) # Test the loop travelse
randseq <- randseqs[i] # Get the ith random sequence
mylist <- findORFsinSeq(randseq) # Find ORFs in "randseq"
lengths <- mylist[[3]] # Find the lengths of ORFs in "randseq"
randseqorflengths <- append(randseqorflengths, lengths, after=length(randseqorflengths))
}
# plot a histogram of the lengths of the ORFs real vs. random
par(mfrow = c(1,2)) # Make a picture with two plots side-by-side (one row, two columns)
bins <- seq(0,11000,50) # Set the bins for the histogram
hist(randseqorflengths, breaks=bins, col="red", xlim=c(0,1000))
#find the longest random gene
x = max(randseqorflengths)
#use it as a threshold, and discard all ORFs found in the real sequence that are shorter than this
summary(randseqorflengths > x)
## Mode FALSE
## logical 8020
#find and use the 99th quantile as a threshold
quantile(randseqorflengths, probs=c(0.99))
## 99%
## 222
define and use a function generateSeqsWithMultinomialModel