Objectives

Description

Write R code to do each of the following tasks:

Question 1:

Retrieve the sequence SARS coronavirus MA15 ExoN1 () with accession number FJ882953 from the ACNUC “genbank”.

# Choose the ACNUC
choosebank("genbank")
My_Que <- query("My_Que", "AC=FJ882953")

# write to fasta file
write.fasta(getSequence(My_Que[['req']][[1]]),getName(My_Que[['req']][[1]]),"SARS.fasta")

Question 2:

Find and plot all potential start and stop codons in the first 1000 bases in the sequence. Use a blue color for the start codons and orange for the stop codons your plot.

######################################################
# Find all potential start and stop sequences in a
# DNA sequences
######################################################

findPotentialStartsAndStops2 <- function(MySeq)
{
  # Define a vector with the sequences of potential start and stop codons
  codons <- c("atg", "taa", "tag", "tga")
  
  # Find the start positions of all occurrences of "atg" in sequence "sequence"
  positions <- start(matchPattern("atg", MySeq ))
  # Find the total number of potential start and stop codons in sequence "sequence"
  numoccurrences <- length(positions)
  # Make a vector "types" containing "numoccurrences" copies of "codon"
  types <- rep("atg", numoccurrences)
  
  # Find the number of occurrences of each type of potential start or stop codon
  for (i in 2:4)
  {
    codon <- codons[i]
    # Find the start positions of all occurrences of "codon" in sequence "sequence"
    codonpositions <- start(matchPattern(codon,MySeq ))
    # Find the total number of potential start and stop codons in sequence "sequence"
    numoccurrences <- length(codonpositions)
    # Add the vector "codonpositions" to the end of vector "positions":
    positions <- append(positions, codonpositions, after=length(positions))
    # Add the vector "rep(codon, numoccurrences)" to the end of vector "types":
    types <- append(types, rep(codon, numoccurrences), after=length(types))
  }
  # Sort the vectors "positions" and "types" in order of position along the input sequence:
  indices <- order(positions)
  positions <- positions[indices]
  types <- types[indices]
  # Return a list variable including vectors "positions" and "types":
  mylist <- list(positions,types)
  names(mylist) <- c("positions", "types")
  return(mylist)
}

Function to plot potential start and stop sequences.

######################################################
# Plot potential start and stop sequences.
######################################################

plotPotentialStartsAndStops <- function(sequence)
{
  # Make a plot showing the positions of the start and stop codons in the input sequence:
  # Draw a line at y=0 from 1 to the length of the sequence:
  x <- c(1, nchar(sequence))
  print(x)
  y <- c(0, 0)
  y
  plot(
    x,
    y,
    ylim = c(0, 3),
    type = "l",
    axes = FALSE,
    xlab = "Nucleotide",
    ylab = "Reading frame",
    main = "Predicted start (red) and stop (blue) codons"
  )
  segments(1, 1, nchar(sequence), 1)
  segments(1, 2, nchar(sequence), 2)
  # Add the x-axis at y=0:
  axis(1, pos = 0)
  # Add the y-axis labels:
  text(0.9, 0.5, "+1")
  text(0.9, 1.5, "+2")
  text(0.9, 2.5, "+3")
  
  # Draw in each predicted start/stop codon:
  mylist <- findPotentialStartsAndStops2(sequence)
  positions = mylist[["positions"]]
  types = mylist[["types"]]
  numcodons <- length(positions)
  for (i in 1:numcodons)
  {
    position <- positions[i]
    type <- types[i]
    remainder <- (position - 1) %% 3
    if (remainder == 0)
      # +1 reading frame
    {
      if (type == "atg") { 
        segments(position, 0, position, 1, lwd = 1,col = "red")}
      else {
        segments(position, 0, position, 1, lwd = 1,col = "blue")}
    }
    else if (remainder == 1){
      if (type == "atg") {
        segments(position, 1, position, 2, lwd = 1, col = "red")}
      else {
      segments(position, 1, position, 2, lwd = 1, col = "blue")}
    }
    else if (remainder == 2){
      if (type == "atg") {
      segments(position, 2, position, 3, lwd = 1, col = "red")}
    else {
      segments(position, 2, position, 3, lwd = 1, col = "blue")}
    }
  }
}
# Get the sequence to vector of characters
My_Seq <- read.fasta("SARS.fasta", as.string = TRUE)

# Convert vector of characters to string
My_SeqS <- My_Seq$FJ882953[1:1000]

# measure length
length(My_SeqS)
## [1] 1000
# conversion of a vector of chars into a string
My_SeqS <- c2s(My_SeqS)

# find potential start and stop
findPotentialStartsAndStops2(My_SeqS)
## $positions
##    [1]    13    26    37    48    66    71    89    96    99   115   129   195
##   [13]   212   223   227   295   306   355   369   384   387   418   429   439
##   [25]   448   450   456   484   500   523   531   555   585   598   607   610
##   [37]   628   633   645   654   655   660   664   679   688   696   697   715
##   [49]   720   739   751   759   780   807   820   860   883   922   927   928
##   [61]   933   934   952   961   964   997  1015  1033  1057  1090  1105  1115
##   [73]  1162  1169  1185  1187  1188  1192  1195  1206  1207  1216  1236  1249
##   [85]  1264  1272  1279  1294  1312  1314  1320  1323  1328  1333  1366  1372
##   [97]  1405  1429  1435  1438  1464  1477  1480  1507  1513  1546  1551  1563
##  [109]  1566  1567  1579  1587  1594  1603  1609  1632  1635  1636  1690  1698
##  [121]  1708  1735  1747  1753  1776  1815  1882  1884  1906  1936  1938  1948
##  [133]  1976  2018  2025  2028  2068  2089  2110  2113  2119  2131  2143  2157
##  [145]  2197  2233  2250  2254  2263  2265  2272  2288  2296  2340  2344  2417
##  [157]  2425  2442  2452  2458  2466  2467  2485  2509  2536  2539  2550  2580
##  [169]  2583  2591  2598  2605  2658  2670  2689  2697  2742  2745  2758  2764
##  [181]  2766  2767  2776  2788  2790  2791  2815  2836  2845  2853  2868  2890
##  [193]  2906  2914  2920  2922  2923  2934  2953  2955  2956  2958  2965  2987
##  [205]  3012  3030  3040  3055  3057  3058  3067  3072  3073  3090  3091  3130
##  [217]  3145  3174  3175  3184  3199  3223  3235  3255  3265  3270  3280  3283
##  [229]  3289  3316  3323  3327  3333  3336  3343  3354  3360  3372  3399  3407
##  [241]  3421  3423  3424  3433  3438  3441  3456  3484  3493  3507  3520  3525
##  [253]  3528  3535  3553  3564  3565  3628  3693  3694  3708  3709  3722  3733
##  [265]  3736  3741  3748  3763  3802  3834  3837  3850  3862  3864  3865  3892
##  [277]  3907  3925  3933  3937  3948  3949  3962  3967  3973  3980  3991  3999
##  [289]  4010  4014  4018  4020  4030  4036  4053  4084  4088  4104  4120  4122
##  [301]  4123  4131  4153  4171  4180  4192  4198  4215  4237  4239  4243  4257
##  [313]  4278  4286  4296  4300  4317  4319  4327  4331  4335  4339  4347  4349
##  [325]  4366  4372  4381  4402  4407  4432  4435  4446  4467  4476  4479  4480
##  [337]  4496  4509  4512  4518  4525  4531  4550  4561  4575  4596  4612  4614
##  [349]  4645  4659  4722  4729  4738  4747  4795  4801  4816  4824  4852  4860
##  [361]  4888  4913  4919  4920  4926  4956  4963  4965  4978  4986  4989  4995
##  [373]  4996  5002  5023  5026  5028  5029  5041  5044  5071  5073  5074  5089
##  [385]  5096  5106  5122  5148  5158  5161  5167  5170  5188  5199  5215  5229
##  [397]  5257  5272  5274  5281  5311  5314  5335  5337  5351  5352  5370  5374
##  [409]  5401  5403  5413  5442  5451  5460  5462  5468  5484  5485  5488  5494
##  [421]  5515  5530  5532  5547  5573  5574  5576  5593  5599  5623  5631  5632
##  [433]  5644  5673  5680  5701  5723  5745  5749  5751  5803  5811  5833  5847
##  [445]  5857  5866  5868  5892  5922  5932  5935  5950  5956  5971  5973  5974
##  [457]  5979  5987  5988  6045  6048  6054  6057  6067  6073  6106  6118  6139
##  [469]  6213  6232  6246  6263  6280  6301  6309  6342  6349  6354  6372  6378
##  [481]  6388  6399  6400  6409  6414  6426  6432  6433  6443  6453  6478  6487
##  [493]  6489  6490  6501  6507  6513  6531  6547  6550  6565  6579  6594  6628
##  [505]  6636  6652  6662  6669  6706  6721  6727  6754  6760  6766  6778  6784
##  [517]  6802  6807  6810  6829  6848  6864  6876  6885  6894  6922  6946  6948
##  [529]  6955  6970  6979  6992  7003  7032  7041  7051  7069  7083  7104  7110
##  [541]  7119  7132  7151  7179  7194  7196  7222  7250  7264  7277  7295  7300
##  [553]  7304  7336  7347  7358  7362  7385  7386  7388  7396  7420  7438  7440
##  [565]  7445  7446  7461  7467  7473  7522  7540  7552  7555  7557  7558  7570
##  [577]  7588  7609  7630  7633  7644  7650  7672  7695  7696  7728  7737  7744
##  [589]  7753  7771  7773  7779  7786  7788  7801  7819  7848  7850  7873  7903
##  [601]  7909  7921  7925  7930  7932  7938  7963  7973  7984  8016  8028  8034
##  [613]  8037  8083  8091  8095  8106  8113  8116  8140  8145  8151  8158  8167
##  [625]  8177  8188  8191  8197  8204  8205  8227  8233  8239  8241  8256  8265
##  [637]  8280  8298  8301  8312  8323  8335  8347  8377  8382  8398  8412  8418
##  [649]  8425  8446  8455  8467  8474  8479  8528  8553  8554  8556  8568  8569
##  [661]  8598  8611  8626  8628  8629  8644  8649  8659  8665  8671  8697  8698
##  [673]  8715  8718  8751  8778  8790  8794  8824  8866  8872  8875  8908  8923
##  [685]  8928  8933  8958  8959  8965  8973  8992  8995  9018  9026  9030  9052
##  [697]  9076  9081  9084  9094  9096  9100  9109  9114  9121  9138  9163  9166
##  [709]  9175  9178  9180  9181  9220  9222  9227  9228  9237  9241  9279  9282
##  [721]  9297  9327  9350  9351  9373  9384  9397  9399  9414  9416  9492  9510
##  [733]  9511  9513  9538  9548  9576  9588  9606  9628  9640  9653  9658  9660
##  [745]  9673  9722  9729  9739  9769  9790  9819  9838  9858  9870  9873  9874
##  [757]  9880  9895  9897  9955  9962  9985  9995 10005 10027 10029 10044 10045
##  [769] 10068 10091 10096 10102 10107 10108 10138 10161 10190 10207 10213 10219
##  [781] 10228 10234 10249 10290 10294 10302 10334 10335 10342 10354 10369 10371
##  [793] 10378 10384 10396 10402 10407 10408 10430 10439 10471 10476 10483 10491
##  [805] 10504 10545 10551 10554 10564 10572 10587 10591 10594 10606 10609 10626
##  [817] 10629 10630 10636 10649 10650 10662 10663 10671 10683 10687 10731 10736
##  [829] 10749 10767 10772 10773 10776 10795 10806 10812 10813 10828 10830 10837
##  [841] 10843 10867 10885 10904 10911 10920 10932 10983 11012 11026 11033 11044
##  [853] 11050 11107 11111 11120 11128 11136 11138 11147 11148 11152 11158 11167
##  [865] 11173 11188 11194 11207 11211 11223 11240 11241 11259 11260 11262 11263
##  [877] 11265 11272 11289 11291 11292 11295 11325 11329 11331 11337 11354 11364
##  [889] 11376 11383 11411 11418 11422 11430 11446 11495 11514 11589 11590 11617
##  [901] 11624 11625 11650 11656 11662 11664 11677 11683 11704 11710 11737 11741
##  [913] 11746 11751 11794 11799 11812 11827 11844 11845 11872 11888 11918 11931
##  [925] 11938 11957 11965 12001 12018 12027 12048 12049 12060 12064 12066 12070
##  [937] 12076 12096 12108 12111 12118 12124 12130 12136 12138 12146 12167 12182
##  [949] 12183 12191 12211 12231 12235 12242 12251 12263 12268 12277 12280 12282
##  [961] 12283 12285 12292 12309 12316 12318 12348 12368 12382 12387 12409 12411
##  [973] 12415 12429 12463 12465 12472 12490 12493 12499 12503 12549 12568 12580
##  [985] 12582 12583 12588 12597 12611 12649 12651 12652 12657 12673 12691 12733
##  [997] 12739 12748 12754 12756 12796 12826 12831 12858 12867 12871 12878 12894
## [1009] 12924 12978 12988 12997 13008 13041 13046 13086 13099 13103 13132 13147
## [1021] 13150 13159 13174 13186 13191 13198 13228 13230 13231 13252 13280 13293
## [1033] 13300 13306 13326 13328 13341 13356 13373 13410 13416 13418 13437 13478
## [1045] 13485 13511 13512 13529 13544 13548 13561 13566 13577 13578 13593 13602
## [1057] 13628 13629 13644 13649 13652 13656 13660 13688 13692 13702 13707 13712
## [1069] 13718 13734 13736 13737 13743 13749 13757 13788 13790 13791 13793 13794
## [1081] 13806 13820 13821 13829 13839 13856 13860 13865 13869 13892 13913 13918
## [1093] 13923 13925 13937 13946 13952 13956 13968 13970 13992 14006 14054 14056
## [1105] 14072 14076 14091 14101 14105 14109 14130 14144 14150 14151 14193 14229
## [1117] 14240 14241 14244 14271 14273 14318 14321 14336 14339 14379 14384 14397
## [1129] 14405 14408 14418 14447 14453 14460 14470 14474 14498 14502 14514 14525
## [1141] 14534 14543 14571 14577 14580 14591 14592 14607 14619 14637 14642 14666
## [1153] 14685 14690 14691 14706 14719 14724 14747 14751 14760 14763 14772 14783
## [1165] 14796 14798 14810 14817 14829 14850 14853 14856 14862 14868 14879 14880
## [1177] 14887 14888 14894 14895 14906 14925 14931 14933 14948 14956 14957 14964
## [1189] 14969 14976 14988 15002 15021 15028 15029 15036 15059 15068 15078 15095
## [1201] 15129 15133 15137 15153 15155 15158 15175 15195 15198 15208 15213 15217
## [1213] 15222 15227 15229 15258 15270 15296 15300 15320 15324 15328 15334 15353
## [1225] 15357 15381 15383 15398 15402 15405 15414 15440 15443 15446 15462 15464
## [1237] 15468 15471 15476 15480 15488 15515 15516 15528 15534 15539 15543 15548
## [1249] 15549 15560 15561 15582 15595 15596 15598 15599 15609 15611 15612 15614
## [1261] 15630 15636 15641 15656 15659 15663 15669 15678 15704 15712 15717 15726
## [1273] 15735 15741 15750 15784 15788 15792 15803 15804 15845 15866 15867 15884
## [1285] 15895 15896 15900 15924 15926 15951 15962 15966 15968 15996 16007 16008
## [1297] 16010 16011 16027 16036 16046 16048 16052 16056 16058 16059 16062 16086
## [1309] 16094 16095 16102 16133 16188 16203 16220 16221 16226 16250 16263 16271
## [1321] 16280 16293 16295 16302 16304 16307 16322 16330 16331 16353 16365 16377
## [1333] 16383 16385 16416 16421 16428 16433 16440 16448 16454 16461 16464 16473
## [1345] 16475 16509 16551 16563 16574 16605 16626 16644 16655 16667 16688 16692
## [1357] 16698 16701 16728 16737 16742 16746 16748 16784 16787 16794 16808 16823
## [1369] 16825 16833 16847 16865 16868 16907 16908 16917 16922 16948 16989 17039
## [1381] 17052 17060 17070 17072 17079 17082 17093 17108 17112 17115 17118 17121
## [1393] 17147 17157 17160 17171 17183 17192 17207 17210 17232 17240 17247 17249
## [1405] 17250 17260 17268 17273 17274 17279 17291 17295 17363 17367 17378 17394
## [1417] 17413 17414 17423 17434 17466 17475 17483 17492 17498 17499 17505 17510
## [1429] 17526 17538 17548 17573 17574 17576 17606 17615 17673 17690 17705 17727
## [1441] 17745 17750 17751 17756 17778 17796 17798 17813 17852 17854 17859 17862
## [1453] 17873 17874 17897 17915 17939 17942 17955 17964 17967 18018 18023 18036
## [1465] 18045 18051 18076 18077 18087 18100 18101 18103 18115 18116 18131 18141
## [1477] 18145 18203 18215 18222 18227 18240 18257 18276 18281 18284 18302 18306
## [1489] 18312 18318 18339 18341 18360 18369 18388 18393 18410 18413 18423 18428
## [1501] 18436 18444 18452 18494 18501 18514 18515 18540 18561 18602 18630 18635
## [1513] 18642 18652 18653 18657 18659 18684 18696 18701 18702 18722 18728 18734
## [1525] 18741 18747 18749 18757 18758 18762 18765 18770 18779 18780 18792 18801
## [1537] 18813 18827 18833 18834 18839 18846 18874 18881 18900 18903 18920 18921
## [1549] 18963 18968 18972 18986 18996 18999 19002 19019 19040 19056 19065 19067
## [1561] 19092 19098 19115 19128 19134 19157 19173 19175 19182 19190 19193 19197
## [1573] 19202 19227 19242 19247 19275 19278 19287 19295 19307 19320 19325 19359
## [1585] 19367 19391 19397 19398 19418 19425 19429 19430 19432 19433 19449 19455
## [1597] 19473 19482 19517 19523 19533 19535 19542 19545 19557 19559 19593 19596
## [1609] 19616 19619 19626 19628 19641 19647 19665 19667 19677 19689 19695 19701
## [1621] 19719 19731 19743 19755 19763 19776 19796 19808 19822 19823 19827 19848
## [1633] 19881 19883 19887 19904 19914 19926 19928 19937 19940 19964 19992 20000
## [1645] 20012 20027 20040 20049 20057 20082 20111 20121 20137 20145 20164 20168
## [1657] 20169 20187 20201 20222 20237 20261 20263 20264 20267 20273 20301 20306
## [1669] 20323 20336 20351 20357 20379 20393 20397 20409 20411 20412 20426 20429
## [1681] 20450 20478 20483 20487 20500 20511 20522 20526 20581 20586 20599 20608
## [1693] 20616 20625 20639 20643 20648 20669 20671 20672 20674 20675 20678 20711
## [1705] 20726 20743 20744 20772 20775 20811 20850 20852 20853 20882 20913 20916
## [1717] 20919 20934 20941 20945 20946 20952 20963 20966 20978 20979 20985 21020
## [1729] 21032 21050 21056 21062 21080 21084 21100 21114 21137 21140 21143 21167
## [1741] 21177 21207 21209 21220 21224 21228 21285 21289 21290 21306 21311 21326
## [1753] 21328 21336 21353 21354 21358 21359 21384 21396 21426 21438 21445 21454
## [1765] 21486 21492 21495 21501 21519 21521 21522 21524 21537 21562 21563 21582
## [1777] 21584 21585 21594 21614 21645 21647 21669 21702 21707 21740 21772 21773
## [1789] 21794 21804 21816 21818 21834 21837 21843 21852 21876 21883 21904 21905
## [1801] 21915 21917 21924 21948 21950 21963 21965 21984 21990 22014 22020 22025
## [1813] 22037 22044 22061 22064 22067 22074 22092 22100 22113 22131 22146 22223
## [1825] 22240 22250 22251 22253 22254 22259 22274 22281 22305 22314 22323 22332
## [1837] 22338 22362 22388 22394 22404 22440 22442 22449 22466 22470 22491 22503
## [1849] 22545 22553 22569 22574 22577 22578 22595 22601 22616 22628 22629 22631
## [1861] 22634 22643 22671 22677 22683 22694 22695 22702 22728 22737 22739 22755
## [1873] 22761 22767 22773 22782 22787 22797 22806 22821 22823 22839 22841 22869
## [1885] 22886 22889 22890 22895 22943 22956 22964 22967 23004 23013 23034 23040
## [1897] 23042 23066 23112 23114 23121 23130 23148 23157 23165 23195 23201 23219
## [1909] 23229 23252 23256 23265 23267 23285 23333 23363 23370 23375 23390 23391
## [1921] 23427 23454 23460 23488 23495 23502 23505 23523 23526 23547 23559 23576
## [1933] 23578 23590 23595 23606 23613 23617 23640 23643 23649 23666 23670 23687
## [1945] 23715 23736 23761 23780 23799 23820 23828 23838 23853 23868 23871 23876
## [1957] 23886 23888 23899 23900 23909 23916 23921 23925 23931 23933 23937 23966
## [1969] 23997 23999 24000 24004 24005 24029 24033 24054 24097 24103 24111 24119
## [1981] 24140 24149 24150 24180 24192 24255 24263 24275 24288 24297 24303 24329
## [1993] 24332 24333 24351 24354 24378 24386 24419 24422 24434 24447 24465 24480
## [2005] 24484 24489 24519 24547 24572 24590 24599 24647 24648 24672 24690 24692
## [2017] 24740 24777 24779 24798 24812 24813 24828 24887 24891 24915 24948 24959
## [2029] 24960 24969 24977 24980 24981 24993 25016 25017 25029 25032 25043 25070
## [2041] 25084 25108 25109 25113 25140 25167 25169 25170 25175 25176 25182 25219
## [2053] 25230 25242 25243 25282 25289 25294 25318 25361 25414 25427 25433 25441
## [2065] 25451 25530 25555 25561 25577 25589 25592 25602 25603 25607 25651 25652
## [2077] 25654 25685 25690 25691 25709 25739 25745 25799 25805 25820 25828 25840
## [2089] 25871 25904 25909 25928 25940 25954 25994 26004 26017 26018 26020 26021
## [2101] 26039 26052 26066 26079 26113 26116 26120 26123 26161 26170 26219 26224
## [2113] 26230 26233 26264 26272 26282 26294 26307 26316 26346 26360 26386 26395
## [2125] 26413 26421 26424 26427 26442 26453 26476 26499 26502 26535 26575 26583
## [2137] 26606 26619 26625 26627 26635 26681 26718 26742 26756 26764 26784 26807
## [2149] 26823 26833 26839 26898 26916 26923 26968 26973 26992 27015 27023 27027
## [2161] 27036 27050 27067 27079 27090 27091 27122 27130 27133 27142 27145 27158
## [2173] 27166 27170 27190 27193 27194 27196 27197 27207 27214 27225 27235 27236
## [2185] 27243 27254 27260 27306 27326 27384 27390 27401 27411 27438 27557 27566
## [2197] 27585 27600 27601 27604 27605 27616 27620 27640 27664 27667 27669 27709
## [2209] 27732 27741 27742 27760 27776 27788 27796 27815 27818 27826 27833 27845
## [2221] 27858 27864 27873 27896 27918 27921 27931 27942 27951 27953 27995 27999
## [2233] 28016 28017 28041 28061 28065 28078 28082 28087 28090 28092 28114 28156
## [2245] 28162 28170 28182 28225 28260 28276 28315 28326 28327 28375 28385 28386
## [2257] 28405 28422 28459 28477 28489 28500 28537 28540 28545 28651 28660 28690
## [2269] 28712 28717 28729 28755 28764 28774 28792 28825 28840 28849 28956 28972
## [2281] 29033 29048 29071 29076 29085 29095 29103 29104 29142 29155 29188 29212
## [2293] 29214 29215 29259 29278 29282 29286 29287 29315 29316 29332 29348 29357
## [2305] 29358 29360 29361 29377 29384 29387 29418 29437 29438 29448 29452 29465
## [2317] 29471 29475 29481 29491 29501 29507 29512 29518 29529 29584 29588 29590
## [2329] 29594 29611 29623 29625 29630 29636 29642 29645
## 
## $types
##    [1] "tag" "taa" "taa" "tag" "atg" "tag" "taa" "taa" "taa" "tga" "taa" "tag"
##   [13] "tga" "taa" "atg" "tag" "tag" "tga" "atg" "tag" "tag" "tga" "atg" "taa"
##   [25] "tga" "atg" "taa" "tga" "atg" "tag" "taa" "atg" "atg" "taa" "taa" "taa"
##   [37] "tag" "atg" "taa" "atg" "tga" "tag" "tga" "tga" "tga" "atg" "tga" "taa"
##   [49] "atg" "tga" "tga" "atg" "atg" "atg" "tga" "atg" "tga" "tga" "atg" "tga"
##   [61] "atg" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "taa" "tga" "tga" "atg"
##   [73] "taa" "atg" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "tga"
##   [85] "tga" "tag" "tga" "atg" "taa" "atg" "tag" "tga" "atg" "atg" "tga" "tag"
##   [97] "tga" "tag" "tag" "atg" "atg" "taa" "taa" "tag" "tga" "tga" "atg" "tga"
##  [109] "atg" "tga" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "tga" "tga" "taa"
##  [121] "tga" "tga" "taa" "taa" "taa" "taa" "tga" "atg" "tga" "tga" "atg" "tga"
##  [133] "atg" "atg" "atg" "taa" "taa" "tga" "tga" "atg" "tga" "tag" "tga" "atg"
##  [145] "tga" "taa" "taa" "atg" "tga" "atg" "taa" "atg" "tga" "tag" "tga" "atg"
##  [157] "taa" "taa" "tga" "tga" "atg" "tga" "tga" "tga" "tga" "tag" "atg" "taa"
##  [169] "atg" "atg" "tag" "taa" "atg" "taa" "taa" "taa" "atg" "tga" "tga" "tga"
##  [181] "atg" "tga" "tga" "taa" "atg" "tga" "tga" "tga" "atg" "tag" "tga" "tga"
##  [193] "atg" "tga" "tga" "atg" "tga" "tag" "tga" "atg" "tga" "atg" "tga" "atg"
##  [205] "atg" "atg" "tga" "tga" "atg" "tga" "tga" "atg" "tga" "atg" "tga" "tga"
##  [217] "tga" "atg" "tga" "tga" "tga" "tga" "taa" "taa" "tga" "atg" "taa" "atg"
##  [229] "tga" "taa" "atg" "tga" "taa" "atg" "taa" "tga" "atg" "tag" "atg" "atg"
##  [241] "tga" "atg" "tga" "taa" "taa" "atg" "tag" "taa" "taa" "atg" "taa" "taa"
##  [253] "atg" "tga" "taa" "atg" "tga" "taa" "atg" "tga" "atg" "tga" "atg" "tga"
##  [265] "taa" "tga" "tag" "taa" "tga" "atg" "tga" "taa" "tga" "atg" "tga" "taa"
##  [277] "taa" "tga" "atg" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "tga" "atg"
##  [289] "atg" "tag" "tga" "atg" "tag" "tga" "taa" "tga" "atg" "tga" "tga" "atg"
##  [301] "tga" "taa" "atg" "tga" "taa" "taa" "atg" "atg" "taa" "atg" "taa" "tag"
##  [313] "tga" "atg" "atg" "tga" "taa" "atg" "atg" "atg" "atg" "tag" "taa" "atg"
##  [325] "taa" "taa" "taa" "tga" "atg" "tag" "taa" "tag" "tga" "taa" "atg" "tga"
##  [337] "atg" "atg" "tga" "atg" "taa" "tga" "atg" "taa" "tag" "atg" "taa" "atg"
##  [349] "tga" "tag" "tag" "tga" "taa" "tga" "tga" "tga" "tga" "taa" "taa" "taa"
##  [361] "taa" "atg" "atg" "tga" "atg" "atg" "tga" "atg" "taa" "atg" "taa" "atg"
##  [373] "tga" "taa" "tag" "tga" "atg" "tga" "tag" "tga" "tga" "atg" "tga" "tag"
##  [385] "atg" "taa" "atg" "taa" "taa" "atg" "tga" "taa" "tag" "tag" "tga" "atg"
##  [397] "tag" "tga" "atg" "taa" "taa" "taa" "tga" "atg" "atg" "tga" "atg" "taa"
##  [409] "taa" "atg" "taa" "taa" "tag" "tga" "atg" "atg" "atg" "tga" "taa" "taa"
##  [421] "atg" "tga" "atg" "tag" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "tga"
##  [433] "taa" "taa" "taa" "tga" "atg" "tga" "tga" "atg" "taa" "atg" "tga" "atg"
##  [445] "taa" "taa" "atg" "tag" "atg" "tga" "taa" "atg" "taa" "tga" "atg" "tga"
##  [457] "taa" "atg" "tga" "tga" "atg" "atg" "tag" "tga" "tag" "taa" "taa" "taa"
##  [469] "tag" "tga" "tag" "atg" "tga" "tga" "tag" "tag" "tga" "tga" "tag" "atg"
##  [481] "taa" "atg" "tga" "taa" "taa" "tag" "atg" "tga" "atg" "atg" "taa" "taa"
##  [493] "atg" "tga" "tag" "tag" "taa" "atg" "taa" "tag" "taa" "atg" "tag" "taa"
##  [505] "tag" "taa" "atg" "atg" "taa" "tag" "tag" "taa" "tag" "taa" "taa" "atg"
##  [517] "taa" "atg" "tga" "taa" "atg" "taa" "tag" "taa" "taa" "taa" "taa" "atg"
##  [529] "tag" "taa" "taa" "atg" "tga" "taa" "tag" "tga" "tga" "tga" "tag" "tga"
##  [541] "tag" "tga" "atg" "tag" "taa" "atg" "tag" "atg" "tag" "atg" "atg" "tag"
##  [553] "atg" "atg" "atg" "atg" "atg" "atg" "tga" "atg" "taa" "tga" "taa" "atg"
##  [565] "atg" "tga" "atg" "atg" "atg" "tga" "tag" "tag" "tga" "atg" "tga" "tga"
##  [577] "taa" "tga" "tga" "tag" "tga" "atg" "tga" "atg" "tga" "tag" "tga" "taa"
##  [589] "taa" "taa" "atg" "tag" "tga" "atg" "atg" "taa" "tga" "atg" "tga" "tag"
##  [601] "tga" "taa" "atg" "tga" "atg" "atg" "tag" "atg" "taa" "tag" "tag" "tag"
##  [613] "atg" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "tag" "tga" "tga" "taa"
##  [625] "atg" "taa" "taa" "tga" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "tag"
##  [637] "atg" "atg" "taa" "atg" "tga" "taa" "tag" "tag" "taa" "tag" "atg" "taa"
##  [649] "taa" "taa" "tag" "taa" "atg" "taa" "atg" "atg" "tga" "atg" "atg" "tga"
##  [661] "atg" "tga" "tga" "atg" "tga" "taa" "atg" "tga" "atg" "tag" "atg" "tga"
##  [673] "tag" "tag" "tag" "tga" "atg" "tga" "tag" "tga" "tag" "tga" "tga" "taa"
##  [685] "atg" "atg" "atg" "tga" "taa" "tag" "tag" "tga" "atg" "atg" "atg" "taa"
##  [697] "tag" "tag" "taa" "tga" "atg" "tga" "tag" "atg" "atg" "tag" "tag" "atg"
##  [709] "taa" "taa" "atg" "tga" "tga" "atg" "atg" "tga" "tag" "taa" "tag" "atg"
##  [721] "tag" "tga" "atg" "tga" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg"
##  [733] "tga" "atg" "atg" "atg" "taa" "atg" "tga" "taa" "tag" "atg" "taa" "atg"
##  [745] "tag" "atg" "taa" "tag" "taa" "taa" "tag" "tga" "tag" "taa" "atg" "tga"
##  [757] "tag" "tga" "atg" "tag" "atg" "tga" "atg" "taa" "taa" "atg" "atg" "tga"
##  [769] "atg" "atg" "taa" "taa" "atg" "tga" "tag" "atg" "atg" "tag" "taa" "tga"
##  [781] "taa" "taa" "taa" "tag" "atg" "atg" "atg" "tga" "taa" "taa" "taa" "atg"
##  [793] "atg" "tag" "taa" "tga" "atg" "tga" "atg" "atg" "tga" "tag" "taa" "atg"
##  [805] "tga" "taa" "taa" "atg" "atg" "atg" "atg" "tga" "tag" "taa" "tag" "tga"
##  [817] "atg" "tga" "taa" "atg" "tga" "atg" "tga" "tga" "atg" "tga" "tag" "atg"
##  [829] "tga" "atg" "atg" "tga" "atg" "tag" "tag" "atg" "tga" "tga" "atg" "tag"
##  [841] "atg" "taa" "taa" "atg" "taa" "tga" "tga" "atg" "atg" "atg" "atg" "taa"
##  [853] "taa" "taa" "atg" "atg" "tag" "tga" "atg" "atg" "tga" "atg" "tga" "tga"
##  [865] "tag" "tag" "taa" "atg" "atg" "tag" "atg" "tga" "atg" "tga" "atg" "tga"
##  [877] "atg" "tag" "tga" "atg" "tga" "atg" "atg" "taa" "atg" "tag" "atg" "tag"
##  [889] "taa" "taa" "atg" "tag" "tag" "tag" "tga" "atg" "tag" "atg" "tga" "tag"
##  [901] "atg" "tga" "taa" "tag" "tga" "atg" "taa" "taa" "taa" "atg" "taa" "atg"
##  [913] "tga" "taa" "tag" "tag" "taa" "atg" "atg" "tga" "tga" "atg" "atg" "tag"
##  [925] "taa" "atg" "taa" "tag" "atg" "atg" "atg" "tga" "tag" "taa" "atg" "tga"
##  [937] "tga" "taa" "tga" "atg" "taa" "tga" "tga" "tga" "atg" "atg" "atg" "atg"
##  [949] "tga" "atg" "tga" "taa" "tag" "atg" "atg" "atg" "tag" "tga" "taa" "atg"
##  [961] "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "atg"
##  [973] "taa" "atg" "tga" "atg" "tag" "tag" "tga" "taa" "atg" "taa" "taa" "taa"
##  [985] "atg" "tga" "tga" "tag" "atg" "tga" "atg" "tga" "atg" "taa" "tag" "atg"
##  [997] "tag" "taa" "tga" "atg" "tag" "taa" "tga" "taa" "taa" "tag" "atg" "tag"
## [1009] "atg" "tag" "taa" "taa" "tag" "tga" "atg" "taa" "taa" "atg" "atg" "tag"
## [1021] "atg" "tga" "taa" "tga" "tga" "taa" "taa" "atg" "tga" "tag" "atg" "atg"
## [1033] "tag" "tga" "tga" "atg" "atg" "taa" "taa" "tag" "tga" "atg" "tga" "taa"
## [1045] "taa" "atg" "tga" "tag" "tag" "taa" "atg" "taa" "atg" "tga" "taa" "taa"
## [1057] "atg" "tga" "tag" "tag" "atg" "tga" "atg" "taa" "taa" "atg" "tga" "tag"
## [1069] "atg" "tga" "atg" "tga" "taa" "tga" "taa" "tga" "atg" "tga" "atg" "tga"
## [1081] "taa" "atg" "tga" "tag" "tga" "atg" "taa" "tag" "tga" "taa" "atg" "atg"
## [1093] "tga" "atg" "tag" "tga" "tag" "taa" "taa" "atg" "tga" "tag" "tga" "atg"
## [1105] "tga" "tag" "tga" "atg" "atg" "tga" "taa" "tga" "atg" "tga" "taa" "taa"
## [1117] "atg" "tga" "tag" "taa" "atg" "tag" "taa" "tag" "atg" "tga" "tag" "taa"
## [1129] "atg" "taa" "tag" "tag" "atg" "tga" "atg" "atg" "tag" "taa" "atg" "tag"
## [1141] "taa" "atg" "taa" "taa" "taa" "atg" "tga" "taa" "taa" "tga" "taa" "atg"
## [1153] "tga" "atg" "tga" "taa" "atg" "tga" "tag" "tga" "tga" "taa" "tga" "atg"
## [1165] "taa" "atg" "taa" "taa" "taa" "taa" "taa" "atg" "taa" "tag" "atg" "tga"
## [1177] "atg" "tga" "atg" "tga" "atg" "taa" "taa" "atg" "taa" "atg" "tga" "taa"
## [1189] "atg" "tag" "tag" "tag" "tag" "atg" "tga" "tag" "tga" "tag" "tag" "taa"
## [1201] "taa" "atg" "taa" "tga" "atg" "tag" "atg" "atg" "tga" "atg" "taa" "atg"
## [1213] "tag" "taa" "atg" "taa" "taa" "tag" "taa" "taa" "tga" "atg" "atg" "atg"
## [1225] "taa" "tga" "atg" "atg" "taa" "tag" "taa" "atg" "taa" "atg" "tga" "atg"
## [1237] "taa" "taa" "tag" "tga" "atg" "atg" "tga" "tag" "tag" "atg" "tga" "atg"
## [1249] "tga" "atg" "tga" "taa" "atg" "tga" "atg" "tga" "tga" "atg" "tga" "atg"
## [1261] "taa" "taa" "atg" "tag" "tag" "tag" "taa" "taa" "atg" "atg" "tga" "atg"
## [1273] "tga" "tga" "taa" "atg" "tag" "taa" "atg" "tga" "tag" "atg" "tga" "atg"
## [1285] "atg" "tga" "tga" "tga" "atg" "taa" "atg" "tga" "atg" "tag" "atg" "tga"
## [1297] "atg" "tga" "atg" "atg" "taa" "atg" "taa" "taa" "atg" "tga" "taa" "tga"
## [1309] "atg" "tga" "atg" "tag" "tag" "atg" "atg" "tga" "atg" "tag" "taa" "atg"
## [1321] "atg" "tga" "atg" "tga" "atg" "tga" "tag" "atg" "tga" "taa" "tag" "atg"
## [1333] "taa" "atg" "atg" "tag" "tga" "atg" "tga" "atg" "tag" "atg" "tga" "taa"
## [1345] "atg" "tga" "tga" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "taa" "taa"
## [1357] "tag" "taa" "tga" "tga" "atg" "tga" "atg" "tga" "atg" "tga" "tga" "taa"
## [1369] "atg" "tag" "tag" "atg" "tga" "atg" "tga" "tag" "atg" "atg" "taa" "tag"
## [1381] "atg" "atg" "tga" "atg" "atg" "tga" "taa" "tag" "taa" "atg" "tag" "tag"
## [1393] "tag" "tga" "taa" "tga" "tag" "atg" "taa" "atg" "tga" "tag" "tga" "atg"
## [1405] "tga" "atg" "taa" "atg" "tga" "tga" "atg" "tag" "tga" "taa" "tag" "taa"
## [1417] "atg" "tga" "tag" "atg" "tga" "tga" "tga" "tag" "atg" "tga" "taa" "taa"
## [1429] "taa" "atg" "atg" "atg" "tga" "atg" "tag" "taa" "taa" "tag" "tag" "tga"
## [1441] "tga" "atg" "tga" "atg" "tga" "taa" "atg" "atg" "taa" "atg" "tga" "tag"
## [1453] "atg" "tga" "tag" "atg" "atg" "taa" "taa" "tag" "taa" "tga" "taa" "tga"
## [1465] "atg" "tga" "atg" "tga" "tag" "atg" "tga" "atg" "atg" "tga" "atg" "taa"
## [1477] "atg" "tag" "atg" "tag" "atg" "taa" "tag" "taa" "tag" "tag" "atg" "tga"
## [1489] "tga" "taa" "taa" "atg" "tga" "taa" "atg" "taa" "atg" "tag" "taa" "tag"
## [1501] "atg" "tga" "tga" "atg" "tga" "atg" "tga" "tga" "tga" "atg" "tga" "atg"
## [1513] "taa" "atg" "tga" "tga" "atg" "taa" "taa" "atg" "tga" "atg" "atg" "atg"
## [1525] "tag" "tga" "atg" "atg" "tga" "tag" "atg" "tag" "atg" "tga" "taa" "tga"
## [1537] "tga" "tag" "atg" "tga" "tga" "taa" "atg" "tga" "tga" "taa" "atg" "tga"
## [1549] "tga" "tag" "atg" "atg" "atg" "tag" "tga" "tag" "atg" "taa" "tga" "atg"
## [1561] "taa" "tga" "atg" "tag" "tga" "tga" "tga" "atg" "tag" "atg" "tga" "taa"
## [1573] "atg" "taa" "taa" "taa" "tga" "tag" "tga" "atg" "tag" "tga" "atg" "atg"
## [1585] "tag" "atg" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "tag" "atg"
## [1597] "tga" "taa" "tag" "atg" "taa" "atg" "taa" "taa" "tga" "atg" "taa" "taa"
## [1609] "tag" "atg" "tga" "atg" "tga" "taa" "taa" "atg" "tga" "taa" "taa" "taa"
## [1621] "taa" "taa" "tga" "taa" "taa" "taa" "atg" "tag" "atg" "tga" "tga" "tga"
## [1633] "tga" "atg" "tag" "tag" "tag" "taa" "atg" "taa" "taa" "taa" "tag" "atg"
## [1645] "taa" "taa" "taa" "taa" "tag" "tga" "tag" "taa" "atg" "tga" "atg" "atg"
## [1657] "tga" "taa" "atg" "atg" "atg" "taa" "atg" "tga" "tag" "tag" "taa" "tag"
## [1669] "atg" "tga" "taa" "atg" "atg" "tga" "tga" "tga" "atg" "tga" "taa" "taa"
## [1681] "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "taa" "atg" "atg"
## [1693] "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "atg" "tga" "atg" "taa"
## [1705] "tag" "atg" "tga" "tga" "taa" "atg" "taa" "atg" "tga" "taa" "taa" "taa"
## [1717] "atg" "tag" "atg" "atg" "tga" "tag" "atg" "tga" "atg" "tga" "taa" "taa"
## [1729] "tag" "tag" "taa" "taa" "atg" "tga" "atg" "atg" "atg" "taa" "atg" "taa"
## [1741] "taa" "tga" "atg" "atg" "atg" "taa" "tga" "atg" "tga" "taa" "taa" "taa"
## [1753] "atg" "taa" "atg" "tga" "atg" "tga" "tag" "tag" "tga" "taa" "taa" "atg"
## [1765] "tag" "tag" "tga" "tga" "tga" "atg" "tga" "atg" "taa" "atg" "tga" "tga"
## [1777] "atg" "tga" "tag" "taa" "taa" "atg" "taa" "taa" "atg" "atg" "atg" "tga"
## [1789] "tga" "taa" "taa" "atg" "atg" "taa" "tga" "tga" "taa" "atg" "atg" "tga"
## [1801] "taa" "atg" "taa" "tga" "atg" "tga" "atg" "taa" "taa" "taa" "taa" "atg"
## [1813] "atg" "taa" "tag" "atg" "tag" "tga" "taa" "tga" "taa" "taa" "tag" "taa"
## [1825] "atg" "atg" "tga" "atg" "tga" "atg" "atg" "tga" "tga" "atg" "taa" "tga"
## [1837] "tga" "taa" "atg" "tga" "taa" "taa" "atg" "taa" "atg" "atg" "taa" "tga"
## [1849] "taa" "atg" "taa" "tga" "atg" "tga" "atg" "atg" "tag" "atg" "tga" "atg"
## [1861] "taa" "tag" "tga" "taa" "taa" "atg" "tga" "atg" "tag" "tga" "atg" "taa"
## [1873] "taa" "taa" "tag" "tag" "atg" "tag" "tga" "taa" "atg" "tga" "atg" "taa"
## [1885] "taa" "atg" "tga" "atg" "tag" "tga" "taa" "atg" "tga" "taa" "taa" "taa"
## [1897] "atg" "taa" "tga" "atg" "tga" "tga" "taa" "tga" "tag" "taa" "taa" "atg"
## [1909] "tga" "atg" "taa" "tga" "atg" "atg" "atg" "tag" "tga" "atg" "atg" "tga"
## [1921] "tag" "tag" "tag" "atg" "tag" "tga" "tag" "taa" "taa" "taa" "tag" "taa"
## [1933] "atg" "atg" "taa" "tag" "taa" "atg" "tga" "atg" "taa" "atg" "tag" "taa"
## [1945] "tga" "tga" "atg" "tga" "taa" "tga" "taa" "taa" "tga" "taa" "taa" "tga"
## [1957] "tga" "atg" "atg" "tga" "atg" "atg" "tag" "tga" "taa" "atg" "tag" "atg"
## [1969] "tga" "atg" "tga" "atg" "tga" "tag" "tag" "atg" "atg" "atg" "tag" "atg"
## [1981] "atg" "atg" "tga" "taa" "tag" "taa" "atg" "taa" "taa" "tag" "taa" "taa"
## [1993] "atg" "tga" "tga" "taa" "tga" "taa" "atg" "taa" "taa" "tga" "taa" "taa"
## [2005] "atg" "tga" "tga" "atg" "atg" "atg" "atg" "atg" "tga" "tga" "taa" "atg"
## [2017] "taa" "tga" "atg" "taa" "atg" "tga" "tga" "atg" "tga" "taa" "tga" "atg"
## [2029] "tga" "taa" "taa" "atg" "tga" "tga" "atg" "tga" "taa" "atg" "atg" "taa"
## [2041] "atg" "atg" "tga" "tag" "atg" "tga" "atg" "tga" "atg" "tga" "tga" "taa"
## [2053] "atg" "atg" "tga" "taa" "tga" "atg" "atg" "atg" "taa" "taa" "atg" "tag"
## [2065] "taa" "atg" "atg" "tga" "atg" "atg" "tag" "atg" "tga" "atg" "atg" "tga"
## [2077] "atg" "taa" "atg" "tga" "taa" "tga" "tga" "tga" "tag" "taa" "atg" "atg"
## [2089] "tga" "tga" "atg" "taa" "taa" "atg" "taa" "atg" "atg" "tga" "atg" "tga"
## [2101] "tag" "taa" "tga" "atg" "taa" "tag" "taa" "tag" "tag" "tag" "taa" "tga"
## [2113] "tag" "taa" "taa" "tga" "tga" "tga" "taa" "taa" "taa" "atg" "tga" "taa"
## [2125] "atg" "tag" "taa" "tag" "tag" "atg" "taa" "taa" "taa" "taa" "taa" "tga"
## [2137] "atg" "tag" "tga" "atg" "tag" "atg" "atg" "tga" "atg" "tga" "tga" "atg"
## [2149] "tag" "tga" "taa" "tag" "tag" "tga" "taa" "taa" "tag" "tag" "taa" "tga"
## [2161] "atg" "tga" "tag" "tga" "atg" "tga" "tga" "taa" "taa" "tag" "tga" "taa"
## [2173] "taa" "taa" "tag" "atg" "tga" "atg" "tga" "atg" "tag" "taa" "atg" "tga"
## [2185] "tag" "tga" "tga" "tag" "taa" "tga" "taa" "taa" "tag" "tga" "tag" "taa"
## [2197] "taa" "atg" "tga" "atg" "tga" "taa" "tga" "tag" "taa" "taa" "atg" "tag"
## [2209] "taa" "atg" "tga" "tga" "atg" "atg" "tag" "taa" "taa" "atg" "tga" "taa"
## [2221] "tag" "taa" "tag" "tag" "tag" "atg" "atg" "atg" "taa" "atg" "tag" "tag"
## [2233] "atg" "tga" "tag" "taa" "taa" "taa" "atg" "tga" "taa" "atg" "tag" "tga"
## [2245] "taa" "atg" "atg" "taa" "atg" "tag" "tag" "atg" "tga" "tga" "atg" "tga"
## [2257] "atg" "tag" "taa" "atg" "tga" "tga" "taa" "taa" "atg" "tag" "taa" "tag"
## [2269] "atg" "tag" "tga" "tag" "tga" "tga" "taa" "taa" "tga" "taa" "taa" "tga"
## [2281] "atg" "atg" "atg" "tga" "atg" "taa" "atg" "tga" "tga" "tga" "taa" "tga"
## [2293] "atg" "tga" "tga" "tga" "atg" "atg" "tga" "atg" "tga" "tga" "taa" "atg"
## [2305] "tga" "atg" "tga" "atg" "atg" "taa" "tag" "atg" "tga" "taa" "taa" "tag"
## [2317] "tag" "taa" "taa" "tag" "taa" "atg" "taa" "tag" "tga" "tga" "taa" "atg"
## [2329] "tag" "atg" "taa" "atg" "taa" "taa" "tag" "tag"
plotPotentialStartsAndStops(My_SeqS)
## [1]     1 31646

Question 3:

Find and Plot the potential ORF in the reverse complement of the last 1000 nucleotides in the sequence. Use a green shade in your plot.

######################################################
# Find ORF in sequences find the possible ORF in 
# DNA sequences and return the RRF start and stop index
# and its length
######################################################

findORFsinSeq <- function(sequence)
{
  require(Biostrings)
  # Make vectors "positions" and "types" containing information on the positions of ATGs in the sequence:
    mylist <- findPotentialStartsAndStops2(sequence)
    positions <- mylist[[1]]
    types <- mylist[[2]]
    # Make vectors "orfstarts" and "orfstops" to store the predicted start and stop codons of ORFs
    orfstarts <- numeric()
    orfstops <- numeric()
    # Make a vector "orflengths" to store the lengths of the ORFs
    orflengths <- numeric()
    # Print out the positions of ORFs in the sequence:
    # Find the length of vector "positions"
    numpositions <- length(positions)
    # There must be at least one start codon and one stop codon to have an ORF.
    if (numpositions >= 2)
    {
      for (i in 1:(numpositions-1))
      {
        posi <- positions[i]
        typei <- types[i]
        found <- 0
        while (found == 0)
        {
          for (j in (i+1):numpositions)
          {
            posj <- positions[j]
            typej <- types[j]
            posdiff <- posj - posi
            posdiffmod3 <- posdiff %% 3
            # Add in the length of the stop codon
            orflength <- posj - posi + 3
            if (typei == "atg" && (typej == "taa" || typej == "tag" || typej == "tga") && posdiffmod3 == 0)
            {
              # Check if we have already used the stop codon at posj+2 in an ORF
              numorfs <- length(orfstops)
              usedstop <- -1
              if (numorfs > 0)
              {
                for (k in 1:numorfs)
                {
                  orfstopk <- orfstops[k]
                  if (orfstopk == (posj + 2)) { usedstop <- 1 }
                }
              }
              if (usedstop == -1)
              {
                orfstarts <- append(orfstarts, posi, after=length(orfstarts))
                orfstops <- append(orfstops, posj+2, after=length(orfstops)) # Including the stop codon.
                orflengths <- append(orflengths, orflength, after=length(orflengths))
              }
              found <- 1
              break
            }
            if (j == numpositions) { found <- 1 }
          }
        }
      }
    }
    # Sort the final ORFs by start position:
    indices <- order(orfstarts)
    orfstarts <- orfstarts[indices]
    orfstops <- orfstops[indices]
    # Find the lengths of the ORFs that we have
    orflengths <- numeric()
    numorfs <- length(orfstarts)
    for (i in 1:numorfs)
    {
      orfstart <- orfstarts[i]
      orfstop <- orfstops[i]
      orflength <- orfstop - orfstart + 1
      orflengths <- append(orflengths,orflength,after=length(orflengths))
    }
    mylist <- list(orfstarts, orfstops, orflengths)
    return(mylist)
}
######################################################
# Plot ORF in sequences
######################################################

plotORFsinSeq <- function(sequence)
{
  # Make vectors "positions" and "types" containing information on the positions of ATGs in the sequence:
    mylist <- findPotentialStartsAndStops2(sequence)
    positions <- mylist[[1]]
    types <- mylist[[2]]
    # Make vectors "orfstarts" and "orfstops" to store the predicted start and stop codons of ORFs
    orfstarts <- numeric()
    orfstops <- numeric()
    # Make a vector "orflengths" to store the lengths of the ORFs
    orflengths <- numeric()
    # Print out the positions of ORFs in the sequence:
    numpositions <- length(positions) # Find the length of vector "positions"
    # There must be at least one start codon and one stop codon to have an ORF.
    if (numpositions >= 2)
    {
      for (i in 1:(numpositions-1))
      {
        posi <- positions[i]
        typei <- types[i]
        found <- 0
        while (found == 0)
        {
          for (j in (i+1):numpositions)
          {
            posj <- positions[j]
            typej <- types[j]
            posdiff <- posj - posi
            posdiffmod3 <- posdiff %% 3
            orflength <- posj - posi + 3 # Add in the length of the stop codon
            if (typei == "atg" && (typej == "taa" || typej == "tag" || typej == "tga") && posdiffmod3 == 0)
            {
              # Check if we have already used the stop codon at posj+2 in an ORF
              numorfs <- length(orfstops)
              usedstop <- -1
              if (numorfs > 0)
              {
                for (k in 1:numorfs)
                {
                  orfstopk <- orfstops[k]
                  if (orfstopk == (posj + 2)) { usedstop <- 1 }
                }
              }
              if (usedstop == -1)
              {
                orfstarts <- append(orfstarts, posi, after=length(orfstarts))
                orfstops <- append(orfstops, posj+2, after=length(orfstops)) # Including the stop codon.
                orflengths <- append(orflengths, orflength, after=length(orflengths))
              }
              found <- 1
              break
            }
            if (j == numpositions) { found <- 1 }
          }
        }
      }
    }
    # Sort the final ORFs by start position:
    indices <- order(orfstarts)
    orfstarts <- orfstarts[indices]
    orfstops <- orfstops[indices]
    # Make a plot showing the positions of ORFs in the input sequence:
    # Draw a line at y=0 from 1 to the length of the sequence:
    x <- c(1,nchar(sequence))
    y <- c(0,0)
    plot(x, y, ylim=c(0,3), type="l", axes=FALSE, xlab="Nucleotide", ylab=
           ?????"Reading frame", main="Predicted ORFs")
    segments(1,1,nchar(sequence),1)
    segments(1,2,nchar(sequence),2)
    # Add the x-axis at y=0:
    axis(1, pos=0)
    # Add the y-axis labels:
    text(0.9,0.5,"+1")
    text(0.9,1.5,"+2")
    text(0.9,2.5,"+3")
    # Make a plot of the ORFs in the sequence:
    numorfs <- length(orfstarts)
    for (i in 1:numorfs)
    {
      orfstart <- orfstarts[i]
      orfstop <- orfstops[i]
      remainder <- (orfstart-1) %% 3
      if (remainder == 0) # +1 reading frame
      {
        rect(orfstart,0,orfstop,1,col="cyan",border="black")
      }
      else if (remainder == 1)
      {
        rect(orfstart,1,orfstop,2,col="cyan",border="black")
      }
      else if (remainder == 2)
      {
        rect(orfstart,2,orfstop,3,col="cyan",border="black")
      }
    }
}
findORFsinSeq(My_SeqS)
## [[1]]
##   [1]    66   227   369   429   555   654   696  1033  1192  1206  1216  1294
##  [13]  1314  1333  1438  1464  1566  1635  1884  2113  2157  2254  2265  2466
##  [25]  2583  2658  2742  2766  2845  2922  2955  3270  3283  3336  3360  3399
##  [37]  3441  3507  3528  3834  3864  4020  4122  4153  4198  4215  4296  4327
##  [49]  4335  4407  4479  4518  4596  4926  4995  5122  5161  5229  5370  5484
##  [61]  5515  5623  5631  5751  5922  5950  6048  6378  6432  6531  6669  6784
##  [73]  6948  7336  7347  7440  7461  7650  7773  7788  7801  7932  8037  8116
##  [85]  8227  8241  8280  8412  8553  8665  8790  9018  9096  9121  9166  9180
##  [97]  9282  9384  9510  9538  9588  9660  9873 10029 10294 10302 10371 10378
## [109] 10491 10554 10564 10629 10662 10683 10767 10776 10812 10843 10983 11026
## [121] 11152 11211 11259 11295 11589 11664 11710 11827 11844 12018 12066 12111
## [133] 12282 12387 12582 12651 12733 12924 13132 13150 13230 13341 13418 13511
## [145] 13561 13577 13652 13718 13790 13856 13913 13970 14105 14150 14339 14405
## [157] 14453 14514 14543 14666 14783 14856 14879 14894 14969 15155 15195 15353
## [169] 15446 15488 15611 15704 15726 15803 15866 15926 16058 16203 16220 16271
## [181] 16377 16385 16416 16433 16461 16475 16626 16667 16742 16787 16865 16907
## [193] 17052 17060 17079 17115 17192 17210 17249 17291 17498 17538 17573 17750
## [205] 17873 17915 18045 18131 18215 18302 18494 18602 18659 18765 18779 18833
## [217] 18920 18972 18986 19040 19175 19202 19295 19325 19359 19391 19455 19523
## [229] 19619 19796 19883 19928 20000 20168 20357 20379 20411 20483 20678 20811
## [241] 20852 20919 20945 20978 21080 21114 21143 21209 21353 21454 21521 21584
## [253] 21647 21818 21834 21917 22064 22250 22314 22442 22470 22577 22628 22694
## [265] 22889 22967 23114 23219 23375 23643 23666 23888 23909 23916 23933 24054
## [277] 24119 24332 24419 24572 24779 24980 25032 25140 25169 25230 25294 25361
## [289] 25433 25555 25577 25607 25651 26079 26360 26413 26718 27036 27193 27235
## [301] 27600 27604 27741 27776 27826 27921 27953 28016 28082 28092 28405 28477
## [313] 28545 29071 29085 29214 29286 29357 29377 29507 29590
## 
## [[2]]
##   [1]    98 13375   386   458   647   662  1187  1059  1197  1238  1251  1314
##  [13]  1322  1368  1479  1565  1589  1700  2030  2121  2252  2265  2342  2582
##  [25]  2600  2672  2747  2855  2892  2936  3257  3329  3291  3356  3374  3440
##  [37]  3458  3527  3743  3839  4016  4055  4133  4173  4239  4259  4319  4341
##  [49]  4349  4448  4514  4577  4661  4991  5108  5160  5169  5354  5444  5549
##  [61]  5532  5634  5675  5894  5981  5958  6059  6416  6503  6596  6812  6804
##  [73]  7034  7398  7388  7448  7646  7730  7781  7850  7821  8018  8147  8142
##  [85]  8235  8267  8303  8420  8717  8673  8975  9083  9140  9165  9177  9230
##  [97]  9299  9416  9578  9630  9608  9731 10007 10292 10344 10337 10478 10386
## [109] 10547 10628 10593 10652 10673 10733 10775 10808 10913 10869 11138 11046
## [121] 11160 11225 11291 11339 11627 11753 11739 11847 11933 12062 12098 12185
## [133] 12350 12551 12590 12833 12741 12980 13149 13161 13328 13358 13480 13531
## [145] 21447 13651 13690 13759 13831 13867 13939 14008 14146 14320 14386 14410
## [157] 14500 14573 14644 14749 14812 14864 14890 14950 15004 15160 15200 15445
## [169] 15478 15598 15658 15790 15737 15847 15898 16048 16135 16223 16252 16309
## [181] 16385 16423 16430 16456 16466 16657 16646 16690 16786 16810 16870 17041
## [193] 17072 17095 17084 17120 17209 17242 17281 17365 17512 17576 17608 17854
## [205] 17899 17944 18053 18205 18259 18415 18517 18655 18760 18782 18829 18841
## [217] 18970 19001 19021 19159 19195 19249 19309 19369 19400 19432 19475 19618
## [229] 19765 19810 19906 19939 20014 20263 20395 20399 20428 20671 20713 20852
## [241] 20884 20936 20968 21022 21142 21179 21169 21292 21361 25221 21565 21616
## [253] 21775 21907 21839 22063 22069 22396 22325 22576 22493 22618 22636 22888
## [265] 22945 23068 23167 23365 23497 23651 23689 23902 23923 23927 24007 24113
## [277] 24277 24388 24424 24742 24979 25072 25115 25169 25291 26054 25416 25429
## [289] 25453 25563 25594 25654 26115 26309 27025 26478 26744 27227 27216 27603
## [301] 27734 27618 27860 27817 28080 27953 27997 28063 29350 28388 28461 28491
## [313] 28757 29097 29144 29261 29318 29389 29454 29590 29625
## 
## [[3]]
##   [1]    33 13149    18    30    93     9   492    27     6    33    36    21
##  [13]     9    36    42   102    24    66   147     9    96    12    78   117
##  [25]    18    15     6    90    48    15   303    60     9    21    15    42
##  [37]    18    21   216     6   153    36    12    21    42    45    24    15
##  [49]    15    42    36    60    66    66   114    39     9   126    75    66
##  [61]    18    12    45   144    60     9    12    39    72    66   144    21
##  [73]    87    63    42     9   186    81     9    63    21    87   111    27
##  [85]     9    27    24     9   165     9   186    66    45    45    12    51
##  [97]    18    33    69    93    21    72   135   264    51    36   108     9
## [109]    57    75    30    24    12    51     9    33   102    27   156    21
## [121]     9    15    33    45    39    90    30    21    90    45    33    75
## [133]    69   165     9   183     9    57    18    12    99    18    63    21
## [145]  7887    75    39    42    42    12    27    39    42   171    48     6
## [157]    48    60   102    84    30     9    12    57    36     6     6    93
## [169]    33   111    48    87    12    45    33   123    78    21    33    39
## [181]     9    39    15    24     6   183    21    24    45    24     6   135
## [193]    21    36     6     6    18    33    33    75    15    39    36   105
## [205]    27    30     9    75    45   114    24    54   102    18    51     9
## [217]    51    30    36   120    21    48    15    45    42    42    21    96
## [229]   147    15    24    12    15    96    39    21    18   189    36    42
## [241]    33    18    24    45    63    66    27    84     9  3768    45    33
## [253]   129    90     6   147     6   147    12   135    24    42     9   195
## [265]    57   102    54   147   123     9    24    15    15    12    75    60
## [277]   159    57     6   171   201    93    84    30   123   825   123    69
## [289]    21     9    18    48   465   231   666    66    27   192    24   369
## [301]   135    15   120    42   255    33    45    48  1269   297    57    15
## [313]   213    27    60    48    33    33    78    84    36
plotORFsinSeq(My_SeqS)
## Contacting Delphi...the oracle is unavailable.
## We apologize for any inconvenience.

Question 4:

Extract and translate one potential gene. What is the length of the resultant protein sequence?

 Seq_T <- seqinr::translate(s2c(substring(My_SeqS,66,98)))

# get the Length of the potential sequences
length(Seq_T)
## [1] 11

Question 5:

For the whole sequence, identify the significant ORFs. Use at least 20 random sequences. Justify your answer.

# sequences is pass a a DNA sequences
generateSeqsWithMultinomialModel <- function(sequence, index)
{
  My_Base1 <- count(sequence, 1)
  
  # Calculate the percentage in sequences
  Pro_Seq <-
    c((My_Base1["a"] / sum(My_Base1) * 100),
      (My_Base1["g"] / sum(My_Base1) * 100),
      (My_Base1["c"] / sum(My_Base1) * 100),
      (My_Base1["t"] / sum(My_Base1) * 100)
    )
  My_Rad_Seq = {
  }
  for (i in 1:index) {
    My_Rad_Seq <- c (My_Rad_Seq,
                     c2s(sample(
                       c('a', 'c', 'g', 't'),
                       length(sequence),
                       replace = TRUE,
                       prob = Pro_Seq
                     )))
  }
  return (My_Rad_Seq)
}
#generate random sequences
randseqs <- generateSeqsWithMultinomialModel(getSequence(My_Que$req[[1]]), 20)
# find ORF
randseqorflengths <- numeric() # Tell R that we want to make a new vector of numbers
for (i in 1:20)
{
  # print(i) # Test the loop travelse
  randseq <- randseqs[i] # Get the ith random sequence
  mylist <- findORFsinSeq(randseq) # Find ORFs in "randseq"
  lengths <- mylist[[3]] # Find the lengths of ORFs in "randseq"
  randseqorflengths <- append(randseqorflengths, lengths, after=length(randseqorflengths))
}

# plot a histogram of the lengths of the ORFs real vs. random
par(mfrow = c(1,2)) # Make a picture with two plots side-by-side (one row, two columns)
bins <- seq(0,11000,50) # Set the bins for the histogram
hist(randseqorflengths, breaks=bins, col="red", xlim=c(0,1000))

#find the longest random gene
x = max(randseqorflengths)

#use it as a threshold, and discard all ORFs found in the real sequence that are shorter than this
summary(randseqorflengths > x)
##    Mode   FALSE 
## logical    8020
#find and use the 99th quantile as a threshold
quantile(randseqorflengths, probs=c(0.99))
## 99% 
## 222

Hint:

define and use a function generateSeqsWithMultinomialModel

Notes: