DNA Sequence Analysis: OOP Python code + Rmarkdown under Rstduio + miniconda Python 3.7(Backend)
DNA Sequence Analysis: OOP Python code + Rmarkdown under Rstduio + miniconda Python 3.7(Backend)
- 1 Create another instance of the Sequence class,
- 2 Define a method of a class, returning numbers of DNA bases
- 3 Define a is_dna() method of a class,
- 4 Define a method of a class for the complement of the Sequence
- 5 Create a method to find the first pair of non-matching bases,
- 6 Add a function (not a class method) to read a file
- 7 Create a method of the class to split the sequence into genes,
- 8 To create a list of Sequence instances from genome_01.dat,
- 9 Creat a method to compare the DNA sequence with another,
- 10 Find the line of best fit for the data from Task 9,
- 11 Create a visualization the genes:
- 12 Create a plot to indicate swap mutations-bases
1 Create another instance of the Sequence class,
- and call its first_base() method.
1.1 A method of the class: first_base()
import numpy as np
import Dksequence as seq
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
any_seq = seq.Dksequence("ACGT")
any_seq_first_base = any_seq.first_base()
print(f"The first base of any given sequence is: {any_seq_first_base}")## The first base of any given sequence is: A
2 Define a method of a class, returning numbers of DNA bases
- (i.e. characters) in the sequence.
2.1 Methods of the class :
- number_of_base()
- element_decompose()
- getACGT()
- ACGT_decompose()
any_seq = "ARTJATCGAAATCGAGGHJKGDJGCCGT"
input_seq = seq.Dksequence(any_seq)
no_of_input_seq = input_seq.number_of_base()
print(f"The input sequence is: {no_of_input_seq[0]}")## The input sequence is: ['A', 'R', 'T', 'J', 'A', 'T', 'C', 'G', 'A', 'A', 'A', 'T', 'C', 'G', 'A', 'G', 'G', 'H', 'J', 'K', 'G', 'D', 'J', 'G', 'C', 'C', 'G', 'T']
## Its sequence length is: 28
any_seq = "ARTJATCGAAATCGAGGHJKGDJGCCGT"
input_seq = seq.Dksequence(any_seq)
no_of_input_seq = input_seq.number_of_base()
print(f"The input sequence is: {any_seq}")## The input sequence is: ARTJATCGAAATCGAGGHJKGDJGCCGT
## Its sequence length is: 28
dna_4_element_decompose = input_seq.element_decompose()
print(f"DNA sequence element decompose is: {dna_4_element_decompose[0]}")## DNA sequence element decompose is: ['A', 'C', 'D', 'G', 'H', 'J', 'K', 'R', 'T']
## count total number of each element is: [6, 4, 1, 7, 1, 3, 1, 1, 4]
## The input DNA sequence is: ARTJATCGAAATCGAGGHJKGDJGCCGT
## no. of [ATCG]: 2
genome_ACGT_decompose = input_seq.ACGT_decompose()
nameList00 = genome_ACGT_decompose[0]
nameList11 = genome_ACGT_decompose[1]
fmt = '{:<8}{:<12}{}'
print(fmt.format('', 'DNABase4', 'counts'))## DNABase4 counts
for i, (name00, name11) in enumerate(zip(nameList00, nameList11)):
print(fmt.format(i, name00, name11))## 0 AAAA 0
## 1 AAAC 0
## 2 AAAG 0
## 3 AAAT 1
## 4 AACA 0
## 5 AACC 0
## 6 AACG 0
## 7 AACT 0
## 8 AAGA 0
## 9 AAGC 0
## 10 AAGG 0
## 11 AAGT 0
## 12 AATA 0
## 13 AATC 1
## 14 AATG 0
## 15 AATT 0
## 16 ACAA 0
## 17 ACAC 0
## 18 ACAG 0
## 19 ACAT 0
## 20 ACCA 0
## 21 ACCC 0
## 22 ACCG 0
## 23 ACCT 0
## 24 ACGA 0
## 25 ACGC 0
## 26 ACGG 0
## 27 ACGT 0
## 28 ACTA 0
## 29 ACTC 0
## 30 ACTG 0
## 31 ACTT 0
## 32 AGAA 0
## 33 AGAC 0
## 34 AGAG 0
## 35 AGAT 0
## 36 AGCA 0
## 37 AGCC 0
## 38 AGCG 0
## 39 AGCT 0
## 40 AGGA 0
## 41 AGGC 0
## 42 AGGG 0
## 43 AGGT 0
## 44 AGTA 0
## 45 AGTC 0
## 46 AGTG 0
## 47 AGTT 0
## 48 ATAA 0
## 49 ATAC 0
## 50 ATAG 0
## 51 ATAT 0
## 52 ATCA 0
## 53 ATCC 0
## 54 ATCG 2
## 55 ATCT 0
## 56 ATGA 0
## 57 ATGC 0
## 58 ATGG 0
## 59 ATGT 0
## 60 ATTA 0
## 61 ATTC 0
## 62 ATTG 0
## 63 ATTT 0
## 64 CAAA 0
## 65 CAAC 0
## 66 CAAG 0
## 67 CAAT 0
## 68 CACA 0
## 69 CACC 0
## 70 CACG 0
## 71 CACT 0
## 72 CAGA 0
## 73 CAGC 0
## 74 CAGG 0
## 75 CAGT 0
## 76 CATA 0
## 77 CATC 0
## 78 CATG 0
## 79 CATT 0
## 80 CCAA 0
## 81 CCAC 0
## 82 CCAG 0
## 83 CCAT 0
## 84 CCCA 0
## 85 CCCC 0
## 86 CCCG 0
## 87 CCCT 0
## 88 CCGA 0
## 89 CCGC 0
## 90 CCGG 0
## 91 CCGT 1
## 92 CCTA 0
## 93 CCTC 0
## 94 CCTG 0
## 95 CCTT 0
## 96 CGAA 1
## 97 CGAC 0
## 98 CGAG 1
## 99 CGAT 0
## 100 CGCA 0
## 101 CGCC 0
## 102 CGCG 0
## 103 CGCT 0
## 104 CGGA 0
## 105 CGGC 0
## 106 CGGG 0
## 107 CGGT 0
## 108 CGTA 0
## 109 CGTC 0
## 110 CGTG 0
## 111 CGTT 0
## 112 CTAA 0
## 113 CTAC 0
## 114 CTAG 0
## 115 CTAT 0
## 116 CTCA 0
## 117 CTCC 0
## 118 CTCG 0
## 119 CTCT 0
## 120 CTGA 0
## 121 CTGC 0
## 122 CTGG 0
## 123 CTGT 0
## 124 CTTA 0
## 125 CTTC 0
## 126 CTTG 0
## 127 CTTT 0
## 128 GAAA 1
## 129 GAAC 0
## 130 GAAG 0
## 131 GAAT 0
## 132 GACA 0
## 133 GACC 0
## 134 GACG 0
## 135 GACT 0
## 136 GAGA 0
## 137 GAGC 0
## 138 GAGG 1
## 139 GAGT 0
## 140 GATA 0
## 141 GATC 0
## 142 GATG 0
## 143 GATT 0
## 144 GCAA 0
## 145 GCAC 0
## 146 GCAG 0
## 147 GCAT 0
## 148 GCCA 0
## 149 GCCC 0
## 150 GCCG 1
## 151 GCCT 0
## 152 GCGA 0
## 153 GCGC 0
## 154 GCGG 0
## 155 GCGT 0
## 156 GCTA 0
## 157 GCTC 0
## 158 GCTG 0
## 159 GCTT 0
## 160 GGAA 0
## 161 GGAC 0
## 162 GGAG 0
## 163 GGAT 0
## 164 GGCA 0
## 165 GGCC 0
## 166 GGCG 0
## 167 GGCT 0
## 168 GGGA 0
## 169 GGGC 0
## 170 GGGG 0
## 171 GGGT 0
## 172 GGTA 0
## 173 GGTC 0
## 174 GGTG 0
## 175 GGTT 0
## 176 GTAA 0
## 177 GTAC 0
## 178 GTAG 0
## 179 GTAT 0
## 180 GTCA 0
## 181 GTCC 0
## 182 GTCG 0
## 183 GTCT 0
## 184 GTGA 0
## 185 GTGC 0
## 186 GTGG 0
## 187 GTGT 0
## 188 GTTA 0
## 189 GTTC 0
## 190 GTTG 0
## 191 GTTT 0
## 192 TAAA 0
## 193 TAAC 0
## 194 TAAG 0
## 195 TAAT 0
## 196 TACA 0
## 197 TACC 0
## 198 TACG 0
## 199 TACT 0
## 200 TAGA 0
## 201 TAGC 0
## 202 TAGG 0
## 203 TAGT 0
## 204 TATA 0
## 205 TATC 0
## 206 TATG 0
## 207 TATT 0
## 208 TCAA 0
## 209 TCAC 0
## 210 TCAG 0
## 211 TCAT 0
## 212 TCCA 0
## 213 TCCC 0
## 214 TCCG 0
## 215 TCCT 0
## 216 TCGA 2
## 217 TCGC 0
## 218 TCGG 0
## 219 TCGT 0
## 220 TCTA 0
## 221 TCTC 0
## 222 TCTG 0
## 223 TCTT 0
## 224 TGAA 0
## 225 TGAC 0
## 226 TGAG 0
## 227 TGAT 0
## 228 TGCA 0
## 229 TGCC 0
## 230 TGCG 0
## 231 TGCT 0
## 232 TGGA 0
## 233 TGGC 0
## 234 TGGG 0
## 235 TGGT 0
## 236 TGTA 0
## 237 TGTC 0
## 238 TGTG 0
## 239 TGTT 0
## 240 TTAA 0
## 241 TTAC 0
## 242 TTAG 0
## 243 TTAT 0
## 244 TTCA 0
## 245 TTCC 0
## 246 TTCG 0
## 247 TTCT 0
## 248 TTGA 0
## 249 TTGC 0
## 250 TTGG 0
## 251 TTGT 0
## 252 TTTA 0
## 253 TTTC 0
## 254 TTTG 0
## 255 TTTT 0
3 Define a is_dna() method of a class,
- which validates whether the all the characters are valid DNA bases: A,T, C or G. It should return True or False. If the sequence has zero length, it should return False.
3.1 A method of the class: is_dna()
input_01_seq = seq.Dksequence('ATAGCGGCAT')
input_02_seq = seq.Dksequence('ATBCFGHJUJAGCGGCAT')
input_03_seq = seq.Dksequence('')
valid_01_seq = input_01_seq.is_dna()
print(f"valid DNA sequence ?: {valid_01_seq}")## valid DNA sequence ?: ['ATAGCGGCAT', True]
## valid DNA sequence ?: ['ATBCFGHJUJAGCGGCAT', False]
## valid DNA sequence ?: ['', False]
4 Define a method of a class for the complement of the Sequence
- as a new Sequence instance. (i.e. every A becomes a T, and every C a G – and vice-versa).Don’t reverse the complement sequence.
input_01_seq = seq.Dksequence('ATAGCGGCAT')
input_02_seq = seq.Dksequence('ATBCFGHJUJAGCGGCAT')
input_03_seq = seq.Dksequence('')
input_04_seq = seq.Dksequence('AtCgTGaAT')
input_01_seq_complement = input_01_seq.complement()
print(f" the complement DNA sequence is : {input_01_seq_complement}")## the complement DNA sequence is : TATCGCCGTA
input_02_seq_complement = input_02_seq.complement()
print(f" the complement DNA sequence is : {input_02_seq_complement}")## the complement DNA sequence is : TABGFCHJUJTCGCCGTA
input_03_seq_complement = input_03_seq.complement()
print(f" the complement DNA sequence is empty: {input_03_seq_complement}")## the complement DNA sequence is empty:
input_04_seq_complement = input_04_seq.complement()
print(f" the complement DNA sequence is : {input_04_seq_complement}")## the complement DNA sequence is : TtGgACaTA
5 Create a method to find the first pair of non-matching bases,
- when comparing with another Sequence instance. Return the zero-based index of the first non-matching base. Return -1 if the sequences are identical (or empty). The sequences assume same in length.
5.1 A method of the class: seq_not_match()
input_05_seq = seq.Dksequence('ATAGCGGCAT')
input_06_seq = seq.Dksequence('ATCGTGabAF')
comparing_two_seq = input_05_seq.seq_not_match(input_05_seq)
print(f"comparing two sequences: {comparing_two_seq}")## comparing two sequences: ['ATAGCGGCAT', 'ATAGCGGCAT', '-1', '-1']
comparing_two_seq = input_05_seq.seq_not_match(input_06_seq)
print(f"comparing two sequences: {comparing_two_seq}")## comparing two sequences: ['ATAGCGGCAT', 'ATCGTGabAF', '_2_4_6_7_9', '2']
## the first pair of non-matching bases: ['ATAGCGGCAT', 'ATCGTGabAF', '2']
input_07_seq = seq.Dksequence('ACGTGCG')
input_08_seq = seq.Dksequence('ACGTTCG')
input_03_seq = seq.Dksequence('')
comparing_two_seq = input_07_seq.seq_not_match(input_08_seq)
print(f"comparing two sequences: {comparing_two_seq}")## comparing two sequences: ['ACGTGCG', 'ACGTTCG', '_4', '4']
## the first pair of non-matching bases: ['ACGTGCG', 'ACGTTCG', '4']
valid_03_seq = input_03_seq.is_dna()
comparing_two_seq = input_07_seq.seq_not_match(input_03_seq)
print(f"comparing two sequences: {comparing_two_seq}")## comparing two sequences: ['ACGTGCG', '', '-1', '-1']
6 Add a function (not a class method) to read a file
- in the format of those provided, and return a new instance of Sequence. Call the function to read the file “genome_01.dat”, and print the total number of bases.
6.1 A function call to read a new instance of Sequence : load_dat_file()
6.1.1 genome_01.dat
def load_dat_file(infile_name):
infile = open(infile_name)
# read the contents
# skip first headline
genome_dat = infile.readlines()[1:]
genome_dat = genome_dat[0]
input_genome_seq = seq.Dksequence(genome_dat)
return input_genome_seq
input_genome_seq = load_dat_file(infile_name = "genome_01.dat")
valid_G01_seq = input_genome_seq.is_dna()
print(f" valid DNA sequence ?: {valid_G01_seq[1] }")## valid DNA sequence ?: True
length_G01_seq = input_genome_seq.number_of_base()
print(f" length of DNA sequence : {length_G01_seq[1] }")## length of DNA sequence : 100000
element_decompose_G01_seq = input_genome_seq.element_decompose()
print(f" element decompose of DNA sequence : {element_decompose_G01_seq }")## element decompose of DNA sequence : [['A', 'C', 'G', 'T'], [25502, 24521, 24608, 25369]]
getACGT_G01_seq = input_genome_seq.getACGT()
print(f" no. of DNA base (ACGT) : {getACGT_G01_seq[1] }")## no. of DNA base (ACGT) : 382
genome_ACGT_decompose = input_genome_seq .ACGT_decompose()
nameList00 = genome_ACGT_decompose[0]
nameList11 = genome_ACGT_decompose[1]
fmt = '{:<8}{:<12}{}'
print(fmt.format('', 'DNABase4', 'counts'))## DNABase4 counts
for ii, (name00, name11) in enumerate(zip(nameList00, nameList11)):
print(fmt.format(ii, name00, name11))## 0 AAAA 466
## 1 AAAC 371
## 2 AAAG 359
## 3 AAAT 483
## 4 AACA 358
## 5 AACC 349
## 6 AACG 383
## 7 AACT 412
## 8 AAGA 403
## 9 AAGC 350
## 10 AAGG 379
## 11 AAGT 347
## 12 AATA 375
## 13 AATC 392
## 14 AATG 375
## 15 AATT 501
## 16 ACAA 381
## 17 ACAC 351
## 18 ACAG 394
## 19 ACAT 358
## 20 ACCA 404
## 21 ACCC 391
## 22 ACCG 377
## 23 ACCT 391
## 24 ACGA 377
## 25 ACGC 397
## 26 ACGG 404
## 27 ACGT 427
## 28 ACTA 380
## 29 ACTC 385
## 30 ACTG 388
## 31 ACTT 397
## 32 AGAA 373
## 33 AGAC 419
## 34 AGAG 373
## 35 AGAT 377
## 36 AGCA 366
## 37 AGCC 392
## 38 AGCG 339
## 39 AGCT 393
## 40 AGGA 379
## 41 AGGC 422
## 42 AGGG 385
## 43 AGGT 376
## 44 AGTA 377
## 45 AGTC 367
## 46 AGTG 366
## 47 AGTT 397
## 48 ATAA 383
## 49 ATAC 389
## 50 ATAG 403
## 51 ATAT 343
## 52 ATCA 390
## 53 ATCC 369
## 54 ATCG 382
## 55 ATCT 384
## 56 ATGA 380
## 57 ATGC 392
## 58 ATGG 371
## 59 ATGT 365
## 60 ATTA 355
## 61 ATTC 370
## 62 ATTG 377
## 63 ATTT 465
## 64 CAAA 364
## 65 CAAC 376
## 66 CAAG 394
## 67 CAAT 393
## 68 CACA 320
## 69 CACC 390
## 70 CACG 408
## 71 CACT 402
## 72 CAGA 401
## 73 CAGC 361
## 74 CAGG 399
## 75 CAGT 379
## 76 CATA 403
## 77 CATC 362
## 78 CATG 357
## 79 CATT 366
## 80 CCAA 423
## 81 CCAC 403
## 82 CCAG 371
## 83 CCAT 382
## 84 CCCA 366
## 85 CCCC 275
## 86 CCCG 385
## 87 CCCT 360
## 88 CCGA 397
## 89 CCGC 342
## 90 CCGG 385
## 91 CCGT 374
## 92 CCTA 384
## 93 CCTC 375
## 94 CCTG 421
## 95 CCTT 367
## 96 CGAA 410
## 97 CGAC 400
## 98 CGAG 381
## 99 CGAT 399
## 100 CGCA 373
## 101 CGCC 350
## 102 CGCG 332
## 103 CGCT 360
## 104 CGGA 363
## 105 CGGC 383
## 106 CGGG 409
## 107 CGGT 370
## 108 CGTA 398
## 109 CGTC 370
## 110 CGTG 378
## 111 CGTT 407
## 112 CTAA 390
## 113 CTAC 394
## 114 CTAG 385
## 115 CTAT 350
## 116 CTCA 369
## 117 CTCC 407
## 118 CTCG 402
## 119 CTCT 359
## 120 CTGA 397
## 121 CTGC 358
## 122 CTGG 403
## 123 CTGT 409
## 124 CTTA 413
## 125 CTTC 347
## 126 CTTG 359
## 127 CTTT 369
## 128 GAAA 408
## 129 GAAC 378
## 130 GAAG 382
## 131 GAAT 389
## 132 GACA 380
## 133 GACC 432
## 134 GACG 395
## 135 GACT 375
## 136 GAGA 370
## 137 GAGC 391
## 138 GAGG 389
## 139 GAGT 397
## 140 GATA 381
## 141 GATC 390
## 142 GATG 381
## 143 GATT 352
## 144 GCAA 373
## 145 GCAC 391
## 146 GCAG 389
## 147 GCAT 364
## 148 GCCA 406
## 149 GCCC 358
## 150 GCCG 360
## 151 GCCT 400
## 152 GCGA 412
## 153 GCGC 345
## 154 GCGG 343
## 155 GCGT 373
## 156 GCTA 364
## 157 GCTC 399
## 158 GCTG 387
## 159 GCTT 366
## 160 GGAA 367
## 161 GGAC 379
## 162 GGAG 415
## 163 GGAT 339
## 164 GGCA 423
## 165 GGCC 405
## 166 GGCG 406
## 167 GGCT 376
## 168 GGGA 390
## 169 GGGC 412
## 170 GGGG 271
## 171 GGGT 394
## 172 GGTA 377
## 173 GGTC 383
## 174 GGTG 374
## 175 GGTT 369
## 176 GTAA 378
## 177 GTAC 432
## 178 GTAG 377
## 179 GTAT 374
## 180 GTCA 373
## 181 GTCC 398
## 182 GTCG 362
## 183 GTCT 372
## 184 GTGA 384
## 185 GTGC 354
## 186 GTGG 374
## 187 GTGT 338
## 188 GTTA 386
## 189 GTTC 380
## 190 GTTG 395
## 191 GTTT 394
## 192 TAAA 441
## 193 TAAC 374
## 194 TAAG 347
## 195 TAAT 376
## 196 TACA 425
## 197 TACC 396
## 198 TACG 417
## 199 TACT 362
## 200 TAGA 372
## 201 TAGC 390
## 202 TAGG 393
## 203 TAGT 384
## 204 TATA 363
## 205 TATC 379
## 206 TATG 394
## 207 TATT 346
## 208 TCAA 353
## 209 TCAC 377
## 210 TCAG 383
## 211 TCAT 386
## 212 TCCA 398
## 213 TCCC 362
## 214 TCCG 381
## 215 TCCT 400
## 216 TCGA 409
## 217 TCGC 335
## 218 TCGG 395
## 219 TCGT 379
## 220 TCTA 390
## 221 TCTC 372
## 222 TCTG 371
## 223 TCTT 355
## 224 TGAA 405
## 225 TGAC 381
## 226 TGAG 374
## 227 TGAT 386
## 228 TGCA 356
## 229 TGCC 372
## 230 TGCG 391
## 231 TGCT 386
## 232 TGGA 369
## 233 TGGC 392
## 234 TGGG 402
## 235 TGGT 365
## 236 TGTA 411
## 237 TGTC 386
## 238 TGTG 334
## 239 TGTT 384
## 240 TTAA 391
## 241 TTAC 385
## 242 TTAG 372
## 243 TTAT 406
## 244 TTCA 367
## 245 TTCC 370
## 246 TTCG 372
## 247 TTCT 370
## 248 TTGA 388
## 249 TTGC 402
## 250 TTGG 378
## 251 TTGT 398
## 252 TTTA 405
## 253 TTTC 387
## 254 TTTG 436
## 255 TTTT 477
6.1.2 genome_02.dat
input_genome_seq = load_dat_file(infile_name = "genome_02.dat")
valid_G01_seq = input_genome_seq.is_dna()
print(f" valid DNA sequence ?: {valid_G01_seq[1] }")## valid DNA sequence ?: True
length_G01_seq = input_genome_seq.number_of_base()
print(f" length of DNA sequence : {length_G01_seq[1] }")## length of DNA sequence : 100000
element_decompose_G01_seq = input_genome_seq.element_decompose()
print(f" element decompose of DNA sequence : {element_decompose_G01_seq }")## element decompose of DNA sequence : [['A', 'C', 'G', 'T'], [25493, 24544, 24620, 25343]]
getACGT_G01_seq = input_genome_seq.getACGT()
print(f" no. of DNA base (ACGT) : {getACGT_G01_seq[1] }")## no. of DNA base (ACGT) : 381
genome_ACGT_decompose = input_genome_seq .ACGT_decompose()
nameList00 = genome_ACGT_decompose[0]
nameList11 = genome_ACGT_decompose[1]
fmt = '{:<8}{:<12}{}'
print(fmt.format('', 'DNABase4', 'counts'))## DNABase4 counts
for ii, (name00, name11) in enumerate(zip(nameList00, nameList11)):
print(fmt.format(ii, name00, name11))## 0 AAAA 466
## 1 AAAC 369
## 2 AAAG 360
## 3 AAAT 483
## 4 AACA 358
## 5 AACC 346
## 6 AACG 378
## 7 AACT 415
## 8 AAGA 402
## 9 AAGC 350
## 10 AAGG 382
## 11 AAGT 345
## 12 AATA 378
## 13 AATC 391
## 14 AATG 373
## 15 AATT 501
## 16 ACAA 381
## 17 ACAC 353
## 18 ACAG 394
## 19 ACAT 357
## 20 ACCA 409
## 21 ACCC 389
## 22 ACCG 379
## 23 ACCT 387
## 24 ACGA 378
## 25 ACGC 399
## 26 ACGG 404
## 27 ACGT 424
## 28 ACTA 379
## 29 ACTC 385
## 30 ACTG 389
## 31 ACTT 396
## 32 AGAA 373
## 33 AGAC 420
## 34 AGAG 376
## 35 AGAT 379
## 36 AGCA 363
## 37 AGCC 393
## 38 AGCG 333
## 39 AGCT 395
## 40 AGGA 377
## 41 AGGC 425
## 42 AGGG 385
## 43 AGGT 378
## 44 AGTA 375
## 45 AGTC 365
## 46 AGTG 370
## 47 AGTT 396
## 48 ATAA 382
## 49 ATAC 386
## 50 ATAG 402
## 51 ATAT 347
## 52 ATCA 389
## 53 ATCC 368
## 54 ATCG 381
## 55 ATCT 380
## 56 ATGA 379
## 57 ATGC 390
## 58 ATGG 369
## 59 ATGT 370
## 60 ATTA 358
## 61 ATTC 372
## 62 ATTG 375
## 63 ATTT 463
## 64 CAAA 366
## 65 CAAC 378
## 66 CAAG 393
## 67 CAAT 395
## 68 CACA 322
## 69 CACC 390
## 70 CACG 412
## 71 CACT 405
## 72 CAGA 402
## 73 CAGC 360
## 74 CAGG 398
## 75 CAGT 381
## 76 CATA 399
## 77 CATC 359
## 78 CATG 359
## 79 CATT 359
## 80 CCAA 421
## 81 CCAC 406
## 82 CCAG 373
## 83 CCAT 378
## 84 CCCA 368
## 85 CCCC 277
## 86 CCCG 385
## 87 CCCT 361
## 88 CCGA 404
## 89 CCGC 340
## 90 CCGG 392
## 91 CCGT 371
## 92 CCTA 381
## 93 CCTC 377
## 94 CCTG 415
## 95 CCTT 369
## 96 CGAA 413
## 97 CGAC 400
## 98 CGAG 382
## 99 CGAT 401
## 100 CGCA 376
## 101 CGCC 348
## 102 CGCG 335
## 103 CGCT 357
## 104 CGGA 366
## 105 CGGC 388
## 106 CGGG 406
## 107 CGGT 373
## 108 CGTA 394
## 109 CGTC 378
## 110 CGTG 372
## 111 CGTT 403
## 112 CTAA 390
## 113 CTAC 393
## 114 CTAG 389
## 115 CTAT 347
## 116 CTCA 369
## 117 CTCC 407
## 118 CTCG 400
## 119 CTCT 362
## 120 CTGA 396
## 121 CTGC 359
## 122 CTGG 407
## 123 CTGT 407
## 124 CTTA 411
## 125 CTTC 346
## 126 CTTG 360
## 127 CTTT 370
## 128 GAAA 405
## 129 GAAC 377
## 130 GAAG 384
## 131 GAAT 386
## 132 GACA 381
## 133 GACC 436
## 134 GACG 397
## 135 GACT 371
## 136 GAGA 372
## 137 GAGC 389
## 138 GAGG 388
## 139 GAGT 398
## 140 GATA 383
## 141 GATC 391
## 142 GATG 381
## 143 GATT 356
## 144 GCAA 376
## 145 GCAC 390
## 146 GCAG 385
## 147 GCAT 360
## 148 GCCA 404
## 149 GCCC 362
## 150 GCCG 360
## 151 GCCT 398
## 152 GCGA 412
## 153 GCGC 345
## 154 GCGG 342
## 155 GCGT 375
## 156 GCTA 366
## 157 GCTC 400
## 158 GCTG 393
## 159 GCTT 366
## 160 GGAA 367
## 161 GGAC 380
## 162 GGAG 416
## 163 GGAT 339
## 164 GGCA 418
## 165 GGCC 405
## 166 GGCG 408
## 167 GGCT 384
## 168 GGGA 389
## 169 GGGC 411
## 170 GGGG 273
## 171 GGGT 393
## 172 GGTA 377
## 173 GGTC 382
## 174 GGTG 373
## 175 GGTT 371
## 176 GTAA 375
## 177 GTAC 430
## 178 GTAG 375
## 179 GTAT 377
## 180 GTCA 378
## 181 GTCC 401
## 182 GTCG 363
## 183 GTCT 370
## 184 GTGA 380
## 185 GTGC 354
## 186 GTGG 371
## 187 GTGT 341
## 188 GTTA 384
## 189 GTTC 383
## 190 GTTG 392
## 191 GTTT 391
## 192 TAAA 441
## 193 TAAC 370
## 194 TAAG 345
## 195 TAAT 377
## 196 TACA 424
## 197 TACC 396
## 198 TACG 417
## 199 TACT 359
## 200 TAGA 374
## 201 TAGC 387
## 202 TAGG 395
## 203 TAGT 383
## 204 TATA 361
## 205 TATC 375
## 206 TATG 394
## 207 TATT 349
## 208 TCAA 357
## 209 TCAC 380
## 210 TCAG 386
## 211 TCAT 383
## 212 TCCA 393
## 213 TCCC 363
## 214 TCCG 388
## 215 TCCT 400
## 216 TCGA 406
## 217 TCGC 337
## 218 TCGG 397
## 219 TCGT 378
## 220 TCTA 392
## 221 TCTC 369
## 222 TCTG 372
## 223 TCTT 353
## 224 TGAA 398
## 225 TGAC 382
## 226 TGAG 370
## 227 TGAT 389
## 228 TGCA 355
## 229 TGCC 373
## 230 TGCG 392
## 231 TGCT 388
## 232 TGGA 371
## 233 TGGC 390
## 234 TGGG 402
## 235 TGGT 361
## 236 TGTA 413
## 237 TGTC 388
## 238 TGTG 333
## 239 TGTT 383
## 240 TTAA 390
## 241 TTAC 387
## 242 TTAG 370
## 243 TTAT 398
## 244 TTCA 370
## 245 TTCC 371
## 246 TTCG 373
## 247 TTCT 373
## 248 TTGA 387
## 249 TTGC 406
## 250 TTGG 375
## 251 TTGT 394
## 252 TTTA 399
## 253 TTTC 390
## 254 TTTG 435
## 255 TTTT 474
7 Create a method of the class to split the sequence into genes,
- returning each as a new Sequence instance (in a list) discarding the separators.
7.1 A method of the class : seq_split()
7.1.1 Example 1
any_seq = "CCGATCGAAAAAAAAAAATTTTTTTTTT"
input_seq = seq.Dksequence(any_seq)
separator_seq = ("AAAAAAAAAATTTTTTTTTT")
input_seq_split = input_seq.seq_split(separator_seq)
print(input_seq_split)## ['CCGATCGA', '']
7.1.2 Example 2
input_genome_seq = load_dat_file(infile_name = "genome_01.dat")
separator_seq = ("AAAAAAAAAATTTTTTTTTT")
genome_split = input_genome_seq.seq_split(separator_seq)
num_list = list([])
for pp in range(0, len(genome_split)):
num = seq.Dksequence(genome_split[pp]).number_of_base()
num_list.append(num[1])
print(f" each gene length listed as : \n { num_list}")## each gene length listed as :
## [639, 1733, 1296, 915, 843, 1811, 1463, 1113, 1827, 357, 1858, 1908, 679, 1174, 1576, 680, 1500, 394, 785, 1590, 631, 167, 254, 1704, 929, 1781, 617, 925, 152, 857, 195, 1056, 835, 1813, 1951, 1535, 528, 930, 179, 1281, 835, 877, 739, 1251, 990, 503, 1392, 1369, 777, 719, 624, 623, 794, 324, 231, 755, 590, 1563, 962, 1287, 108, 882, 208, 208, 1157, 1654, 1592, 836, 353, 415, 468, 1721, 1146, 1546, 623, 968, 337, 1425, 137, 371, 1974, 1223, 859, 510, 1186, 171, 478, 691, 1669, 988, 713, 164, 283, 404, 854, 1044, 1151, 719, 1235, 5883]
8 To create a list of Sequence instances from genome_01.dat,
- each containing a single gene. Plot the gene lengths as a histogram using matplotlib.
8.1 Plot the gene lengths as a histogram using matplotlib.
gene_length = num_list
no_of_gene_splitted = len(genome_split)
plt.figure(figsize=[10,8])
plt.hist(gene_length, bins=50, density = False)## (array([10., 5., 6., 5., 9., 9., 10., 7., 4., 7., 4., 3., 7.,
## 4., 5., 3., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
## 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
## 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]), array([ 108. , 223.5, 339. , 454.5, 570. , 685.5, 801. , 916.5,
## 1032. , 1147.5, 1263. , 1378.5, 1494. , 1609.5, 1725. , 1840.5,
## 1956. , 2071.5, 2187. , 2302.5, 2418. , 2533.5, 2649. , 2764.5,
## 2880. , 2995.5, 3111. , 3226.5, 3342. , 3457.5, 3573. , 3688.5,
## 3804. , 3919.5, 4035. , 4150.5, 4266. , 4381.5, 4497. , 4612.5,
## 4728. , 4843.5, 4959. , 5074.5, 5190. , 5305.5, 5421. , 5536.5,
## 5652. , 5767.5, 5883. ]), <a list of 50 Patch objects>)
## (array([-1000., 0., 1000., 2000., 3000., 4000., 5000., 6000.,
## 7000.]), <a list of 9 Text xticklabel objects>)
## (array([ 0., 2., 4., 6., 8., 10., 12.]), <a list of 7 Text yticklabel objects>)
plt.ylabel('Frequency Count',fontsize=15)
plt.title(' Gene Length Histogram from Genome_01.dat ',fontsize=15)
plt.show()9 Creat a method to compare the DNA sequence with another,
- and returns the number of “swap” mutations – bases which do not match. You don’t need to worry about insertion/deletion mutations. There are 3 swap mutations between these two sequences: ACGTAGCTGCAT and ACGTCCCTTCAT
9.1 A method of the class : swap_mutation_check()
9.2 Extra : A function call to tablate and print: position_of_mutation()
separator_seq = ("AAAAAAAAAATTTTTTTTTT")
input_genome_01_seq = load_dat_file(infile_name = "genome_01.dat")
genome_01_split = input_genome_01_seq.seq_split(separator_seq)
input_genome_02_seq = load_dat_file(infile_name = "genome_02.dat")
genome_02_split = input_genome_02_seq.seq_split(separator_seq)
def number_of_mutation(genome_01_split, genome_02_split, gene_length):
gene_swap_list = list([])
for tt in range(0, len(genome_01_split)):
gene_from_01_dat = seq.Dksequence(genome_01_split[tt])
gene_from_02_dat = seq.Dksequence(genome_02_split[tt])
comparing_two_gene = gene_from_01_dat.swap_mutation_check(gene_from_02_dat)
gene_swap_list.append(comparing_two_gene[2])
removing_first_underline = np.char.strip(gene_swap_list, '_')
swap_list = np.char.split(removing_first_underline, '_')
gene_no_of_swap_list = list([])
no_of_swap = list()
for tt in range(0, len(swap_list)):
if swap_list[tt][0] == '-1':
no_of_swap = 0
else:
no_of_swap = len(swap_list[tt])
gene_no_of_swap_list.append(no_of_swap)
fmt = '{:<12}{:<15}{:<15}{}'
print(fmt.format('gene_ID', 'gene_length', 'mutation_no ', "mutation_position"))
for jj, (name00, name11, name22) in enumerate(zip(gene_length, gene_no_of_swap_list, removing_first_underline)):
print(fmt.format(jj, name00, name11, name22))
return [gene_length, gene_no_of_swap_list, swap_list, removing_first_underline ]
print_swap_mutation_gene = number_of_mutation(genome_01_split, genome_02_split, gene_length)## gene_ID gene_length mutation_no mutation_position
## 0 639 0 -1
## 1 1733 12 256_816_973_1119_1221_1316_1329_1472_1578_1605_1630_1634
## 2 1296 0 -1
## 3 915 4 370_512_555_906
## 4 843 2 23_383
## 5 1811 1 377
## 6 1463 3 123_594_734
## 7 1113 2 562_782
## 8 1827 8 11_86_705_778_872_1060_1067_1082
## 9 357 0 -1
## 10 1858 0 -1
## 11 1908 0 -1
## 12 679 2 371_554
## 13 1174 6 94_286_444_488_637_770
## 14 1576 4 323_450_1004_1272
## 15 680 1 621
## 16 1500 9 25_149_696_886_925_956_1134_1176_1488
## 17 394 3 161_228_360
## 18 785 3 121_326_763
## 19 1590 5 41_287_525_591_1176
## 20 631 1 552
## 21 167 0 -1
## 22 254 0 -1
## 23 1704 5 111_216_735_812_1153
## 24 929 7 67_234_291_709_716_726_771
## 25 1781 9 32_212_623_1162_1445_1512_1640_1661_1718
## 26 617 2 217_539
## 27 925 6 174_307_541_547_579_821
## 28 152 0 -1
## 29 857 4 730_771_799_814
## 30 195 0 -1
## 31 1056 5 33_232_341_645_802
## 32 835 3 36_254_768
## 33 1813 8 129_168_189_209_291_1371_1392_1400
## 34 1951 4 176_323_1561_1895
## 35 1535 10 69_146_221_509_514_803_1053_1138_1297_1313
## 36 528 3 91_322_341
## 37 930 7 43_274_414_657_662_710_860
## 38 179 0 -1
## 39 1281 2 93_433
## 40 835 2 329_498
## 41 877 2 209_575
## 42 739 0 -1
## 43 1251 6 136_606_628_859_1215_1219
## 44 990 6 87_147_160_314_455_584
## 45 503 1 120
## 46 1392 4 715_732_832_951
## 47 1369 3 470_800_1308
## 48 777 1 324
## 49 719 0 -1
## 50 624 4 136_256_463_517
## 51 623 5 4_139_282_348_423
## 52 794 2 219_433
## 53 324 1 274
## 54 231 0 -1
## 55 755 1 437
## 56 590 3 53_326_449
## 57 1563 0 -1
## 58 962 0 -1
## 59 1287 8 163_534_550_903_945_1016_1037_1130
## 60 108 0 -1
## 61 882 8 209_455_468_473_739_774_776_847
## 62 208 0 -1
## 63 208 0 -1
## 64 1157 0 -1
## 65 1654 7 305_343_612_663_882_1004_1028
## 66 1592 9 357_407_502_697_929_1377_1395_1435_1590
## 67 836 3 137_376_617
## 68 353 1 259
## 69 415 0 -1
## 70 468 0 -1
## 71 1721 3 112_650_976
## 72 1146 4 454_603_623_990
## 73 1546 2 369_1152
## 74 623 2 292_355
## 75 968 4 3_134_226_680
## 76 337 0 -1
## 77 1425 1 1308
## 78 137 0 -1
## 79 371 0 -1
## 80 1974 0 -1
## 81 1223 5 197_246_365_637_929
## 82 859 4 135_213_475_707
## 83 510 4 102_223_323_327
## 84 1186 0 -1
## 85 171 0 -1
## 86 478 0 -1
## 87 691 2 149_519
## 88 1669 5 0_691_1113_1384_1574
## 89 988 2 343_635
## 90 713 0 -1
## 91 164 0 -1
## 92 283 2 32_206
## 93 404 2 313_353
## 94 854 0 -1
## 95 1044 3 93_419_941
## 96 1151 6 460_585_702_738_975_1046
## 97 719 1 51
## 98 1235 2 327_640
## 99 5883 0 -1
9.3 A scatter plot: the number of swap mutations per gene against gene length
9.3.1 Observations from the plot:
- there exist a threshold of gene length at 2000.
- for 0< gene length < 2000, the number of swap mutation is empirically following negatively skewed distribution.
- for 2000 <gene length < 5883, there is no swap mutation occurred.
plt.figure(figsize=[10,8])
plt.scatter( x= print_swap_mutation_gene[0], y = print_swap_mutation_gene[1])
plt.grid( alpha=0.45)
plt.xlabel('gene length ',fontsize=15)
plt.ylabel('the number of swap mutations per gene',fontsize=15)
plt.xticks(fontsize=15)## (array([-1000., 0., 1000., 2000., 3000., 4000., 5000., 6000.,
## 7000.]), <a list of 9 Text xticklabel objects>)
## (array([-2., 0., 2., 4., 6., 8., 10., 12., 14.]), <a list of 9 Text yticklabel objects>)
10 Find the line of best fit for the data from Task 9,
- using the LinearRegression class from sklearn.linear_model Plot it on the figure from Task 9.
10.1 \[y = x\beta + \epsilon \]
- X : gene length
- y : the number of swap mutations per gene
- \(\beta\) : coefficients
- \(\epsilon\) : observation noise
import numpy
import matplotlib.pyplot as plot
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
x = np.array(gene_length).reshape((-1, 1))
y = np.array(print_swap_mutation_gene[1])
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
model = LinearRegression()
model.fit(x_train, y_train)## LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
model = LinearRegression().fit(x_train, y_train)
r_sq = model.score(x_train, y_train)
print('coefficient of determination:', r_sq)## coefficient of determination: 0.05401658809538667
## intercept: 1.8059538397262882
## slope: [0.00086925]
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)
print('predicted response:', y_train_pred, sep='\n')## predicted response:
## [2.89338099 1.98675708 2.60131423 3.30192676 2.24318474 2.48135823
## 3.07766121 6.91972997 2.55263643 2.09888985 1.89983244 3.52184608
## 2.42572647 2.11279779 2.3474943 2.61000669 2.4883122 2.31880917
## 1.98675708 2.3474943 3.31235772 2.5726291 2.44832687 2.56828287
## 2.53872849 2.39704134 2.14843689 2.53177452 1.96154893 3.38015893
## 1.94851024 2.92467386 2.36140224 3.50185342 1.93807928 2.34836355
## 3.46447583 3.14024695 3.28714958 2.49613542 3.42101351 2.72387796
## 3.18979399 3.16458585 2.12844423 1.95459496 2.53177452 2.83687998
## 3.17588605 2.66476921 3.1880555 2.55089794 2.43094195 2.43094195
## 2.87947305 2.16669106 2.35444827 2.5482902 2.80211013 3.04462985
## 3.35408154 2.61435292 2.86904209 3.01594472 2.91945838 3.24368726
## 2.6421688 2.39617209 3.25672596 2.21276112 2.40660305 2.2649159
## 1.95111798 2.24926947 2.11627478 2.80645636 2.53264377 2.81167184
## 2.99595206 2.6665077 ]
y_train_pred = model.intercept_ + model.coef_ * x_train
print('predicted response:', y_train_pred, sep='\n')## predicted response:
## [[2.89338099]
## [1.98675708]
## [2.60131423]
## [3.30192676]
## [2.24318474]
## [2.48135823]
## [3.07766121]
## [6.91972997]
## [2.55263643]
## [2.09888985]
## [1.89983244]
## [3.52184608]
## [2.42572647]
## [2.11279779]
## [2.3474943 ]
## [2.61000669]
## [2.4883122 ]
## [2.31880917]
## [1.98675708]
## [2.3474943 ]
## [3.31235772]
## [2.5726291 ]
## [2.44832687]
## [2.56828287]
## [2.53872849]
## [2.39704134]
## [2.14843689]
## [2.53177452]
## [1.96154893]
## [3.38015893]
## [1.94851024]
## [2.92467386]
## [2.36140224]
## [3.50185342]
## [1.93807928]
## [2.34836355]
## [3.46447583]
## [3.14024695]
## [3.28714958]
## [2.49613542]
## [3.42101351]
## [2.72387796]
## [3.18979399]
## [3.16458585]
## [2.12844423]
## [1.95459496]
## [2.53177452]
## [2.83687998]
## [3.17588605]
## [2.66476921]
## [3.1880555 ]
## [2.55089794]
## [2.43094195]
## [2.43094195]
## [2.87947305]
## [2.16669106]
## [2.35444827]
## [2.5482902 ]
## [2.80211013]
## [3.04462985]
## [3.35408154]
## [2.61435292]
## [2.86904209]
## [3.01594472]
## [2.91945838]
## [3.24368726]
## [2.6421688 ]
## [2.39617209]
## [3.25672596]
## [2.21276112]
## [2.40660305]
## [2.2649159 ]
## [1.95111798]
## [2.24926947]
## [2.11627478]
## [2.80645636]
## [2.53264377]
## [2.81167184]
## [2.99595206]
## [2.6665077 ]]
plt.figure(figsize=[10,8])
plt.scatter(x_test, y_test, color='gray')
plt.plot(x_test, y_test_pred, color='red', linewidth=2)
plt.grid( alpha=0.45)
plt.xlabel('gene length ',fontsize=15)
plt.ylabel('the number of swap mutations per gene',fontsize=15)
plt.xticks(fontsize=15)## (array([ 0., 250., 500., 750., 1000., 1250., 1500., 1750., 2000.]), <a list of 9 Text xticklabel objects>)
## (array([-2., 0., 2., 4., 6., 8., 10.]), <a list of 7 Text yticklabel objects>)
11 Create a visualization the genes:
- Decoding Gene with Color
## [[4 1 2 3 4 4 2 2 4 1 4 1 4 4 2 2 4 1 3 1 2 3 3 1 4 2 4 4 1 2 2 3 3 3 1 4
## 3 3 1 1 2 1 4 3 4 4 2 2 3 1]
## [1 4 4 4 3 4 3 3 2 1 4 1 1 1 4 1 4 4 2 3 3 2 2 4 3 3 4 4 1 1 2 1 3 3 4 2
## 1 1 2 4 1 1 1 1 3 1 4 1 2 4]
## [4 3 2 1 4 2 3 4 4 1 2 1 2 4 1 4 4 3 3 1 1 1 1 4 2 1 1 1 3 4 3 1 4 3 1 2
## 3 3 4 4 3 2 4 4 1 1 1 4 3 4]
## [1 4 2 4 3 1 4 3 1 1 1 2 3 3 2 3 2 4 4 4 1 3 1 2 2 3 3 2 1 1 1 1 3 4 1 1
## 4 3 2 2 2 3 1 4 4 3 2 1 3 4]
## [1 1 2 1 2 2 1 4 2 3 3 3 4 2 2 1 2 4 3 1 2 2 4 2 4 1 2 4 4 4 2 4 2 1 1 2
## 3 1 3 2 2 1 1 2 2 3 2 4 1 1]
## [3 2 4 4 2 3 2 3 3 1 2 2 4 3 4 4 1 4 1 2 4 3 2 2 2 2 2 1 3 4 3 1 2 2 4 3
## 1 4 4 3 2 2 4 3 4 3 2 2 2 1]
## [2 2 2 4 4 1 3 2 3 2 1 1 2 1 1 1 2 3 3 4 4 4 1 4 4 2 1 1 2 3 3 1 3 1 1 3
## 4 1 1 3 3 4 1 3 3 2 2 3 4 2]
## [3 4 4 3 3 2 2 2 2 3 1 1 3 1 1 1 3 1 1 4 4 2 1 4 4 4 1 3 4 2 4 4 3 1 1 4
## 4 2 4 1 4 4 2 2 2 1 1 2 4 3]
## [2 3 1 1 3 1 3 4 1 4 4 4 4 1 1 1 2 4 4 1 4 4 2 2 2 3 1 1 4 3 4 1 3 4 3 4
## 1 2 1 2 1 2 2 3 1 3 3 1 3 2]
## [1 2 4 1 4 1 3 4 3 2 2 3 3 3 2 1 2 3 1 4 1 3 2 4 4 2 1 1 3 4 1 1 3 3 4 3
## 2 3 2 4 4 3 3 4 4 3 1 2 1 1]
## [2 3 4 3 2 3 3 3 1 3 1 4 3 3 1 1 3 4 1 3 3 4 3 4 1 3 3 4 4 2 4 4 4 4 3 1
## 4 1 3 1 2 1 2 4 4 2 3 4 2 2]
## [2 3 3 3 1 2 2 2 2 1 1 4 4 1 2 2 3 4 2 3 3 3 2 2 2 1 2 3 3 3 1 3 2 1 4 4
## 1 2 4 3 4 3 2 2 1 1 4 3 2 1]
## [4 1 1 2 4 2 1 4 4 4 2 1 1 2 1 4 3 4 1 2 4 2 1 3 4 4 3 1 2 3 2 2 2 2 4 4
## 3 2 4 2 1 1 2 3 2 3 4 3 1 1]]
## [['T' 'A' 'C' 'G' 'T' 'T' 'C' 'C' 'T' 'A' 'T' 'A' 'T' 'T' 'C' 'C' 'T' 'A'
## 'G' 'A' 'C' 'G' 'G' 'A' 'T' 'C' 'T' 'T' 'A' 'C' 'C' 'G' 'G' 'G' 'A' 'T'
## 'G' 'G' 'A' 'A' 'C' 'A' 'T' 'G' 'T' 'T' 'C' 'C' 'G' 'A']
## ['A' 'T' 'T' 'T' 'G' 'T' 'G' 'G' 'C' 'A' 'T' 'A' 'A' 'A' 'T' 'A' 'T' 'T'
## 'C' 'G' 'G' 'C' 'C' 'T' 'G' 'G' 'T' 'T' 'A' 'A' 'C' 'A' 'G' 'G' 'T' 'C'
## 'A' 'A' 'C' 'T' 'A' 'A' 'A' 'A' 'G' 'A' 'T' 'A' 'C' 'T']
## ['T' 'G' 'C' 'A' 'T' 'C' 'G' 'T' 'T' 'A' 'C' 'A' 'C' 'T' 'A' 'T' 'T' 'G'
## 'G' 'A' 'A' 'A' 'A' 'T' 'C' 'A' 'A' 'A' 'G' 'T' 'G' 'A' 'T' 'G' 'A' 'C'
## 'G' 'G' 'T' 'T' 'G' 'C' 'T' 'T' 'A' 'A' 'A' 'T' 'G' 'T']
## ['A' 'T' 'C' 'T' 'G' 'A' 'T' 'G' 'A' 'A' 'A' 'C' 'G' 'G' 'C' 'G' 'C' 'T'
## 'T' 'T' 'A' 'G' 'A' 'C' 'C' 'G' 'G' 'C' 'A' 'A' 'A' 'A' 'G' 'T' 'A' 'A'
## 'T' 'G' 'C' 'C' 'C' 'G' 'A' 'T' 'T' 'G' 'C' 'A' 'G' 'T']
## ['A' 'A' 'C' 'A' 'C' 'C' 'A' 'T' 'C' 'G' 'G' 'G' 'T' 'C' 'C' 'A' 'C' 'T'
## 'G' 'A' 'C' 'C' 'T' 'C' 'T' 'A' 'C' 'T' 'T' 'T' 'C' 'T' 'C' 'A' 'A' 'C'
## 'G' 'A' 'G' 'C' 'C' 'A' 'A' 'C' 'C' 'G' 'C' 'T' 'A' 'A']
## ['G' 'C' 'T' 'T' 'C' 'G' 'C' 'G' 'G' 'A' 'C' 'C' 'T' 'G' 'T' 'T' 'A' 'T'
## 'A' 'C' 'T' 'G' 'C' 'C' 'C' 'C' 'C' 'A' 'G' 'T' 'G' 'A' 'C' 'C' 'T' 'G'
## 'A' 'T' 'T' 'G' 'C' 'C' 'T' 'G' 'T' 'G' 'C' 'C' 'C' 'A']
## ['C' 'C' 'C' 'T' 'T' 'A' 'G' 'C' 'G' 'C' 'A' 'A' 'C' 'A' 'A' 'A' 'C' 'G'
## 'G' 'T' 'T' 'T' 'A' 'T' 'T' 'C' 'A' 'A' 'C' 'G' 'G' 'A' 'G' 'A' 'A' 'G'
## 'T' 'A' 'A' 'G' 'G' 'T' 'A' 'G' 'G' 'C' 'C' 'G' 'T' 'C']
## ['G' 'T' 'T' 'G' 'G' 'C' 'C' 'C' 'C' 'G' 'A' 'A' 'G' 'A' 'A' 'A' 'G' 'A'
## 'A' 'T' 'T' 'C' 'A' 'T' 'T' 'T' 'A' 'G' 'T' 'C' 'T' 'T' 'G' 'A' 'A' 'T'
## 'T' 'C' 'T' 'A' 'T' 'T' 'C' 'C' 'C' 'A' 'A' 'C' 'T' 'G']
## ['C' 'G' 'A' 'A' 'G' 'A' 'G' 'T' 'A' 'T' 'T' 'T' 'T' 'A' 'A' 'A' 'C' 'T'
## 'T' 'A' 'T' 'T' 'C' 'C' 'C' 'G' 'A' 'A' 'T' 'G' 'T' 'A' 'G' 'T' 'G' 'T'
## 'A' 'C' 'A' 'C' 'A' 'C' 'C' 'G' 'A' 'G' 'G' 'A' 'G' 'C']
## ['A' 'C' 'T' 'A' 'T' 'A' 'G' 'T' 'G' 'C' 'C' 'G' 'G' 'G' 'C' 'A' 'C' 'G'
## 'A' 'T' 'A' 'G' 'C' 'T' 'T' 'C' 'A' 'A' 'G' 'T' 'A' 'A' 'G' 'G' 'T' 'G'
## 'C' 'G' 'C' 'T' 'T' 'G' 'G' 'T' 'T' 'G' 'A' 'C' 'A' 'A']
## ['C' 'G' 'T' 'G' 'C' 'G' 'G' 'G' 'A' 'G' 'A' 'T' 'G' 'G' 'A' 'A' 'G' 'T'
## 'A' 'G' 'G' 'T' 'G' 'T' 'A' 'G' 'G' 'T' 'T' 'C' 'T' 'T' 'T' 'T' 'G' 'A'
## 'T' 'A' 'G' 'A' 'C' 'A' 'C' 'T' 'T' 'C' 'G' 'T' 'C' 'C']
## ['C' 'G' 'G' 'G' 'A' 'C' 'C' 'C' 'C' 'A' 'A' 'T' 'T' 'A' 'C' 'C' 'G' 'T'
## 'C' 'G' 'G' 'G' 'C' 'C' 'C' 'A' 'C' 'G' 'G' 'G' 'A' 'G' 'C' 'A' 'T' 'T'
## 'A' 'C' 'T' 'G' 'T' 'G' 'C' 'C' 'A' 'A' 'T' 'G' 'C' 'A']
## ['T' 'A' 'A' 'C' 'T' 'C' 'A' 'T' 'T' 'T' 'C' 'A' 'A' 'C' 'A' 'T' 'G' 'T'
## 'A' 'C' 'T' 'C' 'A' 'G' 'T' 'T' 'G' 'A' 'C' 'G' 'C' 'C' 'C' 'C' 'T' 'T'
## 'G' 'C' 'T' 'C' 'A' 'A' 'C' 'G' 'C' 'G' 'T' 'G' 'A' 'A']]
## [array([[4, 1, 2, 3, 4, 4, 2, 2, 4, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 1, 2, 3,
## 3, 1, 4, 2, 4, 4, 1, 2, 2, 3, 3, 3, 1, 4, 3, 3, 1, 1, 2, 1, 4, 3,
## 4, 4, 2, 2, 3, 1],
## [1, 4, 4, 4, 3, 4, 3, 3, 2, 1, 4, 1, 1, 1, 4, 1, 4, 4, 2, 3, 3, 2,
## 2, 4, 3, 3, 4, 4, 1, 1, 2, 1, 3, 3, 4, 2, 1, 1, 2, 4, 1, 1, 1, 1,
## 3, 1, 4, 1, 2, 4],
## [4, 3, 2, 1, 4, 2, 3, 4, 4, 1, 2, 1, 2, 4, 1, 4, 4, 3, 3, 1, 1, 1,
## 1, 4, 2, 1, 1, 1, 3, 4, 3, 1, 4, 3, 1, 2, 3, 3, 4, 4, 3, 2, 4, 4,
## 1, 1, 1, 4, 3, 4],
## [1, 4, 2, 4, 3, 1, 4, 3, 1, 1, 1, 2, 3, 3, 2, 3, 2, 4, 4, 4, 1, 3,
## 1, 2, 2, 3, 3, 2, 1, 1, 1, 1, 3, 4, 1, 1, 4, 3, 2, 2, 2, 3, 1, 4,
## 4, 3, 2, 1, 3, 4],
## [1, 1, 2, 1, 2, 2, 1, 4, 2, 3, 3, 3, 4, 2, 2, 1, 2, 4, 3, 1, 2, 2,
## 4, 2, 4, 1, 2, 4, 4, 4, 2, 4, 2, 1, 1, 2, 3, 1, 3, 2, 2, 1, 1, 2,
## 2, 3, 2, 4, 1, 1],
## [3, 2, 4, 4, 2, 3, 2, 3, 3, 1, 2, 2, 4, 3, 4, 4, 1, 4, 1, 2, 4, 3,
## 2, 2, 2, 2, 2, 1, 3, 4, 3, 1, 2, 2, 4, 3, 1, 4, 4, 3, 2, 2, 4, 3,
## 4, 3, 2, 2, 2, 1],
## [2, 2, 2, 4, 4, 1, 3, 2, 3, 2, 1, 1, 2, 1, 1, 1, 2, 3, 3, 4, 4, 4,
## 1, 4, 4, 2, 1, 1, 2, 3, 3, 1, 3, 1, 1, 3, 4, 1, 1, 3, 3, 4, 1, 3,
## 3, 2, 2, 3, 4, 2],
## [3, 4, 4, 3, 3, 2, 2, 2, 2, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 4, 4, 2,
## 1, 4, 4, 4, 1, 3, 4, 2, 4, 4, 3, 1, 1, 4, 4, 2, 4, 1, 4, 4, 2, 2,
## 2, 1, 1, 2, 4, 3],
## [2, 3, 1, 1, 3, 1, 3, 4, 1, 4, 4, 4, 4, 1, 1, 1, 2, 4, 4, 1, 4, 4,
## 2, 2, 2, 3, 1, 1, 4, 3, 4, 1, 3, 4, 3, 4, 1, 2, 1, 2, 1, 2, 2, 3,
## 1, 3, 3, 1, 3, 2],
## [1, 2, 4, 1, 4, 1, 3, 4, 3, 2, 2, 3, 3, 3, 2, 1, 2, 3, 1, 4, 1, 3,
## 2, 4, 4, 2, 1, 1, 3, 4, 1, 1, 3, 3, 4, 3, 2, 3, 2, 4, 4, 3, 3, 4,
## 4, 3, 1, 2, 1, 1],
## [2, 3, 4, 3, 2, 3, 3, 3, 1, 3, 1, 4, 3, 3, 1, 1, 3, 4, 1, 3, 3, 4,
## 3, 4, 1, 3, 3, 4, 4, 2, 4, 4, 4, 4, 3, 1, 4, 1, 3, 1, 2, 1, 2, 4,
## 4, 2, 3, 4, 2, 2],
## [2, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 4, 4, 1, 2, 2, 3, 4, 2, 3, 3, 3,
## 2, 2, 2, 1, 2, 3, 3, 3, 1, 3, 2, 1, 4, 4, 1, 2, 4, 3, 4, 3, 2, 2,
## 1, 1, 4, 3, 2, 1],
## [4, 1, 1, 2, 4, 2, 1, 4, 4, 4, 2, 1, 1, 2, 1, 4, 3, 4, 1, 2, 4, 2,
## 1, 3, 4, 4, 3, 1, 2, 3, 2, 2, 2, 2, 4, 4, 3, 2, 4, 2, 1, 1, 2, 3,
## 2, 3, 4, 3, 1, 1]]), array([['T', 'A', 'C', 'G', 'T', 'T', 'C', 'C', 'T', 'A', 'T', 'A', 'T',
## 'T', 'C', 'C', 'T', 'A', 'G', 'A', 'C', 'G', 'G', 'A', 'T', 'C',
## 'T', 'T', 'A', 'C', 'C', 'G', 'G', 'G', 'A', 'T', 'G', 'G', 'A',
## 'A', 'C', 'A', 'T', 'G', 'T', 'T', 'C', 'C', 'G', 'A'],
## ['A', 'T', 'T', 'T', 'G', 'T', 'G', 'G', 'C', 'A', 'T', 'A', 'A',
## 'A', 'T', 'A', 'T', 'T', 'C', 'G', 'G', 'C', 'C', 'T', 'G', 'G',
## 'T', 'T', 'A', 'A', 'C', 'A', 'G', 'G', 'T', 'C', 'A', 'A', 'C',
## 'T', 'A', 'A', 'A', 'A', 'G', 'A', 'T', 'A', 'C', 'T'],
## ['T', 'G', 'C', 'A', 'T', 'C', 'G', 'T', 'T', 'A', 'C', 'A', 'C',
## 'T', 'A', 'T', 'T', 'G', 'G', 'A', 'A', 'A', 'A', 'T', 'C', 'A',
## 'A', 'A', 'G', 'T', 'G', 'A', 'T', 'G', 'A', 'C', 'G', 'G', 'T',
## 'T', 'G', 'C', 'T', 'T', 'A', 'A', 'A', 'T', 'G', 'T'],
## ['A', 'T', 'C', 'T', 'G', 'A', 'T', 'G', 'A', 'A', 'A', 'C', 'G',
## 'G', 'C', 'G', 'C', 'T', 'T', 'T', 'A', 'G', 'A', 'C', 'C', 'G',
## 'G', 'C', 'A', 'A', 'A', 'A', 'G', 'T', 'A', 'A', 'T', 'G', 'C',
## 'C', 'C', 'G', 'A', 'T', 'T', 'G', 'C', 'A', 'G', 'T'],
## ['A', 'A', 'C', 'A', 'C', 'C', 'A', 'T', 'C', 'G', 'G', 'G', 'T',
## 'C', 'C', 'A', 'C', 'T', 'G', 'A', 'C', 'C', 'T', 'C', 'T', 'A',
## 'C', 'T', 'T', 'T', 'C', 'T', 'C', 'A', 'A', 'C', 'G', 'A', 'G',
## 'C', 'C', 'A', 'A', 'C', 'C', 'G', 'C', 'T', 'A', 'A'],
## ['G', 'C', 'T', 'T', 'C', 'G', 'C', 'G', 'G', 'A', 'C', 'C', 'T',
## 'G', 'T', 'T', 'A', 'T', 'A', 'C', 'T', 'G', 'C', 'C', 'C', 'C',
## 'C', 'A', 'G', 'T', 'G', 'A', 'C', 'C', 'T', 'G', 'A', 'T', 'T',
## 'G', 'C', 'C', 'T', 'G', 'T', 'G', 'C', 'C', 'C', 'A'],
## ['C', 'C', 'C', 'T', 'T', 'A', 'G', 'C', 'G', 'C', 'A', 'A', 'C',
## 'A', 'A', 'A', 'C', 'G', 'G', 'T', 'T', 'T', 'A', 'T', 'T', 'C',
## 'A', 'A', 'C', 'G', 'G', 'A', 'G', 'A', 'A', 'G', 'T', 'A', 'A',
## 'G', 'G', 'T', 'A', 'G', 'G', 'C', 'C', 'G', 'T', 'C'],
## ['G', 'T', 'T', 'G', 'G', 'C', 'C', 'C', 'C', 'G', 'A', 'A', 'G',
## 'A', 'A', 'A', 'G', 'A', 'A', 'T', 'T', 'C', 'A', 'T', 'T', 'T',
## 'A', 'G', 'T', 'C', 'T', 'T', 'G', 'A', 'A', 'T', 'T', 'C', 'T',
## 'A', 'T', 'T', 'C', 'C', 'C', 'A', 'A', 'C', 'T', 'G'],
## ['C', 'G', 'A', 'A', 'G', 'A', 'G', 'T', 'A', 'T', 'T', 'T', 'T',
## 'A', 'A', 'A', 'C', 'T', 'T', 'A', 'T', 'T', 'C', 'C', 'C', 'G',
## 'A', 'A', 'T', 'G', 'T', 'A', 'G', 'T', 'G', 'T', 'A', 'C', 'A',
## 'C', 'A', 'C', 'C', 'G', 'A', 'G', 'G', 'A', 'G', 'C'],
## ['A', 'C', 'T', 'A', 'T', 'A', 'G', 'T', 'G', 'C', 'C', 'G', 'G',
## 'G', 'C', 'A', 'C', 'G', 'A', 'T', 'A', 'G', 'C', 'T', 'T', 'C',
## 'A', 'A', 'G', 'T', 'A', 'A', 'G', 'G', 'T', 'G', 'C', 'G', 'C',
## 'T', 'T', 'G', 'G', 'T', 'T', 'G', 'A', 'C', 'A', 'A'],
## ['C', 'G', 'T', 'G', 'C', 'G', 'G', 'G', 'A', 'G', 'A', 'T', 'G',
## 'G', 'A', 'A', 'G', 'T', 'A', 'G', 'G', 'T', 'G', 'T', 'A', 'G',
## 'G', 'T', 'T', 'C', 'T', 'T', 'T', 'T', 'G', 'A', 'T', 'A', 'G',
## 'A', 'C', 'A', 'C', 'T', 'T', 'C', 'G', 'T', 'C', 'C'],
## ['C', 'G', 'G', 'G', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'T', 'T',
## 'A', 'C', 'C', 'G', 'T', 'C', 'G', 'G', 'G', 'C', 'C', 'C', 'A',
## 'C', 'G', 'G', 'G', 'A', 'G', 'C', 'A', 'T', 'T', 'A', 'C', 'T',
## 'G', 'T', 'G', 'C', 'C', 'A', 'A', 'T', 'G', 'C', 'A'],
## ['T', 'A', 'A', 'C', 'T', 'C', 'A', 'T', 'T', 'T', 'C', 'A', 'A',
## 'C', 'A', 'T', 'G', 'T', 'A', 'C', 'T', 'C', 'A', 'G', 'T', 'T',
## 'G', 'A', 'C', 'G', 'C', 'C', 'C', 'C', 'T', 'T', 'G', 'C', 'T',
## 'C', 'A', 'A', 'C', 'G', 'C', 'G', 'T', 'G', 'A', 'A']],
## dtype='<U1')]
12 Create a plot to indicate swap mutations-bases
- which don’t match between genome_01 and genome_02.
12.1 Example 3
genID = 12
where_mutation = print_swap_mutation_gene[2]
where_mut = where_mutation[genID]
where_mut = list(map(int, where_mut))
seq.Dksequence(genome_01_split[genID]).show_mutation_genes_with_color(max_width = 50, where_mutation= where_mut)## [[4 1 2 3 4 4 2 2 4 1 4 1 4 4 2 2 4 1 3 1 2 3 3 1 4 2 4 4 1 2 2 3 3 3 1 4
## 3 3 1 1 2 1 4 3 4 4 2 2 3 1]
## [1 4 4 4 3 4 3 3 2 1 4 1 1 1 4 1 4 4 2 3 3 2 2 4 3 3 4 4 1 1 2 1 3 3 4 2
## 1 1 2 4 1 1 1 1 3 1 4 1 2 4]
## [4 3 2 1 4 2 3 4 4 1 2 1 2 4 1 4 4 3 3 1 1 1 1 4 2 1 1 1 3 4 3 1 4 3 1 2
## 3 3 4 4 3 2 4 4 1 1 1 4 3 4]
## [1 4 2 4 3 1 4 3 1 1 1 2 3 3 2 3 2 4 4 4 1 3 1 2 2 3 3 2 1 1 1 1 3 4 1 1
## 4 3 2 2 2 3 1 4 4 3 2 1 3 4]
## [1 1 2 1 2 2 1 4 2 3 3 3 4 2 2 1 2 4 3 1 2 2 4 2 4 1 2 4 4 4 2 4 2 1 1 2
## 3 1 3 2 2 1 1 2 2 3 2 4 1 1]
## [3 2 4 4 2 3 2 3 3 1 2 2 4 3 4 4 1 4 1 2 4 3 2 2 2 2 2 1 3 4 3 1 2 2 4 3
## 1 4 4 3 2 2 4 3 4 3 2 2 2 1]
## [2 2 2 4 4 1 3 2 3 2 1 1 2 1 1 1 2 3 3 4 4 4 1 4 4 2 1 1 2 3 3 1 3 1 1 3
## 4 1 1 3 3 4 1 3 3 2 2 3 4 2]
## [3 4 4 3 3 2 2 2 2 3 1 1 3 1 1 1 3 1 1 4 4 0 1 4 4 4 1 3 4 2 4 4 3 1 1 4
## 4 2 4 1 4 4 2 2 2 1 1 2 4 3]
## [2 3 1 1 3 1 3 4 1 4 4 4 4 1 1 1 2 4 4 1 4 4 2 2 2 3 1 1 4 3 4 1 3 4 3 4
## 1 2 1 2 1 2 2 3 1 3 3 1 3 2]
## [1 2 4 1 4 1 3 4 3 2 2 3 3 3 2 1 2 3 1 4 1 3 2 4 4 2 1 1 3 4 1 1 3 3 4 3
## 2 3 2 4 4 3 3 4 4 3 1 2 1 1]
## [2 3 4 3 2 3 3 3 1 3 1 4 3 3 1 1 3 4 1 3 3 4 3 4 1 3 3 4 4 2 4 4 4 4 3 1
## 4 1 3 1 2 1 2 4 4 2 3 4 2 2]
## [2 3 3 3 0 2 2 2 2 1 1 4 4 1 2 2 3 4 2 3 3 3 2 2 2 1 2 3 3 3 1 3 2 1 4 4
## 1 2 4 3 4 3 2 2 1 1 4 3 2 1]
## [4 1 1 2 4 2 1 4 4 4 2 1 1 2 1 4 3 4 1 2 4 2 1 3 4 4 3 1 2 3 2 2 2 2 4 4
## 3 2 4 2 1 1 2 3 2 3 4 3 1 1]]
## [['T' 'A' 'C' 'G' 'T' 'T' 'C' 'C' 'T' 'A' 'T' 'A' 'T' 'T' 'C' 'C' 'T' 'A'
## 'G' 'A' 'C' 'G' 'G' 'A' 'T' 'C' 'T' 'T' 'A' 'C' 'C' 'G' 'G' 'G' 'A' 'T'
## 'G' 'G' 'A' 'A' 'C' 'A' 'T' 'G' 'T' 'T' 'C' 'C' 'G' 'A']
## ['A' 'T' 'T' 'T' 'G' 'T' 'G' 'G' 'C' 'A' 'T' 'A' 'A' 'A' 'T' 'A' 'T' 'T'
## 'C' 'G' 'G' 'C' 'C' 'T' 'G' 'G' 'T' 'T' 'A' 'A' 'C' 'A' 'G' 'G' 'T' 'C'
## 'A' 'A' 'C' 'T' 'A' 'A' 'A' 'A' 'G' 'A' 'T' 'A' 'C' 'T']
## ['T' 'G' 'C' 'A' 'T' 'C' 'G' 'T' 'T' 'A' 'C' 'A' 'C' 'T' 'A' 'T' 'T' 'G'
## 'G' 'A' 'A' 'A' 'A' 'T' 'C' 'A' 'A' 'A' 'G' 'T' 'G' 'A' 'T' 'G' 'A' 'C'
## 'G' 'G' 'T' 'T' 'G' 'C' 'T' 'T' 'A' 'A' 'A' 'T' 'G' 'T']
## ['A' 'T' 'C' 'T' 'G' 'A' 'T' 'G' 'A' 'A' 'A' 'C' 'G' 'G' 'C' 'G' 'C' 'T'
## 'T' 'T' 'A' 'G' 'A' 'C' 'C' 'G' 'G' 'C' 'A' 'A' 'A' 'A' 'G' 'T' 'A' 'A'
## 'T' 'G' 'C' 'C' 'C' 'G' 'A' 'T' 'T' 'G' 'C' 'A' 'G' 'T']
## ['A' 'A' 'C' 'A' 'C' 'C' 'A' 'T' 'C' 'G' 'G' 'G' 'T' 'C' 'C' 'A' 'C' 'T'
## 'G' 'A' 'C' 'C' 'T' 'C' 'T' 'A' 'C' 'T' 'T' 'T' 'C' 'T' 'C' 'A' 'A' 'C'
## 'G' 'A' 'G' 'C' 'C' 'A' 'A' 'C' 'C' 'G' 'C' 'T' 'A' 'A']
## ['G' 'C' 'T' 'T' 'C' 'G' 'C' 'G' 'G' 'A' 'C' 'C' 'T' 'G' 'T' 'T' 'A' 'T'
## 'A' 'C' 'T' 'G' 'C' 'C' 'C' 'C' 'C' 'A' 'G' 'T' 'G' 'A' 'C' 'C' 'T' 'G'
## 'A' 'T' 'T' 'G' 'C' 'C' 'T' 'G' 'T' 'G' 'C' 'C' 'C' 'A']
## ['C' 'C' 'C' 'T' 'T' 'A' 'G' 'C' 'G' 'C' 'A' 'A' 'C' 'A' 'A' 'A' 'C' 'G'
## 'G' 'T' 'T' 'T' 'A' 'T' 'T' 'C' 'A' 'A' 'C' 'G' 'G' 'A' 'G' 'A' 'A' 'G'
## 'T' 'A' 'A' 'G' 'G' 'T' 'A' 'G' 'G' 'C' 'C' 'G' 'T' 'C']
## ['G' 'T' 'T' 'G' 'G' 'C' 'C' 'C' 'C' 'G' 'A' 'A' 'G' 'A' 'A' 'A' 'G' 'A'
## 'A' 'T' 'T' 'C' 'A' 'T' 'T' 'T' 'A' 'G' 'T' 'C' 'T' 'T' 'G' 'A' 'A' 'T'
## 'T' 'C' 'T' 'A' 'T' 'T' 'C' 'C' 'C' 'A' 'A' 'C' 'T' 'G']
## ['C' 'G' 'A' 'A' 'G' 'A' 'G' 'T' 'A' 'T' 'T' 'T' 'T' 'A' 'A' 'A' 'C' 'T'
## 'T' 'A' 'T' 'T' 'C' 'C' 'C' 'G' 'A' 'A' 'T' 'G' 'T' 'A' 'G' 'T' 'G' 'T'
## 'A' 'C' 'A' 'C' 'A' 'C' 'C' 'G' 'A' 'G' 'G' 'A' 'G' 'C']
## ['A' 'C' 'T' 'A' 'T' 'A' 'G' 'T' 'G' 'C' 'C' 'G' 'G' 'G' 'C' 'A' 'C' 'G'
## 'A' 'T' 'A' 'G' 'C' 'T' 'T' 'C' 'A' 'A' 'G' 'T' 'A' 'A' 'G' 'G' 'T' 'G'
## 'C' 'G' 'C' 'T' 'T' 'G' 'G' 'T' 'T' 'G' 'A' 'C' 'A' 'A']
## ['C' 'G' 'T' 'G' 'C' 'G' 'G' 'G' 'A' 'G' 'A' 'T' 'G' 'G' 'A' 'A' 'G' 'T'
## 'A' 'G' 'G' 'T' 'G' 'T' 'A' 'G' 'G' 'T' 'T' 'C' 'T' 'T' 'T' 'T' 'G' 'A'
## 'T' 'A' 'G' 'A' 'C' 'A' 'C' 'T' 'T' 'C' 'G' 'T' 'C' 'C']
## ['C' 'G' 'G' 'G' 'A' 'C' 'C' 'C' 'C' 'A' 'A' 'T' 'T' 'A' 'C' 'C' 'G' 'T'
## 'C' 'G' 'G' 'G' 'C' 'C' 'C' 'A' 'C' 'G' 'G' 'G' 'A' 'G' 'C' 'A' 'T' 'T'
## 'A' 'C' 'T' 'G' 'T' 'G' 'C' 'C' 'A' 'A' 'T' 'G' 'C' 'A']
## ['T' 'A' 'A' 'C' 'T' 'C' 'A' 'T' 'T' 'T' 'C' 'A' 'A' 'C' 'A' 'T' 'G' 'T'
## 'A' 'C' 'T' 'C' 'A' 'G' 'T' 'T' 'G' 'A' 'C' 'G' 'C' 'C' 'C' 'C' 'T' 'T'
## 'G' 'C' 'T' 'C' 'A' 'A' 'C' 'G' 'C' 'G' 'T' 'G' 'A' 'A']]
## [array([[4, 1, 2, 3, 4, 4, 2, 2, 4, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 1, 2, 3,
## 3, 1, 4, 2, 4, 4, 1, 2, 2, 3, 3, 3, 1, 4, 3, 3, 1, 1, 2, 1, 4, 3,
## 4, 4, 2, 2, 3, 1],
## [1, 4, 4, 4, 3, 4, 3, 3, 2, 1, 4, 1, 1, 1, 4, 1, 4, 4, 2, 3, 3, 2,
## 2, 4, 3, 3, 4, 4, 1, 1, 2, 1, 3, 3, 4, 2, 1, 1, 2, 4, 1, 1, 1, 1,
## 3, 1, 4, 1, 2, 4],
## [4, 3, 2, 1, 4, 2, 3, 4, 4, 1, 2, 1, 2, 4, 1, 4, 4, 3, 3, 1, 1, 1,
## 1, 4, 2, 1, 1, 1, 3, 4, 3, 1, 4, 3, 1, 2, 3, 3, 4, 4, 3, 2, 4, 4,
## 1, 1, 1, 4, 3, 4],
## [1, 4, 2, 4, 3, 1, 4, 3, 1, 1, 1, 2, 3, 3, 2, 3, 2, 4, 4, 4, 1, 3,
## 1, 2, 2, 3, 3, 2, 1, 1, 1, 1, 3, 4, 1, 1, 4, 3, 2, 2, 2, 3, 1, 4,
## 4, 3, 2, 1, 3, 4],
## [1, 1, 2, 1, 2, 2, 1, 4, 2, 3, 3, 3, 4, 2, 2, 1, 2, 4, 3, 1, 2, 2,
## 4, 2, 4, 1, 2, 4, 4, 4, 2, 4, 2, 1, 1, 2, 3, 1, 3, 2, 2, 1, 1, 2,
## 2, 3, 2, 4, 1, 1],
## [3, 2, 4, 4, 2, 3, 2, 3, 3, 1, 2, 2, 4, 3, 4, 4, 1, 4, 1, 2, 4, 3,
## 2, 2, 2, 2, 2, 1, 3, 4, 3, 1, 2, 2, 4, 3, 1, 4, 4, 3, 2, 2, 4, 3,
## 4, 3, 2, 2, 2, 1],
## [2, 2, 2, 4, 4, 1, 3, 2, 3, 2, 1, 1, 2, 1, 1, 1, 2, 3, 3, 4, 4, 4,
## 1, 4, 4, 2, 1, 1, 2, 3, 3, 1, 3, 1, 1, 3, 4, 1, 1, 3, 3, 4, 1, 3,
## 3, 2, 2, 3, 4, 2],
## [3, 4, 4, 3, 3, 2, 2, 2, 2, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 4, 4, 0,
## 1, 4, 4, 4, 1, 3, 4, 2, 4, 4, 3, 1, 1, 4, 4, 2, 4, 1, 4, 4, 2, 2,
## 2, 1, 1, 2, 4, 3],
## [2, 3, 1, 1, 3, 1, 3, 4, 1, 4, 4, 4, 4, 1, 1, 1, 2, 4, 4, 1, 4, 4,
## 2, 2, 2, 3, 1, 1, 4, 3, 4, 1, 3, 4, 3, 4, 1, 2, 1, 2, 1, 2, 2, 3,
## 1, 3, 3, 1, 3, 2],
## [1, 2, 4, 1, 4, 1, 3, 4, 3, 2, 2, 3, 3, 3, 2, 1, 2, 3, 1, 4, 1, 3,
## 2, 4, 4, 2, 1, 1, 3, 4, 1, 1, 3, 3, 4, 3, 2, 3, 2, 4, 4, 3, 3, 4,
## 4, 3, 1, 2, 1, 1],
## [2, 3, 4, 3, 2, 3, 3, 3, 1, 3, 1, 4, 3, 3, 1, 1, 3, 4, 1, 3, 3, 4,
## 3, 4, 1, 3, 3, 4, 4, 2, 4, 4, 4, 4, 3, 1, 4, 1, 3, 1, 2, 1, 2, 4,
## 4, 2, 3, 4, 2, 2],
## [2, 3, 3, 3, 0, 2, 2, 2, 2, 1, 1, 4, 4, 1, 2, 2, 3, 4, 2, 3, 3, 3,
## 2, 2, 2, 1, 2, 3, 3, 3, 1, 3, 2, 1, 4, 4, 1, 2, 4, 3, 4, 3, 2, 2,
## 1, 1, 4, 3, 2, 1],
## [4, 1, 1, 2, 4, 2, 1, 4, 4, 4, 2, 1, 1, 2, 1, 4, 3, 4, 1, 2, 4, 2,
## 1, 3, 4, 4, 3, 1, 2, 3, 2, 2, 2, 2, 4, 4, 3, 2, 4, 2, 1, 1, 2, 3,
## 2, 3, 4, 3, 1, 1]]), array([['T', 'A', 'C', 'G', 'T', 'T', 'C', 'C', 'T', 'A', 'T', 'A', 'T',
## 'T', 'C', 'C', 'T', 'A', 'G', 'A', 'C', 'G', 'G', 'A', 'T', 'C',
## 'T', 'T', 'A', 'C', 'C', 'G', 'G', 'G', 'A', 'T', 'G', 'G', 'A',
## 'A', 'C', 'A', 'T', 'G', 'T', 'T', 'C', 'C', 'G', 'A'],
## ['A', 'T', 'T', 'T', 'G', 'T', 'G', 'G', 'C', 'A', 'T', 'A', 'A',
## 'A', 'T', 'A', 'T', 'T', 'C', 'G', 'G', 'C', 'C', 'T', 'G', 'G',
## 'T', 'T', 'A', 'A', 'C', 'A', 'G', 'G', 'T', 'C', 'A', 'A', 'C',
## 'T', 'A', 'A', 'A', 'A', 'G', 'A', 'T', 'A', 'C', 'T'],
## ['T', 'G', 'C', 'A', 'T', 'C', 'G', 'T', 'T', 'A', 'C', 'A', 'C',
## 'T', 'A', 'T', 'T', 'G', 'G', 'A', 'A', 'A', 'A', 'T', 'C', 'A',
## 'A', 'A', 'G', 'T', 'G', 'A', 'T', 'G', 'A', 'C', 'G', 'G', 'T',
## 'T', 'G', 'C', 'T', 'T', 'A', 'A', 'A', 'T', 'G', 'T'],
## ['A', 'T', 'C', 'T', 'G', 'A', 'T', 'G', 'A', 'A', 'A', 'C', 'G',
## 'G', 'C', 'G', 'C', 'T', 'T', 'T', 'A', 'G', 'A', 'C', 'C', 'G',
## 'G', 'C', 'A', 'A', 'A', 'A', 'G', 'T', 'A', 'A', 'T', 'G', 'C',
## 'C', 'C', 'G', 'A', 'T', 'T', 'G', 'C', 'A', 'G', 'T'],
## ['A', 'A', 'C', 'A', 'C', 'C', 'A', 'T', 'C', 'G', 'G', 'G', 'T',
## 'C', 'C', 'A', 'C', 'T', 'G', 'A', 'C', 'C', 'T', 'C', 'T', 'A',
## 'C', 'T', 'T', 'T', 'C', 'T', 'C', 'A', 'A', 'C', 'G', 'A', 'G',
## 'C', 'C', 'A', 'A', 'C', 'C', 'G', 'C', 'T', 'A', 'A'],
## ['G', 'C', 'T', 'T', 'C', 'G', 'C', 'G', 'G', 'A', 'C', 'C', 'T',
## 'G', 'T', 'T', 'A', 'T', 'A', 'C', 'T', 'G', 'C', 'C', 'C', 'C',
## 'C', 'A', 'G', 'T', 'G', 'A', 'C', 'C', 'T', 'G', 'A', 'T', 'T',
## 'G', 'C', 'C', 'T', 'G', 'T', 'G', 'C', 'C', 'C', 'A'],
## ['C', 'C', 'C', 'T', 'T', 'A', 'G', 'C', 'G', 'C', 'A', 'A', 'C',
## 'A', 'A', 'A', 'C', 'G', 'G', 'T', 'T', 'T', 'A', 'T', 'T', 'C',
## 'A', 'A', 'C', 'G', 'G', 'A', 'G', 'A', 'A', 'G', 'T', 'A', 'A',
## 'G', 'G', 'T', 'A', 'G', 'G', 'C', 'C', 'G', 'T', 'C'],
## ['G', 'T', 'T', 'G', 'G', 'C', 'C', 'C', 'C', 'G', 'A', 'A', 'G',
## 'A', 'A', 'A', 'G', 'A', 'A', 'T', 'T', 'C', 'A', 'T', 'T', 'T',
## 'A', 'G', 'T', 'C', 'T', 'T', 'G', 'A', 'A', 'T', 'T', 'C', 'T',
## 'A', 'T', 'T', 'C', 'C', 'C', 'A', 'A', 'C', 'T', 'G'],
## ['C', 'G', 'A', 'A', 'G', 'A', 'G', 'T', 'A', 'T', 'T', 'T', 'T',
## 'A', 'A', 'A', 'C', 'T', 'T', 'A', 'T', 'T', 'C', 'C', 'C', 'G',
## 'A', 'A', 'T', 'G', 'T', 'A', 'G', 'T', 'G', 'T', 'A', 'C', 'A',
## 'C', 'A', 'C', 'C', 'G', 'A', 'G', 'G', 'A', 'G', 'C'],
## ['A', 'C', 'T', 'A', 'T', 'A', 'G', 'T', 'G', 'C', 'C', 'G', 'G',
## 'G', 'C', 'A', 'C', 'G', 'A', 'T', 'A', 'G', 'C', 'T', 'T', 'C',
## 'A', 'A', 'G', 'T', 'A', 'A', 'G', 'G', 'T', 'G', 'C', 'G', 'C',
## 'T', 'T', 'G', 'G', 'T', 'T', 'G', 'A', 'C', 'A', 'A'],
## ['C', 'G', 'T', 'G', 'C', 'G', 'G', 'G', 'A', 'G', 'A', 'T', 'G',
## 'G', 'A', 'A', 'G', 'T', 'A', 'G', 'G', 'T', 'G', 'T', 'A', 'G',
## 'G', 'T', 'T', 'C', 'T', 'T', 'T', 'T', 'G', 'A', 'T', 'A', 'G',
## 'A', 'C', 'A', 'C', 'T', 'T', 'C', 'G', 'T', 'C', 'C'],
## ['C', 'G', 'G', 'G', 'A', 'C', 'C', 'C', 'C', 'A', 'A', 'T', 'T',
## 'A', 'C', 'C', 'G', 'T', 'C', 'G', 'G', 'G', 'C', 'C', 'C', 'A',
## 'C', 'G', 'G', 'G', 'A', 'G', 'C', 'A', 'T', 'T', 'A', 'C', 'T',
## 'G', 'T', 'G', 'C', 'C', 'A', 'A', 'T', 'G', 'C', 'A'],
## ['T', 'A', 'A', 'C', 'T', 'C', 'A', 'T', 'T', 'T', 'C', 'A', 'A',
## 'C', 'A', 'T', 'G', 'T', 'A', 'C', 'T', 'C', 'A', 'G', 'T', 'T',
## 'G', 'A', 'C', 'G', 'C', 'C', 'C', 'C', 'T', 'T', 'G', 'C', 'T',
## 'C', 'A', 'A', 'C', 'G', 'C', 'G', 'T', 'G', 'A', 'A']],
## dtype='<U1')]
12.2 Example 4
genID = 59
where_mutation = print_swap_mutation_gene[2]
where_mut = where_mutation[genID]
where_mut = list(map(int, where_mut))
seq.Dksequence(genome_01_split[genID]).show_mutation_genes_with_color(max_width = 50, where_mutation= where_mut)## [[4 2 2 ... 4 2 3]
## [4 1 2 ... 1 1 2]
## [4 1 2 ... 3 4 2]
## ...
## [3 2 1 ... 1 1 1]
## [2 2 4 ... 2 4 4]
## [2 3 2 ... 2 4 4]]
## [['T' 'C' 'C' ... 'T' 'C' 'G']
## ['T' 'A' 'C' ... 'A' 'A' 'C']
## ['T' 'A' 'C' ... 'G' 'T' 'C']
## ...
## ['G' 'C' 'A' ... 'A' 'A' 'A']
## ['C' 'C' 'T' ... 'C' 'T' 'T']
## ['C' 'G' 'C' ... 'C' 'T' 'T']]
## [array([[4, 2, 2, ..., 4, 2, 3],
## [4, 1, 2, ..., 1, 1, 2],
## [4, 1, 2, ..., 3, 4, 2],
## ...,
## [3, 2, 1, ..., 1, 1, 1],
## [2, 2, 4, ..., 2, 4, 4],
## [2, 3, 2, ..., 2, 4, 4]]), array([['T', 'C', 'C', ..., 'T', 'C', 'G'],
## ['T', 'A', 'C', ..., 'A', 'A', 'C'],
## ['T', 'A', 'C', ..., 'G', 'T', 'C'],
## ...,
## ['G', 'C', 'A', ..., 'A', 'A', 'A'],
## ['C', 'C', 'T', ..., 'C', 'T', 'T'],
## ['C', 'G', 'C', ..., 'C', 'T', 'T']], dtype='<U1')]
12.3 Example 5
genID = 1
where_mutation = print_swap_mutation_gene[2]
where_mut = where_mutation[genID]
where_mut = list(map(int, where_mut))
seq.Dksequence(genome_01_split[genID]).show_mutation_genes_with_color(max_width = 50, where_mutation= where_mut)## [[3 3 3 ... 2 3 3]
## [1 2 1 ... 1 4 2]
## [4 3 4 ... 1 1 1]
## ...
## [1 1 4 ... 4 4 2]
## [1 1 3 ... 2 1 3]
## [3 1 4 ... 4 3 2]]
## [['G' 'G' 'G' ... 'C' 'G' 'G']
## ['A' 'C' 'A' ... 'A' 'T' 'C']
## ['T' 'G' 'T' ... 'A' 'A' 'A']
## ...
## ['A' 'A' 'T' ... 'T' 'T' 'C']
## ['A' 'A' 'G' ... 'C' 'A' 'G']
## ['G' 'A' 'T' ... 'T' 'G' 'C']]
## [array([[3, 3, 3, ..., 2, 3, 3],
## [1, 2, 1, ..., 1, 4, 2],
## [4, 3, 4, ..., 1, 1, 1],
## ...,
## [1, 1, 4, ..., 4, 4, 2],
## [1, 1, 3, ..., 2, 1, 3],
## [3, 1, 4, ..., 4, 3, 2]]), array([['G', 'G', 'G', ..., 'C', 'G', 'G'],
## ['A', 'C', 'A', ..., 'A', 'T', 'C'],
## ['T', 'G', 'T', ..., 'A', 'A', 'A'],
## ...,
## ['A', 'A', 'T', ..., 'T', 'T', 'C'],
## ['A', 'A', 'G', ..., 'C', 'A', 'G'],
## ['G', 'A', 'T', ..., 'T', 'G', 'C']], dtype='<U1')]
12.4 Example 6 :
- mutation position = -1 : no mutation occurs between two genes
genID = 0
where_mutation = print_swap_mutation_gene[2]
where_mut = where_mutation[genID]
where_mut = list(map(int, where_mut))
seq.Dksequence(genome_01_split[genID]).show_mutation_genes_with_color(max_width = 50, where_mutation= where_mut)## [[4 3 1 4 2 3 3 1 2 2 2 2 1 1 4 2 4 4 3 4 1 2 3 3 1 3 4 3 3 4 3 4 1 2 2 1
## 1 3 4 1 1 4 1 1 4 4 1 3 3 3]
## [2 3 1 4 4 1 3 2 1 2 2 4 2 1 4 2 3 2 3 1 1 2 2 2 2 2 3 1 4 4 1 3 4 2 1 2
## 4 3 3 1 4 3 3 3 4 4 3 2 3 1]
## [4 4 3 2 3 4 2 4 1 1 4 1 4 1 3 1 4 4 4 1 3 2 1 2 1 4 4 1 3 1 4 2 3 2 4 4
## 2 3 2 1 4 2 3 1 2 1 3 3 2 1]
## [1 4 4 1 3 1 4 1 4 1 1 1 4 1 3 4 4 2 3 2 3 1 1 4 3 1 3 3 2 3 3 2 2 4 4 4
## 2 2 1 2 1 4 1 1 2 3 1 1 2 4]
## [3 4 3 4 4 4 2 1 1 2 4 1 1 1 3 2 1 2 1 4 2 4 4 1 3 3 3 4 1 2 3 1 4 1 2 3
## 4 3 1 3 3 1 1 1 3 4 3 1 3 1]
## [3 3 3 4 4 1 1 1 2 1 4 2 2 4 1 3 2 3 1 4 1 1 2 4 4 1 2 2 4 1 2 4 3 1 3 1
## 2 3 4 1 1 3 2 1 2 1 2 4 2 3]
## [2 1 2 2 1 4 1 1 3 2 1 1 1 4 2 3 3 3 1 1 1 4 2 1 3 2 3 4 1 2 4 2 4 2 2 4
## 2 1 4 4 2 2 1 4 3 1 3 1 2 1]
## [2 2 2 1 4 1 4 2 3 2 2 1 1 4 3 4 2 3 2 3 4 2 4 2 4 2 4 4 2 2 2 1 4 1 2 2
## 1 4 1 3 1 2 2 1 3 2 3 3 2 4]
## [3 3 2 1 3 2 4 2 2 3 1 3 4 2 2 1 2 4 2 1 2 2 4 2 3 3 3 4 4 3 4 4 4 1 2 2
## 2 3 1 3 2 2 4 2 2 1 3 3 3 1]
## [4 1 1 2 4 3 2 3 4 1 4 4 2 2 1 1 2 1 1 4 2 2 4 3 3 2 2 2 3 4 1 2 2 4 1 3
## 3 1 4 1 3 4 1 1 3 4 2 1 3 3]
## [1 1 2 3 2 2 3 1 4 2 2 4 3 1 3 4 1 1 1 4 1 2 2 1 1 4 2 4 3 1 4 2 1 1 2 2
## 4 3 2 2 2 2 3 3 1 3 4 2 4 2]
## [3 3 4 2 3 3 3 4 4 2 2 3 1 1 1 1 3 3 2 2 3 1 3 1 2 4 4 2 3 1 1 4 4 4 2 1
## 3 4 3 4 1 4 3 4 3 3 1 3 1 4]]
## [['T' 'G' 'A' 'T' 'C' 'G' 'G' 'A' 'C' 'C' 'C' 'C' 'A' 'A' 'T' 'C' 'T' 'T'
## 'G' 'T' 'A' 'C' 'G' 'G' 'A' 'G' 'T' 'G' 'G' 'T' 'G' 'T' 'A' 'C' 'C' 'A'
## 'A' 'G' 'T' 'A' 'A' 'T' 'A' 'A' 'T' 'T' 'A' 'G' 'G' 'G']
## ['C' 'G' 'A' 'T' 'T' 'A' 'G' 'C' 'A' 'C' 'C' 'T' 'C' 'A' 'T' 'C' 'G' 'C'
## 'G' 'A' 'A' 'C' 'C' 'C' 'C' 'C' 'G' 'A' 'T' 'T' 'A' 'G' 'T' 'C' 'A' 'C'
## 'T' 'G' 'G' 'A' 'T' 'G' 'G' 'G' 'T' 'T' 'G' 'C' 'G' 'A']
## ['T' 'T' 'G' 'C' 'G' 'T' 'C' 'T' 'A' 'A' 'T' 'A' 'T' 'A' 'G' 'A' 'T' 'T'
## 'T' 'A' 'G' 'C' 'A' 'C' 'A' 'T' 'T' 'A' 'G' 'A' 'T' 'C' 'G' 'C' 'T' 'T'
## 'C' 'G' 'C' 'A' 'T' 'C' 'G' 'A' 'C' 'A' 'G' 'G' 'C' 'A']
## ['A' 'T' 'T' 'A' 'G' 'A' 'T' 'A' 'T' 'A' 'A' 'A' 'T' 'A' 'G' 'T' 'T' 'C'
## 'G' 'C' 'G' 'A' 'A' 'T' 'G' 'A' 'G' 'G' 'C' 'G' 'G' 'C' 'C' 'T' 'T' 'T'
## 'C' 'C' 'A' 'C' 'A' 'T' 'A' 'A' 'C' 'G' 'A' 'A' 'C' 'T']
## ['G' 'T' 'G' 'T' 'T' 'T' 'C' 'A' 'A' 'C' 'T' 'A' 'A' 'A' 'G' 'C' 'A' 'C'
## 'A' 'T' 'C' 'T' 'T' 'A' 'G' 'G' 'G' 'T' 'A' 'C' 'G' 'A' 'T' 'A' 'C' 'G'
## 'T' 'G' 'A' 'G' 'G' 'A' 'A' 'A' 'G' 'T' 'G' 'A' 'G' 'A']
## ['G' 'G' 'G' 'T' 'T' 'A' 'A' 'A' 'C' 'A' 'T' 'C' 'C' 'T' 'A' 'G' 'C' 'G'
## 'A' 'T' 'A' 'A' 'C' 'T' 'T' 'A' 'C' 'C' 'T' 'A' 'C' 'T' 'G' 'A' 'G' 'A'
## 'C' 'G' 'T' 'A' 'A' 'G' 'C' 'A' 'C' 'A' 'C' 'T' 'C' 'G']
## ['C' 'A' 'C' 'C' 'A' 'T' 'A' 'A' 'G' 'C' 'A' 'A' 'A' 'T' 'C' 'G' 'G' 'G'
## 'A' 'A' 'A' 'T' 'C' 'A' 'G' 'C' 'G' 'T' 'A' 'C' 'T' 'C' 'T' 'C' 'C' 'T'
## 'C' 'A' 'T' 'T' 'C' 'C' 'A' 'T' 'G' 'A' 'G' 'A' 'C' 'A']
## ['C' 'C' 'C' 'A' 'T' 'A' 'T' 'C' 'G' 'C' 'C' 'A' 'A' 'T' 'G' 'T' 'C' 'G'
## 'C' 'G' 'T' 'C' 'T' 'C' 'T' 'C' 'T' 'T' 'C' 'C' 'C' 'A' 'T' 'A' 'C' 'C'
## 'A' 'T' 'A' 'G' 'A' 'C' 'C' 'A' 'G' 'C' 'G' 'G' 'C' 'T']
## ['G' 'G' 'C' 'A' 'G' 'C' 'T' 'C' 'C' 'G' 'A' 'G' 'T' 'C' 'C' 'A' 'C' 'T'
## 'C' 'A' 'C' 'C' 'T' 'C' 'G' 'G' 'G' 'T' 'T' 'G' 'T' 'T' 'T' 'A' 'C' 'C'
## 'C' 'G' 'A' 'G' 'C' 'C' 'T' 'C' 'C' 'A' 'G' 'G' 'G' 'A']
## ['T' 'A' 'A' 'C' 'T' 'G' 'C' 'G' 'T' 'A' 'T' 'T' 'C' 'C' 'A' 'A' 'C' 'A'
## 'A' 'T' 'C' 'C' 'T' 'G' 'G' 'C' 'C' 'C' 'G' 'T' 'A' 'C' 'C' 'T' 'A' 'G'
## 'G' 'A' 'T' 'A' 'G' 'T' 'A' 'A' 'G' 'T' 'C' 'A' 'G' 'G']
## ['A' 'A' 'C' 'G' 'C' 'C' 'G' 'A' 'T' 'C' 'C' 'T' 'G' 'A' 'G' 'T' 'A' 'A'
## 'A' 'T' 'A' 'C' 'C' 'A' 'A' 'T' 'C' 'T' 'G' 'A' 'T' 'C' 'A' 'A' 'C' 'C'
## 'T' 'G' 'C' 'C' 'C' 'C' 'G' 'G' 'A' 'G' 'T' 'C' 'T' 'C']
## ['G' 'G' 'T' 'C' 'G' 'G' 'G' 'T' 'T' 'C' 'C' 'G' 'A' 'A' 'A' 'A' 'G' 'G'
## 'C' 'C' 'G' 'A' 'G' 'A' 'C' 'T' 'T' 'C' 'G' 'A' 'A' 'T' 'T' 'T' 'C' 'A'
## 'G' 'T' 'G' 'T' 'A' 'T' 'G' 'T' 'G' 'G' 'A' 'G' 'A' 'T']]
## [array([[4, 3, 1, 4, 2, 3, 3, 1, 2, 2, 2, 2, 1, 1, 4, 2, 4, 4, 3, 4, 1, 2,
## 3, 3, 1, 3, 4, 3, 3, 4, 3, 4, 1, 2, 2, 1, 1, 3, 4, 1, 1, 4, 1, 1,
## 4, 4, 1, 3, 3, 3],
## [2, 3, 1, 4, 4, 1, 3, 2, 1, 2, 2, 4, 2, 1, 4, 2, 3, 2, 3, 1, 1, 2,
## 2, 2, 2, 2, 3, 1, 4, 4, 1, 3, 4, 2, 1, 2, 4, 3, 3, 1, 4, 3, 3, 3,
## 4, 4, 3, 2, 3, 1],
## [4, 4, 3, 2, 3, 4, 2, 4, 1, 1, 4, 1, 4, 1, 3, 1, 4, 4, 4, 1, 3, 2,
## 1, 2, 1, 4, 4, 1, 3, 1, 4, 2, 3, 2, 4, 4, 2, 3, 2, 1, 4, 2, 3, 1,
## 2, 1, 3, 3, 2, 1],
## [1, 4, 4, 1, 3, 1, 4, 1, 4, 1, 1, 1, 4, 1, 3, 4, 4, 2, 3, 2, 3, 1,
## 1, 4, 3, 1, 3, 3, 2, 3, 3, 2, 2, 4, 4, 4, 2, 2, 1, 2, 1, 4, 1, 1,
## 2, 3, 1, 1, 2, 4],
## [3, 4, 3, 4, 4, 4, 2, 1, 1, 2, 4, 1, 1, 1, 3, 2, 1, 2, 1, 4, 2, 4,
## 4, 1, 3, 3, 3, 4, 1, 2, 3, 1, 4, 1, 2, 3, 4, 3, 1, 3, 3, 1, 1, 1,
## 3, 4, 3, 1, 3, 1],
## [3, 3, 3, 4, 4, 1, 1, 1, 2, 1, 4, 2, 2, 4, 1, 3, 2, 3, 1, 4, 1, 1,
## 2, 4, 4, 1, 2, 2, 4, 1, 2, 4, 3, 1, 3, 1, 2, 3, 4, 1, 1, 3, 2, 1,
## 2, 1, 2, 4, 2, 3],
## [2, 1, 2, 2, 1, 4, 1, 1, 3, 2, 1, 1, 1, 4, 2, 3, 3, 3, 1, 1, 1, 4,
## 2, 1, 3, 2, 3, 4, 1, 2, 4, 2, 4, 2, 2, 4, 2, 1, 4, 4, 2, 2, 1, 4,
## 3, 1, 3, 1, 2, 1],
## [2, 2, 2, 1, 4, 1, 4, 2, 3, 2, 2, 1, 1, 4, 3, 4, 2, 3, 2, 3, 4, 2,
## 4, 2, 4, 2, 4, 4, 2, 2, 2, 1, 4, 1, 2, 2, 1, 4, 1, 3, 1, 2, 2, 1,
## 3, 2, 3, 3, 2, 4],
## [3, 3, 2, 1, 3, 2, 4, 2, 2, 3, 1, 3, 4, 2, 2, 1, 2, 4, 2, 1, 2, 2,
## 4, 2, 3, 3, 3, 4, 4, 3, 4, 4, 4, 1, 2, 2, 2, 3, 1, 3, 2, 2, 4, 2,
## 2, 1, 3, 3, 3, 1],
## [4, 1, 1, 2, 4, 3, 2, 3, 4, 1, 4, 4, 2, 2, 1, 1, 2, 1, 1, 4, 2, 2,
## 4, 3, 3, 2, 2, 2, 3, 4, 1, 2, 2, 4, 1, 3, 3, 1, 4, 1, 3, 4, 1, 1,
## 3, 4, 2, 1, 3, 3],
## [1, 1, 2, 3, 2, 2, 3, 1, 4, 2, 2, 4, 3, 1, 3, 4, 1, 1, 1, 4, 1, 2,
## 2, 1, 1, 4, 2, 4, 3, 1, 4, 2, 1, 1, 2, 2, 4, 3, 2, 2, 2, 2, 3, 3,
## 1, 3, 4, 2, 4, 2],
## [3, 3, 4, 2, 3, 3, 3, 4, 4, 2, 2, 3, 1, 1, 1, 1, 3, 3, 2, 2, 3, 1,
## 3, 1, 2, 4, 4, 2, 3, 1, 1, 4, 4, 4, 2, 1, 3, 4, 3, 4, 1, 4, 3, 4,
## 3, 3, 1, 3, 1, 4]]), array([['T', 'G', 'A', 'T', 'C', 'G', 'G', 'A', 'C', 'C', 'C', 'C', 'A',
## 'A', 'T', 'C', 'T', 'T', 'G', 'T', 'A', 'C', 'G', 'G', 'A', 'G',
## 'T', 'G', 'G', 'T', 'G', 'T', 'A', 'C', 'C', 'A', 'A', 'G', 'T',
## 'A', 'A', 'T', 'A', 'A', 'T', 'T', 'A', 'G', 'G', 'G'],
## ['C', 'G', 'A', 'T', 'T', 'A', 'G', 'C', 'A', 'C', 'C', 'T', 'C',
## 'A', 'T', 'C', 'G', 'C', 'G', 'A', 'A', 'C', 'C', 'C', 'C', 'C',
## 'G', 'A', 'T', 'T', 'A', 'G', 'T', 'C', 'A', 'C', 'T', 'G', 'G',
## 'A', 'T', 'G', 'G', 'G', 'T', 'T', 'G', 'C', 'G', 'A'],
## ['T', 'T', 'G', 'C', 'G', 'T', 'C', 'T', 'A', 'A', 'T', 'A', 'T',
## 'A', 'G', 'A', 'T', 'T', 'T', 'A', 'G', 'C', 'A', 'C', 'A', 'T',
## 'T', 'A', 'G', 'A', 'T', 'C', 'G', 'C', 'T', 'T', 'C', 'G', 'C',
## 'A', 'T', 'C', 'G', 'A', 'C', 'A', 'G', 'G', 'C', 'A'],
## ['A', 'T', 'T', 'A', 'G', 'A', 'T', 'A', 'T', 'A', 'A', 'A', 'T',
## 'A', 'G', 'T', 'T', 'C', 'G', 'C', 'G', 'A', 'A', 'T', 'G', 'A',
## 'G', 'G', 'C', 'G', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'C', 'A',
## 'C', 'A', 'T', 'A', 'A', 'C', 'G', 'A', 'A', 'C', 'T'],
## ['G', 'T', 'G', 'T', 'T', 'T', 'C', 'A', 'A', 'C', 'T', 'A', 'A',
## 'A', 'G', 'C', 'A', 'C', 'A', 'T', 'C', 'T', 'T', 'A', 'G', 'G',
## 'G', 'T', 'A', 'C', 'G', 'A', 'T', 'A', 'C', 'G', 'T', 'G', 'A',
## 'G', 'G', 'A', 'A', 'A', 'G', 'T', 'G', 'A', 'G', 'A'],
## ['G', 'G', 'G', 'T', 'T', 'A', 'A', 'A', 'C', 'A', 'T', 'C', 'C',
## 'T', 'A', 'G', 'C', 'G', 'A', 'T', 'A', 'A', 'C', 'T', 'T', 'A',
## 'C', 'C', 'T', 'A', 'C', 'T', 'G', 'A', 'G', 'A', 'C', 'G', 'T',
## 'A', 'A', 'G', 'C', 'A', 'C', 'A', 'C', 'T', 'C', 'G'],
## ['C', 'A', 'C', 'C', 'A', 'T', 'A', 'A', 'G', 'C', 'A', 'A', 'A',
## 'T', 'C', 'G', 'G', 'G', 'A', 'A', 'A', 'T', 'C', 'A', 'G', 'C',
## 'G', 'T', 'A', 'C', 'T', 'C', 'T', 'C', 'C', 'T', 'C', 'A', 'T',
## 'T', 'C', 'C', 'A', 'T', 'G', 'A', 'G', 'A', 'C', 'A'],
## ['C', 'C', 'C', 'A', 'T', 'A', 'T', 'C', 'G', 'C', 'C', 'A', 'A',
## 'T', 'G', 'T', 'C', 'G', 'C', 'G', 'T', 'C', 'T', 'C', 'T', 'C',
## 'T', 'T', 'C', 'C', 'C', 'A', 'T', 'A', 'C', 'C', 'A', 'T', 'A',
## 'G', 'A', 'C', 'C', 'A', 'G', 'C', 'G', 'G', 'C', 'T'],
## ['G', 'G', 'C', 'A', 'G', 'C', 'T', 'C', 'C', 'G', 'A', 'G', 'T',
## 'C', 'C', 'A', 'C', 'T', 'C', 'A', 'C', 'C', 'T', 'C', 'G', 'G',
## 'G', 'T', 'T', 'G', 'T', 'T', 'T', 'A', 'C', 'C', 'C', 'G', 'A',
## 'G', 'C', 'C', 'T', 'C', 'C', 'A', 'G', 'G', 'G', 'A'],
## ['T', 'A', 'A', 'C', 'T', 'G', 'C', 'G', 'T', 'A', 'T', 'T', 'C',
## 'C', 'A', 'A', 'C', 'A', 'A', 'T', 'C', 'C', 'T', 'G', 'G', 'C',
## 'C', 'C', 'G', 'T', 'A', 'C', 'C', 'T', 'A', 'G', 'G', 'A', 'T',
## 'A', 'G', 'T', 'A', 'A', 'G', 'T', 'C', 'A', 'G', 'G'],
## ['A', 'A', 'C', 'G', 'C', 'C', 'G', 'A', 'T', 'C', 'C', 'T', 'G',
## 'A', 'G', 'T', 'A', 'A', 'A', 'T', 'A', 'C', 'C', 'A', 'A', 'T',
## 'C', 'T', 'G', 'A', 'T', 'C', 'A', 'A', 'C', 'C', 'T', 'G', 'C',
## 'C', 'C', 'C', 'G', 'G', 'A', 'G', 'T', 'C', 'T', 'C'],
## ['G', 'G', 'T', 'C', 'G', 'G', 'G', 'T', 'T', 'C', 'C', 'G', 'A',
## 'A', 'A', 'A', 'G', 'G', 'C', 'C', 'G', 'A', 'G', 'A', 'C', 'T',
## 'T', 'C', 'G', 'A', 'A', 'T', 'T', 'T', 'C', 'A', 'G', 'T', 'G',
## 'T', 'A', 'T', 'G', 'T', 'G', 'G', 'A', 'G', 'A', 'T']],
## dtype='<U1')]
12.5 Example 7
- mutation position = -1 : no mutation occurs between two genes
genID = 99
where_mutation = print_swap_mutation_gene[2]
where_mut = where_mutation[genID]
where_mut = list(map(int, where_mut))
seq.Dksequence(genome_01_split[genID]).show_mutation_genes_with_color(max_width = 100, where_mutation= where_mut)## [[2 4 1 ... 4 4 3]
## [3 4 4 ... 2 2 3]
## [3 1 4 ... 4 1 3]
## ...
## [4 3 3 ... 4 3 3]
## [2 3 2 ... 4 4 4]
## [3 2 4 ... 3 2 4]]
## [['C' 'T' 'A' ... 'T' 'T' 'G']
## ['G' 'T' 'T' ... 'C' 'C' 'G']
## ['G' 'A' 'T' ... 'T' 'A' 'G']
## ...
## ['T' 'G' 'G' ... 'T' 'G' 'G']
## ['C' 'G' 'C' ... 'T' 'T' 'T']
## ['G' 'C' 'T' ... 'G' 'C' 'T']]
## [array([[2, 4, 1, ..., 4, 4, 3],
## [3, 4, 4, ..., 2, 2, 3],
## [3, 1, 4, ..., 4, 1, 3],
## ...,
## [4, 3, 3, ..., 4, 3, 3],
## [2, 3, 2, ..., 4, 4, 4],
## [3, 2, 4, ..., 3, 2, 4]]), array([['C', 'T', 'A', ..., 'T', 'T', 'G'],
## ['G', 'T', 'T', ..., 'C', 'C', 'G'],
## ['G', 'A', 'T', ..., 'T', 'A', 'G'],
## ...,
## ['T', 'G', 'G', ..., 'T', 'G', 'G'],
## ['C', 'G', 'C', ..., 'T', 'T', 'T'],
## ['G', 'C', 'T', ..., 'G', 'C', 'T']], dtype='<U1')]
2019-10-27