v <- 1:20
l <- list()
for (i in c(1:10)) l[[i]] <- (v*i) ; l
## [[1]]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
##
## [[2]]
## [1] 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40
##
## [[3]]
## [1] 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 48 51 54 57 60
##
## [[4]]
## [1] 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80
##
## [[5]]
## [1] 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85
## [18] 90 95 100
##
## [[6]]
## [1] 6 12 18 24 30 36 42 48 54 60 66 72 78 84 90 96 102
## [18] 108 114 120
##
## [[7]]
## [1] 7 14 21 28 35 42 49 56 63 70 77 84 91 98 105 112 119
## [18] 126 133 140
##
## [[8]]
## [1] 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120 128 136
## [18] 144 152 160
##
## [[9]]
## [1] 9 18 27 36 45 54 63 72 81 90 99 108 117 126 135 144 153
## [18] 162 171 180
##
## [[10]]
## [1] 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170
## [18] 180 190 200
M <- matrix(unlist(l), 10, 20, byrow = TRUE) ; M
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 2 3 4 5 6 7 8 9 10 11 12 13
## [2,] 2 4 6 8 10 12 14 16 18 20 22 24 26
## [3,] 3 6 9 12 15 18 21 24 27 30 33 36 39
## [4,] 4 8 12 16 20 24 28 32 36 40 44 48 52
## [5,] 5 10 15 20 25 30 35 40 45 50 55 60 65
## [6,] 6 12 18 24 30 36 42 48 54 60 66 72 78
## [7,] 7 14 21 28 35 42 49 56 63 70 77 84 91
## [8,] 8 16 24 32 40 48 56 64 72 80 88 96 104
## [9,] 9 18 27 36 45 54 63 72 81 90 99 108 117
## [10,] 10 20 30 40 50 60 70 80 90 100 110 120 130
## [,14] [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 14 15 16 17 18 19 20
## [2,] 28 30 32 34 36 38 40
## [3,] 42 45 48 51 54 57 60
## [4,] 56 60 64 68 72 76 80
## [5,] 70 75 80 85 90 95 100
## [6,] 84 90 96 102 108 114 120
## [7,] 98 105 112 119 126 133 140
## [8,] 112 120 128 136 144 152 160
## [9,] 126 135 144 153 162 171 180
## [10,] 140 150 160 170 180 190 200
M or print(M) to check if conversion done.
v1 <- 1:3 ; v2 <- 4:6 ; v3 <- 7:9
A <- matrix(c(v1, v2, v3), 3)
rownames(A) <- c('a', 'b', 'c') ; A
## [,1] [,2] [,3]
## a 1 4 7
## b 2 5 8
## c 3 6 9
v <- 1:12
The elements of v are of integer type ?
unique(sapply(v, class)) == 'integer'
## [1] TRUE
B <- matrix(v, 4) ; B
## [,1] [,2] [,3]
## [1,] 1 5 9
## [2,] 2 6 10
## [3,] 3 7 11
## [4,] 4 8 12
colnames(B) <- c("x", "y", "z") ; rownames(B) <- c("a", "b", "c", "d") ; B
## x y z
## a 1 5 9
## b 2 6 10
## c 3 7 11
## d 4 8 12
B <- matrix(B, 4, byrow = TRUE) ; print(B)
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 4 5 6
## [3,] 7 8 9
## [4,] 10 11 12
tB <- t(B) ; tB
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
We also printed it on screen by typing the name (tB, as we named it).
dim(tB)
## [1] 3 4
By the rule of matrix multiplication in algebra, we cannot perform a tB*tB multiplication.
If we wish to multiply two matrices e.g. A(m,n) with the matrix B(u,v), where m, n and u, v are the rows and columns of the matrices accordingly, then we could multiply A*B only if n==u.
In other words, we could multiply two matrices only if the number of the columns of the first operand was equal to the number of the rows of the second operand.
However, the operation tB*tB is allowed in R and produces a product done cellwisely (in matrices of the same sizes multiplies elements with elements), so we expect an output with the same dimensions of tB.
We perform tB*tB in R and see what result we will get:
tB*tB
## [,1] [,2] [,3] [,4]
## [1,] 1 16 49 100
## [2,] 4 25 64 121
## [3,] 9 36 81 144
To check if dimensions are the same:
dim(tB) == dim(tB*tB)
## [1] TRUE TRUE
Or, if we desired to see more details and only one TRUE output, we could do:
( (dim(tB)[1] == dim(tB*tB)[1] ) * ( dim(tB)[2] == dim(tB*tB)[2] ) ) == 1
## [1] TRUE
tB%*%tB in R is the multiplication according to algebra:
tB%*%tB
It returns the error “Error in tB %*% tB : non-conformable arguments“.
I have set the working directory where seq.txt is:
data <- as.matrix(read.table('seq.txt'))
print(data)
## V1
## [1,] "ATGGGCTGTGTGTTCTGCAAGAAATTGGAGCCGGTGGCCACGGCCAAGGAGGATGCTGGCCTGGAAGGGGACTTCAGAAGCTACGGGGCAGCAGACCACTATGGGCCTGACCCCACTAAGGCCCGGCCTGCATCCTCATTTGCCCACATCCCCAACTACAGCAACTTCTCCTCTCAGGCCATCAACCCTGGCTTCCTTGATAGTGGCACCATCAGGGGTGTGTCAGTATAA"
## [2,] "ATGATCACCGGGGAACCTTTGATCCGTCGATCAGATGATAATGAAAAGGCCTTGAAAATCCGCCTGCAAGCCTACCACACTCAAACCACCCCACTCATAGAGTACTACAGGAAACGGGGGATCCACTCCGCCATCGATGCATCCCAGACCCCCGATGTCGTGTTCGCAAGCATCCTAGCAGCCTTCTCCAAAGCCACATGTAAAGACTTGGTTATGTTTATCTAA"
string1 <- data[1] ; string2 <- data[2]
chartr('ACTG', 'TGAC', string1) -> comp1
chartr('ACTG', 'TGAC', string2) -> comp2
c1 <- strsplit(comp1, '')[[1]] ; c2 <- strsplit(comp2, '')[[1]]
revComp1 <- paste(rev(c1), collapse = '')
revComp2 <- paste(rev(c2), collapse = '')
print(revComp1) ; cat('\n') ; print(revComp2)
## [1] "TTATACTGACACACCCCTGATGGTGCCACTATCAAGGAAGCCAGGGTTGATGGCCTGAGAGGAGAAGTTGCTGTAGTTGGGGATGTGGGCAAATGAGGATGCAGGCCGGGCCTTAGTGGGGTCAGGCCCATAGTGGTCTGCTGCCCCGTAGCTTCTGAAGTCCCCTTCCAGGCCAGCATCCTCCTTGGCCGTGGCCACCGGCTCCAATTTCTTGCAGAACACACAGCCCAT"
## [1] "TTAGATAAACATAACCAAGTCTTTACATGTGGCTTTGGAGAAGGCTGCTAGGATGCTTGCGAACACGACATCGGGGGTCTGGGATGCATCGATGGCGGAGTGGATCCCCCGTTTCCTGTAGTACTCTATGAGTGGGGTGGTTTGAGTGTGGTAGGCTTGCAGGCGGATTTTCAAGGCCTTTTCATTATCATCTGATCGACGGATCAAAGGTTCCCCGGTGATCAT"
Alternatively, we can do what I call the Slacker’s Method:
# install.packages('seqRFLP')
library(seqRFLP)
revComp(string1) -> revComp1
revComp(string2) -> revComp2
print(revComp1) ; cat('\n') ; print(revComp2)
## [1] "TTATACTGACACACCCCTGATGGTGCCACTATCAAGGAAGCCAGGGTTGATGGCCTGAGAGGAGAAGTTGCTGTAGTTGGGGATGTGGGCAAATGAGGATGCAGGCCGGGCCTTAGTGGGGTCAGGCCCATAGTGGTCTGCTGCCCCGTAGCTTCTGAAGTCCCCTTCCAGGCCAGCATCCTCCTTGGCCGTGGCCACCGGCTCCAATTTCTTGCAGAACACACAGCCCAT"
## [1] "TTAGATAAACATAACCAAGTCTTTACATGTGGCTTTGGAGAAGGCTGCTAGGATGCTTGCGAACACGACATCGGGGGTCTGGGATGCATCGATGGCGGAGTGGATCCCCCGTTTCCTGTAGTACTCTATGAGTGGGGTGGTTTGAGTGTGGTAGGCTTGCAGGCGGATTTTCAAGGCCTTTTCATTATCATCTGATCGACGGATCAAAGGTTCCCCGGTGATCAT"
chartr('ACTG', 'UGAC', comp1) -> RNA1
chartr('ACTG', 'UGAC', comp2) -> RNA2
print(RNA1) ; cat('\n') ; print(RNA2)
## [1] "AUGGGCUGUGUGUUCUGCAAGAAAUUGGAGCCGGUGGCCACGGCCAAGGAGGAUGCUGGCCUGGAAGGGGACUUCAGAAGCUACGGGGCAGCAGACCACUAUGGGCCUGACCCCACUAAGGCCCGGCCUGCAUCCUCAUUUGCCCACAUCCCCAACUACAGCAACUUCUCCUCUCAGGCCAUCAACCCUGGCUUCCUUGAUAGUGGCACCAUCAGGGGUGUGUCAGUAUAA"
## [1] "AUGAUCACCGGGGAACCUUUGAUCCGUCGAUCAGAUGAUAAUGAAAAGGCCUUGAAAAUCCGCCUGCAAGCCUACCACACUCAAACCACCCCACUCAUAGAGUACUACAGGAAACGGGGGAUCCACUCCGCCAUCGAUGCAUCCCAGACCCCCGAUGUCGUGUUCGCAAGCAUCCUAGCAGCCUUCUCCAAAGCCACAUGUAAAGACUUGGUUAUGUUUAUCUAA"
Checking if they are multiples of 3:
nchar(comp1) %% 3 == 0
## [1] TRUE
nchar(comp2) %% 3 == 0
## [1] TRUE
Getting the sequences as triplets:
triplets1 <- gsub("(.{3})", replacement="\\1 ", string1)
triplets2 <- gsub("(.{3})", replacement="\\1 ", string2)
strsplit(triplets1, ' ')[[1]] -> tri1
strsplit(triplets2, ' ')[[1]] -> tri2
Using the codon.txt file to get the DNA codons and the corresponding amino acids:
gencode <- read.table('codon.txt', h=T)
attach(gencode)
head(gencode)
## Codon AminoAcid Letter FullName
## 1 AAA Lys K Lysine
## 2 AAC Asn N Asparagine
## 3 AAG Lys K Lysine
## 4 AAT Asn N Asparagine
## 5 ACA Thr T Threonine
## 6 ACC Thr T Threonine
Codon <- as.vector(Codon)
FullName <- as.vector(FullName)
Used dictionaries for the conversion to proteins: source from here.
library(dict)
d <- dict()
for (i in 1:length(FullName)) d[[Codon[i]]] = FullName[i]
protein1 <- vector()
for (i in tri1) protein1<-append(protein1, d[[i]])
protein1 <- paste(protein1, collapse = '->')
protein2 <- vector()
for (i in tri2) protein2<-c(protein2, d[[i]])
protein2 <- paste(protein2, collapse = '->')
Printing the proteins on screen:
print(protein1) ; cat('\n') ; print(protein2)
## [1] "Methionine->Glycine->Cysteine->Valine->Phenylalanine->Cysteine->Lysine->Lysine->Leucine->Glutamic_acid->Proline->Valine->Alanine->Threonine->Alanine->Lysine->Glutamic_acid->Aspartic_acid->Alanine->Glycine->Leucine->Glutamic_acid->Glycine->Aspartic_acid->Phenylalanine->Arginine->Serine->Tyrosine->Glycine->Alanine->Alanine->Aspartic_acid->Histidine->Tyrosine->Glycine->Proline->Aspartic_acid->Proline->Threonine->Lysine->Alanine->Arginine->Proline->Alanine->Serine->Serine->Phenylalanine->Alanine->Histidine->Isoleucine->Proline->Asparagine->Tyrosine->Serine->Asparagine->Phenylalanine->Serine->Serine->Glutamine->Alanine->Isoleucine->Asparagine->Proline->Glycine->Phenylalanine->Leucine->Aspartic_acid->Serine->Glycine->Threonine->Isoleucine->Arginine->Glycine->Valine->Serine->Valine->Stop"
## [1] "Methionine->Isoleucine->Threonine->Glycine->Glutamic_acid->Proline->Leucine->Isoleucine->Arginine->Arginine->Serine->Aspartic_acid->Aspartic_acid->Asparagine->Glutamic_acid->Lysine->Alanine->Leucine->Lysine->Isoleucine->Arginine->Leucine->Glutamine->Alanine->Tyrosine->Histidine->Threonine->Glutamine->Threonine->Threonine->Proline->Leucine->Isoleucine->Glutamic_acid->Tyrosine->Tyrosine->Arginine->Lysine->Arginine->Glycine->Isoleucine->Histidine->Serine->Alanine->Isoleucine->Aspartic_acid->Alanine->Serine->Glutamine->Threonine->Proline->Aspartic_acid->Valine->Valine->Phenylalanine->Alanine->Serine->Isoleucine->Leucine->Alanine->Alanine->Phenylalanine->Serine->Lysine->Alanine->Threonine->Cysteine->Lysine->Aspartic_acid->Leucine->Valine->Methionine->Phenylalanine->Isoleucine->Stop"