String

Pattern Matching


DNA_seq = 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG'
pattern = 'ATC'

matches = grepl(pattern, DNA_seq)
print(matches)
[1] TRUE
matches = grepl('ABCD', DNA_seq)
print(matches)
[1] FALSE

Split

genes = 'gene1,gene2,gene3'
split_genes = strsplit(genes, ',')
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
genes = 'gene1 gene2 gene3'
split_genes = strsplit(genes, ' ')
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
paragraph = "This is an example. We are going to split. And show the output. "
p_split = strsplit(paragraph, "\\. ")
print(p_split)
[[1]]
[1] "This is an example"    "We are going to split" "And show the output"  

Reverse

library(stringi)
s1 = 'ABCDEFG'
rev_s1 = stri_reverse(s1)
print(rev_s1)
[1] "GFEDCBA"

Number of Occurances

DNA_seq = 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG'
pattern = 'ATC'

locations = gregexpr(pattern, DNA_seq)
locations = unlist(locations)
print(locations)
[1]  1  5  9 13 17 21 25 29 33
num_occ = sum(locations != -1)
print(num_occ)
[1] 9

Vector

Create

ages = c(20, 25, 65, 18, 45)
print(ages)
[1] 20 25 65 18 45

Indexing

ages[-1]
[1] 25 65 18 45

Append

ages_extended = c(ages, 100)
ages_extended
[1]  20  25  65  18  45 100
ages_extended = c(ages, c(100, 50, 20, 10))
ages_extended
[1]  20  25  65  18  45 100  50  20  10

Append

ages = c(ages, 100)
ages
[1]  20  25  65  18  45 100
ages = c(ages, c(100, 50, 20, 10))
ages
 [1]  20  25  65  18  45 100 100  50  20  10

slice or subset

print(ages)
 [1]  20  25  65  18  45 100 100  50  20  10
print(ages[3:6])
[1]  65  18  45 100
print(ages[2:7])
[1]  25  65  18  45 100 100
print(ages[2:20])
 [1]  25  65  18  45 100 100  50  20  10  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA

Length

print(length(ages))
[1] 10

Insert in a specific position

print(ages)
 [1]  20  25  65  18  45 100 100  50  20  10
print(ages[1:2])
[1] 20 25
print(ages[3:10])
[1]  65  18  45 100 100  50  20  10
ages_extended = c(ages[1:2], 99, ages[3:10])
ages_extended
 [1]  20  25  99  65  18  45 100 100  50  20  10

Vector conditional

print(ages)
 [1]  20  25  65  18  45 100 100  50  20  10
eligible_participant = ages[ages < 50]
eligible_participant
[1] 20 25 18 45 20 10
eligible_participant = ages[ages <= 50]
eligible_participant
[1] 20 25 18 45 50 20 10
eligible_participant = ages[ages > 50]
eligible_participant
[1]  65 100 100
age_25 = ages[ages >= 25]
age_25
[1]  25  65  45 100 100  50
age_25_50 = age_25[age_25 <= 50]
age_25_50
[1] 25 45 50
ages[ages >= 25 & ages <= 50]
[1] 25 45 50

Sort

sort(ages)
 [1]  10  18  20  20  25  45  50  65 100 100
sort(ages, TRUE)
 [1] 100 100  65  50  45  25  20  20  18  10

Simple statistics

print(max(ages))
[1] 100
print(min(ages))
[1] 10
print(mean(ages))
[1] 45.3
print(median(ages))
[1] 35
print(sum(ages))
[1] 453
print(sd(ages))
[1] 33.46325
print(var(ages))
[1] 1119.789
print(quantile(ages))
    0%    25%    50%    75%   100% 
 10.00  20.00  35.00  61.25 100.00 

Correlation

length(ages)
[1] 10
scores = c(10, 30, 40, 50, 39, 10, 30, 40, 50, 39)
length(scores)
[1] 10
cor(ages, scores)
[1] -0.4010329
ages
 [1]  20  25  65  18  45 100 100  50  20  10
max(ages)
[1] 100
max(ages[2:5])
[1] 65

Data Frame

exam_score = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("Alice", "Bob", "David", "John", "Jenny"),
  Age = c(20, 25, 30, 22, 18),
  score = c(100, 78, 90, 55, 81)
)


print(exam_score)

Row select

#vector[location]
#data[row,col]

exam_score[2:4, ]

exam_score[2, ]

Colunm select

exam_score[ , 3:4 ]
exam_score[ , 4 ]
[1] 100  78  90  55  81

Row and column select

exam_score[2:4, 3:4 ]

Column select

exam_score$Name
[1] "Alice" "Bob"   "David" "John"  "Jenny"
exam_score$Age
[1] 20 25 30 22 18

Row and column select

exam_score$Name
[1] "Alice" "Bob"   "David" "John"  "Jenny"
exam_score$Age
[1] 20 25 30 22 18
selected_segment = data.frame(
  Name = exam_score$Name,
  Age = exam_score$Age
)
selected_segment
exam_score[c(2, 3, 4), c(1, 4) ]

Conditional selection

exam_score[ exam_score$score > 80 & exam_score$score <= 90, ]
selected_ID = c(1, 3, 4)
exam_score[ exam_score$ID %in% selected_ID, ]

Add a new row

exam_score
new_row = c(10, 'Sam', 55, 89)

exam_score2 = rbind(exam_score, new_row)
exam_score2

Add a new column

Loaction = c('Dhaka', "Raj", "Barishal", "Dhaka", "Barishal", "Dhaka" )
exam_score2 = cbind(exam_score2, Loaction)
exam_score2

Condition

x = 90

if(x < 80) {
  print("Fail")
  print("Please improve")
} else {
  print("Pass")
  print("you are good")
}
[1] "Pass"
[1] "you are good"

Loop

for(i in 1:10) {
  print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
score = c(100, 78, 90, 55, 81)

for(s in score) {
  print(s)
}
[1] 100
[1] 78
[1] 90
[1] 55
[1] 81

Loop and condition

score = c(100, 78, 90, 55, 81)

for(s in score) {
  
  if(s >= 80) {
    cat(s, "Pass \n")
  } 
  else {
    cat(s, "Fail \n")
  }
  
  
}
100 Pass 
78 Fail 
90 Pass 
55 Fail 
81 Pass 

Loop example

genes = c("AGTC", "AGTCGA", "AAGTACAT")

for(gene in genes) {
  cat(gene, nchar(gene), '\n')
}
AGTC 4 
AGTCGA 6 
AAGTACAT 8 

Add a serial number

genes = c("AGTC", "AGTCGA", "AAGTACAT")

i = 1
for(gene in genes) {
  cat(i, gene, nchar(gene), '\n')
  i = i+1
}
1 AGTC 4 
2 AGTCGA 6 
3 AAGTACAT 8 

Add letters as serial


# A, B, C ... 

for(i in LETTERS) {
  print(i)
}
[1] "A"
[1] "B"
[1] "C"
[1] "D"
[1] "E"
[1] "F"
[1] "G"
[1] "H"
[1] "I"
[1] "J"
[1] "K"
[1] "L"
[1] "M"
[1] "N"
[1] "O"
[1] "P"
[1] "Q"
[1] "R"
[1] "S"
[1] "T"
[1] "U"
[1] "V"
[1] "W"
[1] "X"
[1] "Y"
[1] "Z"
LETTERS[2]
[1] "B"
genes = c("AGTC", "AGTCGA", "AAGTACAT")

i = 1
for(gene in genes) {
  cat(i, LETTERS[i], gene, nchar(gene), '\n')
  i = i+1
}
1 A AGTC 4 
2 B AGTCGA 6 
3 C AAGTACAT 8 
LS0tCnRpdGxlOiAiQ2xhc3MgMiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgojIFN0cmluZwoKIyMgUGF0dGVybiBNYXRjaGluZwpgYGB7cn0KCkROQV9zZXEgPSAnQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHJwpwYXR0ZXJuID0gJ0FUQycKCm1hdGNoZXMgPSBncmVwbChwYXR0ZXJuLCBETkFfc2VxKQpwcmludChtYXRjaGVzKQoKbWF0Y2hlcyA9IGdyZXBsKCdBQkNEJywgRE5BX3NlcSkKcHJpbnQobWF0Y2hlcykKYGBgCgojIyBTcGxpdApgYGB7cn0KZ2VuZXMgPSAnZ2VuZTEsZ2VuZTIsZ2VuZTMnCnNwbGl0X2dlbmVzID0gc3Ryc3BsaXQoZ2VuZXMsICcsJykKcHJpbnQoc3BsaXRfZ2VuZXMpCgoKZ2VuZXMgPSAnZ2VuZTEgZ2VuZTIgZ2VuZTMnCnNwbGl0X2dlbmVzID0gc3Ryc3BsaXQoZ2VuZXMsICcgJykKcHJpbnQoc3BsaXRfZ2VuZXMpCgpwYXJhZ3JhcGggPSAiVGhpcyBpcyBhbiBleGFtcGxlLiBXZSBhcmUgZ29pbmcgdG8gc3BsaXQuIEFuZCBzaG93IHRoZSBvdXRwdXQuICIKcF9zcGxpdCA9IHN0cnNwbGl0KHBhcmFncmFwaCwgIlxcLiAiKQpwcmludChwX3NwbGl0KQpgYGAKCiMjIFJldmVyc2UgCmBgYHtyfQpsaWJyYXJ5KHN0cmluZ2kpCnMxID0gJ0FCQ0RFRkcnCnJldl9zMSA9IHN0cmlfcmV2ZXJzZShzMSkKcHJpbnQocmV2X3MxKQpgYGAKCgojIyBOdW1iZXIgb2YgT2NjdXJhbmNlcwpgYGB7cn0KRE5BX3NlcSA9ICdBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0cnCnBhdHRlcm4gPSAnQVRDJwoKbG9jYXRpb25zID0gZ3JlZ2V4cHIocGF0dGVybiwgRE5BX3NlcSkKbG9jYXRpb25zID0gdW5saXN0KGxvY2F0aW9ucykKcHJpbnQobG9jYXRpb25zKQoKbnVtX29jYyA9IHN1bShsb2NhdGlvbnMgIT0gLTEpCnByaW50KG51bV9vY2MpCmBgYAoKCgojIFZlY3RvcgoKIyMgQ3JlYXRlCmBgYHtyfQphZ2VzID0gYygyMCwgMjUsIDY1LCAxOCwgNDUpCnByaW50KGFnZXMpCmBgYAoKCiMjIEluZGV4aW5nCmBgYHtyfQphZ2VzWy0xXQpgYGAKCgojIyBBcHBlbmQKYGBge3J9CmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXMsIDEwMCkKYWdlc19leHRlbmRlZAoKCmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXMsIGMoMTAwLCA1MCwgMjAsIDEwKSkKYWdlc19leHRlbmRlZApgYGAKCgojIyBBcHBlbmQKYGBge3J9CmFnZXMgPSBjKGFnZXMsIDEwMCkKYWdlcwoKCmFnZXMgPSBjKGFnZXMsIGMoMTAwLCA1MCwgMjAsIDEwKSkKYWdlcwpgYGAKCiMjIHNsaWNlIG9yIHN1YnNldApgYGB7cn0KcHJpbnQoYWdlcykKcHJpbnQoYWdlc1szOjZdKQpwcmludChhZ2VzWzI6N10pCgpwcmludChhZ2VzWzI6MjBdKQpgYGAKIyMgTGVuZ3RoCmBgYHtyfQpwcmludChsZW5ndGgoYWdlcykpCmBgYAoKCiMjIEluc2VydCBpbiBhIHNwZWNpZmljIHBvc2l0aW9uCmBgYHtyfQpwcmludChhZ2VzKQoKcHJpbnQoYWdlc1sxOjJdKQpwcmludChhZ2VzWzM6MTBdKQoKYWdlc19leHRlbmRlZCA9IGMoYWdlc1sxOjJdLCA5OSwgYWdlc1szOjEwXSkKYWdlc19leHRlbmRlZApgYGAKCgojIyBWZWN0b3IgY29uZGl0aW9uYWwKYGBge3J9CnByaW50KGFnZXMpCgplbGlnaWJsZV9wYXJ0aWNpcGFudCA9IGFnZXNbYWdlcyA8IDUwXQplbGlnaWJsZV9wYXJ0aWNpcGFudAoKZWxpZ2libGVfcGFydGljaXBhbnQgPSBhZ2VzW2FnZXMgPD0gNTBdCmVsaWdpYmxlX3BhcnRpY2lwYW50CgplbGlnaWJsZV9wYXJ0aWNpcGFudCA9IGFnZXNbYWdlcyA+IDUwXQplbGlnaWJsZV9wYXJ0aWNpcGFudAoKCmFnZV8yNSA9IGFnZXNbYWdlcyA+PSAyNV0KYWdlXzI1CgphZ2VfMjVfNTAgPSBhZ2VfMjVbYWdlXzI1IDw9IDUwXQphZ2VfMjVfNTAKYGBgCgpgYGB7cn0KYWdlc1thZ2VzID49IDI1ICYgYWdlcyA8PSA1MF0KYGBgCgoKCiMjIFNvcnQKYGBge3J9CnNvcnQoYWdlcykKc29ydChhZ2VzLCBUUlVFKQpgYGAKCiMgU2ltcGxlIHN0YXRpc3RpY3MKYGBge3J9CnByaW50KG1heChhZ2VzKSkKcHJpbnQobWluKGFnZXMpKQpwcmludChtZWFuKGFnZXMpKQpwcmludChtZWRpYW4oYWdlcykpCnByaW50KHN1bShhZ2VzKSkKcHJpbnQoc2QoYWdlcykpCnByaW50KHZhcihhZ2VzKSkKcHJpbnQocXVhbnRpbGUoYWdlcykpCmBgYAoKIyMgQ29ycmVsYXRpb24KYGBge3J9Cmxlbmd0aChhZ2VzKQoKc2NvcmVzID0gYygxMCwgMzAsIDQwLCA1MCwgMzksIDEwLCAzMCwgNDAsIDUwLCAzOSkKbGVuZ3RoKHNjb3JlcykKCmNvcihhZ2VzLCBzY29yZXMpCmBgYAoKCmBgYHtyfQphZ2VzCm1heChhZ2VzKQptYXgoYWdlc1syOjVdKQpgYGAKCgojIERhdGEgRnJhbWUKCmBgYHtyfQpleGFtX3Njb3JlID0gZGF0YS5mcmFtZSgKICBJRCA9IGMoMSwgMiwgMywgNCwgNSksCiAgTmFtZSA9IGMoIkFsaWNlIiwgIkJvYiIsICJEYXZpZCIsICJKb2huIiwgIkplbm55IiksCiAgQWdlID0gYygyMCwgMjUsIDMwLCAyMiwgMTgpLAogIHNjb3JlID0gYygxMDAsIDc4LCA5MCwgNTUsIDgxKQopCgoKcHJpbnQoZXhhbV9zY29yZSkKYGBgCgoKCiMjIFJvdyBzZWxlY3QKYGBge3J9CiN2ZWN0b3JbbG9jYXRpb25dCiNkYXRhW3Jvdyxjb2xdCgpleGFtX3Njb3JlWzI6NCwgXQoKZXhhbV9zY29yZVsyLCBdCmBgYAoKIyMgQ29sdW5tIHNlbGVjdApgYGB7cn0KZXhhbV9zY29yZVsgLCAzOjQgXQpgYGAKCgpgYGB7cn0KZXhhbV9zY29yZVsgLCA0IF0KYGBgCgoKIyMgUm93IGFuZCBjb2x1bW4gc2VsZWN0CmBgYHtyfQpleGFtX3Njb3JlWzI6NCwgMzo0IF0KYGBgCgojIyBDb2x1bW4gc2VsZWN0CmBgYHtyfQpleGFtX3Njb3JlJE5hbWUKZXhhbV9zY29yZSRBZ2UKYGBgCgojIyBSb3cgYW5kIGNvbHVtbiBzZWxlY3QKYGBge3J9CgpzZWxlY3RlZF9zZWdtZW50ID0gZGF0YS5mcmFtZSgKICBOYW1lID0gZXhhbV9zY29yZSROYW1lLAogIEFnZSA9IGV4YW1fc2NvcmUkQWdlCikKc2VsZWN0ZWRfc2VnbWVudAoKYGBgCgoKYGBge3J9CmV4YW1fc2NvcmVbYygyLCAzLCA0KSwgYygxLCA0KV0KYGBgCgoKIyMgQ29uZGl0aW9uYWwgc2VsZWN0aW9uCgpgYGB7cn0KZXhhbV9zY29yZVsgZXhhbV9zY29yZSRzY29yZSA+IDgwICYgZXhhbV9zY29yZSRzY29yZSA8PSA5MCwgXQpgYGAKCmBgYHtyfQpzZWxlY3RlZF9JRCA9IGMoMSwgMywgNCkKZXhhbV9zY29yZVsgZXhhbV9zY29yZSRJRCAlaW4lIHNlbGVjdGVkX0lELCBdCmBgYAoKCiMjIEFkZCBhIG5ldyByb3cKYGBge3J9CmV4YW1fc2NvcmUKbmV3X3JvdyA9IGMoMTAsICdTYW0nLCA1NSwgODkpCgpleGFtX3Njb3JlMiA9IHJiaW5kKGV4YW1fc2NvcmUsIG5ld19yb3cpCmV4YW1fc2NvcmUyCmBgYAoKIyMgQWRkIGEgbmV3IGNvbHVtbgpgYGB7cn0KTG9hY3Rpb24gPSBjKCdEaGFrYScsICJSYWoiLCAiQmFyaXNoYWwiLCAiRGhha2EiLCAiQmFyaXNoYWwiLCAiRGhha2EiICkKZXhhbV9zY29yZTIgPSBjYmluZChleGFtX3Njb3JlMiwgTG9hY3Rpb24pCmV4YW1fc2NvcmUyCmBgYAoKIyBDb25kaXRpb24KYGBge3J9CnggPSA5MAoKaWYoeCA8IDgwKSB7CiAgcHJpbnQoIkZhaWwiKQogIHByaW50KCJQbGVhc2UgaW1wcm92ZSIpCiAgCiAgCn0gZWxzZSB7CiAgcHJpbnQoIlBhc3MiKQogIHByaW50KCJ5b3UgYXJlIGdvb2QiKQp9CgoKYGBgCgoKCiMgTG9vcApgYGB7cn0KZm9yKGkgaW4gMToxMCkgewogIHByaW50KGkpCn0KCmBgYAoKCmBgYHtyfQpzY29yZSA9IGMoMTAwLCA3OCwgOTAsIDU1LCA4MSkKCmZvcihzIGluIHNjb3JlKSB7CiAgcHJpbnQocykKfQpgYGAKCiMjIExvb3AgYW5kIGNvbmRpdGlvbgpgYGB7cn0Kc2NvcmUgPSBjKDEwMCwgNzgsIDkwLCA1NSwgODEpCgpmb3IocyBpbiBzY29yZSkgewogIAogIGlmKHMgPj0gODApIHsKICAgIGNhdChzLCAiUGFzcyBcbiIpCiAgfSAKICBlbHNlIHsKICAgIGNhdChzLCAiRmFpbCBcbiIpCiAgfQogIAogIAp9CmBgYAoKIyMgTG9vcCBleGFtcGxlCmBgYHtyfQpnZW5lcyA9IGMoIkFHVEMiLCAiQUdUQ0dBIiwgIkFBR1RBQ0FUIikKCmZvcihnZW5lIGluIGdlbmVzKSB7CiAgY2F0KGdlbmUsIG5jaGFyKGdlbmUpLCAnXG4nKQp9CmBgYAoKCiMjIEFkZCBhIHNlcmlhbCBudW1iZXIKYGBge3J9CmdlbmVzID0gYygiQUdUQyIsICJBR1RDR0EiLCAiQUFHVEFDQVQiKQoKaSA9IDEKZm9yKGdlbmUgaW4gZ2VuZXMpIHsKICBjYXQoaSwgZ2VuZSwgbmNoYXIoZ2VuZSksICdcbicpCiAgaSA9IGkrMQp9CmBgYAoKCiMjIEFkZCBsZXR0ZXJzIGFzIHNlcmlhbApgYGB7cn0KCiMgQSwgQiwgQyAuLi4gCgpmb3IoaSBpbiBMRVRURVJTKSB7CiAgcHJpbnQoaSkKfQoKYGBgCgpgYGB7cn0KTEVUVEVSU1syXQpgYGAKCgpgYGB7cn0KZ2VuZXMgPSBjKCJBR1RDIiwgIkFHVENHQSIsICJBQUdUQUNBVCIpCgppID0gMQpmb3IoZ2VuZSBpbiBnZW5lcykgewogIGNhdChpLCBMRVRURVJTW2ldLCBnZW5lLCBuY2hhcihnZW5lKSwgJ1xuJykKICBpID0gaSsxCn0KYGBgCgoKCgoKCgoKCg==