String
Pattern Matching
DNA_seq = 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG'
pattern = 'ATC'
matches = grepl(pattern, DNA_seq)
print(matches)
[1] TRUE
matches = grepl('ABCD', DNA_seq)
print(matches)
[1] FALSE
Split
genes = 'gene1,gene2,gene3'
split_genes = strsplit(genes, ',')
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
genes = 'gene1 gene2 gene3'
split_genes = strsplit(genes, ' ')
print(split_genes)
[[1]]
[1] "gene1" "gene2" "gene3"
paragraph = "This is an example. We are going to split. And show the output. "
p_split = strsplit(paragraph, "\\. ")
print(p_split)
[[1]]
[1] "This is an example" "We are going to split" "And show the output"
Reverse
library(stringi)
s1 = 'ABCDEFG'
rev_s1 = stri_reverse(s1)
print(rev_s1)
[1] "GFEDCBA"
Number of Occurances
DNA_seq = 'ATCGATCGATCGATCGATCGATCGATCGATCGATCG'
pattern = 'ATC'
locations = gregexpr(pattern, DNA_seq)
locations = unlist(locations)
print(locations)
[1] 1 5 9 13 17 21 25 29 33
num_occ = sum(locations != -1)
print(num_occ)
[1] 9
Vector
Create
ages = c(20, 25, 65, 18, 45)
print(ages)
[1] 20 25 65 18 45
Indexing
ages[-1]
[1] 25 65 18 45
Append
ages_extended = c(ages, 100)
ages_extended
[1] 20 25 65 18 45 100
ages_extended = c(ages, c(100, 50, 20, 10))
ages_extended
[1] 20 25 65 18 45 100 50 20 10
Append
ages = c(ages, 100)
ages
[1] 20 25 65 18 45 100
ages = c(ages, c(100, 50, 20, 10))
ages
[1] 20 25 65 18 45 100 100 50 20 10
slice or subset
print(ages)
[1] 20 25 65 18 45 100 100 50 20 10
print(ages[3:6])
[1] 65 18 45 100
print(ages[2:7])
[1] 25 65 18 45 100 100
print(ages[2:20])
[1] 25 65 18 45 100 100 50 20 10 NA NA NA NA NA NA NA NA NA NA
Length
print(length(ages))
[1] 10
Insert in a specific position
print(ages)
[1] 20 25 65 18 45 100 100 50 20 10
print(ages[1:2])
[1] 20 25
print(ages[3:10])
[1] 65 18 45 100 100 50 20 10
ages_extended = c(ages[1:2], 99, ages[3:10])
ages_extended
[1] 20 25 99 65 18 45 100 100 50 20 10
Vector conditional
print(ages)
[1] 20 25 65 18 45 100 100 50 20 10
eligible_participant = ages[ages < 50]
eligible_participant
[1] 20 25 18 45 20 10
eligible_participant = ages[ages <= 50]
eligible_participant
[1] 20 25 18 45 50 20 10
eligible_participant = ages[ages > 50]
eligible_participant
[1] 65 100 100
age_25 = ages[ages >= 25]
age_25
[1] 25 65 45 100 100 50
age_25_50 = age_25[age_25 <= 50]
age_25_50
[1] 25 45 50
ages[ages >= 25 & ages <= 50]
[1] 25 45 50
Sort
sort(ages)
[1] 10 18 20 20 25 45 50 65 100 100
sort(ages, TRUE)
[1] 100 100 65 50 45 25 20 20 18 10
Simple statistics
print(max(ages))
[1] 100
print(min(ages))
[1] 10
print(mean(ages))
[1] 45.3
print(median(ages))
[1] 35
print(sum(ages))
[1] 453
print(sd(ages))
[1] 33.46325
print(var(ages))
[1] 1119.789
print(quantile(ages))
0% 25% 50% 75% 100%
10.00 20.00 35.00 61.25 100.00
Correlation
length(ages)
[1] 10
scores = c(10, 30, 40, 50, 39, 10, 30, 40, 50, 39)
length(scores)
[1] 10
cor(ages, scores)
[1] -0.4010329
ages
[1] 20 25 65 18 45 100 100 50 20 10
max(ages)
[1] 100
max(ages[2:5])
[1] 65
Data Frame
exam_score = data.frame(
ID = c(1, 2, 3, 4, 5),
Name = c("Alice", "Bob", "David", "John", "Jenny"),
Age = c(20, 25, 30, 22, 18),
score = c(100, 78, 90, 55, 81)
)
print(exam_score)
Row select
#vector[location]
#data[row,col]
exam_score[2:4, ]
exam_score[2, ]
Colunm select
exam_score[ , 3:4 ]
exam_score[ , 4 ]
[1] 100 78 90 55 81
Row and column select
exam_score[2:4, 3:4 ]
Column select
exam_score$Name
[1] "Alice" "Bob" "David" "John" "Jenny"
exam_score$Age
[1] 20 25 30 22 18
Row and column select
exam_score$Name
[1] "Alice" "Bob" "David" "John" "Jenny"
exam_score$Age
[1] 20 25 30 22 18
selected_segment = data.frame(
Name = exam_score$Name,
Age = exam_score$Age
)
selected_segment
exam_score[c(2, 3, 4), c(1, 4) ]
Conditional selection
exam_score[ exam_score$score > 80 & exam_score$score <= 90, ]
selected_ID = c(1, 3, 4)
exam_score[ exam_score$ID %in% selected_ID, ]
Add a new row
exam_score
new_row = c(10, 'Sam', 55, 89)
exam_score2 = rbind(exam_score, new_row)
exam_score2
Add a new column
Loaction = c('Dhaka', "Raj", "Barishal", "Dhaka", "Barishal", "Dhaka" )
exam_score2 = cbind(exam_score2, Loaction)
exam_score2
Condition
x = 90
if(x < 80) {
print("Fail")
print("Please improve")
} else {
print("Pass")
print("you are good")
}
[1] "Pass"
[1] "you are good"
Loop
for(i in 1:10) {
print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
score = c(100, 78, 90, 55, 81)
for(s in score) {
print(s)
}
[1] 100
[1] 78
[1] 90
[1] 55
[1] 81
Loop and condition
score = c(100, 78, 90, 55, 81)
for(s in score) {
if(s >= 80) {
cat(s, "Pass \n")
}
else {
cat(s, "Fail \n")
}
}
100 Pass
78 Fail
90 Pass
55 Fail
81 Pass
Loop example
genes = c("AGTC", "AGTCGA", "AAGTACAT")
for(gene in genes) {
cat(gene, nchar(gene), '\n')
}
AGTC 4
AGTCGA 6
AAGTACAT 8
Add a serial number
genes = c("AGTC", "AGTCGA", "AAGTACAT")
i = 1
for(gene in genes) {
cat(i, gene, nchar(gene), '\n')
i = i+1
}
1 AGTC 4
2 AGTCGA 6
3 AAGTACAT 8
Add letters as serial
# A, B, C ...
for(i in LETTERS) {
print(i)
}
[1] "A"
[1] "B"
[1] "C"
[1] "D"
[1] "E"
[1] "F"
[1] "G"
[1] "H"
[1] "I"
[1] "J"
[1] "K"
[1] "L"
[1] "M"
[1] "N"
[1] "O"
[1] "P"
[1] "Q"
[1] "R"
[1] "S"
[1] "T"
[1] "U"
[1] "V"
[1] "W"
[1] "X"
[1] "Y"
[1] "Z"
LETTERS[2]
[1] "B"
genes = c("AGTC", "AGTCGA", "AAGTACAT")
i = 1
for(gene in genes) {
cat(i, LETTERS[i], gene, nchar(gene), '\n')
i = i+1
}
1 A AGTC 4
2 B AGTCGA 6
3 C AAGTACAT 8
LS0tCnRpdGxlOiAiQ2xhc3MgMiIKb3V0cHV0OgogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgojIFN0cmluZwoKIyMgUGF0dGVybiBNYXRjaGluZwpgYGB7cn0KCkROQV9zZXEgPSAnQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHJwpwYXR0ZXJuID0gJ0FUQycKCm1hdGNoZXMgPSBncmVwbChwYXR0ZXJuLCBETkFfc2VxKQpwcmludChtYXRjaGVzKQoKbWF0Y2hlcyA9IGdyZXBsKCdBQkNEJywgRE5BX3NlcSkKcHJpbnQobWF0Y2hlcykKYGBgCgojIyBTcGxpdApgYGB7cn0KZ2VuZXMgPSAnZ2VuZTEsZ2VuZTIsZ2VuZTMnCnNwbGl0X2dlbmVzID0gc3Ryc3BsaXQoZ2VuZXMsICcsJykKcHJpbnQoc3BsaXRfZ2VuZXMpCgoKZ2VuZXMgPSAnZ2VuZTEgZ2VuZTIgZ2VuZTMnCnNwbGl0X2dlbmVzID0gc3Ryc3BsaXQoZ2VuZXMsICcgJykKcHJpbnQoc3BsaXRfZ2VuZXMpCgpwYXJhZ3JhcGggPSAiVGhpcyBpcyBhbiBleGFtcGxlLiBXZSBhcmUgZ29pbmcgdG8gc3BsaXQuIEFuZCBzaG93IHRoZSBvdXRwdXQuICIKcF9zcGxpdCA9IHN0cnNwbGl0KHBhcmFncmFwaCwgIlxcLiAiKQpwcmludChwX3NwbGl0KQpgYGAKCiMjIFJldmVyc2UgCmBgYHtyfQpsaWJyYXJ5KHN0cmluZ2kpCnMxID0gJ0FCQ0RFRkcnCnJldl9zMSA9IHN0cmlfcmV2ZXJzZShzMSkKcHJpbnQocmV2X3MxKQpgYGAKCgojIyBOdW1iZXIgb2YgT2NjdXJhbmNlcwpgYGB7cn0KRE5BX3NlcSA9ICdBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0dBVENHQVRDR0FUQ0cnCnBhdHRlcm4gPSAnQVRDJwoKbG9jYXRpb25zID0gZ3JlZ2V4cHIocGF0dGVybiwgRE5BX3NlcSkKbG9jYXRpb25zID0gdW5saXN0KGxvY2F0aW9ucykKcHJpbnQobG9jYXRpb25zKQoKbnVtX29jYyA9IHN1bShsb2NhdGlvbnMgIT0gLTEpCnByaW50KG51bV9vY2MpCmBgYAoKCgojIFZlY3RvcgoKIyMgQ3JlYXRlCmBgYHtyfQphZ2VzID0gYygyMCwgMjUsIDY1LCAxOCwgNDUpCnByaW50KGFnZXMpCmBgYAoKCiMjIEluZGV4aW5nCmBgYHtyfQphZ2VzWy0xXQpgYGAKCgojIyBBcHBlbmQKYGBge3J9CmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXMsIDEwMCkKYWdlc19leHRlbmRlZAoKCmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXMsIGMoMTAwLCA1MCwgMjAsIDEwKSkKYWdlc19leHRlbmRlZApgYGAKCgojIyBBcHBlbmQKYGBge3J9CmFnZXMgPSBjKGFnZXMsIDEwMCkKYWdlcwoKCmFnZXMgPSBjKGFnZXMsIGMoMTAwLCA1MCwgMjAsIDEwKSkKYWdlcwpgYGAKCiMjIHNsaWNlIG9yIHN1YnNldApgYGB7cn0KcHJpbnQoYWdlcykKcHJpbnQoYWdlc1szOjZdKQpwcmludChhZ2VzWzI6N10pCgpwcmludChhZ2VzWzI6MjBdKQpgYGAKIyMgTGVuZ3RoCmBgYHtyfQpwcmludChsZW5ndGgoYWdlcykpCmBgYAoKCiMjIEluc2VydCBpbiBhIHNwZWNpZmljIHBvc2l0aW9uCmBgYHtyfQpwcmludChhZ2VzKQoKcHJpbnQoYWdlc1sxOjJdKQpwcmludChhZ2VzWzM6MTBdKQoKYWdlc19leHRlbmRlZCA9IGMoYWdlc1sxOjJdLCA5OSwgYWdlc1szOjEwXSkKYWdlc19leHRlbmRlZApgYGAKCgojIyBWZWN0b3IgY29uZGl0aW9uYWwKYGBge3J9CnByaW50KGFnZXMpCgplbGlnaWJsZV9wYXJ0aWNpcGFudCA9IGFnZXNbYWdlcyA8IDUwXQplbGlnaWJsZV9wYXJ0aWNpcGFudAoKZWxpZ2libGVfcGFydGljaXBhbnQgPSBhZ2VzW2FnZXMgPD0gNTBdCmVsaWdpYmxlX3BhcnRpY2lwYW50CgplbGlnaWJsZV9wYXJ0aWNpcGFudCA9IGFnZXNbYWdlcyA+IDUwXQplbGlnaWJsZV9wYXJ0aWNpcGFudAoKCmFnZV8yNSA9IGFnZXNbYWdlcyA+PSAyNV0KYWdlXzI1CgphZ2VfMjVfNTAgPSBhZ2VfMjVbYWdlXzI1IDw9IDUwXQphZ2VfMjVfNTAKYGBgCgpgYGB7cn0KYWdlc1thZ2VzID49IDI1ICYgYWdlcyA8PSA1MF0KYGBgCgoKCiMjIFNvcnQKYGBge3J9CnNvcnQoYWdlcykKc29ydChhZ2VzLCBUUlVFKQpgYGAKCiMgU2ltcGxlIHN0YXRpc3RpY3MKYGBge3J9CnByaW50KG1heChhZ2VzKSkKcHJpbnQobWluKGFnZXMpKQpwcmludChtZWFuKGFnZXMpKQpwcmludChtZWRpYW4oYWdlcykpCnByaW50KHN1bShhZ2VzKSkKcHJpbnQoc2QoYWdlcykpCnByaW50KHZhcihhZ2VzKSkKcHJpbnQocXVhbnRpbGUoYWdlcykpCmBgYAoKIyMgQ29ycmVsYXRpb24KYGBge3J9Cmxlbmd0aChhZ2VzKQoKc2NvcmVzID0gYygxMCwgMzAsIDQwLCA1MCwgMzksIDEwLCAzMCwgNDAsIDUwLCAzOSkKbGVuZ3RoKHNjb3JlcykKCmNvcihhZ2VzLCBzY29yZXMpCmBgYAoKCmBgYHtyfQphZ2VzCm1heChhZ2VzKQptYXgoYWdlc1syOjVdKQpgYGAKCgojIERhdGEgRnJhbWUKCmBgYHtyfQpleGFtX3Njb3JlID0gZGF0YS5mcmFtZSgKICBJRCA9IGMoMSwgMiwgMywgNCwgNSksCiAgTmFtZSA9IGMoIkFsaWNlIiwgIkJvYiIsICJEYXZpZCIsICJKb2huIiwgIkplbm55IiksCiAgQWdlID0gYygyMCwgMjUsIDMwLCAyMiwgMTgpLAogIHNjb3JlID0gYygxMDAsIDc4LCA5MCwgNTUsIDgxKQopCgoKcHJpbnQoZXhhbV9zY29yZSkKYGBgCgoKCiMjIFJvdyBzZWxlY3QKYGBge3J9CiN2ZWN0b3JbbG9jYXRpb25dCiNkYXRhW3Jvdyxjb2xdCgpleGFtX3Njb3JlWzI6NCwgXQoKZXhhbV9zY29yZVsyLCBdCmBgYAoKIyMgQ29sdW5tIHNlbGVjdApgYGB7cn0KZXhhbV9zY29yZVsgLCAzOjQgXQpgYGAKCgpgYGB7cn0KZXhhbV9zY29yZVsgLCA0IF0KYGBgCgoKIyMgUm93IGFuZCBjb2x1bW4gc2VsZWN0CmBgYHtyfQpleGFtX3Njb3JlWzI6NCwgMzo0IF0KYGBgCgojIyBDb2x1bW4gc2VsZWN0CmBgYHtyfQpleGFtX3Njb3JlJE5hbWUKZXhhbV9zY29yZSRBZ2UKYGBgCgojIyBSb3cgYW5kIGNvbHVtbiBzZWxlY3QKYGBge3J9CgpzZWxlY3RlZF9zZWdtZW50ID0gZGF0YS5mcmFtZSgKICBOYW1lID0gZXhhbV9zY29yZSROYW1lLAogIEFnZSA9IGV4YW1fc2NvcmUkQWdlCikKc2VsZWN0ZWRfc2VnbWVudAoKYGBgCgoKYGBge3J9CmV4YW1fc2NvcmVbYygyLCAzLCA0KSwgYygxLCA0KV0KYGBgCgoKIyMgQ29uZGl0aW9uYWwgc2VsZWN0aW9uCgpgYGB7cn0KZXhhbV9zY29yZVsgZXhhbV9zY29yZSRzY29yZSA+IDgwICYgZXhhbV9zY29yZSRzY29yZSA8PSA5MCwgXQpgYGAKCmBgYHtyfQpzZWxlY3RlZF9JRCA9IGMoMSwgMywgNCkKZXhhbV9zY29yZVsgZXhhbV9zY29yZSRJRCAlaW4lIHNlbGVjdGVkX0lELCBdCmBgYAoKCiMjIEFkZCBhIG5ldyByb3cKYGBge3J9CmV4YW1fc2NvcmUKbmV3X3JvdyA9IGMoMTAsICdTYW0nLCA1NSwgODkpCgpleGFtX3Njb3JlMiA9IHJiaW5kKGV4YW1fc2NvcmUsIG5ld19yb3cpCmV4YW1fc2NvcmUyCmBgYAoKIyMgQWRkIGEgbmV3IGNvbHVtbgpgYGB7cn0KTG9hY3Rpb24gPSBjKCdEaGFrYScsICJSYWoiLCAiQmFyaXNoYWwiLCAiRGhha2EiLCAiQmFyaXNoYWwiLCAiRGhha2EiICkKZXhhbV9zY29yZTIgPSBjYmluZChleGFtX3Njb3JlMiwgTG9hY3Rpb24pCmV4YW1fc2NvcmUyCmBgYAoKIyBDb25kaXRpb24KYGBge3J9CnggPSA5MAoKaWYoeCA8IDgwKSB7CiAgcHJpbnQoIkZhaWwiKQogIHByaW50KCJQbGVhc2UgaW1wcm92ZSIpCiAgCiAgCn0gZWxzZSB7CiAgcHJpbnQoIlBhc3MiKQogIHByaW50KCJ5b3UgYXJlIGdvb2QiKQp9CgoKYGBgCgoKCiMgTG9vcApgYGB7cn0KZm9yKGkgaW4gMToxMCkgewogIHByaW50KGkpCn0KCmBgYAoKCmBgYHtyfQpzY29yZSA9IGMoMTAwLCA3OCwgOTAsIDU1LCA4MSkKCmZvcihzIGluIHNjb3JlKSB7CiAgcHJpbnQocykKfQpgYGAKCiMjIExvb3AgYW5kIGNvbmRpdGlvbgpgYGB7cn0Kc2NvcmUgPSBjKDEwMCwgNzgsIDkwLCA1NSwgODEpCgpmb3IocyBpbiBzY29yZSkgewogIAogIGlmKHMgPj0gODApIHsKICAgIGNhdChzLCAiUGFzcyBcbiIpCiAgfSAKICBlbHNlIHsKICAgIGNhdChzLCAiRmFpbCBcbiIpCiAgfQogIAogIAp9CmBgYAoKIyMgTG9vcCBleGFtcGxlCmBgYHtyfQpnZW5lcyA9IGMoIkFHVEMiLCAiQUdUQ0dBIiwgIkFBR1RBQ0FUIikKCmZvcihnZW5lIGluIGdlbmVzKSB7CiAgY2F0KGdlbmUsIG5jaGFyKGdlbmUpLCAnXG4nKQp9CmBgYAoKCiMjIEFkZCBhIHNlcmlhbCBudW1iZXIKYGBge3J9CmdlbmVzID0gYygiQUdUQyIsICJBR1RDR0EiLCAiQUFHVEFDQVQiKQoKaSA9IDEKZm9yKGdlbmUgaW4gZ2VuZXMpIHsKICBjYXQoaSwgZ2VuZSwgbmNoYXIoZ2VuZSksICdcbicpCiAgaSA9IGkrMQp9CmBgYAoKCiMjIEFkZCBsZXR0ZXJzIGFzIHNlcmlhbApgYGB7cn0KCiMgQSwgQiwgQyAuLi4gCgpmb3IoaSBpbiBMRVRURVJTKSB7CiAgcHJpbnQoaSkKfQoKYGBgCgpgYGB7cn0KTEVUVEVSU1syXQpgYGAKCgpgYGB7cn0KZ2VuZXMgPSBjKCJBR1RDIiwgIkFHVENHQSIsICJBQUdUQUNBVCIpCgppID0gMQpmb3IoZ2VuZSBpbiBnZW5lcykgewogIGNhdChpLCBMRVRURVJTW2ldLCBnZW5lLCBuY2hhcihnZW5lKSwgJ1xuJykKICBpID0gaSsxCn0KYGBgCgoKCgoKCgoKCg==