string

pattern matching

print(matches)
[1] FALSE

split

genes = 'gene1,gene2,gene3'

gene_split = strsplit(genes, ',')

print (gene_split)
[[1]]
[1] "gene1" "gene2" "gene3"

reverse string

library(stringi)
s1 = 'abvhfsnjg'
rev_string = stri_reverse(s1)
print (rev_string)
[1] "gjnsfhvba"

number of occurances

DNA = 'ATGCATGCGCATGTTCG'
pattern = 'ATG'

locations = gregexpr(pattern, DNA)
locations = unlist(locations)
print (locations)
[1]  1  5 11
num_OF_occ = sum (locations != -1)
print (num_OF_occ)
[1] 3

Vector (list of information)

Create

ages = c(10, 25, 30, 40, 45)
print(ages)
[1] 10 25 30 40 45

Indexing

Third bracket means location + value means oi location

ages[3]
[1] 30
  • value means oita bad
ages[-3]
[1] 10 25 40 45

Append

new info ager tay add korte chaile

ages_extended = c(ages, 100, 200)
ages_extended
[1]  10  25  30  40  45 100 200
ages_extended = c(ages, c(15, 25, 20, 30))
ages_extended
[1] 10 25 30 40 45 15 25 20 30

Slice or Subset

(:) this means location from

print(ages)
[1] 10 25 30 40 45
print(ages[2:3])
[1] 25 30

Length

print(length(ages))
[1] 5

Insert in a specific position

print(ages)
[1] 10 25 30 40 45
print(ages[1:2])
[1] 10 25
print(ages[3:5])
[1] 30 40 45
ages_extended = c(ages[1:2], 99, ages[3:5])
ages_extended
[1] 10 25 99 30 40 45

Vector conditional

ages
[1] 10 25 30 40 45
eligible_participant = ages[ages < 40]
eligible_participant
[1] 10 25 30
eligible_participant = ages[ages <= 40]
eligible_participant
[1] 10 25 30 40
#If we want to take 20 to 40
ages_20 = ages[ages >= 20]
ages_20
[1] 25 30 40 45
ages20_40 = ages_20[ages_20 <= 40]
ages20_40
[1] 25 30 40
# same like before
## or conditional

ages[ages >= 20 & ages <= 40]
[1] 25 30 40

Sorting

# bu default increasing order
sort(ages)
[1] 10 25 30 40 45
# for decrasing order
sort(ages, TRUE)
[1] 45 40 30 25 10

Simple Statistics

print(ages)
[1] 10 25 30 40 45
print(max(ages))
[1] 45
print(min(ages))
[1] 10
print(mean(ages))
[1] 30
# median
print(median(ages))
[1] 30
#sum
print(sum(ages))
[1] 150
# standard deviation
print(sd(ages))
[1] 13.69306
# variance
print(var(ages))
[1] 187.5
# percentile
print(quantile(ages))
  0%  25%  50%  75% 100% 
  10   25   30   40   45 

Corelation

ages
[1] 10 25 30 40 45
score = c(10, 20, 30, 40, 50)
length(score)
[1] 5
cor(ages, score) 
[1] 0.9814955

portion

ages
[1] 10 25 30 40 45
max(ages[2:5])
[1] 45

Data Frame

Exam_score = data.frame(
  ID = c(1, 2, 3, 4, 5),
  Name = c("Era", "Ema", "Ali", "Jon", "Jia"),
  Age = c(22, 23, 24, 25, 26),
  Score = c(15, 20, 25, 35, 40)
)
print(Exam_score)
#comma need after every column
#c er under er gulo row

Split from data set

# vector [location]
# data set [row,colm]
Exam_score[2:4, ]
Exam_score[2, ]
Exam_score[ ,3:4]

Split from data set another way

using vector

Exam_score[c(2,3,4), c(2,4)]

For selecting two distant column

#HERE $ means colum selector
Exam_score$Name
[1] "Era" "Ema" "Ali" "Jon" "Jia"
Exam_score$Score
[1] 15 20 25 35 40
Selected_column = data.frame(
  Name = Exam_score$Name,
  Score = Exam_score$Score
)
Selected_column

Row selection using range

Exam_score[Exam_score$Score > 20, ]
Exam_score[Exam_score$Score > 20 & Exam_score$Score < 40, ]

Sublist

selected_ID = c(1, 3)
Exam_score[Exam_score$ID %in% selected_ID, ]

New Row add

Exam_score
New_row = c(5, "Zen", 55, 89)
Exam_score2 = rbind(Exam_score, New_row)
Exam_score2

New Column

New_col = c("dhk", "Ctg","dhk", "Ctg","dhk")
cbind(Exam_score,New_col)
LS0tDQp0aXRsZTogIkNsYXNzIDIiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KIyBzdHJpbmcNCiMjIHBhdHRlcm4gbWF0Y2hpbmcNCg0KDQpgYGB7cn0NCkROQSA9ICdBVEdDQVRHQ0dDQVRUVENHJw0KcGF0dGVybiA9ICdBVEcnDQoNCm1hdGNoZXMgPSBncmVwbCgnQWNjZicsRE5BKQ0KcHJpbnQobWF0Y2hlcykNCmBgYA0KDQojIyBzcGxpdA0KDQpgYGB7cn0NCmdlbmVzID0gJ2dlbmUxLGdlbmUyLGdlbmUzJw0KDQpnZW5lX3NwbGl0ID0gc3Ryc3BsaXQoZ2VuZXMsICcsJykNCg0KcHJpbnQgKGdlbmVfc3BsaXQpDQpgYGANCiMjIHJldmVyc2Ugc3RyaW5nDQoNCmBgYHtyfQ0KbGlicmFyeShzdHJpbmdpKQ0KczEgPSAnYWJ2aGZzbmpnJw0KcmV2X3N0cmluZyA9IHN0cmlfcmV2ZXJzZShzMSkNCnByaW50IChyZXZfc3RyaW5nKQ0KYGBgDQoNCiMjIG51bWJlciBvZiBvY2N1cmFuY2VzDQpgYGB7cn0NCkROQSA9ICdBVEdDQVRHQ0dDQVRHVFRDRycNCnBhdHRlcm4gPSAnQVRHJw0KDQpsb2NhdGlvbnMgPSBncmVnZXhwcihwYXR0ZXJuLCBETkEpDQpsb2NhdGlvbnMgPSB1bmxpc3QobG9jYXRpb25zKQ0KcHJpbnQgKGxvY2F0aW9ucykNCm51bV9PRl9vY2MgPSBzdW0gKGxvY2F0aW9ucyAhPSAtMSkNCnByaW50IChudW1fT0Zfb2NjKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg0KDQojIFZlY3RvciAobGlzdCBvZiBpbmZvcm1hdGlvbikNCiMjIENyZWF0ZQ0KYGBge3J9DQphZ2VzID0gYygxMCwgMjUsIDMwLCA0MCwgNDUpDQpwcmludChhZ2VzKQ0KYGBgDQojIyBJbmRleGluZw0KVGhpcmQgYnJhY2tldCBtZWFucyBsb2NhdGlvbg0KKyB2YWx1ZSBtZWFucyBvaSBsb2NhdGlvbg0KYGBge3J9DQphZ2VzWzNdDQpgYGANCi0gdmFsdWUgbWVhbnMgb2l0YSBiYWQNCmBgYHtyfQ0KYWdlc1stM10NCmBgYA0KIyMgQXBwZW5kDQpuZXcgaW5mbyBhZ2VyIHRheSBhZGQga29ydGUgY2hhaWxlDQpgYGB7cn0NCmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXMsIDEwMCwgMjAwKQ0KYWdlc19leHRlbmRlZA0KDQphZ2VzX2V4dGVuZGVkID0gYyhhZ2VzLCBjKDE1LCAyNSwgMjAsIDMwKSkNCmFnZXNfZXh0ZW5kZWQNCk5ldyBhcmVrdGF5IGNoYW5nZWQgdXNlIGtvcnRlIGNoYWlsZQ0KYGBgDQojIyBTbGljZSBvciBTdWJzZXQNCig6KSB0aGlzIG1lYW5zIGxvY2F0aW9uIGZyb20NCmBgYHtyfQ0KcHJpbnQoYWdlcykNCnByaW50KGFnZXNbMjozXSkNCmBgYA0KIyMgTGVuZ3RoDQpgYGB7cn0NCnByaW50KGxlbmd0aChhZ2VzKSkNCmBgYA0KIyMgSW5zZXJ0IGluIGEgc3BlY2lmaWMgcG9zaXRpb24NCmBgYHtyfQ0KcHJpbnQoYWdlcykNCnByaW50KGFnZXNbMToyXSkNCnByaW50KGFnZXNbMzo1XSkNCmFnZXNfZXh0ZW5kZWQgPSBjKGFnZXNbMToyXSwgOTksIGFnZXNbMzo1XSkNCmFnZXNfZXh0ZW5kZWQNCmBgYA0KIyMgVmVjdG9yIGNvbmRpdGlvbmFsDQpgYGB7cn0NCmFnZXMNCmVsaWdpYmxlX3BhcnRpY2lwYW50ID0gYWdlc1thZ2VzIDwgNDBdDQplbGlnaWJsZV9wYXJ0aWNpcGFudA0KDQplbGlnaWJsZV9wYXJ0aWNpcGFudCA9IGFnZXNbYWdlcyA8PSA0MF0NCmVsaWdpYmxlX3BhcnRpY2lwYW50DQoNCiNJZiB3ZSB3YW50IHRvIHRha2UgMjAgdG8gNDANCmFnZXNfMjAgPSBhZ2VzW2FnZXMgPj0gMjBdDQphZ2VzXzIwDQphZ2VzMjBfNDAgPSBhZ2VzXzIwW2FnZXNfMjAgPD0gNDBdDQphZ2VzMjBfNDANCg0KIyBzYW1lIGxpa2UgYmVmb3JlDQojIyBvciBjb25kaXRpb25hbA0KDQphZ2VzW2FnZXMgPj0gMjAgJiBhZ2VzIDw9IDQwXQ0KYGBgDQojIyBTb3J0aW5nDQpgYGB7cn0NCiMgYnUgZGVmYXVsdCBpbmNyZWFzaW5nIG9yZGVyDQpzb3J0KGFnZXMpDQojIGZvciBkZWNyYXNpbmcgb3JkZXINCnNvcnQoYWdlcywgVFJVRSkNCg0KYGBgDQoNCiMjIFNpbXBsZSBTdGF0aXN0aWNzDQpgYGB7cn0NCnByaW50KGFnZXMpDQpwcmludChtYXgoYWdlcykpDQpwcmludChtaW4oYWdlcykpDQpwcmludChtZWFuKGFnZXMpKQ0KIyBtZWRpYW4NCnByaW50KG1lZGlhbihhZ2VzKSkNCiNzdW0NCnByaW50KHN1bShhZ2VzKSkNCiMgc3RhbmRhcmQgZGV2aWF0aW9uDQpwcmludChzZChhZ2VzKSkNCiMgdmFyaWFuY2UNCnByaW50KHZhcihhZ2VzKSkNCiMgcGVyY2VudGlsZQ0KcHJpbnQocXVhbnRpbGUoYWdlcykpDQpgYGANCg0KIyMgQ29yZWxhdGlvbg0KYGBge3J9DQphZ2VzDQoNCnNjb3JlID0gYygxMCwgMjAsIDMwLCA0MCwgNTApDQpsZW5ndGgoc2NvcmUpDQoNCmNvcihhZ2VzLCBzY29yZSkgDQpgYGANCiMjIHBvcnRpb24NCmBgYHtyfQ0KYWdlcw0KbWF4KGFnZXNbMjo1XSkNCmBgYA0KDQojIERhdGEgRnJhbWUNCg0KYGBge3J9DQpFeGFtX3Njb3JlID0gZGF0YS5mcmFtZSgNCiAgSUQgPSBjKDEsIDIsIDMsIDQsIDUpLA0KICBOYW1lID0gYygiRXJhIiwgIkVtYSIsICJBbGkiLCAiSm9uIiwgIkppYSIpLA0KICBBZ2UgPSBjKDIyLCAyMywgMjQsIDI1LCAyNiksDQogIFNjb3JlID0gYygxNSwgMjAsIDI1LCAzNSwgNDApDQopDQpwcmludChFeGFtX3Njb3JlKQ0KI2NvbW1hIG5lZWQgYWZ0ZXIgZXZlcnkgY29sdW1uDQojYyBlciB1bmRlciBlciBndWxvIHJvdw0KYGBgDQojIyBTcGxpdCBmcm9tIGRhdGEgc2V0DQpgYGB7cn0NCiMgdmVjdG9yIFtsb2NhdGlvbl0NCiMgZGF0YSBzZXQgW3Jvdyxjb2xtXQ0KRXhhbV9zY29yZVsyOjQsIF0NCkV4YW1fc2NvcmVbMiwgXQ0KRXhhbV9zY29yZVsgLDM6NF0NCmBgYA0KIyMgU3BsaXQgZnJvbSBkYXRhIHNldCBhbm90aGVyIHdheQ0KIyB1c2luZyB2ZWN0b3INCmBgYHtyfQ0KRXhhbV9zY29yZVtjKDIsMyw0KSwgYygyLDQpXQ0KYGBgDQoNCiMjIEZvciBzZWxlY3RpbmcgdHdvIGRpc3RhbnQgY29sdW1uDQpgYGB7cn0NCiNIRVJFICQgbWVhbnMgY29sdW0gc2VsZWN0b3INCkV4YW1fc2NvcmUkTmFtZQ0KRXhhbV9zY29yZSRTY29yZQ0KDQpTZWxlY3RlZF9jb2x1bW4gPSBkYXRhLmZyYW1lKA0KICBOYW1lID0gRXhhbV9zY29yZSROYW1lLA0KICBTY29yZSA9IEV4YW1fc2NvcmUkU2NvcmUNCikNClNlbGVjdGVkX2NvbHVtbg0KYGBgDQojIyBSb3cgc2VsZWN0aW9uIHVzaW5nIHJhbmdlDQpgYGB7cn0NCkV4YW1fc2NvcmVbRXhhbV9zY29yZSRTY29yZSA+IDIwLCBdDQpFeGFtX3Njb3JlW0V4YW1fc2NvcmUkU2NvcmUgPiAyMCAmIEV4YW1fc2NvcmUkU2NvcmUgPCA0MCwgXQ0KYGBgDQojIyBTdWJsaXN0DQoNCmBgYHtyfQ0KIyB3aGVuIGkgbmVlZCB0byBzZWxlY3QgcmFuZG9tbHkNCnNlbGVjdGVkX0lEID0gYygxLCAzKQ0KRXhhbV9zY29yZVtFeGFtX3Njb3JlJElEICVpbiUgc2VsZWN0ZWRfSUQsIF0NCmBgYA0KIyBOZXcgUm93IGFkZA0KYGBge3J9DQpFeGFtX3Njb3JlDQpOZXdfcm93ID0gYyg1LCAiWmVuIiwgNTUsIDg5KQ0KRXhhbV9zY29yZTIgPSByYmluZChFeGFtX3Njb3JlLCBOZXdfcm93KQ0KRXhhbV9zY29yZTINCmBgYA0KIyBOZXcgQ29sdW1uDQpgYGB7cn0NCk5ld19jb2wgPSBjKCJkaGsiLCAiQ3RnIiwiZGhrIiwgIkN0ZyIsImRoayIpDQpjYmluZChFeGFtX3Njb3JlLE5ld19jb2wpDQpgYGANCg0K